i386.c revision 132743
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 132743 2004-07-28 04:47:35Z kan $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "coretypes.h"
29#include "tm.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "regs.h"
34#include "hard-reg-set.h"
35#include "real.h"
36#include "insn-config.h"
37#include "conditions.h"
38#include "output.h"
39#include "insn-attr.h"
40#include "flags.h"
41#include "except.h"
42#include "function.h"
43#include "recog.h"
44#include "expr.h"
45#include "optabs.h"
46#include "toplev.h"
47#include "basic-block.h"
48#include "ggc.h"
49#include "target.h"
50#include "target-def.h"
51#include "langhooks.h"
52#include "cgraph.h"
53
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT (-1)
56#endif
57
58/* Return index of given mode in mult and division cost tables.  */
59#define MODE_INDEX(mode)					\
60  ((mode) == QImode ? 0						\
61   : (mode) == HImode ? 1					\
62   : (mode) == SImode ? 2					\
63   : (mode) == DImode ? 3					\
64   : 4)
65
66/* Processor costs (relative to an add) */
67static const
68struct processor_costs size_cost = {	/* costs for tunning for size */
69  2,					/* cost of an add instruction */
70  3,					/* cost of a lea instruction */
71  2,					/* variable shift costs */
72  3,					/* constant shift costs */
73  {3, 3, 3, 3, 5},			/* cost of starting a multiply */
74  0,					/* cost of multiply per each bit set */
75  {3, 3, 3, 3, 5},			/* cost of a divide/mod */
76  3,					/* cost of movsx */
77  3,					/* cost of movzx */
78  0,					/* "large" insn */
79  2,					/* MOVE_RATIO */
80  2,					/* cost for loading QImode using movzbl */
81  {2, 2, 2},				/* cost of loading integer registers
82					   in QImode, HImode and SImode.
83					   Relative to reg-reg move (2).  */
84  {2, 2, 2},				/* cost of storing integer registers */
85  2,					/* cost of reg,reg fld/fst */
86  {2, 2, 2},				/* cost of loading fp registers
87					   in SFmode, DFmode and XFmode */
88  {2, 2, 2},				/* cost of loading integer registers */
89  3,					/* cost of moving MMX register */
90  {3, 3},				/* cost of loading MMX registers
91					   in SImode and DImode */
92  {3, 3},				/* cost of storing MMX registers
93					   in SImode and DImode */
94  3,					/* cost of moving SSE register */
95  {3, 3, 3},				/* cost of loading SSE registers
96					   in SImode, DImode and TImode */
97  {3, 3, 3},				/* cost of storing SSE registers
98					   in SImode, DImode and TImode */
99  3,					/* MMX or SSE register to integer */
100  0,					/* size of prefetch block */
101  0,					/* number of parallel prefetches */
102  1,					/* Branch cost */
103  2,					/* cost of FADD and FSUB insns.  */
104  2,					/* cost of FMUL instruction.  */
105  2,					/* cost of FDIV instruction.  */
106  2,					/* cost of FABS instruction.  */
107  2,					/* cost of FCHS instruction.  */
108  2,					/* cost of FSQRT instruction.  */
109};
110
111/* Processor costs (relative to an add) */
112static const
113struct processor_costs i386_cost = {	/* 386 specific costs */
114  1,					/* cost of an add instruction */
115  1,					/* cost of a lea instruction */
116  3,					/* variable shift costs */
117  2,					/* constant shift costs */
118  {6, 6, 6, 6, 6},			/* cost of starting a multiply */
119  1,					/* cost of multiply per each bit set */
120  {23, 23, 23, 23, 23},			/* cost of a divide/mod */
121  3,					/* cost of movsx */
122  2,					/* cost of movzx */
123  15,					/* "large" insn */
124  3,					/* MOVE_RATIO */
125  4,					/* cost for loading QImode using movzbl */
126  {2, 4, 2},				/* cost of loading integer registers
127					   in QImode, HImode and SImode.
128					   Relative to reg-reg move (2).  */
129  {2, 4, 2},				/* cost of storing integer registers */
130  2,					/* cost of reg,reg fld/fst */
131  {8, 8, 8},				/* cost of loading fp registers
132					   in SFmode, DFmode and XFmode */
133  {8, 8, 8},				/* cost of loading integer registers */
134  2,					/* cost of moving MMX register */
135  {4, 8},				/* cost of loading MMX registers
136					   in SImode and DImode */
137  {4, 8},				/* cost of storing MMX registers
138					   in SImode and DImode */
139  2,					/* cost of moving SSE register */
140  {4, 8, 16},				/* cost of loading SSE registers
141					   in SImode, DImode and TImode */
142  {4, 8, 16},				/* cost of storing SSE registers
143					   in SImode, DImode and TImode */
144  3,					/* MMX or SSE register to integer */
145  0,					/* size of prefetch block */
146  0,					/* number of parallel prefetches */
147  1,					/* Branch cost */
148  23,					/* cost of FADD and FSUB insns.  */
149  27,					/* cost of FMUL instruction.  */
150  88,					/* cost of FDIV instruction.  */
151  22,					/* cost of FABS instruction.  */
152  24,					/* cost of FCHS instruction.  */
153  122,					/* cost of FSQRT instruction.  */
154};
155
156static const
157struct processor_costs i486_cost = {	/* 486 specific costs */
158  1,					/* cost of an add instruction */
159  1,					/* cost of a lea instruction */
160  3,					/* variable shift costs */
161  2,					/* constant shift costs */
162  {12, 12, 12, 12, 12},			/* cost of starting a multiply */
163  1,					/* cost of multiply per each bit set */
164  {40, 40, 40, 40, 40},			/* cost of a divide/mod */
165  3,					/* cost of movsx */
166  2,					/* cost of movzx */
167  15,					/* "large" insn */
168  3,					/* MOVE_RATIO */
169  4,					/* cost for loading QImode using movzbl */
170  {2, 4, 2},				/* cost of loading integer registers
171					   in QImode, HImode and SImode.
172					   Relative to reg-reg move (2).  */
173  {2, 4, 2},				/* cost of storing integer registers */
174  2,					/* cost of reg,reg fld/fst */
175  {8, 8, 8},				/* cost of loading fp registers
176					   in SFmode, DFmode and XFmode */
177  {8, 8, 8},				/* cost of loading integer registers */
178  2,					/* cost of moving MMX register */
179  {4, 8},				/* cost of loading MMX registers
180					   in SImode and DImode */
181  {4, 8},				/* cost of storing MMX registers
182					   in SImode and DImode */
183  2,					/* cost of moving SSE register */
184  {4, 8, 16},				/* cost of loading SSE registers
185					   in SImode, DImode and TImode */
186  {4, 8, 16},				/* cost of storing SSE registers
187					   in SImode, DImode and TImode */
188  3,					/* MMX or SSE register to integer */
189  0,					/* size of prefetch block */
190  0,					/* number of parallel prefetches */
191  1,					/* Branch cost */
192  8,					/* cost of FADD and FSUB insns.  */
193  16,					/* cost of FMUL instruction.  */
194  73,					/* cost of FDIV instruction.  */
195  3,					/* cost of FABS instruction.  */
196  3,					/* cost of FCHS instruction.  */
197  83,					/* cost of FSQRT instruction.  */
198};
199
200static const
201struct processor_costs pentium_cost = {
202  1,					/* cost of an add instruction */
203  1,					/* cost of a lea instruction */
204  4,					/* variable shift costs */
205  1,					/* constant shift costs */
206  {11, 11, 11, 11, 11},			/* cost of starting a multiply */
207  0,					/* cost of multiply per each bit set */
208  {25, 25, 25, 25, 25},			/* cost of a divide/mod */
209  3,					/* cost of movsx */
210  2,					/* cost of movzx */
211  8,					/* "large" insn */
212  6,					/* MOVE_RATIO */
213  6,					/* cost for loading QImode using movzbl */
214  {2, 4, 2},				/* cost of loading integer registers
215					   in QImode, HImode and SImode.
216					   Relative to reg-reg move (2).  */
217  {2, 4, 2},				/* cost of storing integer registers */
218  2,					/* cost of reg,reg fld/fst */
219  {2, 2, 6},				/* cost of loading fp registers
220					   in SFmode, DFmode and XFmode */
221  {4, 4, 6},				/* cost of loading integer registers */
222  8,					/* cost of moving MMX register */
223  {8, 8},				/* cost of loading MMX registers
224					   in SImode and DImode */
225  {8, 8},				/* cost of storing MMX registers
226					   in SImode and DImode */
227  2,					/* cost of moving SSE register */
228  {4, 8, 16},				/* cost of loading SSE registers
229					   in SImode, DImode and TImode */
230  {4, 8, 16},				/* cost of storing SSE registers
231					   in SImode, DImode and TImode */
232  3,					/* MMX or SSE register to integer */
233  0,					/* size of prefetch block */
234  0,					/* number of parallel prefetches */
235  2,					/* Branch cost */
236  3,					/* cost of FADD and FSUB insns.  */
237  3,					/* cost of FMUL instruction.  */
238  39,					/* cost of FDIV instruction.  */
239  1,					/* cost of FABS instruction.  */
240  1,					/* cost of FCHS instruction.  */
241  70,					/* cost of FSQRT instruction.  */
242};
243
244static const
245struct processor_costs pentiumpro_cost = {
246  1,					/* cost of an add instruction */
247  1,					/* cost of a lea instruction */
248  1,					/* variable shift costs */
249  1,					/* constant shift costs */
250  {4, 4, 4, 4, 4},			/* cost of starting a multiply */
251  0,					/* cost of multiply per each bit set */
252  {17, 17, 17, 17, 17},			/* cost of a divide/mod */
253  1,					/* cost of movsx */
254  1,					/* cost of movzx */
255  8,					/* "large" insn */
256  6,					/* MOVE_RATIO */
257  2,					/* cost for loading QImode using movzbl */
258  {4, 4, 4},				/* cost of loading integer registers
259					   in QImode, HImode and SImode.
260					   Relative to reg-reg move (2).  */
261  {2, 2, 2},				/* cost of storing integer registers */
262  2,					/* cost of reg,reg fld/fst */
263  {2, 2, 6},				/* cost of loading fp registers
264					   in SFmode, DFmode and XFmode */
265  {4, 4, 6},				/* cost of loading integer registers */
266  2,					/* cost of moving MMX register */
267  {2, 2},				/* cost of loading MMX registers
268					   in SImode and DImode */
269  {2, 2},				/* cost of storing MMX registers
270					   in SImode and DImode */
271  2,					/* cost of moving SSE register */
272  {2, 2, 8},				/* cost of loading SSE registers
273					   in SImode, DImode and TImode */
274  {2, 2, 8},				/* cost of storing SSE registers
275					   in SImode, DImode and TImode */
276  3,					/* MMX or SSE register to integer */
277  32,					/* size of prefetch block */
278  6,					/* number of parallel prefetches */
279  2,					/* Branch cost */
280  3,					/* cost of FADD and FSUB insns.  */
281  5,					/* cost of FMUL instruction.  */
282  56,					/* cost of FDIV instruction.  */
283  2,					/* cost of FABS instruction.  */
284  2,					/* cost of FCHS instruction.  */
285  56,					/* cost of FSQRT instruction.  */
286};
287
288static const
289struct processor_costs k6_cost = {
290  1,					/* cost of an add instruction */
291  2,					/* cost of a lea instruction */
292  1,					/* variable shift costs */
293  1,					/* constant shift costs */
294  {3, 3, 3, 3, 3},			/* cost of starting a multiply */
295  0,					/* cost of multiply per each bit set */
296  {18, 18, 18, 18, 18},			/* cost of a divide/mod */
297  2,					/* cost of movsx */
298  2,					/* cost of movzx */
299  8,					/* "large" insn */
300  4,					/* MOVE_RATIO */
301  3,					/* cost for loading QImode using movzbl */
302  {4, 5, 4},				/* cost of loading integer registers
303					   in QImode, HImode and SImode.
304					   Relative to reg-reg move (2).  */
305  {2, 3, 2},				/* cost of storing integer registers */
306  4,					/* cost of reg,reg fld/fst */
307  {6, 6, 6},				/* cost of loading fp registers
308					   in SFmode, DFmode and XFmode */
309  {4, 4, 4},				/* cost of loading integer registers */
310  2,					/* cost of moving MMX register */
311  {2, 2},				/* cost of loading MMX registers
312					   in SImode and DImode */
313  {2, 2},				/* cost of storing MMX registers
314					   in SImode and DImode */
315  2,					/* cost of moving SSE register */
316  {2, 2, 8},				/* cost of loading SSE registers
317					   in SImode, DImode and TImode */
318  {2, 2, 8},				/* cost of storing SSE registers
319					   in SImode, DImode and TImode */
320  6,					/* MMX or SSE register to integer */
321  32,					/* size of prefetch block */
322  1,					/* number of parallel prefetches */
323  1,					/* Branch cost */
324  2,					/* cost of FADD and FSUB insns.  */
325  2,					/* cost of FMUL instruction.  */
326  56,					/* cost of FDIV instruction.  */
327  2,					/* cost of FABS instruction.  */
328  2,					/* cost of FCHS instruction.  */
329  56,					/* cost of FSQRT instruction.  */
330};
331
332static const
333struct processor_costs athlon_cost = {
334  1,					/* cost of an add instruction */
335  2,					/* cost of a lea instruction */
336  1,					/* variable shift costs */
337  1,					/* constant shift costs */
338  {5, 5, 5, 5, 5},			/* cost of starting a multiply */
339  0,					/* cost of multiply per each bit set */
340  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
341  1,					/* cost of movsx */
342  1,					/* cost of movzx */
343  8,					/* "large" insn */
344  9,					/* MOVE_RATIO */
345  4,					/* cost for loading QImode using movzbl */
346  {3, 4, 3},				/* cost of loading integer registers
347					   in QImode, HImode and SImode.
348					   Relative to reg-reg move (2).  */
349  {3, 4, 3},				/* cost of storing integer registers */
350  4,					/* cost of reg,reg fld/fst */
351  {4, 4, 12},				/* cost of loading fp registers
352					   in SFmode, DFmode and XFmode */
353  {6, 6, 8},				/* cost of loading integer registers */
354  2,					/* cost of moving MMX register */
355  {4, 4},				/* cost of loading MMX registers
356					   in SImode and DImode */
357  {4, 4},				/* cost of storing MMX registers
358					   in SImode and DImode */
359  2,					/* cost of moving SSE register */
360  {4, 4, 6},				/* cost of loading SSE registers
361					   in SImode, DImode and TImode */
362  {4, 4, 5},				/* cost of storing SSE registers
363					   in SImode, DImode and TImode */
364  5,					/* MMX or SSE register to integer */
365  64,					/* size of prefetch block */
366  6,					/* number of parallel prefetches */
367  2,					/* Branch cost */
368  4,					/* cost of FADD and FSUB insns.  */
369  4,					/* cost of FMUL instruction.  */
370  24,					/* cost of FDIV instruction.  */
371  2,					/* cost of FABS instruction.  */
372  2,					/* cost of FCHS instruction.  */
373  35,					/* cost of FSQRT instruction.  */
374};
375
376static const
377struct processor_costs k8_cost = {
378  1,					/* cost of an add instruction */
379  2,					/* cost of a lea instruction */
380  1,					/* variable shift costs */
381  1,					/* constant shift costs */
382  {3, 4, 3, 4, 5},			/* cost of starting a multiply */
383  0,					/* cost of multiply per each bit set */
384  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
385  1,					/* cost of movsx */
386  1,					/* cost of movzx */
387  8,					/* "large" insn */
388  9,					/* MOVE_RATIO */
389  4,					/* cost for loading QImode using movzbl */
390  {3, 4, 3},				/* cost of loading integer registers
391					   in QImode, HImode and SImode.
392					   Relative to reg-reg move (2).  */
393  {3, 4, 3},				/* cost of storing integer registers */
394  4,					/* cost of reg,reg fld/fst */
395  {4, 4, 12},				/* cost of loading fp registers
396					   in SFmode, DFmode and XFmode */
397  {6, 6, 8},				/* cost of loading integer registers */
398  2,					/* cost of moving MMX register */
399  {3, 3},				/* cost of loading MMX registers
400					   in SImode and DImode */
401  {4, 4},				/* cost of storing MMX registers
402					   in SImode and DImode */
403  2,					/* cost of moving SSE register */
404  {4, 3, 6},				/* cost of loading SSE registers
405					   in SImode, DImode and TImode */
406  {4, 4, 5},				/* cost of storing SSE registers
407					   in SImode, DImode and TImode */
408  5,					/* MMX or SSE register to integer */
409  64,					/* size of prefetch block */
410  6,					/* number of parallel prefetches */
411  2,					/* Branch cost */
412  4,					/* cost of FADD and FSUB insns.  */
413  4,					/* cost of FMUL instruction.  */
414  19,					/* cost of FDIV instruction.  */
415  2,					/* cost of FABS instruction.  */
416  2,					/* cost of FCHS instruction.  */
417  35,					/* cost of FSQRT instruction.  */
418};
419
420static const
421struct processor_costs pentium4_cost = {
422  1,					/* cost of an add instruction */
423  1,					/* cost of a lea instruction */
424  4,					/* variable shift costs */
425  4,					/* constant shift costs */
426  {15, 15, 15, 15, 15},			/* cost of starting a multiply */
427  0,					/* cost of multiply per each bit set */
428  {56, 56, 56, 56, 56},			/* cost of a divide/mod */
429  1,					/* cost of movsx */
430  1,					/* cost of movzx */
431  16,					/* "large" insn */
432  6,					/* MOVE_RATIO */
433  2,					/* cost for loading QImode using movzbl */
434  {4, 5, 4},				/* cost of loading integer registers
435					   in QImode, HImode and SImode.
436					   Relative to reg-reg move (2).  */
437  {2, 3, 2},				/* cost of storing integer registers */
438  2,					/* cost of reg,reg fld/fst */
439  {2, 2, 6},				/* cost of loading fp registers
440					   in SFmode, DFmode and XFmode */
441  {4, 4, 6},				/* cost of loading integer registers */
442  2,					/* cost of moving MMX register */
443  {2, 2},				/* cost of loading MMX registers
444					   in SImode and DImode */
445  {2, 2},				/* cost of storing MMX registers
446					   in SImode and DImode */
447  12,					/* cost of moving SSE register */
448  {12, 12, 12},				/* cost of loading SSE registers
449					   in SImode, DImode and TImode */
450  {2, 2, 8},				/* cost of storing SSE registers
451					   in SImode, DImode and TImode */
452  10,					/* MMX or SSE register to integer */
453  64,					/* size of prefetch block */
454  6,					/* number of parallel prefetches */
455  2,					/* Branch cost */
456  5,					/* cost of FADD and FSUB insns.  */
457  7,					/* cost of FMUL instruction.  */
458  43,					/* cost of FDIV instruction.  */
459  2,					/* cost of FABS instruction.  */
460  2,					/* cost of FCHS instruction.  */
461  43,					/* cost of FSQRT instruction.  */
462};
463
464const struct processor_costs *ix86_cost = &pentium_cost;
465
466/* Processor feature/optimization bitmasks.  */
467#define m_386 (1<<PROCESSOR_I386)
468#define m_486 (1<<PROCESSOR_I486)
469#define m_PENT (1<<PROCESSOR_PENTIUM)
470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
471#define m_K6  (1<<PROCESSOR_K6)
472#define m_ATHLON  (1<<PROCESSOR_ATHLON)
473#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
474#define m_K8  (1<<PROCESSOR_K8)
475#define m_ATHLON_K8  (m_K8 | m_ATHLON)
476
477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
479const int x86_zero_extend_with_and = m_486 | m_PENT;
480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
481const int x86_double_with_add = ~m_386;
482const int x86_use_bit_test = m_386;
483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
485const int x86_3dnow_a = m_ATHLON_K8;
486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
487const int x86_branch_hints = m_PENT4;
488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
489const int x86_partial_reg_stall = m_PPRO;
490const int x86_use_loop = m_K6;
491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
492const int x86_use_mov0 = m_K6;
493const int x86_use_cltd = ~(m_PENT | m_K6);
494const int x86_read_modify_write = ~m_PENT;
495const int x86_read_modify = ~(m_PENT | m_PPRO);
496const int x86_split_long_moves = m_PPRO;
497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
499const int x86_single_stringop = m_386 | m_PENT4;
500const int x86_qimode_math = ~(0);
501const int x86_promote_qi_regs = 0;
502const int x86_himode_math = ~(m_PPRO);
503const int x86_promote_hi_regs = m_PPRO;
504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
514const int x86_decompose_lea = m_PENT4;
515const int x86_shift1 = ~m_486;
516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
518/* Set for machines where the type and dependencies are resolved on SSE register
519   parts instead of whole registers, so we may maintain just lower part of
520   scalar values in proper format leaving the upper part undefined.  */
521const int x86_sse_partial_regs = m_ATHLON_K8;
522/* Athlon optimizes partial-register FPS special case, thus avoiding the
523   need for extra instructions beforehand  */
524const int x86_sse_partial_regs_for_cvtsd2ss = 0;
525const int x86_sse_typeless_stores = m_ATHLON_K8;
526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
527const int x86_use_ffreep = m_ATHLON_K8;
528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
529const int x86_inter_unit_moves = ~(m_ATHLON_K8);
530const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
531
532/* In case the average insn count for single function invocation is
533   lower than this constant, emit fast (but longer) prologue and
534   epilogue code.  */
535#define FAST_PROLOGUE_INSN_COUNT 20
536
537/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
538static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
539static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
540static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541
542/* Array of the smallest class containing reg number REGNO, indexed by
543   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
544
545enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
546{
547  /* ax, dx, cx, bx */
548  AREG, DREG, CREG, BREG,
549  /* si, di, bp, sp */
550  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
551  /* FP registers */
552  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
553  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
554  /* arg pointer */
555  NON_Q_REGS,
556  /* flags, fpsr, dirflag, frame */
557  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
558  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
559  SSE_REGS, SSE_REGS,
560  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
561  MMX_REGS, MMX_REGS,
562  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
564  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
565  SSE_REGS, SSE_REGS,
566};
567
568/* The "default" register map used in 32bit mode.  */
569
570int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571{
572  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
573  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
574  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
575  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
576  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
577  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
578  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
579};
580
581static int const x86_64_int_parameter_registers[6] =
582{
583  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
584  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
585};
586
587static int const x86_64_int_return_registers[4] =
588{
589  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
590};
591
592/* The "default" register map used in 64bit mode.  */
593int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594{
595  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
596  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
597  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
598  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
599  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
600  8,9,10,11,12,13,14,15,		/* extended integer registers */
601  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
602};
603
604/* Define the register numbers to be used in Dwarf debugging information.
605   The SVR4 reference port C compiler uses the following register numbers
606   in its Dwarf output code:
607	0 for %eax (gcc regno = 0)
608	1 for %ecx (gcc regno = 2)
609	2 for %edx (gcc regno = 1)
610	3 for %ebx (gcc regno = 3)
611	4 for %esp (gcc regno = 7)
612	5 for %ebp (gcc regno = 6)
613	6 for %esi (gcc regno = 4)
614	7 for %edi (gcc regno = 5)
615   The following three DWARF register numbers are never generated by
616   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
617   believes these numbers have these meanings.
618	8  for %eip    (no gcc equivalent)
619	9  for %eflags (gcc regno = 17)
620	10 for %trapno (no gcc equivalent)
621   It is not at all clear how we should number the FP stack registers
622   for the x86 architecture.  If the version of SDB on x86/svr4 were
623   a bit less brain dead with respect to floating-point then we would
624   have a precedent to follow with respect to DWARF register numbers
625   for x86 FP registers, but the SDB on x86/svr4 is so completely
626   broken with respect to FP registers that it is hardly worth thinking
627   of it as something to strive for compatibility with.
628   The version of x86/svr4 SDB I have at the moment does (partially)
629   seem to believe that DWARF register number 11 is associated with
630   the x86 register %st(0), but that's about all.  Higher DWARF
631   register numbers don't seem to be associated with anything in
632   particular, and even for DWARF regno 11, SDB only seems to under-
633   stand that it should say that a variable lives in %st(0) (when
634   asked via an `=' command) if we said it was in DWARF regno 11,
635   but SDB still prints garbage when asked for the value of the
636   variable in question (via a `/' command).
637   (Also note that the labels SDB prints for various FP stack regs
638   when doing an `x' command are all wrong.)
639   Note that these problems generally don't affect the native SVR4
640   C compiler because it doesn't allow the use of -O with -g and
641   because when it is *not* optimizing, it allocates a memory
642   location for each floating-point variable, and the memory
643   location is what gets described in the DWARF AT_location
644   attribute for the variable in question.
645   Regardless of the severe mental illness of the x86/svr4 SDB, we
646   do something sensible here and we use the following DWARF
647   register numbers.  Note that these are all stack-top-relative
648   numbers.
649	11 for %st(0) (gcc regno = 8)
650	12 for %st(1) (gcc regno = 9)
651	13 for %st(2) (gcc regno = 10)
652	14 for %st(3) (gcc regno = 11)
653	15 for %st(4) (gcc regno = 12)
654	16 for %st(5) (gcc regno = 13)
655	17 for %st(6) (gcc regno = 14)
656	18 for %st(7) (gcc regno = 15)
657*/
658int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659{
660  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
661  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
662  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
663  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
664  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
665  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
666  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
667};
668
669/* Test and compare insns in i386.md store the information needed to
670   generate branch and scc insns here.  */
671
672rtx ix86_compare_op0 = NULL_RTX;
673rtx ix86_compare_op1 = NULL_RTX;
674
675#define MAX_386_STACK_LOCALS 3
676/* Size of the register save area.  */
677#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
678
679/* Define the structure for the machine field in struct function.  */
680
681struct stack_local_entry GTY(())
682{
683  unsigned short mode;
684  unsigned short n;
685  rtx rtl;
686  struct stack_local_entry *next;
687};
688
689/* Structure describing stack frame layout.
690   Stack grows downward:
691
692   [arguments]
693					      <- ARG_POINTER
694   saved pc
695
696   saved frame pointer if frame_pointer_needed
697					      <- HARD_FRAME_POINTER
698   [saved regs]
699
700   [padding1]          \
701		        )
702   [va_arg registers]  (
703		        > to_allocate	      <- FRAME_POINTER
704   [frame]	       (
705		        )
706   [padding2]	       /
707  */
708struct ix86_frame
709{
710  int nregs;
711  int padding1;
712  int va_arg_size;
713  HOST_WIDE_INT frame;
714  int padding2;
715  int outgoing_arguments_size;
716  int red_zone_size;
717
718  HOST_WIDE_INT to_allocate;
719  /* The offsets relative to ARG_POINTER.  */
720  HOST_WIDE_INT frame_pointer_offset;
721  HOST_WIDE_INT hard_frame_pointer_offset;
722  HOST_WIDE_INT stack_pointer_offset;
723
724  /* When save_regs_using_mov is set, emit prologue using
725     move instead of push instructions.  */
726  bool save_regs_using_mov;
727};
728
729/* Used to enable/disable debugging features.  */
730const char *ix86_debug_arg_string, *ix86_debug_addr_string;
731/* Code model option as passed by user.  */
732const char *ix86_cmodel_string;
733/* Parsed value.  */
734enum cmodel ix86_cmodel;
735/* Asm dialect.  */
736const char *ix86_asm_string;
737enum asm_dialect ix86_asm_dialect = ASM_ATT;
738/* TLS dialext.  */
739const char *ix86_tls_dialect_string;
740enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741
742/* Which unit we are generating floating point math for.  */
743enum fpmath_unit ix86_fpmath;
744
745/* Which cpu are we scheduling for.  */
746enum processor_type ix86_tune;
747/* Which instruction set architecture to use.  */
748enum processor_type ix86_arch;
749
750/* Strings to hold which cpu and instruction set architecture  to use.  */
751const char *ix86_tune_string;		/* for -mtune=<xxx> */
752const char *ix86_arch_string;		/* for -march=<xxx> */
753const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
754
755/* # of registers to use to pass arguments.  */
756const char *ix86_regparm_string;
757
758/* true if sse prefetch instruction is not NOOP.  */
759int x86_prefetch_sse;
760
761/* ix86_regparm_string as a number */
762int ix86_regparm;
763
764/* Alignment to use for loops and jumps:  */
765
766/* Power of two alignment for loops.  */
767const char *ix86_align_loops_string;
768
769/* Power of two alignment for non-loop jumps.  */
770const char *ix86_align_jumps_string;
771
772/* Power of two alignment for stack boundary in bytes.  */
773const char *ix86_preferred_stack_boundary_string;
774
775/* Preferred alignment for stack boundary in bits.  */
776int ix86_preferred_stack_boundary;
777
778/* Values 1-5: see jump.c */
779int ix86_branch_cost;
780const char *ix86_branch_cost_string;
781
782/* Power of two alignment for functions.  */
783const char *ix86_align_funcs_string;
784
785/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
786static char internal_label_prefix[16];
787static int internal_label_prefix_len;
788
789static int local_symbolic_operand (rtx, enum machine_mode);
790static int tls_symbolic_operand_1 (rtx, enum tls_model);
791static void output_pic_addr_const (FILE *, rtx, int);
792static void put_condition_code (enum rtx_code, enum machine_mode,
793				int, int, FILE *);
794static const char *get_some_local_dynamic_name (void);
795static int get_some_local_dynamic_name_1 (rtx *, void *);
796static rtx maybe_get_pool_constant (rtx);
797static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
798static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
799						   rtx *);
800static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
801static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
802						   enum machine_mode);
803static rtx get_thread_pointer (int);
804static rtx legitimize_tls_address (rtx, enum tls_model, int);
805static void get_pc_thunk_name (char [32], unsigned int);
806static rtx gen_push (rtx);
807static int memory_address_length (rtx addr);
808static int ix86_flags_dependant (rtx, rtx, enum attr_type);
809static int ix86_agi_dependant (rtx, rtx, enum attr_type);
810static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
811static void ix86_dump_ppro_packet (FILE *);
812static void ix86_reorder_insn (rtx *, rtx *);
813static struct machine_function * ix86_init_machine_status (void);
814static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
815static int ix86_nsaved_regs (void);
816static void ix86_emit_save_regs (void);
817static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
818static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
819static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
820static void ix86_sched_reorder_ppro (rtx *, rtx *);
821static HOST_WIDE_INT ix86_GOT_alias_set (void);
822static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
823static rtx ix86_expand_aligntest (rtx, int);
824static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
825static int ix86_issue_rate (void);
826static int ix86_adjust_cost (rtx, rtx, rtx, int);
827static void ix86_sched_init (FILE *, int, int);
828static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
829static int ix86_variable_issue (FILE *, int, rtx, int);
830static int ia32_use_dfa_pipeline_interface (void);
831static int ia32_multipass_dfa_lookahead (void);
832static void ix86_init_mmx_sse_builtins (void);
833static rtx x86_this_parameter (tree);
834static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
835				 HOST_WIDE_INT, tree);
836static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
837static void x86_file_start (void);
838static void ix86_reorg (void);
839static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
840static tree ix86_build_builtin_va_list (void);
841
842struct ix86_address
843{
844  rtx base, index, disp;
845  HOST_WIDE_INT scale;
846  enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
847};
848
849static int ix86_decompose_address (rtx, struct ix86_address *);
850static int ix86_address_cost (rtx);
851static bool ix86_cannot_force_const_mem (rtx);
852static rtx ix86_delegitimize_address (rtx);
853
854struct builtin_description;
855static rtx ix86_expand_sse_comi (const struct builtin_description *,
856				 tree, rtx);
857static rtx ix86_expand_sse_compare (const struct builtin_description *,
858				    tree, rtx);
859static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
860static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
861static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
862static rtx ix86_expand_store_builtin (enum insn_code, tree);
863static rtx safe_vector_operand (rtx, enum machine_mode);
864static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
865static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
866				      enum rtx_code *, enum rtx_code *);
867static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
868static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
869static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
870static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
871static int ix86_fp_comparison_cost (enum rtx_code code);
872static unsigned int ix86_select_alt_pic_regnum (void);
873static int ix86_save_reg (unsigned int, int);
874static void ix86_compute_frame_layout (struct ix86_frame *);
875static int ix86_comp_type_attributes (tree, tree);
876static int ix86_function_regparm (tree, tree);
877const struct attribute_spec ix86_attribute_table[];
878static bool ix86_function_ok_for_sibcall (tree, tree);
879static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
880static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
881static int ix86_value_regno (enum machine_mode);
882static bool contains_128bit_aligned_vector_p (tree);
883static bool ix86_ms_bitfield_layout_p (tree);
884static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
885static int extended_reg_mentioned_1 (rtx *, void *);
886static bool ix86_rtx_costs (rtx, int, int, int *);
887static int min_insn_size (rtx);
888static void k8_avoid_jump_misspredicts (void);
889
890#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
891static void ix86_svr3_asm_out_constructor (rtx, int);
892#endif
893
894/* Register class used for passing given 64bit part of the argument.
895   These represent classes as documented by the PS ABI, with the exception
896   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
897   use SF or DFmode move instead of DImode to avoid reformatting penalties.
898
899   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
900   whenever possible (upper half does contain padding).
901 */
902enum x86_64_reg_class
903  {
904    X86_64_NO_CLASS,
905    X86_64_INTEGER_CLASS,
906    X86_64_INTEGERSI_CLASS,
907    X86_64_SSE_CLASS,
908    X86_64_SSESF_CLASS,
909    X86_64_SSEDF_CLASS,
910    X86_64_SSEUP_CLASS,
911    X86_64_X87_CLASS,
912    X86_64_X87UP_CLASS,
913    X86_64_MEMORY_CLASS
914  };
915static const char * const x86_64_reg_class_name[] =
916   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
917
918#define MAX_CLASSES 4
919static int classify_argument (enum machine_mode, tree,
920			      enum x86_64_reg_class [MAX_CLASSES], int);
921static int examine_argument (enum machine_mode, tree, int, int *, int *);
922static rtx construct_container (enum machine_mode, tree, int, int, int,
923				const int *, int);
924static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
925					    enum x86_64_reg_class);
926
927/* Table of constants used by fldpi, fldln2, etc....  */
928static REAL_VALUE_TYPE ext_80387_constants_table [5];
929static bool ext_80387_constants_init = 0;
930static void init_ext_80387_constants (void);
931
932/* Initialize the GCC target structure.  */
933#undef TARGET_ATTRIBUTE_TABLE
934#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
935#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
936#  undef TARGET_MERGE_DECL_ATTRIBUTES
937#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
938#endif
939
940#undef TARGET_COMP_TYPE_ATTRIBUTES
941#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942
943#undef TARGET_INIT_BUILTINS
944#define TARGET_INIT_BUILTINS ix86_init_builtins
945
946#undef TARGET_EXPAND_BUILTIN
947#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948
949#undef TARGET_ASM_FUNCTION_EPILOGUE
950#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
951
952#undef TARGET_ASM_OPEN_PAREN
953#define TARGET_ASM_OPEN_PAREN ""
954#undef TARGET_ASM_CLOSE_PAREN
955#define TARGET_ASM_CLOSE_PAREN ""
956
957#undef TARGET_ASM_ALIGNED_HI_OP
958#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
959#undef TARGET_ASM_ALIGNED_SI_OP
960#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
961#ifdef ASM_QUAD
962#undef TARGET_ASM_ALIGNED_DI_OP
963#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
964#endif
965
966#undef TARGET_ASM_UNALIGNED_HI_OP
967#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
968#undef TARGET_ASM_UNALIGNED_SI_OP
969#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
970#undef TARGET_ASM_UNALIGNED_DI_OP
971#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972
973#undef TARGET_SCHED_ADJUST_COST
974#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
975#undef TARGET_SCHED_ISSUE_RATE
976#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
977#undef TARGET_SCHED_VARIABLE_ISSUE
978#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
979#undef TARGET_SCHED_INIT
980#define TARGET_SCHED_INIT ix86_sched_init
981#undef TARGET_SCHED_REORDER
982#define TARGET_SCHED_REORDER ix86_sched_reorder
983#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
984#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
985  ia32_use_dfa_pipeline_interface
986#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
987#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
988  ia32_multipass_dfa_lookahead
989
990#undef TARGET_FUNCTION_OK_FOR_SIBCALL
991#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
992
993#ifdef HAVE_AS_TLS
994#undef TARGET_HAVE_TLS
995#define TARGET_HAVE_TLS true
996#endif
997#undef TARGET_CANNOT_FORCE_CONST_MEM
998#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
999
1000#undef TARGET_DELEGITIMIZE_ADDRESS
1001#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1002
1003#undef TARGET_MS_BITFIELD_LAYOUT_P
1004#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005
1006#undef TARGET_ASM_OUTPUT_MI_THUNK
1007#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1008#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1009#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1010
1011#undef TARGET_ASM_FILE_START
1012#define TARGET_ASM_FILE_START x86_file_start
1013
1014#undef TARGET_RTX_COSTS
1015#define TARGET_RTX_COSTS ix86_rtx_costs
1016#undef TARGET_ADDRESS_COST
1017#define TARGET_ADDRESS_COST ix86_address_cost
1018
1019#undef TARGET_FIXED_CONDITION_CODE_REGS
1020#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1021#undef TARGET_CC_MODES_COMPATIBLE
1022#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1023
1024#undef TARGET_MACHINE_DEPENDENT_REORG
1025#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1026
1027#undef TARGET_BUILD_BUILTIN_VA_LIST
1028#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1029
1030struct gcc_target targetm = TARGET_INITIALIZER;
1031
1032/* The svr4 ABI for the i386 says that records and unions are returned
1033   in memory.  */
1034#ifndef DEFAULT_PCC_STRUCT_RETURN
1035#define DEFAULT_PCC_STRUCT_RETURN 1
1036#endif
1037
1038/* Sometimes certain combinations of command options do not make
1039   sense on a particular target machine.  You can define a macro
1040   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1041   defined, is executed once just after all the command options have
1042   been parsed.
1043
1044   Don't use this macro to turn on various extra optimizations for
1045   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1046
1047void
1048override_options (void)
1049{
1050  int i;
1051  /* Comes from final.c -- no real reason to change it.  */
1052#define MAX_CODE_ALIGN 16
1053
1054  static struct ptt
1055    {
1056      const struct processor_costs *cost;	/* Processor costs */
1057      const int target_enable;			/* Target flags to enable.  */
1058      const int target_disable;			/* Target flags to disable.  */
1059      const int align_loop;			/* Default alignments.  */
1060      const int align_loop_max_skip;
1061      const int align_jump;
1062      const int align_jump_max_skip;
1063      const int align_func;
1064    }
1065  const processor_target_table[PROCESSOR_max] =
1066    {
1067      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1068      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1069      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1070      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1071      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1072      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1073      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1074      {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1075    };
1076
1077  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1078  static struct pta
1079    {
1080      const char *const name;		/* processor name or nickname.  */
1081      const enum processor_type processor;
1082      const enum pta_flags
1083	{
1084	  PTA_SSE = 1,
1085	  PTA_SSE2 = 2,
1086	  PTA_SSE3 = 4,
1087	  PTA_MMX = 8,
1088	  PTA_PREFETCH_SSE = 16,
1089	  PTA_3DNOW = 32,
1090	  PTA_3DNOW_A = 64,
1091	  PTA_64BIT = 128
1092	} flags;
1093    }
1094  const processor_alias_table[] =
1095    {
1096      {"i386", PROCESSOR_I386, 0},
1097      {"i486", PROCESSOR_I486, 0},
1098      {"i586", PROCESSOR_PENTIUM, 0},
1099      {"pentium", PROCESSOR_PENTIUM, 0},
1100      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1101      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1102      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1103      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1104      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1105      {"i686", PROCESSOR_PENTIUMPRO, 0},
1106      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1107      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1108      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1109      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1110      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1111      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1112				       | PTA_MMX | PTA_PREFETCH_SSE},
1113      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1114				        | PTA_MMX | PTA_PREFETCH_SSE},
1115      {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1116				        | PTA_MMX | PTA_PREFETCH_SSE},
1117      {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1118				     | PTA_MMX | PTA_PREFETCH_SSE},
1119      {"k6", PROCESSOR_K6, PTA_MMX},
1120      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1121      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1122      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1123				   | PTA_3DNOW_A},
1124      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1125					 | PTA_3DNOW | PTA_3DNOW_A},
1126      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127				    | PTA_3DNOW_A | PTA_SSE},
1128      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1129				      | PTA_3DNOW_A | PTA_SSE},
1130      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1131				      | PTA_3DNOW_A | PTA_SSE},
1132      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1133			       | PTA_SSE | PTA_SSE2 },
1134      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1138      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1139				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1140      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1141				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1142    };
1143
1144  int const pta_size = ARRAY_SIZE (processor_alias_table);
1145
1146  /* Set the default values for switches whose default depends on TARGET_64BIT
1147     in case they weren't overwritten by command line options.  */
1148  if (TARGET_64BIT)
1149    {
1150      if (flag_omit_frame_pointer == 2)
1151	flag_omit_frame_pointer = 1;
1152      if (flag_asynchronous_unwind_tables == 2)
1153	flag_asynchronous_unwind_tables = 1;
1154      if (flag_pcc_struct_return == 2)
1155	flag_pcc_struct_return = 0;
1156    }
1157  else
1158    {
1159      if (flag_omit_frame_pointer == 2)
1160	flag_omit_frame_pointer = 0;
1161      if (flag_asynchronous_unwind_tables == 2)
1162	flag_asynchronous_unwind_tables = 0;
1163      if (flag_pcc_struct_return == 2)
1164	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1165    }
1166
1167#ifdef SUBTARGET_OVERRIDE_OPTIONS
1168  SUBTARGET_OVERRIDE_OPTIONS;
1169#endif
1170
1171  if (!ix86_tune_string && ix86_arch_string)
1172    ix86_tune_string = ix86_arch_string;
1173  if (!ix86_tune_string)
1174    ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1175  if (!ix86_arch_string)
1176    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1177
1178  if (ix86_cmodel_string != 0)
1179    {
1180      if (!strcmp (ix86_cmodel_string, "small"))
1181	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1182      else if (flag_pic)
1183	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1184      else if (!strcmp (ix86_cmodel_string, "32"))
1185	ix86_cmodel = CM_32;
1186      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1187	ix86_cmodel = CM_KERNEL;
1188      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1189	ix86_cmodel = CM_MEDIUM;
1190      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1191	ix86_cmodel = CM_LARGE;
1192      else
1193	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1194    }
1195  else
1196    {
1197      ix86_cmodel = CM_32;
1198      if (TARGET_64BIT)
1199	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1200    }
1201  if (ix86_asm_string != 0)
1202    {
1203      if (!strcmp (ix86_asm_string, "intel"))
1204	ix86_asm_dialect = ASM_INTEL;
1205      else if (!strcmp (ix86_asm_string, "att"))
1206	ix86_asm_dialect = ASM_ATT;
1207      else
1208	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1209    }
1210  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1211    error ("code model `%s' not supported in the %s bit mode",
1212	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1213  if (ix86_cmodel == CM_LARGE)
1214    sorry ("code model `large' not supported yet");
1215  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1216    sorry ("%i-bit mode not compiled in",
1217	   (target_flags & MASK_64BIT) ? 64 : 32);
1218
1219  for (i = 0; i < pta_size; i++)
1220    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1221      {
1222	ix86_arch = processor_alias_table[i].processor;
1223	/* Default cpu tuning to the architecture.  */
1224	ix86_tune = ix86_arch;
1225	if (processor_alias_table[i].flags & PTA_MMX
1226	    && !(target_flags_explicit & MASK_MMX))
1227	  target_flags |= MASK_MMX;
1228	if (processor_alias_table[i].flags & PTA_3DNOW
1229	    && !(target_flags_explicit & MASK_3DNOW))
1230	  target_flags |= MASK_3DNOW;
1231	if (processor_alias_table[i].flags & PTA_3DNOW_A
1232	    && !(target_flags_explicit & MASK_3DNOW_A))
1233	  target_flags |= MASK_3DNOW_A;
1234	if (processor_alias_table[i].flags & PTA_SSE
1235	    && !(target_flags_explicit & MASK_SSE))
1236	  target_flags |= MASK_SSE;
1237	if (processor_alias_table[i].flags & PTA_SSE2
1238	    && !(target_flags_explicit & MASK_SSE2))
1239	  target_flags |= MASK_SSE2;
1240	if (processor_alias_table[i].flags & PTA_SSE3
1241	    && !(target_flags_explicit & MASK_SSE3))
1242	  target_flags |= MASK_SSE3;
1243	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1244	  x86_prefetch_sse = true;
1245	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1246	  error ("CPU you selected does not support x86-64 instruction set");
1247	break;
1248      }
1249
1250  if (i == pta_size)
1251    error ("bad value (%s) for -march= switch", ix86_arch_string);
1252
1253  for (i = 0; i < pta_size; i++)
1254    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1255      {
1256	ix86_tune = processor_alias_table[i].processor;
1257	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1258	  error ("CPU you selected does not support x86-64 instruction set");
1259
1260	/* Intel CPUs have always interpreted SSE prefetch instructions as
1261	   NOPs; so, we can enable SSE prefetch instructions even when
1262	   -mtune (rather than -march) points us to a processor that has them.
1263	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1264	   higher processors.  */
1265	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1266	  x86_prefetch_sse = true;
1267	break;
1268      }
1269  if (i == pta_size)
1270    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1271
1272  if (optimize_size)
1273    ix86_cost = &size_cost;
1274  else
1275    ix86_cost = processor_target_table[ix86_tune].cost;
1276  target_flags |= processor_target_table[ix86_tune].target_enable;
1277  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1278
1279  /* Arrange to set up i386_stack_locals for all functions.  */
1280  init_machine_status = ix86_init_machine_status;
1281
1282  /* Validate -mregparm= value.  */
1283  if (ix86_regparm_string)
1284    {
1285      i = atoi (ix86_regparm_string);
1286      if (i < 0 || i > REGPARM_MAX)
1287	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1288      else
1289	ix86_regparm = i;
1290    }
1291  else
1292   if (TARGET_64BIT)
1293     ix86_regparm = REGPARM_MAX;
1294
1295  /* If the user has provided any of the -malign-* options,
1296     warn and use that value only if -falign-* is not set.
1297     Remove this code in GCC 3.2 or later.  */
1298  if (ix86_align_loops_string)
1299    {
1300      warning ("-malign-loops is obsolete, use -falign-loops");
1301      if (align_loops == 0)
1302	{
1303	  i = atoi (ix86_align_loops_string);
1304	  if (i < 0 || i > MAX_CODE_ALIGN)
1305	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1306	  else
1307	    align_loops = 1 << i;
1308	}
1309    }
1310
1311  if (ix86_align_jumps_string)
1312    {
1313      warning ("-malign-jumps is obsolete, use -falign-jumps");
1314      if (align_jumps == 0)
1315	{
1316	  i = atoi (ix86_align_jumps_string);
1317	  if (i < 0 || i > MAX_CODE_ALIGN)
1318	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1319	  else
1320	    align_jumps = 1 << i;
1321	}
1322    }
1323
1324  if (ix86_align_funcs_string)
1325    {
1326      warning ("-malign-functions is obsolete, use -falign-functions");
1327      if (align_functions == 0)
1328	{
1329	  i = atoi (ix86_align_funcs_string);
1330	  if (i < 0 || i > MAX_CODE_ALIGN)
1331	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1332	  else
1333	    align_functions = 1 << i;
1334	}
1335    }
1336
1337  /* Default align_* from the processor table.  */
1338  if (align_loops == 0)
1339    {
1340      align_loops = processor_target_table[ix86_tune].align_loop;
1341      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1342    }
1343  if (align_jumps == 0)
1344    {
1345      align_jumps = processor_target_table[ix86_tune].align_jump;
1346      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1347    }
1348  if (align_functions == 0)
1349    {
1350      align_functions = processor_target_table[ix86_tune].align_func;
1351    }
1352
1353  /* Validate -mpreferred-stack-boundary= value, or provide default.
1354     The default of 128 bits is for Pentium III's SSE __m128, but we
1355     don't want additional code to keep the stack aligned when
1356     optimizing for code size.  */
1357  ix86_preferred_stack_boundary = (optimize_size
1358				   ? TARGET_64BIT ? 128 : 32
1359				   : 128);
1360  if (ix86_preferred_stack_boundary_string)
1361    {
1362      i = atoi (ix86_preferred_stack_boundary_string);
1363      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1364	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1365	       TARGET_64BIT ? 4 : 2);
1366      else
1367	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1368    }
1369
1370  /* Validate -mbranch-cost= value, or provide default.  */
1371  ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1372  if (ix86_branch_cost_string)
1373    {
1374      i = atoi (ix86_branch_cost_string);
1375      if (i < 0 || i > 5)
1376	error ("-mbranch-cost=%d is not between 0 and 5", i);
1377      else
1378	ix86_branch_cost = i;
1379    }
1380
1381  if (ix86_tls_dialect_string)
1382    {
1383      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1384	ix86_tls_dialect = TLS_DIALECT_GNU;
1385      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1386	ix86_tls_dialect = TLS_DIALECT_SUN;
1387      else
1388	error ("bad value (%s) for -mtls-dialect= switch",
1389	       ix86_tls_dialect_string);
1390    }
1391
1392  /* Keep nonleaf frame pointers.  */
1393  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1394    flag_omit_frame_pointer = 1;
1395
1396  /* If we're doing fast math, we don't care about comparison order
1397     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1398  if (flag_unsafe_math_optimizations)
1399    target_flags &= ~MASK_IEEE_FP;
1400
1401  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1402     since the insns won't need emulation.  */
1403  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1404    target_flags &= ~MASK_NO_FANCY_MATH_387;
1405
1406  /* Turn on SSE2 builtins for -msse3.  */
1407  if (TARGET_SSE3)
1408    target_flags |= MASK_SSE2;
1409
1410  /* Turn on SSE builtins for -msse2.  */
1411  if (TARGET_SSE2)
1412    target_flags |= MASK_SSE;
1413
1414  if (TARGET_64BIT)
1415    {
1416      if (TARGET_ALIGN_DOUBLE)
1417	error ("-malign-double makes no sense in the 64bit mode");
1418      if (TARGET_RTD)
1419	error ("-mrtd calling convention not supported in the 64bit mode");
1420      /* Enable by default the SSE and MMX builtins.  */
1421      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1422      ix86_fpmath = FPMATH_SSE;
1423     }
1424  else
1425    {
1426      ix86_fpmath = FPMATH_387;
1427      /* i386 ABI does not specify red zone.  It still makes sense to use it
1428         when programmer takes care to stack from being destroyed.  */
1429      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1430        target_flags |= MASK_NO_RED_ZONE;
1431    }
1432
1433  if (ix86_fpmath_string != 0)
1434    {
1435      if (! strcmp (ix86_fpmath_string, "387"))
1436	ix86_fpmath = FPMATH_387;
1437      else if (! strcmp (ix86_fpmath_string, "sse"))
1438	{
1439	  if (!TARGET_SSE)
1440	    {
1441	      warning ("SSE instruction set disabled, using 387 arithmetics");
1442	      ix86_fpmath = FPMATH_387;
1443	    }
1444	  else
1445	    ix86_fpmath = FPMATH_SSE;
1446	}
1447      else if (! strcmp (ix86_fpmath_string, "387,sse")
1448	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1449	{
1450	  if (!TARGET_SSE)
1451	    {
1452	      warning ("SSE instruction set disabled, using 387 arithmetics");
1453	      ix86_fpmath = FPMATH_387;
1454	    }
1455	  else if (!TARGET_80387)
1456	    {
1457	      warning ("387 instruction set disabled, using SSE arithmetics");
1458	      ix86_fpmath = FPMATH_SSE;
1459	    }
1460	  else
1461	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1462	}
1463      else
1464	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1465    }
1466
1467  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1468     on by -msse.  */
1469  if (TARGET_SSE)
1470    {
1471      target_flags |= MASK_MMX;
1472      x86_prefetch_sse = true;
1473    }
1474
1475  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1476  if (TARGET_3DNOW)
1477    {
1478      target_flags |= MASK_MMX;
1479      /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1480	 extensions it adds.  */
1481      if (x86_3dnow_a & (1 << ix86_arch))
1482	target_flags |= MASK_3DNOW_A;
1483    }
1484  if ((x86_accumulate_outgoing_args & TUNEMASK)
1485      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1486      && !optimize_size)
1487    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1488
1489  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1490  {
1491    char *p;
1492    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1493    p = strchr (internal_label_prefix, 'X');
1494    internal_label_prefix_len = p - internal_label_prefix;
1495    *p = '\0';
1496  }
1497}
1498
1499void
1500optimization_options (int level, int size ATTRIBUTE_UNUSED)
1501{
1502  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1503     make the problem with not enough registers even worse.  */
1504#ifdef INSN_SCHEDULING
1505  if (level > 1)
1506    flag_schedule_insns = 0;
1507#endif
1508
1509  /* The default values of these switches depend on the TARGET_64BIT
1510     that is not known at this moment.  Mark these values with 2 and
1511     let user the to override these.  In case there is no command line option
1512     specifying them, we will set the defaults in override_options.  */
1513  if (optimize >= 1)
1514    flag_omit_frame_pointer = 2;
1515  flag_pcc_struct_return = 2;
1516  flag_asynchronous_unwind_tables = 2;
1517}
1518
1519/* Table of valid machine attributes.  */
1520const struct attribute_spec ix86_attribute_table[] =
1521{
1522  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1523  /* Stdcall attribute says callee is responsible for popping arguments
1524     if they are not variable.  */
1525  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1526  /* Fastcall attribute says callee is responsible for popping arguments
1527     if they are not variable.  */
1528  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1529  /* Cdecl attribute says the callee is a normal C declaration */
1530  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1531  /* Regparm attribute specifies how many integer arguments are to be
1532     passed in registers.  */
1533  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1534#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1535  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1536  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1537  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1538#endif
1539  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1540  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1541  { NULL,        0, 0, false, false, false, NULL }
1542};
1543
1544/* Decide whether we can make a sibling call to a function.  DECL is the
1545   declaration of the function being targeted by the call and EXP is the
1546   CALL_EXPR representing the call.  */
1547
1548static bool
1549ix86_function_ok_for_sibcall (tree decl, tree exp)
1550{
1551  /* If we are generating position-independent code, we cannot sibcall
1552     optimize any indirect call, or a direct call to a global function,
1553     as the PLT requires %ebx be live.  */
1554  if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1555    return false;
1556
1557  /* If we are returning floats on the 80387 register stack, we cannot
1558     make a sibcall from a function that doesn't return a float to a
1559     function that does or, conversely, from a function that does return
1560     a float to a function that doesn't; the necessary stack adjustment
1561     would not be executed.  */
1562  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1563      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1564    return false;
1565
1566  /* If this call is indirect, we'll need to be able to use a call-clobbered
1567     register for the address of the target function.  Make sure that all
1568     such registers are not used for passing parameters.  */
1569  if (!decl && !TARGET_64BIT)
1570    {
1571      tree type;
1572
1573      /* We're looking at the CALL_EXPR, we need the type of the function.  */
1574      type = TREE_OPERAND (exp, 0);		/* pointer expression */
1575      type = TREE_TYPE (type);			/* pointer type */
1576      type = TREE_TYPE (type);			/* function type */
1577
1578      if (ix86_function_regparm (type, NULL) >= 3)
1579	{
1580	  /* ??? Need to count the actual number of registers to be used,
1581	     not the possible number of registers.  Fix later.  */
1582	  return false;
1583	}
1584    }
1585
1586  /* Otherwise okay.  That also includes certain types of indirect calls.  */
1587  return true;
1588}
1589
1590/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1591   arguments as in struct attribute_spec.handler.  */
1592static tree
1593ix86_handle_cdecl_attribute (tree *node, tree name,
1594			     tree args ATTRIBUTE_UNUSED,
1595			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1596{
1597  if (TREE_CODE (*node) != FUNCTION_TYPE
1598      && TREE_CODE (*node) != METHOD_TYPE
1599      && TREE_CODE (*node) != FIELD_DECL
1600      && TREE_CODE (*node) != TYPE_DECL)
1601    {
1602      warning ("`%s' attribute only applies to functions",
1603	       IDENTIFIER_POINTER (name));
1604      *no_add_attrs = true;
1605    }
1606  else
1607    {
1608      if (is_attribute_p ("fastcall", name))
1609        {
1610          if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1611            {
1612              error ("fastcall and stdcall attributes are not compatible");
1613            }
1614           else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1615            {
1616              error ("fastcall and regparm attributes are not compatible");
1617            }
1618        }
1619      else if (is_attribute_p ("stdcall", name))
1620        {
1621          if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1622            {
1623              error ("fastcall and stdcall attributes are not compatible");
1624            }
1625        }
1626    }
1627
1628  if (TARGET_64BIT)
1629    {
1630      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1631      *no_add_attrs = true;
1632    }
1633
1634  return NULL_TREE;
1635}
1636
1637/* Handle a "regparm" attribute;
1638   arguments as in struct attribute_spec.handler.  */
1639static tree
1640ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1641			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1642{
1643  if (TREE_CODE (*node) != FUNCTION_TYPE
1644      && TREE_CODE (*node) != METHOD_TYPE
1645      && TREE_CODE (*node) != FIELD_DECL
1646      && TREE_CODE (*node) != TYPE_DECL)
1647    {
1648      warning ("`%s' attribute only applies to functions",
1649	       IDENTIFIER_POINTER (name));
1650      *no_add_attrs = true;
1651    }
1652  else
1653    {
1654      tree cst;
1655
1656      cst = TREE_VALUE (args);
1657      if (TREE_CODE (cst) != INTEGER_CST)
1658	{
1659	  warning ("`%s' attribute requires an integer constant argument",
1660		   IDENTIFIER_POINTER (name));
1661	  *no_add_attrs = true;
1662	}
1663      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1664	{
1665	  warning ("argument to `%s' attribute larger than %d",
1666		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1667	  *no_add_attrs = true;
1668	}
1669
1670      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1671	{
1672	  error ("fastcall and regparm attributes are not compatible");
1673	}
1674    }
1675
1676  return NULL_TREE;
1677}
1678
1679/* Return 0 if the attributes for two types are incompatible, 1 if they
1680   are compatible, and 2 if they are nearly compatible (which causes a
1681   warning to be generated).  */
1682
1683static int
1684ix86_comp_type_attributes (tree type1, tree type2)
1685{
1686  /* Check for mismatch of non-default calling convention.  */
1687  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1688
1689  if (TREE_CODE (type1) != FUNCTION_TYPE)
1690    return 1;
1691
1692  /*  Check for mismatched fastcall types */
1693  if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1694      != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1695    return 0;
1696
1697  /* Check for mismatched return types (cdecl vs stdcall).  */
1698  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1699      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1700    return 0;
1701  if (ix86_function_regparm (type1, NULL)
1702      != ix86_function_regparm (type2, NULL))
1703    return 0;
1704  return 1;
1705}
1706
1707/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1708   DECL may be NULL when calling function indirectly
1709   or considering a libcall.  */
1710
1711static int
1712ix86_function_regparm (tree type, tree decl)
1713{
1714  tree attr;
1715  int regparm = ix86_regparm;
1716  bool user_convention = false;
1717
1718  if (!TARGET_64BIT)
1719    {
1720      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1721      if (attr)
1722	{
1723	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1724	  user_convention = true;
1725	}
1726
1727      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1728	{
1729	  regparm = 2;
1730	  user_convention = true;
1731	}
1732
1733      /* Use register calling convention for local functions when possible.  */
1734      if (!TARGET_64BIT && !user_convention && decl
1735	  && flag_unit_at_a_time && !profile_flag)
1736	{
1737	  struct cgraph_local_info *i = cgraph_local_info (decl);
1738	  if (i && i->local)
1739	    {
1740	      /* We can't use regparm(3) for nested functions as these use
1741		 static chain pointer in third argument.  */
1742	      if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1743		regparm = 2;
1744	      else
1745		regparm = 3;
1746	    }
1747	}
1748    }
1749  return regparm;
1750}
1751
1752/* Return true if EAX is live at the start of the function.  Used by
1753   ix86_expand_prologue to determine if we need special help before
1754   calling allocate_stack_worker.  */
1755
1756static bool
1757ix86_eax_live_at_start_p (void)
1758{
1759  /* Cheat.  Don't bother working forward from ix86_function_regparm
1760     to the function type to whether an actual argument is located in
1761     eax.  Instead just look at cfg info, which is still close enough
1762     to correct at this point.  This gives false positives for broken
1763     functions that might use uninitialized data that happens to be
1764     allocated in eax, but who cares?  */
1765  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1766}
1767
1768/* Value is the number of bytes of arguments automatically
1769   popped when returning from a subroutine call.
1770   FUNDECL is the declaration node of the function (as a tree),
1771   FUNTYPE is the data type of the function (as a tree),
1772   or for a library call it is an identifier node for the subroutine name.
1773   SIZE is the number of bytes of arguments passed on the stack.
1774
1775   On the 80386, the RTD insn may be used to pop them if the number
1776     of args is fixed, but if the number is variable then the caller
1777     must pop them all.  RTD can't be used for library calls now
1778     because the library is compiled with the Unix compiler.
1779   Use of RTD is a selectable option, since it is incompatible with
1780   standard Unix calling sequences.  If the option is not selected,
1781   the caller must always pop the args.
1782
1783   The attribute stdcall is equivalent to RTD on a per module basis.  */
1784
1785int
1786ix86_return_pops_args (tree fundecl, tree funtype, int size)
1787{
1788  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1789
1790  /* Cdecl functions override -mrtd, and never pop the stack.  */
1791  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1792
1793    /* Stdcall and fastcall functions will pop the stack if not
1794       variable args.  */
1795    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1796        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1797      rtd = 1;
1798
1799    if (rtd
1800        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1801	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1802		== void_type_node)))
1803      return size;
1804  }
1805
1806  /* Lose any fake structure return argument if it is passed on the stack.  */
1807  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1808      && !TARGET_64BIT)
1809    {
1810      int nregs = ix86_function_regparm (funtype, fundecl);
1811
1812      if (!nregs)
1813	return GET_MODE_SIZE (Pmode);
1814    }
1815
1816  return 0;
1817}
1818
1819/* Argument support functions.  */
1820
1821/* Return true when register may be used to pass function parameters.  */
1822bool
1823ix86_function_arg_regno_p (int regno)
1824{
1825  int i;
1826  if (!TARGET_64BIT)
1827    return (regno < REGPARM_MAX
1828	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1829  if (SSE_REGNO_P (regno) && TARGET_SSE)
1830    return true;
1831  /* RAX is used as hidden argument to va_arg functions.  */
1832  if (!regno)
1833    return true;
1834  for (i = 0; i < REGPARM_MAX; i++)
1835    if (regno == x86_64_int_parameter_registers[i])
1836      return true;
1837  return false;
1838}
1839
1840/* Initialize a variable CUM of type CUMULATIVE_ARGS
1841   for a call to a function whose data type is FNTYPE.
1842   For a library call, FNTYPE is 0.  */
1843
1844void
1845init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
1846		      tree fntype,	/* tree ptr for function decl */
1847		      rtx libname,	/* SYMBOL_REF of library name or 0 */
1848		      tree fndecl)
1849{
1850  static CUMULATIVE_ARGS zero_cum;
1851  tree param, next_param;
1852
1853  if (TARGET_DEBUG_ARG)
1854    {
1855      fprintf (stderr, "\ninit_cumulative_args (");
1856      if (fntype)
1857	fprintf (stderr, "fntype code = %s, ret code = %s",
1858		 tree_code_name[(int) TREE_CODE (fntype)],
1859		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1860      else
1861	fprintf (stderr, "no fntype");
1862
1863      if (libname)
1864	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1865    }
1866
1867  *cum = zero_cum;
1868
1869  /* Set up the number of registers to use for passing arguments.  */
1870  if (fntype)
1871    cum->nregs = ix86_function_regparm (fntype, fndecl);
1872  else
1873    cum->nregs = ix86_regparm;
1874  cum->sse_nregs = SSE_REGPARM_MAX;
1875  cum->mmx_nregs = MMX_REGPARM_MAX;
1876  cum->warn_sse = true;
1877  cum->warn_mmx = true;
1878  cum->maybe_vaarg = false;
1879
1880  /* Use ecx and edx registers if function has fastcall attribute */
1881  if (fntype && !TARGET_64BIT)
1882    {
1883      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1884	{
1885	  cum->nregs = 2;
1886	  cum->fastcall = 1;
1887	}
1888    }
1889
1890
1891  /* Determine if this function has variable arguments.  This is
1892     indicated by the last argument being 'void_type_mode' if there
1893     are no variable arguments.  If there are variable arguments, then
1894     we won't pass anything in registers */
1895
1896  if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1897    {
1898      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1899	   param != 0; param = next_param)
1900	{
1901	  next_param = TREE_CHAIN (param);
1902	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1903	    {
1904	      if (!TARGET_64BIT)
1905		{
1906		  cum->nregs = 0;
1907		  cum->sse_nregs = 0;
1908		  cum->mmx_nregs = 0;
1909		  cum->warn_sse = 0;
1910		  cum->warn_mmx = 0;
1911		  cum->fastcall = 0;
1912		}
1913	      cum->maybe_vaarg = true;
1914	    }
1915	}
1916    }
1917  if ((!fntype && !libname)
1918      || (fntype && !TYPE_ARG_TYPES (fntype)))
1919    cum->maybe_vaarg = 1;
1920
1921  if (TARGET_DEBUG_ARG)
1922    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1923
1924  return;
1925}
1926
1927/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
1928   of this code is to classify each 8bytes of incoming argument by the register
1929   class and assign registers accordingly.  */
1930
1931/* Return the union class of CLASS1 and CLASS2.
1932   See the x86-64 PS ABI for details.  */
1933
1934static enum x86_64_reg_class
1935merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1936{
1937  /* Rule #1: If both classes are equal, this is the resulting class.  */
1938  if (class1 == class2)
1939    return class1;
1940
1941  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1942     the other class.  */
1943  if (class1 == X86_64_NO_CLASS)
1944    return class2;
1945  if (class2 == X86_64_NO_CLASS)
1946    return class1;
1947
1948  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1949  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1950    return X86_64_MEMORY_CLASS;
1951
1952  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1953  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1954      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1955    return X86_64_INTEGERSI_CLASS;
1956  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1957      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1958    return X86_64_INTEGER_CLASS;
1959
1960  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1961  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1962      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1963    return X86_64_MEMORY_CLASS;
1964
1965  /* Rule #6: Otherwise class SSE is used.  */
1966  return X86_64_SSE_CLASS;
1967}
1968
1969/* Classify the argument of type TYPE and mode MODE.
1970   CLASSES will be filled by the register class used to pass each word
1971   of the operand.  The number of words is returned.  In case the parameter
1972   should be passed in memory, 0 is returned. As a special case for zero
1973   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1974
1975   BIT_OFFSET is used internally for handling records and specifies offset
1976   of the offset in bits modulo 256 to avoid overflow cases.
1977
1978   See the x86-64 PS ABI for details.
1979*/
1980
1981static int
1982classify_argument (enum machine_mode mode, tree type,
1983		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1984{
1985  HOST_WIDE_INT bytes =
1986    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1987  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1988
1989  /* Variable sized entities are always passed/returned in memory.  */
1990  if (bytes < 0)
1991    return 0;
1992
1993  if (mode != VOIDmode
1994      && MUST_PASS_IN_STACK (mode, type))
1995    return 0;
1996
1997  if (type && AGGREGATE_TYPE_P (type))
1998    {
1999      int i;
2000      tree field;
2001      enum x86_64_reg_class subclasses[MAX_CLASSES];
2002
2003      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2004      if (bytes > 16)
2005	return 0;
2006
2007      for (i = 0; i < words; i++)
2008	classes[i] = X86_64_NO_CLASS;
2009
2010      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2011	 signalize memory class, so handle it as special case.  */
2012      if (!words)
2013	{
2014	  classes[0] = X86_64_NO_CLASS;
2015	  return 1;
2016	}
2017
2018      /* Classify each field of record and merge classes.  */
2019      if (TREE_CODE (type) == RECORD_TYPE)
2020	{
2021	  /* For classes first merge in the field of the subclasses.  */
2022	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2023	    {
2024	      tree bases = TYPE_BINFO_BASETYPES (type);
2025	      int n_bases = TREE_VEC_LENGTH (bases);
2026	      int i;
2027
2028	      for (i = 0; i < n_bases; ++i)
2029		{
2030		   tree binfo = TREE_VEC_ELT (bases, i);
2031		   int num;
2032		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2033		   tree type = BINFO_TYPE (binfo);
2034
2035		   num = classify_argument (TYPE_MODE (type),
2036					    type, subclasses,
2037					    (offset + bit_offset) % 256);
2038		   if (!num)
2039		     return 0;
2040		   for (i = 0; i < num; i++)
2041		     {
2042		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2043		       classes[i + pos] =
2044			 merge_classes (subclasses[i], classes[i + pos]);
2045		     }
2046		}
2047	    }
2048	  /* And now merge the fields of structure.  */
2049	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2050	    {
2051	      if (TREE_CODE (field) == FIELD_DECL)
2052		{
2053		  int num;
2054
2055		  /* Bitfields are always classified as integer.  Handle them
2056		     early, since later code would consider them to be
2057		     misaligned integers.  */
2058		  if (DECL_BIT_FIELD (field))
2059		    {
2060		      for (i = int_bit_position (field) / 8 / 8;
2061			   i < (int_bit_position (field)
2062			        + tree_low_cst (DECL_SIZE (field), 0)
2063				+ 63) / 8 / 8; i++)
2064			classes[i] =
2065			  merge_classes (X86_64_INTEGER_CLASS,
2066					 classes[i]);
2067		    }
2068		  else
2069		    {
2070		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2071					       TREE_TYPE (field), subclasses,
2072					       (int_bit_position (field)
2073						+ bit_offset) % 256);
2074		      if (!num)
2075			return 0;
2076		      for (i = 0; i < num; i++)
2077			{
2078			  int pos =
2079			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2080			  classes[i + pos] =
2081			    merge_classes (subclasses[i], classes[i + pos]);
2082			}
2083		    }
2084		}
2085	    }
2086	}
2087      /* Arrays are handled as small records.  */
2088      else if (TREE_CODE (type) == ARRAY_TYPE)
2089	{
2090	  int num;
2091	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2092				   TREE_TYPE (type), subclasses, bit_offset);
2093	  if (!num)
2094	    return 0;
2095
2096	  /* The partial classes are now full classes.  */
2097	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2098	    subclasses[0] = X86_64_SSE_CLASS;
2099	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2100	    subclasses[0] = X86_64_INTEGER_CLASS;
2101
2102	  for (i = 0; i < words; i++)
2103	    classes[i] = subclasses[i % num];
2104	}
2105      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
2106      else if (TREE_CODE (type) == UNION_TYPE
2107	       || TREE_CODE (type) == QUAL_UNION_TYPE)
2108	{
2109	  /* For classes first merge in the field of the subclasses.  */
2110	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2111	    {
2112	      tree bases = TYPE_BINFO_BASETYPES (type);
2113	      int n_bases = TREE_VEC_LENGTH (bases);
2114	      int i;
2115
2116	      for (i = 0; i < n_bases; ++i)
2117		{
2118		   tree binfo = TREE_VEC_ELT (bases, i);
2119		   int num;
2120		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2121		   tree type = BINFO_TYPE (binfo);
2122
2123		   num = classify_argument (TYPE_MODE (type),
2124					    type, subclasses,
2125					    (offset + (bit_offset % 64)) % 256);
2126		   if (!num)
2127		     return 0;
2128		   for (i = 0; i < num; i++)
2129		     {
2130		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2131		       classes[i + pos] =
2132			 merge_classes (subclasses[i], classes[i + pos]);
2133		     }
2134		}
2135	    }
2136	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2137	    {
2138	      if (TREE_CODE (field) == FIELD_DECL)
2139		{
2140		  int num;
2141		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2142					   TREE_TYPE (field), subclasses,
2143					   bit_offset);
2144		  if (!num)
2145		    return 0;
2146		  for (i = 0; i < num; i++)
2147		    classes[i] = merge_classes (subclasses[i], classes[i]);
2148		}
2149	    }
2150	}
2151      else if (TREE_CODE (type) == SET_TYPE)
2152	{
2153	  if (bytes <= 4)
2154	    {
2155	      classes[0] = X86_64_INTEGERSI_CLASS;
2156	      return 1;
2157	    }
2158	  else if (bytes <= 8)
2159	    {
2160	      classes[0] = X86_64_INTEGER_CLASS;
2161	      return 1;
2162	    }
2163	  else if (bytes <= 12)
2164	    {
2165	      classes[0] = X86_64_INTEGER_CLASS;
2166	      classes[1] = X86_64_INTEGERSI_CLASS;
2167	      return 2;
2168	    }
2169	  else
2170	    {
2171	      classes[0] = X86_64_INTEGER_CLASS;
2172	      classes[1] = X86_64_INTEGER_CLASS;
2173	      return 2;
2174	    }
2175	}
2176      else
2177	abort ();
2178
2179      /* Final merger cleanup.  */
2180      for (i = 0; i < words; i++)
2181	{
2182	  /* If one class is MEMORY, everything should be passed in
2183	     memory.  */
2184	  if (classes[i] == X86_64_MEMORY_CLASS)
2185	    return 0;
2186
2187	  /* The X86_64_SSEUP_CLASS should be always preceded by
2188	     X86_64_SSE_CLASS.  */
2189	  if (classes[i] == X86_64_SSEUP_CLASS
2190	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2191	    classes[i] = X86_64_SSE_CLASS;
2192
2193	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
2194	  if (classes[i] == X86_64_X87UP_CLASS
2195	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2196	    classes[i] = X86_64_SSE_CLASS;
2197	}
2198      return words;
2199    }
2200
2201  /* Compute alignment needed.  We align all types to natural boundaries with
2202     exception of XFmode that is aligned to 64bits.  */
2203  if (mode != VOIDmode && mode != BLKmode)
2204    {
2205      int mode_alignment = GET_MODE_BITSIZE (mode);
2206
2207      if (mode == XFmode)
2208	mode_alignment = 128;
2209      else if (mode == XCmode)
2210	mode_alignment = 256;
2211      if (COMPLEX_MODE_P (mode))
2212	mode_alignment /= 2;
2213      /* Misaligned fields are always returned in memory.  */
2214      if (bit_offset % mode_alignment)
2215	return 0;
2216    }
2217
2218  /* Classification of atomic types.  */
2219  switch (mode)
2220    {
2221    case DImode:
2222    case SImode:
2223    case HImode:
2224    case QImode:
2225    case CSImode:
2226    case CHImode:
2227    case CQImode:
2228      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2229	classes[0] = X86_64_INTEGERSI_CLASS;
2230      else
2231	classes[0] = X86_64_INTEGER_CLASS;
2232      return 1;
2233    case CDImode:
2234    case TImode:
2235      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2236      return 2;
2237    case CTImode:
2238      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2239      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2240      return 4;
2241    case SFmode:
2242      if (!(bit_offset % 64))
2243	classes[0] = X86_64_SSESF_CLASS;
2244      else
2245	classes[0] = X86_64_SSE_CLASS;
2246      return 1;
2247    case DFmode:
2248      classes[0] = X86_64_SSEDF_CLASS;
2249      return 1;
2250    case XFmode:
2251      classes[0] = X86_64_X87_CLASS;
2252      classes[1] = X86_64_X87UP_CLASS;
2253      return 2;
2254    case TFmode:
2255    case TCmode:
2256      return 0;
2257    case XCmode:
2258      classes[0] = X86_64_X87_CLASS;
2259      classes[1] = X86_64_X87UP_CLASS;
2260      classes[2] = X86_64_X87_CLASS;
2261      classes[3] = X86_64_X87UP_CLASS;
2262      return 4;
2263    case DCmode:
2264      classes[0] = X86_64_SSEDF_CLASS;
2265      classes[1] = X86_64_SSEDF_CLASS;
2266      return 2;
2267    case SCmode:
2268      classes[0] = X86_64_SSE_CLASS;
2269      return 1;
2270    case V4SFmode:
2271    case V4SImode:
2272    case V16QImode:
2273    case V8HImode:
2274    case V2DFmode:
2275    case V2DImode:
2276      classes[0] = X86_64_SSE_CLASS;
2277      classes[1] = X86_64_SSEUP_CLASS;
2278      return 2;
2279    case V2SFmode:
2280    case V2SImode:
2281    case V4HImode:
2282    case V8QImode:
2283      return 0;
2284    case BLKmode:
2285    case VOIDmode:
2286      return 0;
2287    default:
2288      abort ();
2289    }
2290}
2291
2292/* Examine the argument and return set number of register required in each
2293   class.  Return 0 iff parameter should be passed in memory.  */
2294static int
2295examine_argument (enum machine_mode mode, tree type, int in_return,
2296		  int *int_nregs, int *sse_nregs)
2297{
2298  enum x86_64_reg_class class[MAX_CLASSES];
2299  int n = classify_argument (mode, type, class, 0);
2300
2301  *int_nregs = 0;
2302  *sse_nregs = 0;
2303  if (!n)
2304    return 0;
2305  for (n--; n >= 0; n--)
2306    switch (class[n])
2307      {
2308      case X86_64_INTEGER_CLASS:
2309      case X86_64_INTEGERSI_CLASS:
2310	(*int_nregs)++;
2311	break;
2312      case X86_64_SSE_CLASS:
2313      case X86_64_SSESF_CLASS:
2314      case X86_64_SSEDF_CLASS:
2315	(*sse_nregs)++;
2316	break;
2317      case X86_64_NO_CLASS:
2318      case X86_64_SSEUP_CLASS:
2319	break;
2320      case X86_64_X87_CLASS:
2321      case X86_64_X87UP_CLASS:
2322	if (!in_return)
2323	  return 0;
2324	break;
2325      case X86_64_MEMORY_CLASS:
2326	abort ();
2327      }
2328  return 1;
2329}
2330/* Construct container for the argument used by GCC interface.  See
2331   FUNCTION_ARG for the detailed description.  */
2332static rtx
2333construct_container (enum machine_mode mode, tree type, int in_return,
2334		     int nintregs, int nsseregs, const int * intreg,
2335		     int sse_regno)
2336{
2337  enum machine_mode tmpmode;
2338  int bytes =
2339    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2340  enum x86_64_reg_class class[MAX_CLASSES];
2341  int n;
2342  int i;
2343  int nexps = 0;
2344  int needed_sseregs, needed_intregs;
2345  rtx exp[MAX_CLASSES];
2346  rtx ret;
2347
2348  n = classify_argument (mode, type, class, 0);
2349  if (TARGET_DEBUG_ARG)
2350    {
2351      if (!n)
2352	fprintf (stderr, "Memory class\n");
2353      else
2354	{
2355	  fprintf (stderr, "Classes:");
2356	  for (i = 0; i < n; i++)
2357	    {
2358	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2359	    }
2360	   fprintf (stderr, "\n");
2361	}
2362    }
2363  if (!n)
2364    return NULL;
2365  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2366    return NULL;
2367  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2368    return NULL;
2369
2370  /* First construct simple cases.  Avoid SCmode, since we want to use
2371     single register to pass this type.  */
2372  if (n == 1 && mode != SCmode)
2373    switch (class[0])
2374      {
2375      case X86_64_INTEGER_CLASS:
2376      case X86_64_INTEGERSI_CLASS:
2377	return gen_rtx_REG (mode, intreg[0]);
2378      case X86_64_SSE_CLASS:
2379      case X86_64_SSESF_CLASS:
2380      case X86_64_SSEDF_CLASS:
2381	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2382      case X86_64_X87_CLASS:
2383	return gen_rtx_REG (mode, FIRST_STACK_REG);
2384      case X86_64_NO_CLASS:
2385	/* Zero sized array, struct or class.  */
2386	return NULL;
2387      default:
2388	abort ();
2389      }
2390  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2391      && mode != BLKmode)
2392    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2393  if (n == 2
2394      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2395    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2396  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2397      && class[1] == X86_64_INTEGER_CLASS
2398      && (mode == CDImode || mode == TImode || mode == TFmode)
2399      && intreg[0] + 1 == intreg[1])
2400    return gen_rtx_REG (mode, intreg[0]);
2401  if (n == 4
2402      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2403      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2404      && mode != BLKmode)
2405    return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2406
2407  /* Otherwise figure out the entries of the PARALLEL.  */
2408  for (i = 0; i < n; i++)
2409    {
2410      switch (class[i])
2411        {
2412	  case X86_64_NO_CLASS:
2413	    break;
2414	  case X86_64_INTEGER_CLASS:
2415	  case X86_64_INTEGERSI_CLASS:
2416	    /* Merge TImodes on aligned occasions here too.  */
2417	    if (i * 8 + 8 > bytes)
2418	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2419	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2420	      tmpmode = SImode;
2421	    else
2422	      tmpmode = DImode;
2423	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2424	    if (tmpmode == BLKmode)
2425	      tmpmode = DImode;
2426	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2427					       gen_rtx_REG (tmpmode, *intreg),
2428					       GEN_INT (i*8));
2429	    intreg++;
2430	    break;
2431	  case X86_64_SSESF_CLASS:
2432	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2433					       gen_rtx_REG (SFmode,
2434							    SSE_REGNO (sse_regno)),
2435					       GEN_INT (i*8));
2436	    sse_regno++;
2437	    break;
2438	  case X86_64_SSEDF_CLASS:
2439	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2440					       gen_rtx_REG (DFmode,
2441							    SSE_REGNO (sse_regno)),
2442					       GEN_INT (i*8));
2443	    sse_regno++;
2444	    break;
2445	  case X86_64_SSE_CLASS:
2446	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2447	      tmpmode = TImode;
2448	    else
2449	      tmpmode = DImode;
2450	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2451					       gen_rtx_REG (tmpmode,
2452							    SSE_REGNO (sse_regno)),
2453					       GEN_INT (i*8));
2454	    if (tmpmode == TImode)
2455	      i++;
2456	    sse_regno++;
2457	    break;
2458	  default:
2459	    abort ();
2460	}
2461    }
2462  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2463  for (i = 0; i < nexps; i++)
2464    XVECEXP (ret, 0, i) = exp [i];
2465  return ret;
2466}
2467
2468/* Update the data in CUM to advance over an argument
2469   of mode MODE and data type TYPE.
2470   (TYPE is null for libcalls where that information may not be available.)  */
2471
2472void
2473function_arg_advance (CUMULATIVE_ARGS *cum,	/* current arg information */
2474		      enum machine_mode mode,	/* current arg mode */
2475		      tree type,	/* type of the argument or 0 if lib support */
2476		      int named)	/* whether or not the argument was named */
2477{
2478  int bytes =
2479    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2480  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2481
2482  if (TARGET_DEBUG_ARG)
2483    fprintf (stderr,
2484	     "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2485	     words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2486  if (TARGET_64BIT)
2487    {
2488      int int_nregs, sse_nregs;
2489      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2490	cum->words += words;
2491      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2492	{
2493	  cum->nregs -= int_nregs;
2494	  cum->sse_nregs -= sse_nregs;
2495	  cum->regno += int_nregs;
2496	  cum->sse_regno += sse_nregs;
2497	}
2498      else
2499	cum->words += words;
2500    }
2501  else
2502    {
2503      if (TARGET_SSE && SSE_REG_MODE_P (mode)
2504	  && (!type || !AGGREGATE_TYPE_P (type)))
2505	{
2506	  cum->sse_words += words;
2507	  cum->sse_nregs -= 1;
2508	  cum->sse_regno += 1;
2509	  if (cum->sse_nregs <= 0)
2510	    {
2511	      cum->sse_nregs = 0;
2512	      cum->sse_regno = 0;
2513	    }
2514	}
2515      else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2516	       && (!type || !AGGREGATE_TYPE_P (type)))
2517	{
2518	  cum->mmx_words += words;
2519	  cum->mmx_nregs -= 1;
2520	  cum->mmx_regno += 1;
2521	  if (cum->mmx_nregs <= 0)
2522	    {
2523	      cum->mmx_nregs = 0;
2524	      cum->mmx_regno = 0;
2525	    }
2526	}
2527      else
2528	{
2529	  cum->words += words;
2530	  cum->nregs -= words;
2531	  cum->regno += words;
2532
2533	  if (cum->nregs <= 0)
2534	    {
2535	      cum->nregs = 0;
2536	      cum->regno = 0;
2537	    }
2538	}
2539    }
2540  return;
2541}
2542
2543/* Define where to put the arguments to a function.
2544   Value is zero to push the argument on the stack,
2545   or a hard register in which to store the argument.
2546
2547   MODE is the argument's machine mode.
2548   TYPE is the data type of the argument (as a tree).
2549    This is null for libcalls where that information may
2550    not be available.
2551   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2552    the preceding args and about the function being called.
2553   NAMED is nonzero if this argument is a named parameter
2554    (otherwise it is an extra parameter matching an ellipsis).  */
2555
2556rtx
2557function_arg (CUMULATIVE_ARGS *cum,	/* current arg information */
2558	      enum machine_mode mode,	/* current arg mode */
2559	      tree type,	/* type of the argument or 0 if lib support */
2560	      int named)	/* != 0 for normal args, == 0 for ...  args */
2561{
2562  rtx ret   = NULL_RTX;
2563  int bytes =
2564    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2565  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2566  static bool warnedsse, warnedmmx;
2567
2568  /* Handle a hidden AL argument containing number of registers for varargs
2569     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2570     any AL settings.  */
2571  if (mode == VOIDmode)
2572    {
2573      if (TARGET_64BIT)
2574	return GEN_INT (cum->maybe_vaarg
2575			? (cum->sse_nregs < 0
2576			   ? SSE_REGPARM_MAX
2577			   : cum->sse_regno)
2578			: -1);
2579      else
2580	return constm1_rtx;
2581    }
2582  if (TARGET_64BIT)
2583    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2584			       &x86_64_int_parameter_registers [cum->regno],
2585			       cum->sse_regno);
2586  else
2587    switch (mode)
2588      {
2589	/* For now, pass fp/complex values on the stack.  */
2590      default:
2591	break;
2592
2593      case BLKmode:
2594	if (bytes < 0)
2595	  break;
2596	/* FALLTHRU */
2597      case DImode:
2598      case SImode:
2599      case HImode:
2600      case QImode:
2601	if (words <= cum->nregs)
2602	  {
2603	    int regno = cum->regno;
2604
2605	    /* Fastcall allocates the first two DWORD (SImode) or
2606	       smaller arguments to ECX and EDX.  */
2607	    if (cum->fastcall)
2608	      {
2609	        if (mode == BLKmode || mode == DImode)
2610	          break;
2611
2612	        /* ECX not EAX is the first allocated register.  */
2613	        if (regno == 0)
2614		  regno = 2;
2615	      }
2616	    ret = gen_rtx_REG (mode, regno);
2617	  }
2618	break;
2619      case TImode:
2620      case V16QImode:
2621      case V8HImode:
2622      case V4SImode:
2623      case V2DImode:
2624      case V4SFmode:
2625      case V2DFmode:
2626	if (!type || !AGGREGATE_TYPE_P (type))
2627	  {
2628	    if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2629	      {
2630		warnedsse = true;
2631		warning ("SSE vector argument without SSE enabled "
2632			 "changes the ABI");
2633	      }
2634	    if (cum->sse_nregs)
2635	      ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2636	  }
2637	break;
2638      case V8QImode:
2639      case V4HImode:
2640      case V2SImode:
2641      case V2SFmode:
2642	if (!type || !AGGREGATE_TYPE_P (type))
2643	  {
2644	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2645	      {
2646		warnedmmx = true;
2647		warning ("MMX vector argument without MMX enabled "
2648			 "changes the ABI");
2649	      }
2650	    if (cum->mmx_nregs)
2651	      ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2652	  }
2653	break;
2654      }
2655
2656  if (TARGET_DEBUG_ARG)
2657    {
2658      fprintf (stderr,
2659	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2660	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2661
2662      if (ret)
2663	print_simple_rtl (stderr, ret);
2664      else
2665	fprintf (stderr, ", stack");
2666
2667      fprintf (stderr, " )\n");
2668    }
2669
2670  return ret;
2671}
2672
2673/* A C expression that indicates when an argument must be passed by
2674   reference.  If nonzero for an argument, a copy of that argument is
2675   made in memory and a pointer to the argument is passed instead of
2676   the argument itself.  The pointer is passed in whatever way is
2677   appropriate for passing a pointer to that type.  */
2678
2679int
2680function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2681				enum machine_mode mode ATTRIBUTE_UNUSED,
2682				tree type, int named ATTRIBUTE_UNUSED)
2683{
2684  if (!TARGET_64BIT)
2685    return 0;
2686
2687  if (type && int_size_in_bytes (type) == -1)
2688    {
2689      if (TARGET_DEBUG_ARG)
2690	fprintf (stderr, "function_arg_pass_by_reference\n");
2691      return 1;
2692    }
2693
2694  return 0;
2695}
2696
2697/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2698   ABI  */
2699static bool
2700contains_128bit_aligned_vector_p (tree type)
2701{
2702  enum machine_mode mode = TYPE_MODE (type);
2703  if (SSE_REG_MODE_P (mode)
2704      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2705    return true;
2706  if (TYPE_ALIGN (type) < 128)
2707    return false;
2708
2709  if (AGGREGATE_TYPE_P (type))
2710    {
2711      /* Walk the aggregates recursively.  */
2712      if (TREE_CODE (type) == RECORD_TYPE
2713	  || TREE_CODE (type) == UNION_TYPE
2714	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2715	{
2716	  tree field;
2717
2718	  if (TYPE_BINFO (type) != NULL
2719	      && TYPE_BINFO_BASETYPES (type) != NULL)
2720	    {
2721	      tree bases = TYPE_BINFO_BASETYPES (type);
2722	      int n_bases = TREE_VEC_LENGTH (bases);
2723	      int i;
2724
2725	      for (i = 0; i < n_bases; ++i)
2726		{
2727		  tree binfo = TREE_VEC_ELT (bases, i);
2728		  tree type = BINFO_TYPE (binfo);
2729
2730		  if (contains_128bit_aligned_vector_p (type))
2731		    return true;
2732		}
2733	    }
2734	  /* And now merge the fields of structure.  */
2735	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2736	    {
2737	      if (TREE_CODE (field) == FIELD_DECL
2738		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2739		return true;
2740	    }
2741	}
2742      /* Just for use if some languages passes arrays by value.  */
2743      else if (TREE_CODE (type) == ARRAY_TYPE)
2744	{
2745	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2746	    return true;
2747	}
2748      else
2749	abort ();
2750    }
2751  return false;
2752}
2753
2754/* Gives the alignment boundary, in bits, of an argument with the
2755   specified mode and type.  */
2756
2757int
2758ix86_function_arg_boundary (enum machine_mode mode, tree type)
2759{
2760  int align;
2761  if (type)
2762    align = TYPE_ALIGN (type);
2763  else
2764    align = GET_MODE_ALIGNMENT (mode);
2765  if (align < PARM_BOUNDARY)
2766    align = PARM_BOUNDARY;
2767  if (!TARGET_64BIT)
2768    {
2769      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2770	 make an exception for SSE modes since these require 128bit
2771	 alignment.
2772
2773	 The handling here differs from field_alignment.  ICC aligns MMX
2774	 arguments to 4 byte boundaries, while structure fields are aligned
2775	 to 8 byte boundaries.  */
2776      if (!type)
2777	{
2778	  if (!SSE_REG_MODE_P (mode))
2779	    align = PARM_BOUNDARY;
2780	}
2781      else
2782	{
2783	  if (!contains_128bit_aligned_vector_p (type))
2784	    align = PARM_BOUNDARY;
2785	}
2786    }
2787  if (align > 128)
2788    align = 128;
2789  return align;
2790}
2791
2792/* Return true if N is a possible register number of function value.  */
2793bool
2794ix86_function_value_regno_p (int regno)
2795{
2796  if (!TARGET_64BIT)
2797    {
2798      return ((regno) == 0
2799	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2800	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2801    }
2802  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2803	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2804	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2805}
2806
2807/* Define how to find the value returned by a function.
2808   VALTYPE is the data type of the value (as a tree).
2809   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2810   otherwise, FUNC is 0.  */
2811rtx
2812ix86_function_value (tree valtype)
2813{
2814  if (TARGET_64BIT)
2815    {
2816      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2817				     REGPARM_MAX, SSE_REGPARM_MAX,
2818				     x86_64_int_return_registers, 0);
2819      /* For zero sized structures, construct_container return NULL, but we need
2820         to keep rest of compiler happy by returning meaningful value.  */
2821      if (!ret)
2822	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2823      return ret;
2824    }
2825  else
2826    return gen_rtx_REG (TYPE_MODE (valtype),
2827			ix86_value_regno (TYPE_MODE (valtype)));
2828}
2829
2830/* Return false iff type is returned in memory.  */
2831int
2832ix86_return_in_memory (tree type)
2833{
2834  int needed_intregs, needed_sseregs, size;
2835  enum machine_mode mode = TYPE_MODE (type);
2836
2837  if (TARGET_64BIT)
2838    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2839
2840  if (mode == BLKmode)
2841    return 1;
2842
2843  size = int_size_in_bytes (type);
2844
2845  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2846    return 0;
2847
2848  if (VECTOR_MODE_P (mode) || mode == TImode)
2849    {
2850      /* User-created vectors small enough to fit in EAX.  */
2851      if (size < 8)
2852	return 0;
2853
2854      /* MMX/3dNow values are returned on the stack, since we've
2855	 got to EMMS/FEMMS before returning.  */
2856      if (size == 8)
2857	return 1;
2858
2859      /* SSE values are returned in XMM0.  */
2860      /* ??? Except when it doesn't exist?  We have a choice of
2861	 either (1) being abi incompatible with a -march switch,
2862	 or (2) generating an error here.  Given no good solution,
2863	 I think the safest thing is one warning.  The user won't
2864	 be able to use -Werror, but....  */
2865      if (size == 16)
2866	{
2867	  static bool warned;
2868
2869	  if (TARGET_SSE)
2870	    return 0;
2871
2872	  if (!warned)
2873	    {
2874	      warned = true;
2875	      warning ("SSE vector return without SSE enabled "
2876		       "changes the ABI");
2877	    }
2878	  return 1;
2879	}
2880    }
2881
2882  if (mode == XFmode)
2883    return 0;
2884
2885  if (size > 12)
2886    return 1;
2887  return 0;
2888}
2889
2890/* Define how to find the value returned by a library function
2891   assuming the value has mode MODE.  */
2892rtx
2893ix86_libcall_value (enum machine_mode mode)
2894{
2895  if (TARGET_64BIT)
2896    {
2897      switch (mode)
2898	{
2899	case SFmode:
2900	case SCmode:
2901	case DFmode:
2902	case DCmode:
2903	  return gen_rtx_REG (mode, FIRST_SSE_REG);
2904	case XFmode:
2905	case XCmode:
2906	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2907	case TFmode:
2908	case TCmode:
2909	  return NULL;
2910	default:
2911	  return gen_rtx_REG (mode, 0);
2912	}
2913    }
2914  else
2915    return gen_rtx_REG (mode, ix86_value_regno (mode));
2916}
2917
2918/* Given a mode, return the register to use for a return value.  */
2919
2920static int
2921ix86_value_regno (enum machine_mode mode)
2922{
2923  /* Floating point return values in %st(0).  */
2924  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2925    return FIRST_FLOAT_REG;
2926  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
2927     we prevent this case when sse is not available.  */
2928  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2929    return FIRST_SSE_REG;
2930  /* Everything else in %eax.  */
2931  return 0;
2932}
2933
2934/* Create the va_list data type.  */
2935
2936static tree
2937ix86_build_builtin_va_list (void)
2938{
2939  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2940
2941  /* For i386 we use plain pointer to argument area.  */
2942  if (!TARGET_64BIT)
2943    return build_pointer_type (char_type_node);
2944
2945  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2946  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2947
2948  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2949		      unsigned_type_node);
2950  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2951		      unsigned_type_node);
2952  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2953		      ptr_type_node);
2954  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2955		      ptr_type_node);
2956
2957  DECL_FIELD_CONTEXT (f_gpr) = record;
2958  DECL_FIELD_CONTEXT (f_fpr) = record;
2959  DECL_FIELD_CONTEXT (f_ovf) = record;
2960  DECL_FIELD_CONTEXT (f_sav) = record;
2961
2962  TREE_CHAIN (record) = type_decl;
2963  TYPE_NAME (record) = type_decl;
2964  TYPE_FIELDS (record) = f_gpr;
2965  TREE_CHAIN (f_gpr) = f_fpr;
2966  TREE_CHAIN (f_fpr) = f_ovf;
2967  TREE_CHAIN (f_ovf) = f_sav;
2968
2969  layout_type (record);
2970
2971  /* The correct type is an array type of one element.  */
2972  return build_array_type (record, build_index_type (size_zero_node));
2973}
2974
2975/* Perform any needed actions needed for a function that is receiving a
2976   variable number of arguments.
2977
2978   CUM is as above.
2979
2980   MODE and TYPE are the mode and type of the current parameter.
2981
2982   PRETEND_SIZE is a variable that should be set to the amount of stack
2983   that must be pushed by the prolog to pretend that our caller pushed
2984   it.
2985
2986   Normally, this macro will push all remaining incoming registers on the
2987   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2988
2989void
2990ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2991			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
2992			     int no_rtl)
2993{
2994  CUMULATIVE_ARGS next_cum;
2995  rtx save_area = NULL_RTX, mem;
2996  rtx label;
2997  rtx label_ref;
2998  rtx tmp_reg;
2999  rtx nsse_reg;
3000  int set;
3001  tree fntype;
3002  int stdarg_p;
3003  int i;
3004
3005  if (!TARGET_64BIT)
3006    return;
3007
3008  /* Indicate to allocate space on the stack for varargs save area.  */
3009  ix86_save_varrargs_registers = 1;
3010
3011  cfun->stack_alignment_needed = 128;
3012
3013  fntype = TREE_TYPE (current_function_decl);
3014  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3015	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3016		  != void_type_node));
3017
3018  /* For varargs, we do not want to skip the dummy va_dcl argument.
3019     For stdargs, we do want to skip the last named argument.  */
3020  next_cum = *cum;
3021  if (stdarg_p)
3022    function_arg_advance (&next_cum, mode, type, 1);
3023
3024  if (!no_rtl)
3025    save_area = frame_pointer_rtx;
3026
3027  set = get_varargs_alias_set ();
3028
3029  for (i = next_cum.regno; i < ix86_regparm; i++)
3030    {
3031      mem = gen_rtx_MEM (Pmode,
3032			 plus_constant (save_area, i * UNITS_PER_WORD));
3033      set_mem_alias_set (mem, set);
3034      emit_move_insn (mem, gen_rtx_REG (Pmode,
3035					x86_64_int_parameter_registers[i]));
3036    }
3037
3038  if (next_cum.sse_nregs)
3039    {
3040      /* Now emit code to save SSE registers.  The AX parameter contains number
3041	 of SSE parameter registers used to call this function.  We use
3042	 sse_prologue_save insn template that produces computed jump across
3043	 SSE saves.  We need some preparation work to get this working.  */
3044
3045      label = gen_label_rtx ();
3046      label_ref = gen_rtx_LABEL_REF (Pmode, label);
3047
3048      /* Compute address to jump to :
3049         label - 5*eax + nnamed_sse_arguments*5  */
3050      tmp_reg = gen_reg_rtx (Pmode);
3051      nsse_reg = gen_reg_rtx (Pmode);
3052      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3053      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3054			      gen_rtx_MULT (Pmode, nsse_reg,
3055					    GEN_INT (4))));
3056      if (next_cum.sse_regno)
3057	emit_move_insn
3058	  (nsse_reg,
3059	   gen_rtx_CONST (DImode,
3060			  gen_rtx_PLUS (DImode,
3061					label_ref,
3062					GEN_INT (next_cum.sse_regno * 4))));
3063      else
3064	emit_move_insn (nsse_reg, label_ref);
3065      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3066
3067      /* Compute address of memory block we save into.  We always use pointer
3068	 pointing 127 bytes after first byte to store - this is needed to keep
3069	 instruction size limited by 4 bytes.  */
3070      tmp_reg = gen_reg_rtx (Pmode);
3071      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3072			      plus_constant (save_area,
3073					     8 * REGPARM_MAX + 127)));
3074      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3075      set_mem_alias_set (mem, set);
3076      set_mem_align (mem, BITS_PER_WORD);
3077
3078      /* And finally do the dirty job!  */
3079      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3080					GEN_INT (next_cum.sse_regno), label));
3081    }
3082
3083}
3084
3085/* Implement va_start.  */
3086
3087void
3088ix86_va_start (tree valist, rtx nextarg)
3089{
3090  HOST_WIDE_INT words, n_gpr, n_fpr;
3091  tree f_gpr, f_fpr, f_ovf, f_sav;
3092  tree gpr, fpr, ovf, sav, t;
3093
3094  /* Only 64bit target needs something special.  */
3095  if (!TARGET_64BIT)
3096    {
3097      std_expand_builtin_va_start (valist, nextarg);
3098      return;
3099    }
3100
3101  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3102  f_fpr = TREE_CHAIN (f_gpr);
3103  f_ovf = TREE_CHAIN (f_fpr);
3104  f_sav = TREE_CHAIN (f_ovf);
3105
3106  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3107  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3108  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3109  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3110  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3111
3112  /* Count number of gp and fp argument registers used.  */
3113  words = current_function_args_info.words;
3114  n_gpr = current_function_args_info.regno;
3115  n_fpr = current_function_args_info.sse_regno;
3116
3117  if (TARGET_DEBUG_ARG)
3118    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3119	     (int) words, (int) n_gpr, (int) n_fpr);
3120
3121  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3122	     build_int_2 (n_gpr * 8, 0));
3123  TREE_SIDE_EFFECTS (t) = 1;
3124  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3125
3126  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3127	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3128  TREE_SIDE_EFFECTS (t) = 1;
3129  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3130
3131  /* Find the overflow area.  */
3132  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3133  if (words != 0)
3134    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3135	       build_int_2 (words * UNITS_PER_WORD, 0));
3136  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3137  TREE_SIDE_EFFECTS (t) = 1;
3138  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3139
3140  /* Find the register save area.
3141     Prologue of the function save it right above stack frame.  */
3142  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3143  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3144  TREE_SIDE_EFFECTS (t) = 1;
3145  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3146}
3147
3148/* Implement va_arg.  */
3149rtx
3150ix86_va_arg (tree valist, tree type)
3151{
3152  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3153  tree f_gpr, f_fpr, f_ovf, f_sav;
3154  tree gpr, fpr, ovf, sav, t;
3155  int size, rsize;
3156  rtx lab_false, lab_over = NULL_RTX;
3157  rtx addr_rtx, r;
3158  rtx container;
3159  int indirect_p = 0;
3160
3161  /* Only 64bit target needs something special.  */
3162  if (!TARGET_64BIT)
3163    {
3164      return std_expand_builtin_va_arg (valist, type);
3165    }
3166
3167  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3168  f_fpr = TREE_CHAIN (f_gpr);
3169  f_ovf = TREE_CHAIN (f_fpr);
3170  f_sav = TREE_CHAIN (f_ovf);
3171
3172  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3173  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3174  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3175  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3176  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3177
3178  size = int_size_in_bytes (type);
3179  if (size == -1)
3180    {
3181      /* Passed by reference.  */
3182      indirect_p = 1;
3183      type = build_pointer_type (type);
3184      size = int_size_in_bytes (type);
3185    }
3186  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3187
3188  container = construct_container (TYPE_MODE (type), type, 0,
3189				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3190  /*
3191   * Pull the value out of the saved registers ...
3192   */
3193
3194  addr_rtx = gen_reg_rtx (Pmode);
3195
3196  if (container)
3197    {
3198      rtx int_addr_rtx, sse_addr_rtx;
3199      int needed_intregs, needed_sseregs;
3200      int need_temp;
3201
3202      lab_over = gen_label_rtx ();
3203      lab_false = gen_label_rtx ();
3204
3205      examine_argument (TYPE_MODE (type), type, 0,
3206		        &needed_intregs, &needed_sseregs);
3207
3208
3209      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3210		   || TYPE_ALIGN (type) > 128);
3211
3212      /* In case we are passing structure, verify that it is consecutive block
3213         on the register save area.  If not we need to do moves.  */
3214      if (!need_temp && !REG_P (container))
3215	{
3216	  /* Verify that all registers are strictly consecutive  */
3217	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3218	    {
3219	      int i;
3220
3221	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3222		{
3223		  rtx slot = XVECEXP (container, 0, i);
3224		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3225		      || INTVAL (XEXP (slot, 1)) != i * 16)
3226		    need_temp = 1;
3227		}
3228	    }
3229	  else
3230	    {
3231	      int i;
3232
3233	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3234		{
3235		  rtx slot = XVECEXP (container, 0, i);
3236		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3237		      || INTVAL (XEXP (slot, 1)) != i * 8)
3238		    need_temp = 1;
3239		}
3240	    }
3241	}
3242      if (!need_temp)
3243	{
3244	  int_addr_rtx = addr_rtx;
3245	  sse_addr_rtx = addr_rtx;
3246	}
3247      else
3248	{
3249	  int_addr_rtx = gen_reg_rtx (Pmode);
3250	  sse_addr_rtx = gen_reg_rtx (Pmode);
3251	}
3252      /* First ensure that we fit completely in registers.  */
3253      if (needed_intregs)
3254	{
3255	  emit_cmp_and_jump_insns (expand_expr
3256				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3257				   GEN_INT ((REGPARM_MAX - needed_intregs +
3258					     1) * 8), GE, const1_rtx, SImode,
3259				   1, lab_false);
3260	}
3261      if (needed_sseregs)
3262	{
3263	  emit_cmp_and_jump_insns (expand_expr
3264				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3265				   GEN_INT ((SSE_REGPARM_MAX -
3266					     needed_sseregs + 1) * 16 +
3267					    REGPARM_MAX * 8), GE, const1_rtx,
3268				   SImode, 1, lab_false);
3269	}
3270
3271      /* Compute index to start of area used for integer regs.  */
3272      if (needed_intregs)
3273	{
3274	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3275	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3276	  if (r != int_addr_rtx)
3277	    emit_move_insn (int_addr_rtx, r);
3278	}
3279      if (needed_sseregs)
3280	{
3281	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3282	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3283	  if (r != sse_addr_rtx)
3284	    emit_move_insn (sse_addr_rtx, r);
3285	}
3286      if (need_temp)
3287	{
3288	  int i;
3289	  rtx mem;
3290	  rtx x;
3291
3292	  /* Never use the memory itself, as it has the alias set.  */
3293	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
3294	  mem = gen_rtx_MEM (BLKmode, x);
3295	  force_operand (x, addr_rtx);
3296	  set_mem_alias_set (mem, get_varargs_alias_set ());
3297	  set_mem_align (mem, BITS_PER_UNIT);
3298
3299	  for (i = 0; i < XVECLEN (container, 0); i++)
3300	    {
3301	      rtx slot = XVECEXP (container, 0, i);
3302	      rtx reg = XEXP (slot, 0);
3303	      enum machine_mode mode = GET_MODE (reg);
3304	      rtx src_addr;
3305	      rtx src_mem;
3306	      int src_offset;
3307	      rtx dest_mem;
3308
3309	      if (SSE_REGNO_P (REGNO (reg)))
3310		{
3311		  src_addr = sse_addr_rtx;
3312		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3313		}
3314	      else
3315		{
3316		  src_addr = int_addr_rtx;
3317		  src_offset = REGNO (reg) * 8;
3318		}
3319	      src_mem = gen_rtx_MEM (mode, src_addr);
3320	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
3321	      src_mem = adjust_address (src_mem, mode, src_offset);
3322	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3323	      emit_move_insn (dest_mem, src_mem);
3324	    }
3325	}
3326
3327      if (needed_intregs)
3328	{
3329	  t =
3330	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3331		   build_int_2 (needed_intregs * 8, 0));
3332	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3333	  TREE_SIDE_EFFECTS (t) = 1;
3334	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3335	}
3336      if (needed_sseregs)
3337	{
3338	  t =
3339	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3340		   build_int_2 (needed_sseregs * 16, 0));
3341	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3342	  TREE_SIDE_EFFECTS (t) = 1;
3343	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3344	}
3345
3346      emit_jump_insn (gen_jump (lab_over));
3347      emit_barrier ();
3348      emit_label (lab_false);
3349    }
3350
3351  /* ... otherwise out of the overflow area.  */
3352
3353  /* Care for on-stack alignment if needed.  */
3354  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3355    t = ovf;
3356  else
3357    {
3358      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3359      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3360      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3361    }
3362  t = save_expr (t);
3363
3364  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3365  if (r != addr_rtx)
3366    emit_move_insn (addr_rtx, r);
3367
3368  t =
3369    build (PLUS_EXPR, TREE_TYPE (t), t,
3370	   build_int_2 (rsize * UNITS_PER_WORD, 0));
3371  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3372  TREE_SIDE_EFFECTS (t) = 1;
3373  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3374
3375  if (container)
3376    emit_label (lab_over);
3377
3378  if (indirect_p)
3379    {
3380      r = gen_rtx_MEM (Pmode, addr_rtx);
3381      set_mem_alias_set (r, get_varargs_alias_set ());
3382      emit_move_insn (addr_rtx, r);
3383    }
3384
3385  return addr_rtx;
3386}
3387
3388/* Return nonzero if OP is either a i387 or SSE fp register.  */
3389int
3390any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3391{
3392  return ANY_FP_REG_P (op);
3393}
3394
3395/* Return nonzero if OP is an i387 fp register.  */
3396int
3397fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3398{
3399  return FP_REG_P (op);
3400}
3401
3402/* Return nonzero if OP is a non-fp register_operand.  */
3403int
3404register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3405{
3406  return register_operand (op, mode) && !ANY_FP_REG_P (op);
3407}
3408
3409/* Return nonzero if OP is a register operand other than an
3410   i387 fp register.  */
3411int
3412register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3413{
3414  return register_operand (op, mode) && !FP_REG_P (op);
3415}
3416
3417/* Return nonzero if OP is general operand representable on x86_64.  */
3418
3419int
3420x86_64_general_operand (rtx op, enum machine_mode mode)
3421{
3422  if (!TARGET_64BIT)
3423    return general_operand (op, mode);
3424  if (nonimmediate_operand (op, mode))
3425    return 1;
3426  return x86_64_sign_extended_value (op);
3427}
3428
3429/* Return nonzero if OP is general operand representable on x86_64
3430   as either sign extended or zero extended constant.  */
3431
3432int
3433x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3434{
3435  if (!TARGET_64BIT)
3436    return general_operand (op, mode);
3437  if (nonimmediate_operand (op, mode))
3438    return 1;
3439  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3440}
3441
3442/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3443
3444int
3445x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3446{
3447  if (!TARGET_64BIT)
3448    return nonmemory_operand (op, mode);
3449  if (register_operand (op, mode))
3450    return 1;
3451  return x86_64_sign_extended_value (op);
3452}
3453
3454/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3455
3456int
3457x86_64_movabs_operand (rtx op, enum machine_mode mode)
3458{
3459  if (!TARGET_64BIT || !flag_pic)
3460    return nonmemory_operand (op, mode);
3461  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3462    return 1;
3463  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3464    return 1;
3465  return 0;
3466}
3467
3468/* Return nonzero if OPNUM's MEM should be matched
3469   in movabs* patterns.  */
3470
3471int
3472ix86_check_movabs (rtx insn, int opnum)
3473{
3474  rtx set, mem;
3475
3476  set = PATTERN (insn);
3477  if (GET_CODE (set) == PARALLEL)
3478    set = XVECEXP (set, 0, 0);
3479  if (GET_CODE (set) != SET)
3480    abort ();
3481  mem = XEXP (set, opnum);
3482  while (GET_CODE (mem) == SUBREG)
3483    mem = SUBREG_REG (mem);
3484  if (GET_CODE (mem) != MEM)
3485    abort ();
3486  return (volatile_ok || !MEM_VOLATILE_P (mem));
3487}
3488
3489/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3490
3491int
3492x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3493{
3494  if (!TARGET_64BIT)
3495    return nonmemory_operand (op, mode);
3496  if (register_operand (op, mode))
3497    return 1;
3498  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3499}
3500
3501/* Return nonzero if OP is immediate operand representable on x86_64.  */
3502
3503int
3504x86_64_immediate_operand (rtx op, enum machine_mode mode)
3505{
3506  if (!TARGET_64BIT)
3507    return immediate_operand (op, mode);
3508  return x86_64_sign_extended_value (op);
3509}
3510
3511/* Return nonzero if OP is immediate operand representable on x86_64.  */
3512
3513int
3514x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3515{
3516  return x86_64_zero_extended_value (op);
3517}
3518
3519/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3520   for shift & compare patterns, as shifting by 0 does not change flags),
3521   else return zero.  */
3522
3523int
3524const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3525{
3526  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3527}
3528
3529/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3530   reference and a constant.  */
3531
3532int
3533symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3534{
3535  switch (GET_CODE (op))
3536    {
3537    case SYMBOL_REF:
3538    case LABEL_REF:
3539      return 1;
3540
3541    case CONST:
3542      op = XEXP (op, 0);
3543      if (GET_CODE (op) == SYMBOL_REF
3544	  || GET_CODE (op) == LABEL_REF
3545	  || (GET_CODE (op) == UNSPEC
3546	      && (XINT (op, 1) == UNSPEC_GOT
3547		  || XINT (op, 1) == UNSPEC_GOTOFF
3548		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3549	return 1;
3550      if (GET_CODE (op) != PLUS
3551	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3552	return 0;
3553
3554      op = XEXP (op, 0);
3555      if (GET_CODE (op) == SYMBOL_REF
3556	  || GET_CODE (op) == LABEL_REF)
3557	return 1;
3558      /* Only @GOTOFF gets offsets.  */
3559      if (GET_CODE (op) != UNSPEC
3560	  || XINT (op, 1) != UNSPEC_GOTOFF)
3561	return 0;
3562
3563      op = XVECEXP (op, 0, 0);
3564      if (GET_CODE (op) == SYMBOL_REF
3565	  || GET_CODE (op) == LABEL_REF)
3566	return 1;
3567      return 0;
3568
3569    default:
3570      return 0;
3571    }
3572}
3573
3574/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3575
3576int
3577pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3578{
3579  if (GET_CODE (op) != CONST)
3580    return 0;
3581  op = XEXP (op, 0);
3582  if (TARGET_64BIT)
3583    {
3584      if (GET_CODE (op) == UNSPEC
3585	  && XINT (op, 1) == UNSPEC_GOTPCREL)
3586	return 1;
3587      if (GET_CODE (op) == PLUS
3588	  && GET_CODE (XEXP (op, 0)) == UNSPEC
3589	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3590	return 1;
3591    }
3592  else
3593    {
3594      if (GET_CODE (op) == UNSPEC)
3595	return 1;
3596      if (GET_CODE (op) != PLUS
3597	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3598	return 0;
3599      op = XEXP (op, 0);
3600      if (GET_CODE (op) == UNSPEC)
3601	return 1;
3602    }
3603  return 0;
3604}
3605
3606/* Return true if OP is a symbolic operand that resolves locally.  */
3607
3608static int
3609local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3610{
3611  if (GET_CODE (op) == CONST
3612      && GET_CODE (XEXP (op, 0)) == PLUS
3613      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3614    op = XEXP (XEXP (op, 0), 0);
3615
3616  if (GET_CODE (op) == LABEL_REF)
3617    return 1;
3618
3619  if (GET_CODE (op) != SYMBOL_REF)
3620    return 0;
3621
3622  if (SYMBOL_REF_LOCAL_P (op))
3623    return 1;
3624
3625  /* There is, however, a not insubstantial body of code in the rest of
3626     the compiler that assumes it can just stick the results of
3627     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3628  /* ??? This is a hack.  Should update the body of the compiler to
3629     always create a DECL an invoke targetm.encode_section_info.  */
3630  if (strncmp (XSTR (op, 0), internal_label_prefix,
3631	       internal_label_prefix_len) == 0)
3632    return 1;
3633
3634  return 0;
3635}
3636
3637/* Test for various thread-local symbols.  */
3638
3639int
3640tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3641{
3642  if (GET_CODE (op) != SYMBOL_REF)
3643    return 0;
3644  return SYMBOL_REF_TLS_MODEL (op);
3645}
3646
3647static inline int
3648tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3649{
3650  if (GET_CODE (op) != SYMBOL_REF)
3651    return 0;
3652  return SYMBOL_REF_TLS_MODEL (op) == kind;
3653}
3654
3655int
3656global_dynamic_symbolic_operand (rtx op,
3657				 enum machine_mode mode ATTRIBUTE_UNUSED)
3658{
3659  return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3660}
3661
3662int
3663local_dynamic_symbolic_operand (rtx op,
3664				enum machine_mode mode ATTRIBUTE_UNUSED)
3665{
3666  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3667}
3668
3669int
3670initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3671{
3672  return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3673}
3674
3675int
3676local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3677{
3678  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3679}
3680
3681/* Test for a valid operand for a call instruction.  Don't allow the
3682   arg pointer register or virtual regs since they may decay into
3683   reg + const, which the patterns can't handle.  */
3684
3685int
3686call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3687{
3688  /* Disallow indirect through a virtual register.  This leads to
3689     compiler aborts when trying to eliminate them.  */
3690  if (GET_CODE (op) == REG
3691      && (op == arg_pointer_rtx
3692	  || op == frame_pointer_rtx
3693	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3694	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3695    return 0;
3696
3697  /* Disallow `call 1234'.  Due to varying assembler lameness this
3698     gets either rejected or translated to `call .+1234'.  */
3699  if (GET_CODE (op) == CONST_INT)
3700    return 0;
3701
3702  /* Explicitly allow SYMBOL_REF even if pic.  */
3703  if (GET_CODE (op) == SYMBOL_REF)
3704    return 1;
3705
3706  /* Otherwise we can allow any general_operand in the address.  */
3707  return general_operand (op, Pmode);
3708}
3709
3710/* Test for a valid operand for a call instruction.  Don't allow the
3711   arg pointer register or virtual regs since they may decay into
3712   reg + const, which the patterns can't handle.  */
3713
3714int
3715sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3716{
3717  /* Disallow indirect through a virtual register.  This leads to
3718     compiler aborts when trying to eliminate them.  */
3719  if (GET_CODE (op) == REG
3720      && (op == arg_pointer_rtx
3721	  || op == frame_pointer_rtx
3722	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3723	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3724    return 0;
3725
3726  /* Explicitly allow SYMBOL_REF even if pic.  */
3727  if (GET_CODE (op) == SYMBOL_REF)
3728    return 1;
3729
3730  /* Otherwise we can only allow register operands.  */
3731  return register_operand (op, Pmode);
3732}
3733
3734int
3735constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3736{
3737  if (GET_CODE (op) == CONST
3738      && GET_CODE (XEXP (op, 0)) == PLUS
3739      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3740    op = XEXP (XEXP (op, 0), 0);
3741  return GET_CODE (op) == SYMBOL_REF;
3742}
3743
3744/* Match exactly zero and one.  */
3745
3746int
3747const0_operand (rtx op, enum machine_mode mode)
3748{
3749  return op == CONST0_RTX (mode);
3750}
3751
3752int
3753const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3754{
3755  return op == const1_rtx;
3756}
3757
3758/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3759
3760int
3761const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3762{
3763  return (GET_CODE (op) == CONST_INT
3764	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3765}
3766
3767int
3768const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3769{
3770  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3771}
3772
3773int
3774const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3775{
3776  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3777}
3778
3779int
3780const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3781{
3782  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3783}
3784
3785int
3786const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3787{
3788  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3789}
3790
3791
3792/* True if this is a constant appropriate for an increment or decrement.  */
3793
3794int
3795incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3796{
3797  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3798     registers, since carry flag is not set.  */
3799  if (TARGET_PENTIUM4 && !optimize_size)
3800    return 0;
3801  return op == const1_rtx || op == constm1_rtx;
3802}
3803
3804/* Return nonzero if OP is acceptable as operand of DImode shift
3805   expander.  */
3806
3807int
3808shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3809{
3810  if (TARGET_64BIT)
3811    return nonimmediate_operand (op, mode);
3812  else
3813    return register_operand (op, mode);
3814}
3815
3816/* Return false if this is the stack pointer, or any other fake
3817   register eliminable to the stack pointer.  Otherwise, this is
3818   a register operand.
3819
3820   This is used to prevent esp from being used as an index reg.
3821   Which would only happen in pathological cases.  */
3822
3823int
3824reg_no_sp_operand (rtx op, enum machine_mode mode)
3825{
3826  rtx t = op;
3827  if (GET_CODE (t) == SUBREG)
3828    t = SUBREG_REG (t);
3829  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3830    return 0;
3831
3832  return register_operand (op, mode);
3833}
3834
3835int
3836mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3837{
3838  return MMX_REG_P (op);
3839}
3840
3841/* Return false if this is any eliminable register.  Otherwise
3842   general_operand.  */
3843
3844int
3845general_no_elim_operand (rtx op, enum machine_mode mode)
3846{
3847  rtx t = op;
3848  if (GET_CODE (t) == SUBREG)
3849    t = SUBREG_REG (t);
3850  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3851      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3852      || t == virtual_stack_dynamic_rtx)
3853    return 0;
3854  if (REG_P (t)
3855      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3856      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3857    return 0;
3858
3859  return general_operand (op, mode);
3860}
3861
3862/* Return false if this is any eliminable register.  Otherwise
3863   register_operand or const_int.  */
3864
3865int
3866nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3867{
3868  rtx t = op;
3869  if (GET_CODE (t) == SUBREG)
3870    t = SUBREG_REG (t);
3871  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3872      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3873      || t == virtual_stack_dynamic_rtx)
3874    return 0;
3875
3876  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3877}
3878
3879/* Return false if this is any eliminable register or stack register,
3880   otherwise work like register_operand.  */
3881
3882int
3883index_register_operand (rtx op, enum machine_mode mode)
3884{
3885  rtx t = op;
3886  if (GET_CODE (t) == SUBREG)
3887    t = SUBREG_REG (t);
3888  if (!REG_P (t))
3889    return 0;
3890  if (t == arg_pointer_rtx
3891      || t == frame_pointer_rtx
3892      || t == virtual_incoming_args_rtx
3893      || t == virtual_stack_vars_rtx
3894      || t == virtual_stack_dynamic_rtx
3895      || REGNO (t) == STACK_POINTER_REGNUM)
3896    return 0;
3897
3898  return general_operand (op, mode);
3899}
3900
3901/* Return true if op is a Q_REGS class register.  */
3902
3903int
3904q_regs_operand (rtx op, enum machine_mode mode)
3905{
3906  if (mode != VOIDmode && GET_MODE (op) != mode)
3907    return 0;
3908  if (GET_CODE (op) == SUBREG)
3909    op = SUBREG_REG (op);
3910  return ANY_QI_REG_P (op);
3911}
3912
3913/* Return true if op is an flags register.  */
3914
3915int
3916flags_reg_operand (rtx op, enum machine_mode mode)
3917{
3918  if (mode != VOIDmode && GET_MODE (op) != mode)
3919    return 0;
3920  return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3921}
3922
3923/* Return true if op is a NON_Q_REGS class register.  */
3924
3925int
3926non_q_regs_operand (rtx op, enum machine_mode mode)
3927{
3928  if (mode != VOIDmode && GET_MODE (op) != mode)
3929    return 0;
3930  if (GET_CODE (op) == SUBREG)
3931    op = SUBREG_REG (op);
3932  return NON_QI_REG_P (op);
3933}
3934
3935int
3936zero_extended_scalar_load_operand (rtx op,
3937				   enum machine_mode mode ATTRIBUTE_UNUSED)
3938{
3939  unsigned n_elts;
3940  if (GET_CODE (op) != MEM)
3941    return 0;
3942  op = maybe_get_pool_constant (op);
3943  if (!op)
3944    return 0;
3945  if (GET_CODE (op) != CONST_VECTOR)
3946    return 0;
3947  n_elts =
3948    (GET_MODE_SIZE (GET_MODE (op)) /
3949     GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3950  for (n_elts--; n_elts > 0; n_elts--)
3951    {
3952      rtx elt = CONST_VECTOR_ELT (op, n_elts);
3953      if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3954	return 0;
3955    }
3956  return 1;
3957}
3958
3959/*  Return 1 when OP is operand acceptable for standard SSE move.  */
3960int
3961vector_move_operand (rtx op, enum machine_mode mode)
3962{
3963  if (nonimmediate_operand (op, mode))
3964    return 1;
3965  if (GET_MODE (op) != mode && mode != VOIDmode)
3966    return 0;
3967  return (op == CONST0_RTX (GET_MODE (op)));
3968}
3969
3970/* Return true if op if a valid address, and does not contain
3971   a segment override.  */
3972
3973int
3974no_seg_address_operand (rtx op, enum machine_mode mode)
3975{
3976  struct ix86_address parts;
3977
3978  if (! address_operand (op, mode))
3979    return 0;
3980
3981  if (! ix86_decompose_address (op, &parts))
3982    abort ();
3983
3984  return parts.seg == SEG_DEFAULT;
3985}
3986
3987/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3988   insns.  */
3989int
3990sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3991{
3992  enum rtx_code code = GET_CODE (op);
3993  switch (code)
3994    {
3995    /* Operations supported directly.  */
3996    case EQ:
3997    case LT:
3998    case LE:
3999    case UNORDERED:
4000    case NE:
4001    case UNGE:
4002    case UNGT:
4003    case ORDERED:
4004      return 1;
4005    /* These are equivalent to ones above in non-IEEE comparisons.  */
4006    case UNEQ:
4007    case UNLT:
4008    case UNLE:
4009    case LTGT:
4010    case GE:
4011    case GT:
4012      return !TARGET_IEEE_FP;
4013    default:
4014      return 0;
4015    }
4016}
4017/* Return 1 if OP is a valid comparison operator in valid mode.  */
4018int
4019ix86_comparison_operator (rtx op, enum machine_mode mode)
4020{
4021  enum machine_mode inmode;
4022  enum rtx_code code = GET_CODE (op);
4023  if (mode != VOIDmode && GET_MODE (op) != mode)
4024    return 0;
4025  if (GET_RTX_CLASS (code) != '<')
4026    return 0;
4027  inmode = GET_MODE (XEXP (op, 0));
4028
4029  if (inmode == CCFPmode || inmode == CCFPUmode)
4030    {
4031      enum rtx_code second_code, bypass_code;
4032      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4033      return (bypass_code == NIL && second_code == NIL);
4034    }
4035  switch (code)
4036    {
4037    case EQ: case NE:
4038      return 1;
4039    case LT: case GE:
4040      if (inmode == CCmode || inmode == CCGCmode
4041	  || inmode == CCGOCmode || inmode == CCNOmode)
4042	return 1;
4043      return 0;
4044    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4045      if (inmode == CCmode)
4046	return 1;
4047      return 0;
4048    case GT: case LE:
4049      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4050	return 1;
4051      return 0;
4052    default:
4053      return 0;
4054    }
4055}
4056
4057/* Return 1 if OP is a valid comparison operator testing carry flag
4058   to be set.  */
4059int
4060ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4061{
4062  enum machine_mode inmode;
4063  enum rtx_code code = GET_CODE (op);
4064
4065  if (mode != VOIDmode && GET_MODE (op) != mode)
4066    return 0;
4067  if (GET_RTX_CLASS (code) != '<')
4068    return 0;
4069  inmode = GET_MODE (XEXP (op, 0));
4070  if (GET_CODE (XEXP (op, 0)) != REG
4071      || REGNO (XEXP (op, 0)) != 17
4072      || XEXP (op, 1) != const0_rtx)
4073    return 0;
4074
4075  if (inmode == CCFPmode || inmode == CCFPUmode)
4076    {
4077      enum rtx_code second_code, bypass_code;
4078
4079      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4080      if (bypass_code != NIL || second_code != NIL)
4081	return 0;
4082      code = ix86_fp_compare_code_to_integer (code);
4083    }
4084  else if (inmode != CCmode)
4085    return 0;
4086  return code == LTU;
4087}
4088
4089/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
4090
4091int
4092fcmov_comparison_operator (rtx op, enum machine_mode mode)
4093{
4094  enum machine_mode inmode;
4095  enum rtx_code code = GET_CODE (op);
4096
4097  if (mode != VOIDmode && GET_MODE (op) != mode)
4098    return 0;
4099  if (GET_RTX_CLASS (code) != '<')
4100    return 0;
4101  inmode = GET_MODE (XEXP (op, 0));
4102  if (inmode == CCFPmode || inmode == CCFPUmode)
4103    {
4104      enum rtx_code second_code, bypass_code;
4105
4106      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4107      if (bypass_code != NIL || second_code != NIL)
4108	return 0;
4109      code = ix86_fp_compare_code_to_integer (code);
4110    }
4111  /* i387 supports just limited amount of conditional codes.  */
4112  switch (code)
4113    {
4114    case LTU: case GTU: case LEU: case GEU:
4115      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4116	return 1;
4117      return 0;
4118    case ORDERED: case UNORDERED:
4119    case EQ: case NE:
4120      return 1;
4121    default:
4122      return 0;
4123    }
4124}
4125
4126/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
4127
4128int
4129promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4130{
4131  switch (GET_CODE (op))
4132    {
4133    case MULT:
4134      /* Modern CPUs have same latency for HImode and SImode multiply,
4135         but 386 and 486 do HImode multiply faster.  */
4136      return ix86_tune > PROCESSOR_I486;
4137    case PLUS:
4138    case AND:
4139    case IOR:
4140    case XOR:
4141    case ASHIFT:
4142      return 1;
4143    default:
4144      return 0;
4145    }
4146}
4147
4148/* Nearly general operand, but accept any const_double, since we wish
4149   to be able to drop them into memory rather than have them get pulled
4150   into registers.  */
4151
4152int
4153cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4154{
4155  if (mode != VOIDmode && mode != GET_MODE (op))
4156    return 0;
4157  if (GET_CODE (op) == CONST_DOUBLE)
4158    return 1;
4159  return general_operand (op, mode);
4160}
4161
4162/* Match an SI or HImode register for a zero_extract.  */
4163
4164int
4165ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4166{
4167  int regno;
4168  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4169      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4170    return 0;
4171
4172  if (!register_operand (op, VOIDmode))
4173    return 0;
4174
4175  /* Be careful to accept only registers having upper parts.  */
4176  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4177  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4178}
4179
4180/* Return 1 if this is a valid binary floating-point operation.
4181   OP is the expression matched, and MODE is its mode.  */
4182
4183int
4184binary_fp_operator (rtx op, enum machine_mode mode)
4185{
4186  if (mode != VOIDmode && mode != GET_MODE (op))
4187    return 0;
4188
4189  switch (GET_CODE (op))
4190    {
4191    case PLUS:
4192    case MINUS:
4193    case MULT:
4194    case DIV:
4195      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4196
4197    default:
4198      return 0;
4199    }
4200}
4201
4202int
4203mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4204{
4205  return GET_CODE (op) == MULT;
4206}
4207
4208int
4209div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4210{
4211  return GET_CODE (op) == DIV;
4212}
4213
4214int
4215arith_or_logical_operator (rtx op, enum machine_mode mode)
4216{
4217  return ((mode == VOIDmode || GET_MODE (op) == mode)
4218          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4219              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4220}
4221
4222/* Returns 1 if OP is memory operand with a displacement.  */
4223
4224int
4225memory_displacement_operand (rtx op, enum machine_mode mode)
4226{
4227  struct ix86_address parts;
4228
4229  if (! memory_operand (op, mode))
4230    return 0;
4231
4232  if (! ix86_decompose_address (XEXP (op, 0), &parts))
4233    abort ();
4234
4235  return parts.disp != NULL_RTX;
4236}
4237
4238/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4239   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4240
4241   ??? It seems likely that this will only work because cmpsi is an
4242   expander, and no actual insns use this.  */
4243
4244int
4245cmpsi_operand (rtx op, enum machine_mode mode)
4246{
4247  if (nonimmediate_operand (op, mode))
4248    return 1;
4249
4250  if (GET_CODE (op) == AND
4251      && GET_MODE (op) == SImode
4252      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4253      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4254      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4255      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4256      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4257      && GET_CODE (XEXP (op, 1)) == CONST_INT)
4258    return 1;
4259
4260  return 0;
4261}
4262
4263/* Returns 1 if OP is memory operand that can not be represented by the
4264   modRM array.  */
4265
4266int
4267long_memory_operand (rtx op, enum machine_mode mode)
4268{
4269  if (! memory_operand (op, mode))
4270    return 0;
4271
4272  return memory_address_length (op) != 0;
4273}
4274
4275/* Return nonzero if the rtx is known aligned.  */
4276
4277int
4278aligned_operand (rtx op, enum machine_mode mode)
4279{
4280  struct ix86_address parts;
4281
4282  if (!general_operand (op, mode))
4283    return 0;
4284
4285  /* Registers and immediate operands are always "aligned".  */
4286  if (GET_CODE (op) != MEM)
4287    return 1;
4288
4289  /* Don't even try to do any aligned optimizations with volatiles.  */
4290  if (MEM_VOLATILE_P (op))
4291    return 0;
4292
4293  op = XEXP (op, 0);
4294
4295  /* Pushes and pops are only valid on the stack pointer.  */
4296  if (GET_CODE (op) == PRE_DEC
4297      || GET_CODE (op) == POST_INC)
4298    return 1;
4299
4300  /* Decode the address.  */
4301  if (! ix86_decompose_address (op, &parts))
4302    abort ();
4303
4304  /* Look for some component that isn't known to be aligned.  */
4305  if (parts.index)
4306    {
4307      if (parts.scale < 4
4308	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4309	return 0;
4310    }
4311  if (parts.base)
4312    {
4313      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4314	return 0;
4315    }
4316  if (parts.disp)
4317    {
4318      if (GET_CODE (parts.disp) != CONST_INT
4319	  || (INTVAL (parts.disp) & 3) != 0)
4320	return 0;
4321    }
4322
4323  /* Didn't find one -- this must be an aligned address.  */
4324  return 1;
4325}
4326
4327/* Initialize the table of extra 80387 mathematical constants.  */
4328
4329static void
4330init_ext_80387_constants (void)
4331{
4332  static const char * cst[5] =
4333  {
4334    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4335    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4336    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4337    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4338    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4339  };
4340  int i;
4341
4342  for (i = 0; i < 5; i++)
4343    {
4344      real_from_string (&ext_80387_constants_table[i], cst[i]);
4345      /* Ensure each constant is rounded to XFmode precision.  */
4346      real_convert (&ext_80387_constants_table[i],
4347		    XFmode, &ext_80387_constants_table[i]);
4348    }
4349
4350  ext_80387_constants_init = 1;
4351}
4352
4353/* Return true if the constant is something that can be loaded with
4354   a special instruction.  */
4355
4356int
4357standard_80387_constant_p (rtx x)
4358{
4359  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4360    return -1;
4361
4362  if (x == CONST0_RTX (GET_MODE (x)))
4363    return 1;
4364  if (x == CONST1_RTX (GET_MODE (x)))
4365    return 2;
4366
4367  /* For XFmode constants, try to find a special 80387 instruction on
4368     those CPUs that benefit from them.  */
4369  if (GET_MODE (x) == XFmode
4370      && x86_ext_80387_constants & TUNEMASK)
4371    {
4372      REAL_VALUE_TYPE r;
4373      int i;
4374
4375      if (! ext_80387_constants_init)
4376	init_ext_80387_constants ();
4377
4378      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4379      for (i = 0; i < 5; i++)
4380        if (real_identical (&r, &ext_80387_constants_table[i]))
4381	  return i + 3;
4382    }
4383
4384  return 0;
4385}
4386
4387/* Return the opcode of the special instruction to be used to load
4388   the constant X.  */
4389
4390const char *
4391standard_80387_constant_opcode (rtx x)
4392{
4393  switch (standard_80387_constant_p (x))
4394    {
4395    case 1:
4396      return "fldz";
4397    case 2:
4398      return "fld1";
4399    case 3:
4400      return "fldlg2";
4401    case 4:
4402      return "fldln2";
4403    case 5:
4404      return "fldl2e";
4405    case 6:
4406      return "fldl2t";
4407    case 7:
4408      return "fldpi";
4409    }
4410  abort ();
4411}
4412
4413/* Return the CONST_DOUBLE representing the 80387 constant that is
4414   loaded by the specified special instruction.  The argument IDX
4415   matches the return value from standard_80387_constant_p.  */
4416
4417rtx
4418standard_80387_constant_rtx (int idx)
4419{
4420  int i;
4421
4422  if (! ext_80387_constants_init)
4423    init_ext_80387_constants ();
4424
4425  switch (idx)
4426    {
4427    case 3:
4428    case 4:
4429    case 5:
4430    case 6:
4431    case 7:
4432      i = idx - 3;
4433      break;
4434
4435    default:
4436      abort ();
4437    }
4438
4439  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4440				       XFmode);
4441}
4442
4443/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4444 */
4445int
4446standard_sse_constant_p (rtx x)
4447{
4448  if (x == const0_rtx)
4449    return 1;
4450  return (x == CONST0_RTX (GET_MODE (x)));
4451}
4452
4453/* Returns 1 if OP contains a symbol reference */
4454
4455int
4456symbolic_reference_mentioned_p (rtx op)
4457{
4458  const char *fmt;
4459  int i;
4460
4461  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4462    return 1;
4463
4464  fmt = GET_RTX_FORMAT (GET_CODE (op));
4465  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4466    {
4467      if (fmt[i] == 'E')
4468	{
4469	  int j;
4470
4471	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4472	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4473	      return 1;
4474	}
4475
4476      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4477	return 1;
4478    }
4479
4480  return 0;
4481}
4482
4483/* Return 1 if it is appropriate to emit `ret' instructions in the
4484   body of a function.  Do this only if the epilogue is simple, needing a
4485   couple of insns.  Prior to reloading, we can't tell how many registers
4486   must be saved, so return 0 then.  Return 0 if there is no frame
4487   marker to de-allocate.
4488
4489   If NON_SAVING_SETJMP is defined and true, then it is not possible
4490   for the epilogue to be simple, so return 0.  This is a special case
4491   since NON_SAVING_SETJMP will not cause regs_ever_live to change
4492   until final, but jump_optimize may need to know sooner if a
4493   `return' is OK.  */
4494
4495int
4496ix86_can_use_return_insn_p (void)
4497{
4498  struct ix86_frame frame;
4499
4500#ifdef NON_SAVING_SETJMP
4501  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4502    return 0;
4503#endif
4504
4505  if (! reload_completed || frame_pointer_needed)
4506    return 0;
4507
4508  /* Don't allow more than 32 pop, since that's all we can do
4509     with one instruction.  */
4510  if (current_function_pops_args
4511      && current_function_args_size >= 32768)
4512    return 0;
4513
4514  ix86_compute_frame_layout (&frame);
4515  return frame.to_allocate == 0 && frame.nregs == 0;
4516}
4517
4518/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4519int
4520x86_64_sign_extended_value (rtx value)
4521{
4522  switch (GET_CODE (value))
4523    {
4524      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4525         to be at least 32 and this all acceptable constants are
4526	 represented as CONST_INT.  */
4527      case CONST_INT:
4528	if (HOST_BITS_PER_WIDE_INT == 32)
4529	  return 1;
4530	else
4531	  {
4532	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4533	    return trunc_int_for_mode (val, SImode) == val;
4534	  }
4535	break;
4536
4537      /* For certain code models, the symbolic references are known to fit.
4538	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4539	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4540	 only if inside of UNSPEC handled below.  */
4541      case SYMBOL_REF:
4542	/* TLS symbols are not constant.  */
4543	if (tls_symbolic_operand (value, Pmode))
4544	  return false;
4545	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4546
4547      /* For certain code models, the code is near as well.  */
4548      case LABEL_REF:
4549	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4550		|| ix86_cmodel == CM_KERNEL);
4551
4552      /* We also may accept the offsetted memory references in certain special
4553         cases.  */
4554      case CONST:
4555	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4556	  switch (XINT (XEXP (value, 0), 1))
4557	    {
4558	    case UNSPEC_GOTPCREL:
4559	    case UNSPEC_DTPOFF:
4560	    case UNSPEC_GOTNTPOFF:
4561	    case UNSPEC_NTPOFF:
4562	      return 1;
4563	    default:
4564	      break;
4565	    }
4566	if (GET_CODE (XEXP (value, 0)) == PLUS)
4567	  {
4568	    rtx op1 = XEXP (XEXP (value, 0), 0);
4569	    rtx op2 = XEXP (XEXP (value, 0), 1);
4570	    HOST_WIDE_INT offset;
4571
4572	    if (ix86_cmodel == CM_LARGE)
4573	      return 0;
4574	    if (GET_CODE (op2) != CONST_INT)
4575	      return 0;
4576	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4577	    switch (GET_CODE (op1))
4578	      {
4579		case SYMBOL_REF:
4580		  /* For CM_SMALL assume that latest object is 16MB before
4581		     end of 31bits boundary.  We may also accept pretty
4582		     large negative constants knowing that all objects are
4583		     in the positive half of address space.  */
4584		  if (ix86_cmodel == CM_SMALL
4585		      && offset < 16*1024*1024
4586		      && trunc_int_for_mode (offset, SImode) == offset)
4587		    return 1;
4588		  /* For CM_KERNEL we know that all object resist in the
4589		     negative half of 32bits address space.  We may not
4590		     accept negative offsets, since they may be just off
4591		     and we may accept pretty large positive ones.  */
4592		  if (ix86_cmodel == CM_KERNEL
4593		      && offset > 0
4594		      && trunc_int_for_mode (offset, SImode) == offset)
4595		    return 1;
4596		  break;
4597		case LABEL_REF:
4598		  /* These conditions are similar to SYMBOL_REF ones, just the
4599		     constraints for code models differ.  */
4600		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4601		      && offset < 16*1024*1024
4602		      && trunc_int_for_mode (offset, SImode) == offset)
4603		    return 1;
4604		  if (ix86_cmodel == CM_KERNEL
4605		      && offset > 0
4606		      && trunc_int_for_mode (offset, SImode) == offset)
4607		    return 1;
4608		  break;
4609		case UNSPEC:
4610		  switch (XINT (op1, 1))
4611		    {
4612		    case UNSPEC_DTPOFF:
4613		    case UNSPEC_NTPOFF:
4614		      if (offset > 0
4615			  && trunc_int_for_mode (offset, SImode) == offset)
4616			return 1;
4617		    }
4618		  break;
4619		default:
4620		  return 0;
4621	      }
4622	  }
4623	return 0;
4624      default:
4625	return 0;
4626    }
4627}
4628
4629/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4630int
4631x86_64_zero_extended_value (rtx value)
4632{
4633  switch (GET_CODE (value))
4634    {
4635      case CONST_DOUBLE:
4636	if (HOST_BITS_PER_WIDE_INT == 32)
4637	  return  (GET_MODE (value) == VOIDmode
4638		   && !CONST_DOUBLE_HIGH (value));
4639	else
4640	  return 0;
4641      case CONST_INT:
4642	if (HOST_BITS_PER_WIDE_INT == 32)
4643	  return INTVAL (value) >= 0;
4644	else
4645	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4646	break;
4647
4648      /* For certain code models, the symbolic references are known to fit.  */
4649      case SYMBOL_REF:
4650	/* TLS symbols are not constant.  */
4651	if (tls_symbolic_operand (value, Pmode))
4652	  return false;
4653	return ix86_cmodel == CM_SMALL;
4654
4655      /* For certain code models, the code is near as well.  */
4656      case LABEL_REF:
4657	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4658
4659      /* We also may accept the offsetted memory references in certain special
4660         cases.  */
4661      case CONST:
4662	if (GET_CODE (XEXP (value, 0)) == PLUS)
4663	  {
4664	    rtx op1 = XEXP (XEXP (value, 0), 0);
4665	    rtx op2 = XEXP (XEXP (value, 0), 1);
4666
4667	    if (ix86_cmodel == CM_LARGE)
4668	      return 0;
4669	    switch (GET_CODE (op1))
4670	      {
4671		case SYMBOL_REF:
4672		    return 0;
4673		  /* For small code model we may accept pretty large positive
4674		     offsets, since one bit is available for free.  Negative
4675		     offsets are limited by the size of NULL pointer area
4676		     specified by the ABI.  */
4677		  if (ix86_cmodel == CM_SMALL
4678		      && GET_CODE (op2) == CONST_INT
4679		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4680		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4681			  == INTVAL (op2)))
4682		    return 1;
4683	          /* ??? For the kernel, we may accept adjustment of
4684		     -0x10000000, since we know that it will just convert
4685		     negative address space to positive, but perhaps this
4686		     is not worthwhile.  */
4687		  break;
4688		case LABEL_REF:
4689		  /* These conditions are similar to SYMBOL_REF ones, just the
4690		     constraints for code models differ.  */
4691		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4692		      && GET_CODE (op2) == CONST_INT
4693		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4694		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4695			  == INTVAL (op2)))
4696		    return 1;
4697		  break;
4698		default:
4699		  return 0;
4700	      }
4701	  }
4702	return 0;
4703      default:
4704	return 0;
4705    }
4706}
4707
4708/* Value should be nonzero if functions must have frame pointers.
4709   Zero means the frame pointer need not be set up (and parms may
4710   be accessed via the stack pointer) in functions that seem suitable.  */
4711
4712int
4713ix86_frame_pointer_required (void)
4714{
4715  /* If we accessed previous frames, then the generated code expects
4716     to be able to access the saved ebp value in our frame.  */
4717  if (cfun->machine->accesses_prev_frame)
4718    return 1;
4719
4720  /* Several x86 os'es need a frame pointer for other reasons,
4721     usually pertaining to setjmp.  */
4722  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4723    return 1;
4724
4725  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4726     the frame pointer by default.  Turn it back on now if we've not
4727     got a leaf function.  */
4728  if (TARGET_OMIT_LEAF_FRAME_POINTER
4729      && (!current_function_is_leaf))
4730    return 1;
4731
4732  if (current_function_profile)
4733    return 1;
4734
4735  return 0;
4736}
4737
4738/* Record that the current function accesses previous call frames.  */
4739
4740void
4741ix86_setup_frame_addresses (void)
4742{
4743  cfun->machine->accesses_prev_frame = 1;
4744}
4745
4746#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4747# define USE_HIDDEN_LINKONCE 1
4748#else
4749# define USE_HIDDEN_LINKONCE 0
4750#endif
4751
4752static int pic_labels_used;
4753
4754/* Fills in the label name that should be used for a pc thunk for
4755   the given register.  */
4756
4757static void
4758get_pc_thunk_name (char name[32], unsigned int regno)
4759{
4760  if (USE_HIDDEN_LINKONCE)
4761    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4762  else
4763    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4764}
4765
4766
4767/* This function generates code for -fpic that loads %ebx with
4768   the return address of the caller and then returns.  */
4769
4770void
4771ix86_file_end (void)
4772{
4773  rtx xops[2];
4774  int regno;
4775
4776  for (regno = 0; regno < 8; ++regno)
4777    {
4778      char name[32];
4779
4780      if (! ((pic_labels_used >> regno) & 1))
4781	continue;
4782
4783      get_pc_thunk_name (name, regno);
4784
4785      if (USE_HIDDEN_LINKONCE)
4786	{
4787	  tree decl;
4788
4789	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4790			     error_mark_node);
4791	  TREE_PUBLIC (decl) = 1;
4792	  TREE_STATIC (decl) = 1;
4793	  DECL_ONE_ONLY (decl) = 1;
4794
4795	  (*targetm.asm_out.unique_section) (decl, 0);
4796	  named_section (decl, NULL, 0);
4797
4798	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4799	  fputs ("\t.hidden\t", asm_out_file);
4800	  assemble_name (asm_out_file, name);
4801	  fputc ('\n', asm_out_file);
4802	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4803	}
4804      else
4805	{
4806	  text_section ();
4807	  ASM_OUTPUT_LABEL (asm_out_file, name);
4808	}
4809
4810      xops[0] = gen_rtx_REG (SImode, regno);
4811      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4812      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4813      output_asm_insn ("ret", xops);
4814    }
4815
4816  if (NEED_INDICATE_EXEC_STACK)
4817    file_end_indicate_exec_stack ();
4818}
4819
4820/* Emit code for the SET_GOT patterns.  */
4821
4822const char *
4823output_set_got (rtx dest)
4824{
4825  rtx xops[3];
4826
4827  xops[0] = dest;
4828  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4829
4830  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4831    {
4832      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4833
4834      if (!flag_pic)
4835	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4836      else
4837	output_asm_insn ("call\t%a2", xops);
4838
4839#if TARGET_MACHO
4840      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4841         is what will be referred to by the Mach-O PIC subsystem.  */
4842      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4843#endif
4844      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4845				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4846
4847      if (flag_pic)
4848	output_asm_insn ("pop{l}\t%0", xops);
4849    }
4850  else
4851    {
4852      char name[32];
4853      get_pc_thunk_name (name, REGNO (dest));
4854      pic_labels_used |= 1 << REGNO (dest);
4855
4856      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4857      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4858      output_asm_insn ("call\t%X2", xops);
4859    }
4860
4861  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4862    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4863  else if (!TARGET_MACHO)
4864    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4865
4866  return "";
4867}
4868
4869/* Generate an "push" pattern for input ARG.  */
4870
4871static rtx
4872gen_push (rtx arg)
4873{
4874  return gen_rtx_SET (VOIDmode,
4875		      gen_rtx_MEM (Pmode,
4876				   gen_rtx_PRE_DEC (Pmode,
4877						    stack_pointer_rtx)),
4878		      arg);
4879}
4880
4881/* Return >= 0 if there is an unused call-clobbered register available
4882   for the entire function.  */
4883
4884static unsigned int
4885ix86_select_alt_pic_regnum (void)
4886{
4887  if (current_function_is_leaf && !current_function_profile)
4888    {
4889      int i;
4890      for (i = 2; i >= 0; --i)
4891        if (!regs_ever_live[i])
4892	  return i;
4893    }
4894
4895  return INVALID_REGNUM;
4896}
4897
4898/* Return 1 if we need to save REGNO.  */
4899static int
4900ix86_save_reg (unsigned int regno, int maybe_eh_return)
4901{
4902  if (pic_offset_table_rtx
4903      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4904      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4905	  || current_function_profile
4906	  || current_function_calls_eh_return
4907	  || current_function_uses_const_pool))
4908    {
4909      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4910	return 0;
4911      return 1;
4912    }
4913
4914  if (current_function_calls_eh_return && maybe_eh_return)
4915    {
4916      unsigned i;
4917      for (i = 0; ; i++)
4918	{
4919	  unsigned test = EH_RETURN_DATA_REGNO (i);
4920	  if (test == INVALID_REGNUM)
4921	    break;
4922	  if (test == regno)
4923	    return 1;
4924	}
4925    }
4926
4927  return (regs_ever_live[regno]
4928	  && !call_used_regs[regno]
4929	  && !fixed_regs[regno]
4930	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4931}
4932
4933/* Return number of registers to be saved on the stack.  */
4934
4935static int
4936ix86_nsaved_regs (void)
4937{
4938  int nregs = 0;
4939  int regno;
4940
4941  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4942    if (ix86_save_reg (regno, true))
4943      nregs++;
4944  return nregs;
4945}
4946
4947/* Return the offset between two registers, one to be eliminated, and the other
4948   its replacement, at the start of a routine.  */
4949
4950HOST_WIDE_INT
4951ix86_initial_elimination_offset (int from, int to)
4952{
4953  struct ix86_frame frame;
4954  ix86_compute_frame_layout (&frame);
4955
4956  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4957    return frame.hard_frame_pointer_offset;
4958  else if (from == FRAME_POINTER_REGNUM
4959	   && to == HARD_FRAME_POINTER_REGNUM)
4960    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4961  else
4962    {
4963      if (to != STACK_POINTER_REGNUM)
4964	abort ();
4965      else if (from == ARG_POINTER_REGNUM)
4966	return frame.stack_pointer_offset;
4967      else if (from != FRAME_POINTER_REGNUM)
4968	abort ();
4969      else
4970	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4971    }
4972}
4973
4974/* Fill structure ix86_frame about frame of currently computed function.  */
4975
4976static void
4977ix86_compute_frame_layout (struct ix86_frame *frame)
4978{
4979  HOST_WIDE_INT total_size;
4980  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4981  HOST_WIDE_INT offset;
4982  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4983  HOST_WIDE_INT size = get_frame_size ();
4984
4985  frame->nregs = ix86_nsaved_regs ();
4986  total_size = size;
4987
4988  /* During reload iteration the amount of registers saved can change.
4989     Recompute the value as needed.  Do not recompute when amount of registers
4990     didn't change as reload does mutiple calls to the function and does not
4991     expect the decision to change within single iteration.  */
4992  if (!optimize_size
4993      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4994    {
4995      int count = frame->nregs;
4996
4997      cfun->machine->use_fast_prologue_epilogue_nregs = count;
4998      /* The fast prologue uses move instead of push to save registers.  This
4999         is significantly longer, but also executes faster as modern hardware
5000         can execute the moves in parallel, but can't do that for push/pop.
5001
5002	 Be careful about choosing what prologue to emit:  When function takes
5003	 many instructions to execute we may use slow version as well as in
5004	 case function is known to be outside hot spot (this is known with
5005	 feedback only).  Weight the size of function by number of registers
5006	 to save as it is cheap to use one or two push instructions but very
5007	 slow to use many of them.  */
5008      if (count)
5009	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5010      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5011	  || (flag_branch_probabilities
5012	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5013        cfun->machine->use_fast_prologue_epilogue = false;
5014      else
5015        cfun->machine->use_fast_prologue_epilogue
5016	   = !expensive_function_p (count);
5017    }
5018  if (TARGET_PROLOGUE_USING_MOVE
5019      && cfun->machine->use_fast_prologue_epilogue)
5020    frame->save_regs_using_mov = true;
5021  else
5022    frame->save_regs_using_mov = false;
5023
5024
5025  /* Skip return address and saved base pointer.  */
5026  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5027
5028  frame->hard_frame_pointer_offset = offset;
5029
5030  /* Do some sanity checking of stack_alignment_needed and
5031     preferred_alignment, since i386 port is the only using those features
5032     that may break easily.  */
5033
5034  if (size && !stack_alignment_needed)
5035    abort ();
5036  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5037    abort ();
5038  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5039    abort ();
5040  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5041    abort ();
5042
5043  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5044    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5045
5046  /* Register save area */
5047  offset += frame->nregs * UNITS_PER_WORD;
5048
5049  /* Va-arg area */
5050  if (ix86_save_varrargs_registers)
5051    {
5052      offset += X86_64_VARARGS_SIZE;
5053      frame->va_arg_size = X86_64_VARARGS_SIZE;
5054    }
5055  else
5056    frame->va_arg_size = 0;
5057
5058  /* Align start of frame for local function.  */
5059  frame->padding1 = ((offset + stack_alignment_needed - 1)
5060		     & -stack_alignment_needed) - offset;
5061
5062  offset += frame->padding1;
5063
5064  /* Frame pointer points here.  */
5065  frame->frame_pointer_offset = offset;
5066
5067  offset += size;
5068
5069  /* Add outgoing arguments area.  Can be skipped if we eliminated
5070     all the function calls as dead code.
5071     Skipping is however impossible when function calls alloca.  Alloca
5072     expander assumes that last current_function_outgoing_args_size
5073     of stack frame are unused.  */
5074  if (ACCUMULATE_OUTGOING_ARGS
5075      && (!current_function_is_leaf || current_function_calls_alloca))
5076    {
5077      offset += current_function_outgoing_args_size;
5078      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5079    }
5080  else
5081    frame->outgoing_arguments_size = 0;
5082
5083  /* Align stack boundary.  Only needed if we're calling another function
5084     or using alloca.  */
5085  if (!current_function_is_leaf || current_function_calls_alloca)
5086    frame->padding2 = ((offset + preferred_alignment - 1)
5087		       & -preferred_alignment) - offset;
5088  else
5089    frame->padding2 = 0;
5090
5091  offset += frame->padding2;
5092
5093  /* We've reached end of stack frame.  */
5094  frame->stack_pointer_offset = offset;
5095
5096  /* Size prologue needs to allocate.  */
5097  frame->to_allocate =
5098    (size + frame->padding1 + frame->padding2
5099     + frame->outgoing_arguments_size + frame->va_arg_size);
5100
5101  if ((!frame->to_allocate && frame->nregs <= 1)
5102      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5103    frame->save_regs_using_mov = false;
5104
5105  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5106      && current_function_is_leaf)
5107    {
5108      frame->red_zone_size = frame->to_allocate;
5109      if (frame->save_regs_using_mov)
5110	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5111      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5112	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5113    }
5114  else
5115    frame->red_zone_size = 0;
5116  frame->to_allocate -= frame->red_zone_size;
5117  frame->stack_pointer_offset -= frame->red_zone_size;
5118#if 0
5119  fprintf (stderr, "nregs: %i\n", frame->nregs);
5120  fprintf (stderr, "size: %i\n", size);
5121  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5122  fprintf (stderr, "padding1: %i\n", frame->padding1);
5123  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5124  fprintf (stderr, "padding2: %i\n", frame->padding2);
5125  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5126  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5127  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5128  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5129	   frame->hard_frame_pointer_offset);
5130  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5131#endif
5132}
5133
5134/* Emit code to save registers in the prologue.  */
5135
5136static void
5137ix86_emit_save_regs (void)
5138{
5139  int regno;
5140  rtx insn;
5141
5142  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5143    if (ix86_save_reg (regno, true))
5144      {
5145	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5146	RTX_FRAME_RELATED_P (insn) = 1;
5147      }
5148}
5149
5150/* Emit code to save registers using MOV insns.  First register
5151   is restored from POINTER + OFFSET.  */
5152static void
5153ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5154{
5155  int regno;
5156  rtx insn;
5157
5158  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5159    if (ix86_save_reg (regno, true))
5160      {
5161	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5162					       Pmode, offset),
5163			       gen_rtx_REG (Pmode, regno));
5164	RTX_FRAME_RELATED_P (insn) = 1;
5165	offset += UNITS_PER_WORD;
5166      }
5167}
5168
5169/* Expand prologue or epilogue stack adjustment.
5170   The pattern exist to put a dependency on all ebp-based memory accesses.
5171   STYLE should be negative if instructions should be marked as frame related,
5172   zero if %r11 register is live and cannot be freely used and positive
5173   otherwise.  */
5174
5175static void
5176pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5177{
5178  rtx insn;
5179
5180  if (! TARGET_64BIT)
5181    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5182  else if (x86_64_immediate_operand (offset, DImode))
5183    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5184  else
5185    {
5186      rtx r11;
5187      /* r11 is used by indirect sibcall return as well, set before the
5188	 epilogue and used after the epilogue.  ATM indirect sibcall
5189	 shouldn't be used together with huge frame sizes in one
5190	 function because of the frame_size check in sibcall.c.  */
5191      if (style == 0)
5192	abort ();
5193      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5194      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5195      if (style < 0)
5196	RTX_FRAME_RELATED_P (insn) = 1;
5197      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5198							       offset));
5199    }
5200  if (style < 0)
5201    RTX_FRAME_RELATED_P (insn) = 1;
5202}
5203
5204/* Expand the prologue into a bunch of separate insns.  */
5205
5206void
5207ix86_expand_prologue (void)
5208{
5209  rtx insn;
5210  bool pic_reg_used;
5211  struct ix86_frame frame;
5212  HOST_WIDE_INT allocate;
5213
5214  ix86_compute_frame_layout (&frame);
5215
5216  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5217     slower on all targets.  Also sdb doesn't like it.  */
5218
5219  if (frame_pointer_needed)
5220    {
5221      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5222      RTX_FRAME_RELATED_P (insn) = 1;
5223
5224      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5225      RTX_FRAME_RELATED_P (insn) = 1;
5226    }
5227
5228  allocate = frame.to_allocate;
5229
5230  if (!frame.save_regs_using_mov)
5231    ix86_emit_save_regs ();
5232  else
5233    allocate += frame.nregs * UNITS_PER_WORD;
5234
5235  /* When using red zone we may start register saving before allocating
5236     the stack frame saving one cycle of the prologue.  */
5237  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5238    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5239				   : stack_pointer_rtx,
5240				   -frame.nregs * UNITS_PER_WORD);
5241
5242  if (allocate == 0)
5243    ;
5244  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5245    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5246			       GEN_INT (-allocate), -1);
5247  else
5248    {
5249      /* Only valid for Win32.  */
5250      rtx eax = gen_rtx_REG (SImode, 0);
5251      bool eax_live = ix86_eax_live_at_start_p ();
5252
5253      if (TARGET_64BIT)
5254        abort ();
5255
5256      if (eax_live)
5257	{
5258	  emit_insn (gen_push (eax));
5259	  allocate -= 4;
5260	}
5261
5262      insn = emit_move_insn (eax, GEN_INT (allocate));
5263      RTX_FRAME_RELATED_P (insn) = 1;
5264
5265      insn = emit_insn (gen_allocate_stack_worker (eax));
5266      RTX_FRAME_RELATED_P (insn) = 1;
5267
5268      if (eax_live)
5269	{
5270	  rtx t = plus_constant (stack_pointer_rtx, allocate);
5271	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5272	}
5273    }
5274
5275  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5276    {
5277      if (!frame_pointer_needed || !frame.to_allocate)
5278        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5279      else
5280        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5281				       -frame.nregs * UNITS_PER_WORD);
5282    }
5283
5284  pic_reg_used = false;
5285  if (pic_offset_table_rtx
5286      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5287	  || current_function_profile))
5288    {
5289      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5290
5291      if (alt_pic_reg_used != INVALID_REGNUM)
5292	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5293
5294      pic_reg_used = true;
5295    }
5296
5297  if (pic_reg_used)
5298    {
5299      insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5300
5301      /* Even with accurate pre-reload life analysis, we can wind up
5302	 deleting all references to the pic register after reload.
5303	 Consider if cross-jumping unifies two sides of a branch
5304	 controlled by a comparison vs the only read from a global.
5305	 In which case, allow the set_got to be deleted, though we're
5306	 too late to do anything about the ebx save in the prologue.  */
5307      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5308    }
5309
5310  /* Prevent function calls from be scheduled before the call to mcount.
5311     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5312  if (current_function_profile)
5313    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5314}
5315
5316/* Emit code to restore saved registers using MOV insns.  First register
5317   is restored from POINTER + OFFSET.  */
5318static void
5319ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5320				  int maybe_eh_return)
5321{
5322  int regno;
5323  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5324
5325  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5326    if (ix86_save_reg (regno, maybe_eh_return))
5327      {
5328	/* Ensure that adjust_address won't be forced to produce pointer
5329	   out of range allowed by x86-64 instruction set.  */
5330	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5331	  {
5332	    rtx r11;
5333
5334	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5335	    emit_move_insn (r11, GEN_INT (offset));
5336	    emit_insn (gen_adddi3 (r11, r11, pointer));
5337	    base_address = gen_rtx_MEM (Pmode, r11);
5338	    offset = 0;
5339	  }
5340	emit_move_insn (gen_rtx_REG (Pmode, regno),
5341			adjust_address (base_address, Pmode, offset));
5342	offset += UNITS_PER_WORD;
5343      }
5344}
5345
5346/* Restore function stack, frame, and registers.  */
5347
5348void
5349ix86_expand_epilogue (int style)
5350{
5351  int regno;
5352  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5353  struct ix86_frame frame;
5354  HOST_WIDE_INT offset;
5355
5356  ix86_compute_frame_layout (&frame);
5357
5358  /* Calculate start of saved registers relative to ebp.  Special care
5359     must be taken for the normal return case of a function using
5360     eh_return: the eax and edx registers are marked as saved, but not
5361     restored along this path.  */
5362  offset = frame.nregs;
5363  if (current_function_calls_eh_return && style != 2)
5364    offset -= 2;
5365  offset *= -UNITS_PER_WORD;
5366
5367  /* If we're only restoring one register and sp is not valid then
5368     using a move instruction to restore the register since it's
5369     less work than reloading sp and popping the register.
5370
5371     The default code result in stack adjustment using add/lea instruction,
5372     while this code results in LEAVE instruction (or discrete equivalent),
5373     so it is profitable in some other cases as well.  Especially when there
5374     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5375     and there is exactly one register to pop. This heuristic may need some
5376     tuning in future.  */
5377  if ((!sp_valid && frame.nregs <= 1)
5378      || (TARGET_EPILOGUE_USING_MOVE
5379	  && cfun->machine->use_fast_prologue_epilogue
5380	  && (frame.nregs > 1 || frame.to_allocate))
5381      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5382      || (frame_pointer_needed && TARGET_USE_LEAVE
5383	  && cfun->machine->use_fast_prologue_epilogue
5384	  && frame.nregs == 1)
5385      || current_function_calls_eh_return)
5386    {
5387      /* Restore registers.  We can use ebp or esp to address the memory
5388	 locations.  If both are available, default to ebp, since offsets
5389	 are known to be small.  Only exception is esp pointing directly to the
5390	 end of block of saved registers, where we may simplify addressing
5391	 mode.  */
5392
5393      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5394	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5395					  frame.to_allocate, style == 2);
5396      else
5397	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5398					  offset, style == 2);
5399
5400      /* eh_return epilogues need %ecx added to the stack pointer.  */
5401      if (style == 2)
5402	{
5403	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5404
5405	  if (frame_pointer_needed)
5406	    {
5407	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5408	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5409	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5410
5411	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5412	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5413
5414	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5415					 const0_rtx, style);
5416	    }
5417	  else
5418	    {
5419	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5420	      tmp = plus_constant (tmp, (frame.to_allocate
5421                                         + frame.nregs * UNITS_PER_WORD));
5422	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5423	    }
5424	}
5425      else if (!frame_pointer_needed)
5426	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5427				   GEN_INT (frame.to_allocate
5428					    + frame.nregs * UNITS_PER_WORD),
5429				   style);
5430      /* If not an i386, mov & pop is faster than "leave".  */
5431      else if (TARGET_USE_LEAVE || optimize_size
5432	       || !cfun->machine->use_fast_prologue_epilogue)
5433	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5434      else
5435	{
5436	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5437				     hard_frame_pointer_rtx,
5438				     const0_rtx, style);
5439	  if (TARGET_64BIT)
5440	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5441	  else
5442	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5443	}
5444    }
5445  else
5446    {
5447      /* First step is to deallocate the stack frame so that we can
5448	 pop the registers.  */
5449      if (!sp_valid)
5450	{
5451	  if (!frame_pointer_needed)
5452	    abort ();
5453	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5454				     hard_frame_pointer_rtx,
5455				     GEN_INT (offset), style);
5456	}
5457      else if (frame.to_allocate)
5458	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5459				   GEN_INT (frame.to_allocate), style);
5460
5461      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5462	if (ix86_save_reg (regno, false))
5463	  {
5464	    if (TARGET_64BIT)
5465	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5466	    else
5467	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5468	  }
5469      if (frame_pointer_needed)
5470	{
5471	  /* Leave results in shorter dependency chains on CPUs that are
5472	     able to grok it fast.  */
5473	  if (TARGET_USE_LEAVE)
5474	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5475	  else if (TARGET_64BIT)
5476	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5477	  else
5478	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5479	}
5480    }
5481
5482  /* Sibcall epilogues don't want a return instruction.  */
5483  if (style == 0)
5484    return;
5485
5486  if (current_function_pops_args && current_function_args_size)
5487    {
5488      rtx popc = GEN_INT (current_function_pops_args);
5489
5490      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5491	 return address, do explicit add, and jump indirectly to the
5492	 caller.  */
5493
5494      if (current_function_pops_args >= 65536)
5495	{
5496	  rtx ecx = gen_rtx_REG (SImode, 2);
5497
5498	  /* There is no "pascal" calling convention in 64bit ABI.  */
5499	  if (TARGET_64BIT)
5500	    abort ();
5501
5502	  emit_insn (gen_popsi1 (ecx));
5503	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5504	  emit_jump_insn (gen_return_indirect_internal (ecx));
5505	}
5506      else
5507	emit_jump_insn (gen_return_pop_internal (popc));
5508    }
5509  else
5510    emit_jump_insn (gen_return_internal ());
5511}
5512
5513/* Reset from the function's potential modifications.  */
5514
5515static void
5516ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5517			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5518{
5519  if (pic_offset_table_rtx)
5520    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5521}
5522
5523/* Extract the parts of an RTL expression that is a valid memory address
5524   for an instruction.  Return 0 if the structure of the address is
5525   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5526   strictly valid, but still used for computing length of lea instruction.  */
5527
5528static int
5529ix86_decompose_address (rtx addr, struct ix86_address *out)
5530{
5531  rtx base = NULL_RTX;
5532  rtx index = NULL_RTX;
5533  rtx disp = NULL_RTX;
5534  HOST_WIDE_INT scale = 1;
5535  rtx scale_rtx = NULL_RTX;
5536  int retval = 1;
5537  enum ix86_address_seg seg = SEG_DEFAULT;
5538
5539  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5540    base = addr;
5541  else if (GET_CODE (addr) == PLUS)
5542    {
5543      rtx addends[4], op;
5544      int n = 0, i;
5545
5546      op = addr;
5547      do
5548	{
5549	  if (n >= 4)
5550	    return 0;
5551	  addends[n++] = XEXP (op, 1);
5552	  op = XEXP (op, 0);
5553	}
5554      while (GET_CODE (op) == PLUS);
5555      if (n >= 4)
5556	return 0;
5557      addends[n] = op;
5558
5559      for (i = n; i >= 0; --i)
5560	{
5561	  op = addends[i];
5562	  switch (GET_CODE (op))
5563	    {
5564	    case MULT:
5565	      if (index)
5566		return 0;
5567	      index = XEXP (op, 0);
5568	      scale_rtx = XEXP (op, 1);
5569	      break;
5570
5571	    case UNSPEC:
5572	      if (XINT (op, 1) == UNSPEC_TP
5573	          && TARGET_TLS_DIRECT_SEG_REFS
5574	          && seg == SEG_DEFAULT)
5575		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5576	      else
5577		return 0;
5578	      break;
5579
5580	    case REG:
5581	    case SUBREG:
5582	      if (!base)
5583		base = op;
5584	      else if (!index)
5585		index = op;
5586	      else
5587		return 0;
5588	      break;
5589
5590	    case CONST:
5591	    case CONST_INT:
5592	    case SYMBOL_REF:
5593	    case LABEL_REF:
5594	      if (disp)
5595		return 0;
5596	      disp = op;
5597	      break;
5598
5599	    default:
5600	      return 0;
5601	    }
5602	}
5603    }
5604  else if (GET_CODE (addr) == MULT)
5605    {
5606      index = XEXP (addr, 0);		/* index*scale */
5607      scale_rtx = XEXP (addr, 1);
5608    }
5609  else if (GET_CODE (addr) == ASHIFT)
5610    {
5611      rtx tmp;
5612
5613      /* We're called for lea too, which implements ashift on occasion.  */
5614      index = XEXP (addr, 0);
5615      tmp = XEXP (addr, 1);
5616      if (GET_CODE (tmp) != CONST_INT)
5617	return 0;
5618      scale = INTVAL (tmp);
5619      if ((unsigned HOST_WIDE_INT) scale > 3)
5620	return 0;
5621      scale = 1 << scale;
5622      retval = -1;
5623    }
5624  else
5625    disp = addr;			/* displacement */
5626
5627  /* Extract the integral value of scale.  */
5628  if (scale_rtx)
5629    {
5630      if (GET_CODE (scale_rtx) != CONST_INT)
5631	return 0;
5632      scale = INTVAL (scale_rtx);
5633    }
5634
5635  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5636  if (base && index && scale == 1
5637      && (index == arg_pointer_rtx
5638	  || index == frame_pointer_rtx
5639	  || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5640    {
5641      rtx tmp = base;
5642      base = index;
5643      index = tmp;
5644    }
5645
5646  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5647  if ((base == hard_frame_pointer_rtx
5648       || base == frame_pointer_rtx
5649       || base == arg_pointer_rtx) && !disp)
5650    disp = const0_rtx;
5651
5652  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5653     Avoid this by transforming to [%esi+0].  */
5654  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5655      && base && !index && !disp
5656      && REG_P (base)
5657      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5658    disp = const0_rtx;
5659
5660  /* Special case: encode reg+reg instead of reg*2.  */
5661  if (!base && index && scale && scale == 2)
5662    base = index, scale = 1;
5663
5664  /* Special case: scaling cannot be encoded without base or displacement.  */
5665  if (!base && !disp && index && scale != 1)
5666    disp = const0_rtx;
5667
5668  out->base = base;
5669  out->index = index;
5670  out->disp = disp;
5671  out->scale = scale;
5672  out->seg = seg;
5673
5674  return retval;
5675}
5676
5677/* Return cost of the memory address x.
5678   For i386, it is better to use a complex address than let gcc copy
5679   the address into a reg and make a new pseudo.  But not if the address
5680   requires to two regs - that would mean more pseudos with longer
5681   lifetimes.  */
5682static int
5683ix86_address_cost (rtx x)
5684{
5685  struct ix86_address parts;
5686  int cost = 1;
5687
5688  if (!ix86_decompose_address (x, &parts))
5689    abort ();
5690
5691  /* More complex memory references are better.  */
5692  if (parts.disp && parts.disp != const0_rtx)
5693    cost--;
5694  if (parts.seg != SEG_DEFAULT)
5695    cost--;
5696
5697  /* Attempt to minimize number of registers in the address.  */
5698  if ((parts.base
5699       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5700      || (parts.index
5701	  && (!REG_P (parts.index)
5702	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5703    cost++;
5704
5705  if (parts.base
5706      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5707      && parts.index
5708      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5709      && parts.base != parts.index)
5710    cost++;
5711
5712  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5713     since it's predecode logic can't detect the length of instructions
5714     and it degenerates to vector decoded.  Increase cost of such
5715     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5716     to split such addresses or even refuse such addresses at all.
5717
5718     Following addressing modes are affected:
5719      [base+scale*index]
5720      [scale*index+disp]
5721      [base+index]
5722
5723     The first and last case  may be avoidable by explicitly coding the zero in
5724     memory address, but I don't have AMD-K6 machine handy to check this
5725     theory.  */
5726
5727  if (TARGET_K6
5728      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5729	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5730	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5731    cost += 10;
5732
5733  return cost;
5734}
5735
5736/* If X is a machine specific address (i.e. a symbol or label being
5737   referenced as a displacement from the GOT implemented using an
5738   UNSPEC), then return the base term.  Otherwise return X.  */
5739
5740rtx
5741ix86_find_base_term (rtx x)
5742{
5743  rtx term;
5744
5745  if (TARGET_64BIT)
5746    {
5747      if (GET_CODE (x) != CONST)
5748	return x;
5749      term = XEXP (x, 0);
5750      if (GET_CODE (term) == PLUS
5751	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5752	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5753	term = XEXP (term, 0);
5754      if (GET_CODE (term) != UNSPEC
5755	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5756	return x;
5757
5758      term = XVECEXP (term, 0, 0);
5759
5760      if (GET_CODE (term) != SYMBOL_REF
5761	  && GET_CODE (term) != LABEL_REF)
5762	return x;
5763
5764      return term;
5765    }
5766
5767  term = ix86_delegitimize_address (x);
5768
5769  if (GET_CODE (term) != SYMBOL_REF
5770      && GET_CODE (term) != LABEL_REF)
5771    return x;
5772
5773  return term;
5774}
5775
5776/* Determine if a given RTX is a valid constant.  We already know this
5777   satisfies CONSTANT_P.  */
5778
5779bool
5780legitimate_constant_p (rtx x)
5781{
5782  rtx inner;
5783
5784  switch (GET_CODE (x))
5785    {
5786    case SYMBOL_REF:
5787      /* TLS symbols are not constant.  */
5788      if (tls_symbolic_operand (x, Pmode))
5789	return false;
5790      break;
5791
5792    case CONST:
5793      inner = XEXP (x, 0);
5794
5795      /* Offsets of TLS symbols are never valid.
5796	 Discourage CSE from creating them.  */
5797      if (GET_CODE (inner) == PLUS
5798	  && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5799	return false;
5800
5801      if (GET_CODE (inner) == PLUS
5802	  || GET_CODE (inner) == MINUS)
5803	{
5804	  if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5805	    return false;
5806	  inner = XEXP (inner, 0);
5807	}
5808
5809      /* Only some unspecs are valid as "constants".  */
5810      if (GET_CODE (inner) == UNSPEC)
5811	switch (XINT (inner, 1))
5812	  {
5813	  case UNSPEC_TPOFF:
5814	  case UNSPEC_NTPOFF:
5815	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5816	  case UNSPEC_DTPOFF:
5817	    return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5818	  default:
5819	    return false;
5820	  }
5821      break;
5822
5823    default:
5824      break;
5825    }
5826
5827  /* Otherwise we handle everything else in the move patterns.  */
5828  return true;
5829}
5830
5831/* Determine if it's legal to put X into the constant pool.  This
5832   is not possible for the address of thread-local symbols, which
5833   is checked above.  */
5834
5835static bool
5836ix86_cannot_force_const_mem (rtx x)
5837{
5838  return !legitimate_constant_p (x);
5839}
5840
5841/* Determine if a given RTX is a valid constant address.  */
5842
5843bool
5844constant_address_p (rtx x)
5845{
5846  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5847}
5848
5849/* Nonzero if the constant value X is a legitimate general operand
5850   when generating PIC code.  It is given that flag_pic is on and
5851   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5852
5853bool
5854legitimate_pic_operand_p (rtx x)
5855{
5856  rtx inner;
5857
5858  switch (GET_CODE (x))
5859    {
5860    case CONST:
5861      inner = XEXP (x, 0);
5862
5863      /* Only some unspecs are valid as "constants".  */
5864      if (GET_CODE (inner) == UNSPEC)
5865	switch (XINT (inner, 1))
5866	  {
5867	  case UNSPEC_TPOFF:
5868	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5869	  default:
5870	    return false;
5871	  }
5872      /* FALLTHRU */
5873
5874    case SYMBOL_REF:
5875    case LABEL_REF:
5876      return legitimate_pic_address_disp_p (x);
5877
5878    default:
5879      return true;
5880    }
5881}
5882
5883/* Determine if a given CONST RTX is a valid memory displacement
5884   in PIC mode.  */
5885
5886int
5887legitimate_pic_address_disp_p (rtx disp)
5888{
5889  bool saw_plus;
5890
5891  /* In 64bit mode we can allow direct addresses of symbols and labels
5892     when they are not dynamic symbols.  */
5893  if (TARGET_64BIT)
5894    {
5895      /* TLS references should always be enclosed in UNSPEC.  */
5896      if (tls_symbolic_operand (disp, GET_MODE (disp)))
5897	return 0;
5898      if (GET_CODE (disp) == SYMBOL_REF
5899	  && ix86_cmodel == CM_SMALL_PIC
5900	  && SYMBOL_REF_LOCAL_P (disp))
5901	return 1;
5902      if (GET_CODE (disp) == LABEL_REF)
5903	return 1;
5904      if (GET_CODE (disp) == CONST
5905	  && GET_CODE (XEXP (disp, 0)) == PLUS)
5906	{
5907	  rtx op0 = XEXP (XEXP (disp, 0), 0);
5908	  rtx op1 = XEXP (XEXP (disp, 0), 1);
5909
5910	  /* TLS references should always be enclosed in UNSPEC.  */
5911	  if (tls_symbolic_operand (op0, GET_MODE (op0)))
5912	    return 0;
5913	  if (((GET_CODE (op0) == SYMBOL_REF
5914		&& ix86_cmodel == CM_SMALL_PIC
5915		&& SYMBOL_REF_LOCAL_P (op0))
5916	       || GET_CODE (op0) == LABEL_REF)
5917	      && GET_CODE (op1) == CONST_INT
5918	      && INTVAL (op1) < 16*1024*1024
5919	      && INTVAL (op1) >= -16*1024*1024)
5920	    return 1;
5921	}
5922    }
5923  if (GET_CODE (disp) != CONST)
5924    return 0;
5925  disp = XEXP (disp, 0);
5926
5927  if (TARGET_64BIT)
5928    {
5929      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5930         of GOT tables.  We should not need these anyway.  */
5931      if (GET_CODE (disp) != UNSPEC
5932	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5933	return 0;
5934
5935      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5936	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5937	return 0;
5938      return 1;
5939    }
5940
5941  saw_plus = false;
5942  if (GET_CODE (disp) == PLUS)
5943    {
5944      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5945	return 0;
5946      disp = XEXP (disp, 0);
5947      saw_plus = true;
5948    }
5949
5950  /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5951  if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5952    {
5953      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5954          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5955        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5956          {
5957            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5958            if (! strcmp (sym_name, "<pic base>"))
5959              return 1;
5960          }
5961    }
5962
5963  if (GET_CODE (disp) != UNSPEC)
5964    return 0;
5965
5966  switch (XINT (disp, 1))
5967    {
5968    case UNSPEC_GOT:
5969      if (saw_plus)
5970	return false;
5971      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5972    case UNSPEC_GOTOFF:
5973      if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5974	  || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5975        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5976      return false;
5977    case UNSPEC_GOTTPOFF:
5978    case UNSPEC_GOTNTPOFF:
5979    case UNSPEC_INDNTPOFF:
5980      if (saw_plus)
5981	return false;
5982      return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5983    case UNSPEC_NTPOFF:
5984      return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5985    case UNSPEC_DTPOFF:
5986      return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5987    }
5988
5989  return 0;
5990}
5991
5992/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5993   memory address for an instruction.  The MODE argument is the machine mode
5994   for the MEM expression that wants to use this address.
5995
5996   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5997   convert common non-canonical forms to canonical form so that they will
5998   be recognized.  */
5999
6000int
6001legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6002{
6003  struct ix86_address parts;
6004  rtx base, index, disp;
6005  HOST_WIDE_INT scale;
6006  const char *reason = NULL;
6007  rtx reason_rtx = NULL_RTX;
6008
6009  if (TARGET_DEBUG_ADDR)
6010    {
6011      fprintf (stderr,
6012	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6013	       GET_MODE_NAME (mode), strict);
6014      debug_rtx (addr);
6015    }
6016
6017  if (ix86_decompose_address (addr, &parts) <= 0)
6018    {
6019      reason = "decomposition failed";
6020      goto report_error;
6021    }
6022
6023  base = parts.base;
6024  index = parts.index;
6025  disp = parts.disp;
6026  scale = parts.scale;
6027
6028  /* Validate base register.
6029
6030     Don't allow SUBREG's here, it can lead to spill failures when the base
6031     is one word out of a two word structure, which is represented internally
6032     as a DImode int.  */
6033
6034  if (base)
6035    {
6036      reason_rtx = base;
6037
6038      if (GET_CODE (base) != REG)
6039	{
6040	  reason = "base is not a register";
6041	  goto report_error;
6042	}
6043
6044      if (GET_MODE (base) != Pmode)
6045	{
6046	  reason = "base is not in Pmode";
6047	  goto report_error;
6048	}
6049
6050      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6051	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6052	{
6053	  reason = "base is not valid";
6054	  goto report_error;
6055	}
6056    }
6057
6058  /* Validate index register.
6059
6060     Don't allow SUBREG's here, it can lead to spill failures when the index
6061     is one word out of a two word structure, which is represented internally
6062     as a DImode int.  */
6063
6064  if (index)
6065    {
6066      reason_rtx = index;
6067
6068      if (GET_CODE (index) != REG)
6069	{
6070	  reason = "index is not a register";
6071	  goto report_error;
6072	}
6073
6074      if (GET_MODE (index) != Pmode)
6075	{
6076	  reason = "index is not in Pmode";
6077	  goto report_error;
6078	}
6079
6080      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6081	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6082	{
6083	  reason = "index is not valid";
6084	  goto report_error;
6085	}
6086    }
6087
6088  /* Validate scale factor.  */
6089  if (scale != 1)
6090    {
6091      reason_rtx = GEN_INT (scale);
6092      if (!index)
6093	{
6094	  reason = "scale without index";
6095	  goto report_error;
6096	}
6097
6098      if (scale != 2 && scale != 4 && scale != 8)
6099	{
6100	  reason = "scale is not a valid multiplier";
6101	  goto report_error;
6102	}
6103    }
6104
6105  /* Validate displacement.  */
6106  if (disp)
6107    {
6108      reason_rtx = disp;
6109
6110      if (GET_CODE (disp) == CONST
6111	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6112	switch (XINT (XEXP (disp, 0), 1))
6113	  {
6114	  case UNSPEC_GOT:
6115	  case UNSPEC_GOTOFF:
6116	  case UNSPEC_GOTPCREL:
6117	    if (!flag_pic)
6118	      abort ();
6119	    goto is_legitimate_pic;
6120
6121	  case UNSPEC_GOTTPOFF:
6122	  case UNSPEC_GOTNTPOFF:
6123	  case UNSPEC_INDNTPOFF:
6124	  case UNSPEC_NTPOFF:
6125	  case UNSPEC_DTPOFF:
6126	    break;
6127
6128	  default:
6129	    reason = "invalid address unspec";
6130	    goto report_error;
6131	  }
6132
6133      else if (flag_pic && (SYMBOLIC_CONST (disp)
6134#if TARGET_MACHO
6135			    && !machopic_operand_p (disp)
6136#endif
6137			    ))
6138	{
6139	is_legitimate_pic:
6140	  if (TARGET_64BIT && (index || base))
6141	    {
6142	      /* foo@dtpoff(%rX) is ok.  */
6143	      if (GET_CODE (disp) != CONST
6144		  || GET_CODE (XEXP (disp, 0)) != PLUS
6145		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6146		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6147		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6148		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6149		{
6150		  reason = "non-constant pic memory reference";
6151		  goto report_error;
6152		}
6153	    }
6154	  else if (! legitimate_pic_address_disp_p (disp))
6155	    {
6156	      reason = "displacement is an invalid pic construct";
6157	      goto report_error;
6158	    }
6159
6160          /* This code used to verify that a symbolic pic displacement
6161	     includes the pic_offset_table_rtx register.
6162
6163	     While this is good idea, unfortunately these constructs may
6164	     be created by "adds using lea" optimization for incorrect
6165	     code like:
6166
6167	     int a;
6168	     int foo(int i)
6169	       {
6170	         return *(&a+i);
6171	       }
6172
6173	     This code is nonsensical, but results in addressing
6174	     GOT table with pic_offset_table_rtx base.  We can't
6175	     just refuse it easily, since it gets matched by
6176	     "addsi3" pattern, that later gets split to lea in the
6177	     case output register differs from input.  While this
6178	     can be handled by separate addsi pattern for this case
6179	     that never results in lea, this seems to be easier and
6180	     correct fix for crash to disable this test.  */
6181	}
6182      else if (GET_CODE (disp) != LABEL_REF
6183	       && GET_CODE (disp) != CONST_INT
6184	       && (GET_CODE (disp) != CONST
6185		   || !legitimate_constant_p (disp))
6186	       && (GET_CODE (disp) != SYMBOL_REF
6187		   || !legitimate_constant_p (disp)))
6188	{
6189	  reason = "displacement is not constant";
6190	  goto report_error;
6191	}
6192      else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6193	{
6194	  reason = "displacement is out of range";
6195	  goto report_error;
6196	}
6197    }
6198
6199  /* Everything looks valid.  */
6200  if (TARGET_DEBUG_ADDR)
6201    fprintf (stderr, "Success.\n");
6202  return TRUE;
6203
6204 report_error:
6205  if (TARGET_DEBUG_ADDR)
6206    {
6207      fprintf (stderr, "Error: %s\n", reason);
6208      debug_rtx (reason_rtx);
6209    }
6210  return FALSE;
6211}
6212
6213/* Return an unique alias set for the GOT.  */
6214
6215static HOST_WIDE_INT
6216ix86_GOT_alias_set (void)
6217{
6218  static HOST_WIDE_INT set = -1;
6219  if (set == -1)
6220    set = new_alias_set ();
6221  return set;
6222}
6223
6224/* Return a legitimate reference for ORIG (an address) using the
6225   register REG.  If REG is 0, a new pseudo is generated.
6226
6227   There are two types of references that must be handled:
6228
6229   1. Global data references must load the address from the GOT, via
6230      the PIC reg.  An insn is emitted to do this load, and the reg is
6231      returned.
6232
6233   2. Static data references, constant pool addresses, and code labels
6234      compute the address as an offset from the GOT, whose base is in
6235      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6236      differentiate them from global data objects.  The returned
6237      address is the PIC reg + an unspec constant.
6238
6239   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6240   reg also appears in the address.  */
6241
6242rtx
6243legitimize_pic_address (rtx orig, rtx reg)
6244{
6245  rtx addr = orig;
6246  rtx new = orig;
6247  rtx base;
6248
6249#if TARGET_MACHO
6250  if (reg == 0)
6251    reg = gen_reg_rtx (Pmode);
6252  /* Use the generic Mach-O PIC machinery.  */
6253  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6254#endif
6255
6256  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6257    new = addr;
6258  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6259    {
6260      /* This symbol may be referenced via a displacement from the PIC
6261	 base address (@GOTOFF).  */
6262
6263      if (reload_in_progress)
6264	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6265      if (GET_CODE (addr) == CONST)
6266	addr = XEXP (addr, 0);
6267      if (GET_CODE (addr) == PLUS)
6268	  {
6269            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6270	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6271	  }
6272	else
6273          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6274      new = gen_rtx_CONST (Pmode, new);
6275      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6276
6277      if (reg != 0)
6278	{
6279	  emit_move_insn (reg, new);
6280	  new = reg;
6281	}
6282    }
6283  else if (GET_CODE (addr) == SYMBOL_REF)
6284    {
6285      if (TARGET_64BIT)
6286	{
6287	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6288	  new = gen_rtx_CONST (Pmode, new);
6289	  new = gen_rtx_MEM (Pmode, new);
6290	  RTX_UNCHANGING_P (new) = 1;
6291	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6292
6293	  if (reg == 0)
6294	    reg = gen_reg_rtx (Pmode);
6295	  /* Use directly gen_movsi, otherwise the address is loaded
6296	     into register for CSE.  We don't want to CSE this addresses,
6297	     instead we CSE addresses from the GOT table, so skip this.  */
6298	  emit_insn (gen_movsi (reg, new));
6299	  new = reg;
6300	}
6301      else
6302	{
6303	  /* This symbol must be referenced via a load from the
6304	     Global Offset Table (@GOT).  */
6305
6306	  if (reload_in_progress)
6307	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6308	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6309	  new = gen_rtx_CONST (Pmode, new);
6310	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6311	  new = gen_rtx_MEM (Pmode, new);
6312	  RTX_UNCHANGING_P (new) = 1;
6313	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6314
6315	  if (reg == 0)
6316	    reg = gen_reg_rtx (Pmode);
6317	  emit_move_insn (reg, new);
6318	  new = reg;
6319	}
6320    }
6321  else
6322    {
6323      if (GET_CODE (addr) == CONST)
6324	{
6325	  addr = XEXP (addr, 0);
6326
6327	  /* We must match stuff we generate before.  Assume the only
6328	     unspecs that can get here are ours.  Not that we could do
6329	     anything with them anyway....  */
6330	  if (GET_CODE (addr) == UNSPEC
6331	      || (GET_CODE (addr) == PLUS
6332		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6333	    return orig;
6334	  if (GET_CODE (addr) != PLUS)
6335	    abort ();
6336	}
6337      if (GET_CODE (addr) == PLUS)
6338	{
6339	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6340
6341	  /* Check first to see if this is a constant offset from a @GOTOFF
6342	     symbol reference.  */
6343	  if (local_symbolic_operand (op0, Pmode)
6344	      && GET_CODE (op1) == CONST_INT)
6345	    {
6346	      if (!TARGET_64BIT)
6347		{
6348		  if (reload_in_progress)
6349		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6350		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6351					UNSPEC_GOTOFF);
6352		  new = gen_rtx_PLUS (Pmode, new, op1);
6353		  new = gen_rtx_CONST (Pmode, new);
6354		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6355
6356		  if (reg != 0)
6357		    {
6358		      emit_move_insn (reg, new);
6359		      new = reg;
6360		    }
6361		}
6362	      else
6363		{
6364		  if (INTVAL (op1) < -16*1024*1024
6365		      || INTVAL (op1) >= 16*1024*1024)
6366		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6367		}
6368	    }
6369	  else
6370	    {
6371	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6372	      new  = legitimize_pic_address (XEXP (addr, 1),
6373					     base == reg ? NULL_RTX : reg);
6374
6375	      if (GET_CODE (new) == CONST_INT)
6376		new = plus_constant (base, INTVAL (new));
6377	      else
6378		{
6379		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6380		    {
6381		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6382		      new = XEXP (new, 1);
6383		    }
6384		  new = gen_rtx_PLUS (Pmode, base, new);
6385		}
6386	    }
6387	}
6388    }
6389  return new;
6390}
6391
6392/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6393
6394static rtx
6395get_thread_pointer (int to_reg)
6396{
6397  rtx tp, reg, insn;
6398
6399  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6400  if (!to_reg)
6401    return tp;
6402
6403  reg = gen_reg_rtx (Pmode);
6404  insn = gen_rtx_SET (VOIDmode, reg, tp);
6405  insn = emit_insn (insn);
6406
6407  return reg;
6408}
6409
6410/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6411   false if we expect this to be used for a memory address and true if
6412   we expect to load the address into a register.  */
6413
6414static rtx
6415legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6416{
6417  rtx dest, base, off, pic;
6418  int type;
6419
6420  switch (model)
6421    {
6422    case TLS_MODEL_GLOBAL_DYNAMIC:
6423      dest = gen_reg_rtx (Pmode);
6424      if (TARGET_64BIT)
6425	{
6426	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6427
6428	  start_sequence ();
6429	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6430	  insns = get_insns ();
6431	  end_sequence ();
6432
6433	  emit_libcall_block (insns, dest, rax, x);
6434	}
6435      else
6436	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6437      break;
6438
6439    case TLS_MODEL_LOCAL_DYNAMIC:
6440      base = gen_reg_rtx (Pmode);
6441      if (TARGET_64BIT)
6442	{
6443	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6444
6445	  start_sequence ();
6446	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6447	  insns = get_insns ();
6448	  end_sequence ();
6449
6450	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6451	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6452	  emit_libcall_block (insns, base, rax, note);
6453	}
6454      else
6455	emit_insn (gen_tls_local_dynamic_base_32 (base));
6456
6457      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6458      off = gen_rtx_CONST (Pmode, off);
6459
6460      return gen_rtx_PLUS (Pmode, base, off);
6461
6462    case TLS_MODEL_INITIAL_EXEC:
6463      if (TARGET_64BIT)
6464	{
6465	  pic = NULL;
6466	  type = UNSPEC_GOTNTPOFF;
6467	}
6468      else if (flag_pic)
6469	{
6470	  if (reload_in_progress)
6471	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6472	  pic = pic_offset_table_rtx;
6473	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6474	}
6475      else if (!TARGET_GNU_TLS)
6476	{
6477	  pic = gen_reg_rtx (Pmode);
6478	  emit_insn (gen_set_got (pic));
6479	  type = UNSPEC_GOTTPOFF;
6480	}
6481      else
6482	{
6483	  pic = NULL;
6484	  type = UNSPEC_INDNTPOFF;
6485	}
6486
6487      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6488      off = gen_rtx_CONST (Pmode, off);
6489      if (pic)
6490	off = gen_rtx_PLUS (Pmode, pic, off);
6491      off = gen_rtx_MEM (Pmode, off);
6492      RTX_UNCHANGING_P (off) = 1;
6493      set_mem_alias_set (off, ix86_GOT_alias_set ());
6494
6495      if (TARGET_64BIT || TARGET_GNU_TLS)
6496	{
6497          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6498	  off = force_reg (Pmode, off);
6499	  return gen_rtx_PLUS (Pmode, base, off);
6500	}
6501      else
6502	{
6503	  base = get_thread_pointer (true);
6504	  dest = gen_reg_rtx (Pmode);
6505	  emit_insn (gen_subsi3 (dest, base, off));
6506	}
6507      break;
6508
6509    case TLS_MODEL_LOCAL_EXEC:
6510      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6511			    (TARGET_64BIT || TARGET_GNU_TLS)
6512			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6513      off = gen_rtx_CONST (Pmode, off);
6514
6515      if (TARGET_64BIT || TARGET_GNU_TLS)
6516	{
6517	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6518	  return gen_rtx_PLUS (Pmode, base, off);
6519	}
6520      else
6521	{
6522	  base = get_thread_pointer (true);
6523	  dest = gen_reg_rtx (Pmode);
6524	  emit_insn (gen_subsi3 (dest, base, off));
6525	}
6526      break;
6527
6528    default:
6529      abort ();
6530    }
6531
6532  return dest;
6533}
6534
6535/* Try machine-dependent ways of modifying an illegitimate address
6536   to be legitimate.  If we find one, return the new, valid address.
6537   This macro is used in only one place: `memory_address' in explow.c.
6538
6539   OLDX is the address as it was before break_out_memory_refs was called.
6540   In some cases it is useful to look at this to decide what needs to be done.
6541
6542   MODE and WIN are passed so that this macro can use
6543   GO_IF_LEGITIMATE_ADDRESS.
6544
6545   It is always safe for this macro to do nothing.  It exists to recognize
6546   opportunities to optimize the output.
6547
6548   For the 80386, we handle X+REG by loading X into a register R and
6549   using R+REG.  R will go in a general reg and indexing will be used.
6550   However, if REG is a broken-out memory address or multiplication,
6551   nothing needs to be done because REG can certainly go in a general reg.
6552
6553   When -fpic is used, special handling is needed for symbolic references.
6554   See comments by legitimize_pic_address in i386.c for details.  */
6555
6556rtx
6557legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6558{
6559  int changed = 0;
6560  unsigned log;
6561
6562  if (TARGET_DEBUG_ADDR)
6563    {
6564      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6565	       GET_MODE_NAME (mode));
6566      debug_rtx (x);
6567    }
6568
6569  log = tls_symbolic_operand (x, mode);
6570  if (log)
6571    return legitimize_tls_address (x, log, false);
6572
6573  if (flag_pic && SYMBOLIC_CONST (x))
6574    return legitimize_pic_address (x, 0);
6575
6576  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6577  if (GET_CODE (x) == ASHIFT
6578      && GET_CODE (XEXP (x, 1)) == CONST_INT
6579      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6580    {
6581      changed = 1;
6582      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6583			GEN_INT (1 << log));
6584    }
6585
6586  if (GET_CODE (x) == PLUS)
6587    {
6588      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6589
6590      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6591	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6592	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6593	{
6594	  changed = 1;
6595	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6596				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6597				      GEN_INT (1 << log));
6598	}
6599
6600      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6601	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6602	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6603	{
6604	  changed = 1;
6605	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6606				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6607				      GEN_INT (1 << log));
6608	}
6609
6610      /* Put multiply first if it isn't already.  */
6611      if (GET_CODE (XEXP (x, 1)) == MULT)
6612	{
6613	  rtx tmp = XEXP (x, 0);
6614	  XEXP (x, 0) = XEXP (x, 1);
6615	  XEXP (x, 1) = tmp;
6616	  changed = 1;
6617	}
6618
6619      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6620	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6621	 created by virtual register instantiation, register elimination, and
6622	 similar optimizations.  */
6623      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6624	{
6625	  changed = 1;
6626	  x = gen_rtx_PLUS (Pmode,
6627			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6628					  XEXP (XEXP (x, 1), 0)),
6629			    XEXP (XEXP (x, 1), 1));
6630	}
6631
6632      /* Canonicalize
6633	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6634	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6635      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6636	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6637	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6638	       && CONSTANT_P (XEXP (x, 1)))
6639	{
6640	  rtx constant;
6641	  rtx other = NULL_RTX;
6642
6643	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6644	    {
6645	      constant = XEXP (x, 1);
6646	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6647	    }
6648	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6649	    {
6650	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6651	      other = XEXP (x, 1);
6652	    }
6653	  else
6654	    constant = 0;
6655
6656	  if (constant)
6657	    {
6658	      changed = 1;
6659	      x = gen_rtx_PLUS (Pmode,
6660				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6661					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6662				plus_constant (other, INTVAL (constant)));
6663	    }
6664	}
6665
6666      if (changed && legitimate_address_p (mode, x, FALSE))
6667	return x;
6668
6669      if (GET_CODE (XEXP (x, 0)) == MULT)
6670	{
6671	  changed = 1;
6672	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6673	}
6674
6675      if (GET_CODE (XEXP (x, 1)) == MULT)
6676	{
6677	  changed = 1;
6678	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6679	}
6680
6681      if (changed
6682	  && GET_CODE (XEXP (x, 1)) == REG
6683	  && GET_CODE (XEXP (x, 0)) == REG)
6684	return x;
6685
6686      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6687	{
6688	  changed = 1;
6689	  x = legitimize_pic_address (x, 0);
6690	}
6691
6692      if (changed && legitimate_address_p (mode, x, FALSE))
6693	return x;
6694
6695      if (GET_CODE (XEXP (x, 0)) == REG)
6696	{
6697	  rtx temp = gen_reg_rtx (Pmode);
6698	  rtx val  = force_operand (XEXP (x, 1), temp);
6699	  if (val != temp)
6700	    emit_move_insn (temp, val);
6701
6702	  XEXP (x, 1) = temp;
6703	  return x;
6704	}
6705
6706      else if (GET_CODE (XEXP (x, 1)) == REG)
6707	{
6708	  rtx temp = gen_reg_rtx (Pmode);
6709	  rtx val  = force_operand (XEXP (x, 0), temp);
6710	  if (val != temp)
6711	    emit_move_insn (temp, val);
6712
6713	  XEXP (x, 0) = temp;
6714	  return x;
6715	}
6716    }
6717
6718  return x;
6719}
6720
6721/* Print an integer constant expression in assembler syntax.  Addition
6722   and subtraction are the only arithmetic that may appear in these
6723   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6724   CODE is the operand print code from the output string.  */
6725
6726static void
6727output_pic_addr_const (FILE *file, rtx x, int code)
6728{
6729  char buf[256];
6730
6731  switch (GET_CODE (x))
6732    {
6733    case PC:
6734      if (flag_pic)
6735	putc ('.', file);
6736      else
6737	abort ();
6738      break;
6739
6740    case SYMBOL_REF:
6741      assemble_name (file, XSTR (x, 0));
6742      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6743	fputs ("@PLT", file);
6744      break;
6745
6746    case LABEL_REF:
6747      x = XEXP (x, 0);
6748      /* FALLTHRU */
6749    case CODE_LABEL:
6750      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6751      assemble_name (asm_out_file, buf);
6752      break;
6753
6754    case CONST_INT:
6755      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6756      break;
6757
6758    case CONST:
6759      /* This used to output parentheses around the expression,
6760	 but that does not work on the 386 (either ATT or BSD assembler).  */
6761      output_pic_addr_const (file, XEXP (x, 0), code);
6762      break;
6763
6764    case CONST_DOUBLE:
6765      if (GET_MODE (x) == VOIDmode)
6766	{
6767	  /* We can use %d if the number is <32 bits and positive.  */
6768	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6769	    fprintf (file, "0x%lx%08lx",
6770		     (unsigned long) CONST_DOUBLE_HIGH (x),
6771		     (unsigned long) CONST_DOUBLE_LOW (x));
6772	  else
6773	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6774	}
6775      else
6776	/* We can't handle floating point constants;
6777	   PRINT_OPERAND must handle them.  */
6778	output_operand_lossage ("floating constant misused");
6779      break;
6780
6781    case PLUS:
6782      /* Some assemblers need integer constants to appear first.  */
6783      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6784	{
6785	  output_pic_addr_const (file, XEXP (x, 0), code);
6786	  putc ('+', file);
6787	  output_pic_addr_const (file, XEXP (x, 1), code);
6788	}
6789      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6790	{
6791	  output_pic_addr_const (file, XEXP (x, 1), code);
6792	  putc ('+', file);
6793	  output_pic_addr_const (file, XEXP (x, 0), code);
6794	}
6795      else
6796	abort ();
6797      break;
6798
6799    case MINUS:
6800      if (!TARGET_MACHO)
6801	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6802      output_pic_addr_const (file, XEXP (x, 0), code);
6803      putc ('-', file);
6804      output_pic_addr_const (file, XEXP (x, 1), code);
6805      if (!TARGET_MACHO)
6806	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6807      break;
6808
6809     case UNSPEC:
6810       if (XVECLEN (x, 0) != 1)
6811	 abort ();
6812       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6813       switch (XINT (x, 1))
6814	{
6815	case UNSPEC_GOT:
6816	  fputs ("@GOT", file);
6817	  break;
6818	case UNSPEC_GOTOFF:
6819	  fputs ("@GOTOFF", file);
6820	  break;
6821	case UNSPEC_GOTPCREL:
6822	  fputs ("@GOTPCREL(%rip)", file);
6823	  break;
6824	case UNSPEC_GOTTPOFF:
6825	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6826	  fputs ("@GOTTPOFF", file);
6827	  break;
6828	case UNSPEC_TPOFF:
6829	  fputs ("@TPOFF", file);
6830	  break;
6831	case UNSPEC_NTPOFF:
6832	  if (TARGET_64BIT)
6833	    fputs ("@TPOFF", file);
6834	  else
6835	    fputs ("@NTPOFF", file);
6836	  break;
6837	case UNSPEC_DTPOFF:
6838	  fputs ("@DTPOFF", file);
6839	  break;
6840	case UNSPEC_GOTNTPOFF:
6841	  if (TARGET_64BIT)
6842	    fputs ("@GOTTPOFF(%rip)", file);
6843	  else
6844	    fputs ("@GOTNTPOFF", file);
6845	  break;
6846	case UNSPEC_INDNTPOFF:
6847	  fputs ("@INDNTPOFF", file);
6848	  break;
6849	default:
6850	  output_operand_lossage ("invalid UNSPEC as operand");
6851	  break;
6852	}
6853       break;
6854
6855    default:
6856      output_operand_lossage ("invalid expression as operand");
6857    }
6858}
6859
6860/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6861   We need to handle our special PIC relocations.  */
6862
6863void
6864i386_dwarf_output_addr_const (FILE *file, rtx x)
6865{
6866#ifdef ASM_QUAD
6867  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6868#else
6869  if (TARGET_64BIT)
6870    abort ();
6871  fprintf (file, "%s", ASM_LONG);
6872#endif
6873  if (flag_pic)
6874    output_pic_addr_const (file, x, '\0');
6875  else
6876    output_addr_const (file, x);
6877  fputc ('\n', file);
6878}
6879
6880/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6881   We need to emit DTP-relative relocations.  */
6882
6883void
6884i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6885{
6886  fputs (ASM_LONG, file);
6887  output_addr_const (file, x);
6888  fputs ("@DTPOFF", file);
6889  switch (size)
6890    {
6891    case 4:
6892      break;
6893    case 8:
6894      fputs (", 0", file);
6895      break;
6896    default:
6897      abort ();
6898   }
6899}
6900
6901/* In the name of slightly smaller debug output, and to cater to
6902   general assembler losage, recognize PIC+GOTOFF and turn it back
6903   into a direct symbol reference.  */
6904
6905static rtx
6906ix86_delegitimize_address (rtx orig_x)
6907{
6908  rtx x = orig_x, y;
6909
6910  if (GET_CODE (x) == MEM)
6911    x = XEXP (x, 0);
6912
6913  if (TARGET_64BIT)
6914    {
6915      if (GET_CODE (x) != CONST
6916	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6917	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6918	  || GET_CODE (orig_x) != MEM)
6919	return orig_x;
6920      return XVECEXP (XEXP (x, 0), 0, 0);
6921    }
6922
6923  if (GET_CODE (x) != PLUS
6924      || GET_CODE (XEXP (x, 1)) != CONST)
6925    return orig_x;
6926
6927  if (GET_CODE (XEXP (x, 0)) == REG
6928      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6929    /* %ebx + GOT/GOTOFF */
6930    y = NULL;
6931  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6932    {
6933      /* %ebx + %reg * scale + GOT/GOTOFF */
6934      y = XEXP (x, 0);
6935      if (GET_CODE (XEXP (y, 0)) == REG
6936	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6937	y = XEXP (y, 1);
6938      else if (GET_CODE (XEXP (y, 1)) == REG
6939	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6940	y = XEXP (y, 0);
6941      else
6942	return orig_x;
6943      if (GET_CODE (y) != REG
6944	  && GET_CODE (y) != MULT
6945	  && GET_CODE (y) != ASHIFT)
6946	return orig_x;
6947    }
6948  else
6949    return orig_x;
6950
6951  x = XEXP (XEXP (x, 1), 0);
6952  if (GET_CODE (x) == UNSPEC
6953      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6954	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6955    {
6956      if (y)
6957	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6958      return XVECEXP (x, 0, 0);
6959    }
6960
6961  if (GET_CODE (x) == PLUS
6962      && GET_CODE (XEXP (x, 0)) == UNSPEC
6963      && GET_CODE (XEXP (x, 1)) == CONST_INT
6964      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6965	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6966	      && GET_CODE (orig_x) != MEM)))
6967    {
6968      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6969      if (y)
6970	return gen_rtx_PLUS (Pmode, y, x);
6971      return x;
6972    }
6973
6974  return orig_x;
6975}
6976
6977static void
6978put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6979		    int fp, FILE *file)
6980{
6981  const char *suffix;
6982
6983  if (mode == CCFPmode || mode == CCFPUmode)
6984    {
6985      enum rtx_code second_code, bypass_code;
6986      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6987      if (bypass_code != NIL || second_code != NIL)
6988	abort ();
6989      code = ix86_fp_compare_code_to_integer (code);
6990      mode = CCmode;
6991    }
6992  if (reverse)
6993    code = reverse_condition (code);
6994
6995  switch (code)
6996    {
6997    case EQ:
6998      suffix = "e";
6999      break;
7000    case NE:
7001      suffix = "ne";
7002      break;
7003    case GT:
7004      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7005	abort ();
7006      suffix = "g";
7007      break;
7008    case GTU:
7009      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7010	 Those same assemblers have the same but opposite losage on cmov.  */
7011      if (mode != CCmode)
7012	abort ();
7013      suffix = fp ? "nbe" : "a";
7014      break;
7015    case LT:
7016      if (mode == CCNOmode || mode == CCGOCmode)
7017	suffix = "s";
7018      else if (mode == CCmode || mode == CCGCmode)
7019	suffix = "l";
7020      else
7021	abort ();
7022      break;
7023    case LTU:
7024      if (mode != CCmode)
7025	abort ();
7026      suffix = "b";
7027      break;
7028    case GE:
7029      if (mode == CCNOmode || mode == CCGOCmode)
7030	suffix = "ns";
7031      else if (mode == CCmode || mode == CCGCmode)
7032	suffix = "ge";
7033      else
7034	abort ();
7035      break;
7036    case GEU:
7037      /* ??? As above.  */
7038      if (mode != CCmode)
7039	abort ();
7040      suffix = fp ? "nb" : "ae";
7041      break;
7042    case LE:
7043      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7044	abort ();
7045      suffix = "le";
7046      break;
7047    case LEU:
7048      if (mode != CCmode)
7049	abort ();
7050      suffix = "be";
7051      break;
7052    case UNORDERED:
7053      suffix = fp ? "u" : "p";
7054      break;
7055    case ORDERED:
7056      suffix = fp ? "nu" : "np";
7057      break;
7058    default:
7059      abort ();
7060    }
7061  fputs (suffix, file);
7062}
7063
7064/* Print the name of register X to FILE based on its machine mode and number.
7065   If CODE is 'w', pretend the mode is HImode.
7066   If CODE is 'b', pretend the mode is QImode.
7067   If CODE is 'k', pretend the mode is SImode.
7068   If CODE is 'q', pretend the mode is DImode.
7069   If CODE is 'h', pretend the reg is the `high' byte register.
7070   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7071
7072void
7073print_reg (rtx x, int code, FILE *file)
7074{
7075  if (REGNO (x) == ARG_POINTER_REGNUM
7076      || REGNO (x) == FRAME_POINTER_REGNUM
7077      || REGNO (x) == FLAGS_REG
7078      || REGNO (x) == FPSR_REG)
7079    abort ();
7080
7081  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7082    putc ('%', file);
7083
7084  if (code == 'w' || MMX_REG_P (x))
7085    code = 2;
7086  else if (code == 'b')
7087    code = 1;
7088  else if (code == 'k')
7089    code = 4;
7090  else if (code == 'q')
7091    code = 8;
7092  else if (code == 'y')
7093    code = 3;
7094  else if (code == 'h')
7095    code = 0;
7096  else
7097    code = GET_MODE_SIZE (GET_MODE (x));
7098
7099  /* Irritatingly, AMD extended registers use different naming convention
7100     from the normal registers.  */
7101  if (REX_INT_REG_P (x))
7102    {
7103      if (!TARGET_64BIT)
7104	abort ();
7105      switch (code)
7106	{
7107	  case 0:
7108	    error ("extended registers have no high halves");
7109	    break;
7110	  case 1:
7111	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7112	    break;
7113	  case 2:
7114	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7115	    break;
7116	  case 4:
7117	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7118	    break;
7119	  case 8:
7120	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7121	    break;
7122	  default:
7123	    error ("unsupported operand size for extended register");
7124	    break;
7125	}
7126      return;
7127    }
7128  switch (code)
7129    {
7130    case 3:
7131      if (STACK_TOP_P (x))
7132	{
7133	  fputs ("st(0)", file);
7134	  break;
7135	}
7136      /* FALLTHRU */
7137    case 8:
7138    case 4:
7139    case 12:
7140      if (! ANY_FP_REG_P (x))
7141	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7142      /* FALLTHRU */
7143    case 16:
7144    case 2:
7145    normal:
7146      fputs (hi_reg_name[REGNO (x)], file);
7147      break;
7148    case 1:
7149      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7150	goto normal;
7151      fputs (qi_reg_name[REGNO (x)], file);
7152      break;
7153    case 0:
7154      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7155	goto normal;
7156      fputs (qi_high_reg_name[REGNO (x)], file);
7157      break;
7158    default:
7159      abort ();
7160    }
7161}
7162
7163/* Locate some local-dynamic symbol still in use by this function
7164   so that we can print its name in some tls_local_dynamic_base
7165   pattern.  */
7166
7167static const char *
7168get_some_local_dynamic_name (void)
7169{
7170  rtx insn;
7171
7172  if (cfun->machine->some_ld_name)
7173    return cfun->machine->some_ld_name;
7174
7175  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7176    if (INSN_P (insn)
7177	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7178      return cfun->machine->some_ld_name;
7179
7180  abort ();
7181}
7182
7183static int
7184get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7185{
7186  rtx x = *px;
7187
7188  if (GET_CODE (x) == SYMBOL_REF
7189      && local_dynamic_symbolic_operand (x, Pmode))
7190    {
7191      cfun->machine->some_ld_name = XSTR (x, 0);
7192      return 1;
7193    }
7194
7195  return 0;
7196}
7197
7198/* Meaning of CODE:
7199   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7200   C -- print opcode suffix for set/cmov insn.
7201   c -- like C, but print reversed condition
7202   F,f -- likewise, but for floating-point.
7203   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7204        otherwise nothing
7205   R -- print the prefix for register names.
7206   z -- print the opcode suffix for the size of the current operand.
7207   * -- print a star (in certain assembler syntax)
7208   A -- print an absolute memory reference.
7209   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7210   s -- print a shift double count, followed by the assemblers argument
7211	delimiter.
7212   b -- print the QImode name of the register for the indicated operand.
7213	%b0 would print %al if operands[0] is reg 0.
7214   w --  likewise, print the HImode name of the register.
7215   k --  likewise, print the SImode name of the register.
7216   q --  likewise, print the DImode name of the register.
7217   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7218   y -- print "st(0)" instead of "st" as a register.
7219   D -- print condition for SSE cmp instruction.
7220   P -- if PIC, print an @PLT suffix.
7221   X -- don't print any sort of PIC '@' suffix for a symbol.
7222   & -- print some in-use local-dynamic symbol name.
7223 */
7224
7225void
7226print_operand (FILE *file, rtx x, int code)
7227{
7228  if (code)
7229    {
7230      switch (code)
7231	{
7232	case '*':
7233	  if (ASSEMBLER_DIALECT == ASM_ATT)
7234	    putc ('*', file);
7235	  return;
7236
7237	case '&':
7238	  assemble_name (file, get_some_local_dynamic_name ());
7239	  return;
7240
7241	case 'A':
7242	  if (ASSEMBLER_DIALECT == ASM_ATT)
7243	    putc ('*', file);
7244	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
7245	    {
7246	      /* Intel syntax. For absolute addresses, registers should not
7247		 be surrounded by braces.  */
7248	      if (GET_CODE (x) != REG)
7249		{
7250		  putc ('[', file);
7251		  PRINT_OPERAND (file, x, 0);
7252		  putc (']', file);
7253		  return;
7254		}
7255	    }
7256	  else
7257	    abort ();
7258
7259	  PRINT_OPERAND (file, x, 0);
7260	  return;
7261
7262
7263	case 'L':
7264	  if (ASSEMBLER_DIALECT == ASM_ATT)
7265	    putc ('l', file);
7266	  return;
7267
7268	case 'W':
7269	  if (ASSEMBLER_DIALECT == ASM_ATT)
7270	    putc ('w', file);
7271	  return;
7272
7273	case 'B':
7274	  if (ASSEMBLER_DIALECT == ASM_ATT)
7275	    putc ('b', file);
7276	  return;
7277
7278	case 'Q':
7279	  if (ASSEMBLER_DIALECT == ASM_ATT)
7280	    putc ('l', file);
7281	  return;
7282
7283	case 'S':
7284	  if (ASSEMBLER_DIALECT == ASM_ATT)
7285	    putc ('s', file);
7286	  return;
7287
7288	case 'T':
7289	  if (ASSEMBLER_DIALECT == ASM_ATT)
7290	    putc ('t', file);
7291	  return;
7292
7293	case 'z':
7294	  /* 387 opcodes don't get size suffixes if the operands are
7295	     registers.  */
7296	  if (STACK_REG_P (x))
7297	    return;
7298
7299	  /* Likewise if using Intel opcodes.  */
7300	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7301	    return;
7302
7303	  /* This is the size of op from size of operand.  */
7304	  switch (GET_MODE_SIZE (GET_MODE (x)))
7305	    {
7306	    case 2:
7307#ifdef HAVE_GAS_FILDS_FISTS
7308	      putc ('s', file);
7309#endif
7310	      return;
7311
7312	    case 4:
7313	      if (GET_MODE (x) == SFmode)
7314		{
7315		  putc ('s', file);
7316		  return;
7317		}
7318	      else
7319		putc ('l', file);
7320	      return;
7321
7322	    case 12:
7323	    case 16:
7324	      putc ('t', file);
7325	      return;
7326
7327	    case 8:
7328	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7329		{
7330#ifdef GAS_MNEMONICS
7331		  putc ('q', file);
7332#else
7333		  putc ('l', file);
7334		  putc ('l', file);
7335#endif
7336		}
7337	      else
7338	        putc ('l', file);
7339	      return;
7340
7341	    default:
7342	      abort ();
7343	    }
7344
7345	case 'b':
7346	case 'w':
7347	case 'k':
7348	case 'q':
7349	case 'h':
7350	case 'y':
7351	case 'X':
7352	case 'P':
7353	  break;
7354
7355	case 's':
7356	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7357	    {
7358	      PRINT_OPERAND (file, x, 0);
7359	      putc (',', file);
7360	    }
7361	  return;
7362
7363	case 'D':
7364	  /* Little bit of braindamage here.  The SSE compare instructions
7365	     does use completely different names for the comparisons that the
7366	     fp conditional moves.  */
7367	  switch (GET_CODE (x))
7368	    {
7369	    case EQ:
7370	    case UNEQ:
7371	      fputs ("eq", file);
7372	      break;
7373	    case LT:
7374	    case UNLT:
7375	      fputs ("lt", file);
7376	      break;
7377	    case LE:
7378	    case UNLE:
7379	      fputs ("le", file);
7380	      break;
7381	    case UNORDERED:
7382	      fputs ("unord", file);
7383	      break;
7384	    case NE:
7385	    case LTGT:
7386	      fputs ("neq", file);
7387	      break;
7388	    case UNGE:
7389	    case GE:
7390	      fputs ("nlt", file);
7391	      break;
7392	    case UNGT:
7393	    case GT:
7394	      fputs ("nle", file);
7395	      break;
7396	    case ORDERED:
7397	      fputs ("ord", file);
7398	      break;
7399	    default:
7400	      abort ();
7401	      break;
7402	    }
7403	  return;
7404	case 'O':
7405#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7406	  if (ASSEMBLER_DIALECT == ASM_ATT)
7407	    {
7408	      switch (GET_MODE (x))
7409		{
7410		case HImode: putc ('w', file); break;
7411		case SImode:
7412		case SFmode: putc ('l', file); break;
7413		case DImode:
7414		case DFmode: putc ('q', file); break;
7415		default: abort ();
7416		}
7417	      putc ('.', file);
7418	    }
7419#endif
7420	  return;
7421	case 'C':
7422	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7423	  return;
7424	case 'F':
7425#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7426	  if (ASSEMBLER_DIALECT == ASM_ATT)
7427	    putc ('.', file);
7428#endif
7429	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7430	  return;
7431
7432	  /* Like above, but reverse condition */
7433	case 'c':
7434	  /* Check to see if argument to %c is really a constant
7435	     and not a condition code which needs to be reversed.  */
7436	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7437	  {
7438	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7439	     return;
7440	  }
7441	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7442	  return;
7443	case 'f':
7444#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7445	  if (ASSEMBLER_DIALECT == ASM_ATT)
7446	    putc ('.', file);
7447#endif
7448	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7449	  return;
7450	case '+':
7451	  {
7452	    rtx x;
7453
7454	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7455	      return;
7456
7457	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7458	    if (x)
7459	      {
7460		int pred_val = INTVAL (XEXP (x, 0));
7461
7462		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7463		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7464		  {
7465		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7466		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7467
7468		    /* Emit hints only in the case default branch prediction
7469		       heuristics would fail.  */
7470		    if (taken != cputaken)
7471		      {
7472			/* We use 3e (DS) prefix for taken branches and
7473			   2e (CS) prefix for not taken branches.  */
7474			if (taken)
7475			  fputs ("ds ; ", file);
7476			else
7477			  fputs ("cs ; ", file);
7478		      }
7479		  }
7480	      }
7481	    return;
7482	  }
7483	default:
7484	    output_operand_lossage ("invalid operand code `%c'", code);
7485	}
7486    }
7487
7488  if (GET_CODE (x) == REG)
7489    print_reg (x, code, file);
7490
7491  else if (GET_CODE (x) == MEM)
7492    {
7493      /* No `byte ptr' prefix for call instructions.  */
7494      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7495	{
7496	  const char * size;
7497	  switch (GET_MODE_SIZE (GET_MODE (x)))
7498	    {
7499	    case 1: size = "BYTE"; break;
7500	    case 2: size = "WORD"; break;
7501	    case 4: size = "DWORD"; break;
7502	    case 8: size = "QWORD"; break;
7503	    case 12: size = "XWORD"; break;
7504	    case 16: size = "XMMWORD"; break;
7505	    default:
7506	      abort ();
7507	    }
7508
7509	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7510	  if (code == 'b')
7511	    size = "BYTE";
7512	  else if (code == 'w')
7513	    size = "WORD";
7514	  else if (code == 'k')
7515	    size = "DWORD";
7516
7517	  fputs (size, file);
7518	  fputs (" PTR ", file);
7519	}
7520
7521      x = XEXP (x, 0);
7522      /* Avoid (%rip) for call operands.  */
7523      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7524	       && GET_CODE (x) != CONST_INT)
7525	output_addr_const (file, x);
7526      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7527	output_operand_lossage ("invalid constraints for operand");
7528      else
7529	output_address (x);
7530    }
7531
7532  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7533    {
7534      REAL_VALUE_TYPE r;
7535      long l;
7536
7537      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7538      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7539
7540      if (ASSEMBLER_DIALECT == ASM_ATT)
7541	putc ('$', file);
7542      fprintf (file, "0x%08lx", l);
7543    }
7544
7545  /* These float cases don't actually occur as immediate operands.  */
7546  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7547    {
7548      char dstr[30];
7549
7550      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7551      fprintf (file, "%s", dstr);
7552    }
7553
7554  else if (GET_CODE (x) == CONST_DOUBLE
7555	   && GET_MODE (x) == XFmode)
7556    {
7557      char dstr[30];
7558
7559      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7560      fprintf (file, "%s", dstr);
7561    }
7562
7563  else
7564    {
7565      if (code != 'P')
7566	{
7567	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7568	    {
7569	      if (ASSEMBLER_DIALECT == ASM_ATT)
7570		putc ('$', file);
7571	    }
7572	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7573		   || GET_CODE (x) == LABEL_REF)
7574	    {
7575	      if (ASSEMBLER_DIALECT == ASM_ATT)
7576		putc ('$', file);
7577	      else
7578		fputs ("OFFSET FLAT:", file);
7579	    }
7580	}
7581      if (GET_CODE (x) == CONST_INT)
7582	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7583      else if (flag_pic)
7584	output_pic_addr_const (file, x, code);
7585      else
7586	output_addr_const (file, x);
7587    }
7588}
7589
7590/* Print a memory operand whose address is ADDR.  */
7591
7592void
7593print_operand_address (FILE *file, rtx addr)
7594{
7595  struct ix86_address parts;
7596  rtx base, index, disp;
7597  int scale;
7598
7599  if (! ix86_decompose_address (addr, &parts))
7600    abort ();
7601
7602  base = parts.base;
7603  index = parts.index;
7604  disp = parts.disp;
7605  scale = parts.scale;
7606
7607  switch (parts.seg)
7608    {
7609    case SEG_DEFAULT:
7610      break;
7611    case SEG_FS:
7612    case SEG_GS:
7613      if (USER_LABEL_PREFIX[0] == 0)
7614	putc ('%', file);
7615      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7616      break;
7617    default:
7618      abort ();
7619    }
7620
7621  if (!base && !index)
7622    {
7623      /* Displacement only requires special attention.  */
7624
7625      if (GET_CODE (disp) == CONST_INT)
7626	{
7627	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7628	    {
7629	      if (USER_LABEL_PREFIX[0] == 0)
7630		putc ('%', file);
7631	      fputs ("ds:", file);
7632	    }
7633	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7634	}
7635      else if (flag_pic)
7636	output_pic_addr_const (file, disp, 0);
7637      else
7638	output_addr_const (file, disp);
7639
7640      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7641      if (TARGET_64BIT
7642	  && ((GET_CODE (disp) == SYMBOL_REF
7643	       && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7644	      || GET_CODE (disp) == LABEL_REF
7645	      || (GET_CODE (disp) == CONST
7646		  && GET_CODE (XEXP (disp, 0)) == PLUS
7647		  && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7648		      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7649		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7650	fputs ("(%rip)", file);
7651    }
7652  else
7653    {
7654      if (ASSEMBLER_DIALECT == ASM_ATT)
7655	{
7656	  if (disp)
7657	    {
7658	      if (flag_pic)
7659		output_pic_addr_const (file, disp, 0);
7660	      else if (GET_CODE (disp) == LABEL_REF)
7661		output_asm_label (disp);
7662	      else
7663		output_addr_const (file, disp);
7664	    }
7665
7666	  putc ('(', file);
7667	  if (base)
7668	    print_reg (base, 0, file);
7669	  if (index)
7670	    {
7671	      putc (',', file);
7672	      print_reg (index, 0, file);
7673	      if (scale != 1)
7674		fprintf (file, ",%d", scale);
7675	    }
7676	  putc (')', file);
7677	}
7678      else
7679	{
7680	  rtx offset = NULL_RTX;
7681
7682	  if (disp)
7683	    {
7684	      /* Pull out the offset of a symbol; print any symbol itself.  */
7685	      if (GET_CODE (disp) == CONST
7686		  && GET_CODE (XEXP (disp, 0)) == PLUS
7687		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7688		{
7689		  offset = XEXP (XEXP (disp, 0), 1);
7690		  disp = gen_rtx_CONST (VOIDmode,
7691					XEXP (XEXP (disp, 0), 0));
7692		}
7693
7694	      if (flag_pic)
7695		output_pic_addr_const (file, disp, 0);
7696	      else if (GET_CODE (disp) == LABEL_REF)
7697		output_asm_label (disp);
7698	      else if (GET_CODE (disp) == CONST_INT)
7699		offset = disp;
7700	      else
7701		output_addr_const (file, disp);
7702	    }
7703
7704	  putc ('[', file);
7705	  if (base)
7706	    {
7707	      print_reg (base, 0, file);
7708	      if (offset)
7709		{
7710		  if (INTVAL (offset) >= 0)
7711		    putc ('+', file);
7712		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7713		}
7714	    }
7715	  else if (offset)
7716	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7717	  else
7718	    putc ('0', file);
7719
7720	  if (index)
7721	    {
7722	      putc ('+', file);
7723	      print_reg (index, 0, file);
7724	      if (scale != 1)
7725		fprintf (file, "*%d", scale);
7726	    }
7727	  putc (']', file);
7728	}
7729    }
7730}
7731
7732bool
7733output_addr_const_extra (FILE *file, rtx x)
7734{
7735  rtx op;
7736
7737  if (GET_CODE (x) != UNSPEC)
7738    return false;
7739
7740  op = XVECEXP (x, 0, 0);
7741  switch (XINT (x, 1))
7742    {
7743    case UNSPEC_GOTTPOFF:
7744      output_addr_const (file, op);
7745      /* FIXME: This might be @TPOFF in Sun ld.  */
7746      fputs ("@GOTTPOFF", file);
7747      break;
7748    case UNSPEC_TPOFF:
7749      output_addr_const (file, op);
7750      fputs ("@TPOFF", file);
7751      break;
7752    case UNSPEC_NTPOFF:
7753      output_addr_const (file, op);
7754      if (TARGET_64BIT)
7755	fputs ("@TPOFF", file);
7756      else
7757	fputs ("@NTPOFF", file);
7758      break;
7759    case UNSPEC_DTPOFF:
7760      output_addr_const (file, op);
7761      fputs ("@DTPOFF", file);
7762      break;
7763    case UNSPEC_GOTNTPOFF:
7764      output_addr_const (file, op);
7765      if (TARGET_64BIT)
7766	fputs ("@GOTTPOFF(%rip)", file);
7767      else
7768	fputs ("@GOTNTPOFF", file);
7769      break;
7770    case UNSPEC_INDNTPOFF:
7771      output_addr_const (file, op);
7772      fputs ("@INDNTPOFF", file);
7773      break;
7774
7775    default:
7776      return false;
7777    }
7778
7779  return true;
7780}
7781
7782/* Split one or more DImode RTL references into pairs of SImode
7783   references.  The RTL can be REG, offsettable MEM, integer constant, or
7784   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7785   split and "num" is its length.  lo_half and hi_half are output arrays
7786   that parallel "operands".  */
7787
7788void
7789split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7790{
7791  while (num--)
7792    {
7793      rtx op = operands[num];
7794
7795      /* simplify_subreg refuse to split volatile memory addresses,
7796         but we still have to handle it.  */
7797      if (GET_CODE (op) == MEM)
7798	{
7799	  lo_half[num] = adjust_address (op, SImode, 0);
7800	  hi_half[num] = adjust_address (op, SImode, 4);
7801	}
7802      else
7803	{
7804	  lo_half[num] = simplify_gen_subreg (SImode, op,
7805					      GET_MODE (op) == VOIDmode
7806					      ? DImode : GET_MODE (op), 0);
7807	  hi_half[num] = simplify_gen_subreg (SImode, op,
7808					      GET_MODE (op) == VOIDmode
7809					      ? DImode : GET_MODE (op), 4);
7810	}
7811    }
7812}
7813/* Split one or more TImode RTL references into pairs of SImode
7814   references.  The RTL can be REG, offsettable MEM, integer constant, or
7815   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7816   split and "num" is its length.  lo_half and hi_half are output arrays
7817   that parallel "operands".  */
7818
7819void
7820split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7821{
7822  while (num--)
7823    {
7824      rtx op = operands[num];
7825
7826      /* simplify_subreg refuse to split volatile memory addresses, but we
7827         still have to handle it.  */
7828      if (GET_CODE (op) == MEM)
7829	{
7830	  lo_half[num] = adjust_address (op, DImode, 0);
7831	  hi_half[num] = adjust_address (op, DImode, 8);
7832	}
7833      else
7834	{
7835	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7836	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7837	}
7838    }
7839}
7840
7841/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7842   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7843   is the expression of the binary operation.  The output may either be
7844   emitted here, or returned to the caller, like all output_* functions.
7845
7846   There is no guarantee that the operands are the same mode, as they
7847   might be within FLOAT or FLOAT_EXTEND expressions.  */
7848
7849#ifndef SYSV386_COMPAT
7850/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7851   wants to fix the assemblers because that causes incompatibility
7852   with gcc.  No-one wants to fix gcc because that causes
7853   incompatibility with assemblers...  You can use the option of
7854   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7855#define SYSV386_COMPAT 1
7856#endif
7857
7858const char *
7859output_387_binary_op (rtx insn, rtx *operands)
7860{
7861  static char buf[30];
7862  const char *p;
7863  const char *ssep;
7864  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7865
7866#ifdef ENABLE_CHECKING
7867  /* Even if we do not want to check the inputs, this documents input
7868     constraints.  Which helps in understanding the following code.  */
7869  if (STACK_REG_P (operands[0])
7870      && ((REG_P (operands[1])
7871	   && REGNO (operands[0]) == REGNO (operands[1])
7872	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7873	  || (REG_P (operands[2])
7874	      && REGNO (operands[0]) == REGNO (operands[2])
7875	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7876      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7877    ; /* ok */
7878  else if (!is_sse)
7879    abort ();
7880#endif
7881
7882  switch (GET_CODE (operands[3]))
7883    {
7884    case PLUS:
7885      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887	p = "fiadd";
7888      else
7889	p = "fadd";
7890      ssep = "add";
7891      break;
7892
7893    case MINUS:
7894      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7895	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7896	p = "fisub";
7897      else
7898	p = "fsub";
7899      ssep = "sub";
7900      break;
7901
7902    case MULT:
7903      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7904	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7905	p = "fimul";
7906      else
7907	p = "fmul";
7908      ssep = "mul";
7909      break;
7910
7911    case DIV:
7912      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7913	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7914	p = "fidiv";
7915      else
7916	p = "fdiv";
7917      ssep = "div";
7918      break;
7919
7920    default:
7921      abort ();
7922    }
7923
7924  if (is_sse)
7925   {
7926      strcpy (buf, ssep);
7927      if (GET_MODE (operands[0]) == SFmode)
7928	strcat (buf, "ss\t{%2, %0|%0, %2}");
7929      else
7930	strcat (buf, "sd\t{%2, %0|%0, %2}");
7931      return buf;
7932   }
7933  strcpy (buf, p);
7934
7935  switch (GET_CODE (operands[3]))
7936    {
7937    case MULT:
7938    case PLUS:
7939      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7940	{
7941	  rtx temp = operands[2];
7942	  operands[2] = operands[1];
7943	  operands[1] = temp;
7944	}
7945
7946      /* know operands[0] == operands[1].  */
7947
7948      if (GET_CODE (operands[2]) == MEM)
7949	{
7950	  p = "%z2\t%2";
7951	  break;
7952	}
7953
7954      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7955	{
7956	  if (STACK_TOP_P (operands[0]))
7957	    /* How is it that we are storing to a dead operand[2]?
7958	       Well, presumably operands[1] is dead too.  We can't
7959	       store the result to st(0) as st(0) gets popped on this
7960	       instruction.  Instead store to operands[2] (which I
7961	       think has to be st(1)).  st(1) will be popped later.
7962	       gcc <= 2.8.1 didn't have this check and generated
7963	       assembly code that the Unixware assembler rejected.  */
7964	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7965	  else
7966	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7967	  break;
7968	}
7969
7970      if (STACK_TOP_P (operands[0]))
7971	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7972      else
7973	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7974      break;
7975
7976    case MINUS:
7977    case DIV:
7978      if (GET_CODE (operands[1]) == MEM)
7979	{
7980	  p = "r%z1\t%1";
7981	  break;
7982	}
7983
7984      if (GET_CODE (operands[2]) == MEM)
7985	{
7986	  p = "%z2\t%2";
7987	  break;
7988	}
7989
7990      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7991	{
7992#if SYSV386_COMPAT
7993	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7994	     derived assemblers, confusingly reverse the direction of
7995	     the operation for fsub{r} and fdiv{r} when the
7996	     destination register is not st(0).  The Intel assembler
7997	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7998	     figure out what the hardware really does.  */
7999	  if (STACK_TOP_P (operands[0]))
8000	    p = "{p\t%0, %2|rp\t%2, %0}";
8001	  else
8002	    p = "{rp\t%2, %0|p\t%0, %2}";
8003#else
8004	  if (STACK_TOP_P (operands[0]))
8005	    /* As above for fmul/fadd, we can't store to st(0).  */
8006	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8007	  else
8008	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8009#endif
8010	  break;
8011	}
8012
8013      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8014	{
8015#if SYSV386_COMPAT
8016	  if (STACK_TOP_P (operands[0]))
8017	    p = "{rp\t%0, %1|p\t%1, %0}";
8018	  else
8019	    p = "{p\t%1, %0|rp\t%0, %1}";
8020#else
8021	  if (STACK_TOP_P (operands[0]))
8022	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8023	  else
8024	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8025#endif
8026	  break;
8027	}
8028
8029      if (STACK_TOP_P (operands[0]))
8030	{
8031	  if (STACK_TOP_P (operands[1]))
8032	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8033	  else
8034	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8035	  break;
8036	}
8037      else if (STACK_TOP_P (operands[1]))
8038	{
8039#if SYSV386_COMPAT
8040	  p = "{\t%1, %0|r\t%0, %1}";
8041#else
8042	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8043#endif
8044	}
8045      else
8046	{
8047#if SYSV386_COMPAT
8048	  p = "{r\t%2, %0|\t%0, %2}";
8049#else
8050	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8051#endif
8052	}
8053      break;
8054
8055    default:
8056      abort ();
8057    }
8058
8059  strcat (buf, p);
8060  return buf;
8061}
8062
8063/* Output code to initialize control word copies used by
8064   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
8065   is set to control word rounding downwards.  */
8066void
8067emit_i387_cw_initialization (rtx normal, rtx round_down)
8068{
8069  rtx reg = gen_reg_rtx (HImode);
8070
8071  emit_insn (gen_x86_fnstcw_1 (normal));
8072  emit_move_insn (reg, normal);
8073  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8074      && !TARGET_64BIT)
8075    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8076  else
8077    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8078  emit_move_insn (round_down, reg);
8079}
8080
8081/* Output code for INSN to convert a float to a signed int.  OPERANDS
8082   are the insn operands.  The output may be [HSD]Imode and the input
8083   operand may be [SDX]Fmode.  */
8084
8085const char *
8086output_fix_trunc (rtx insn, rtx *operands)
8087{
8088  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8089  int dimode_p = GET_MODE (operands[0]) == DImode;
8090
8091  /* Jump through a hoop or two for DImode, since the hardware has no
8092     non-popping instruction.  We used to do this a different way, but
8093     that was somewhat fragile and broke with post-reload splitters.  */
8094  if (dimode_p && !stack_top_dies)
8095    output_asm_insn ("fld\t%y1", operands);
8096
8097  if (!STACK_TOP_P (operands[1]))
8098    abort ();
8099
8100  if (GET_CODE (operands[0]) != MEM)
8101    abort ();
8102
8103  output_asm_insn ("fldcw\t%3", operands);
8104  if (stack_top_dies || dimode_p)
8105    output_asm_insn ("fistp%z0\t%0", operands);
8106  else
8107    output_asm_insn ("fist%z0\t%0", operands);
8108  output_asm_insn ("fldcw\t%2", operands);
8109
8110  return "";
8111}
8112
8113/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8114   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
8115   when fucom should be used.  */
8116
8117const char *
8118output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8119{
8120  int stack_top_dies;
8121  rtx cmp_op0 = operands[0];
8122  rtx cmp_op1 = operands[1];
8123  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8124
8125  if (eflags_p == 2)
8126    {
8127      cmp_op0 = cmp_op1;
8128      cmp_op1 = operands[2];
8129    }
8130  if (is_sse)
8131    {
8132      if (GET_MODE (operands[0]) == SFmode)
8133	if (unordered_p)
8134	  return "ucomiss\t{%1, %0|%0, %1}";
8135	else
8136	  return "comiss\t{%1, %0|%0, %1}";
8137      else
8138	if (unordered_p)
8139	  return "ucomisd\t{%1, %0|%0, %1}";
8140	else
8141	  return "comisd\t{%1, %0|%0, %1}";
8142    }
8143
8144  if (! STACK_TOP_P (cmp_op0))
8145    abort ();
8146
8147  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8148
8149  if (STACK_REG_P (cmp_op1)
8150      && stack_top_dies
8151      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8152      && REGNO (cmp_op1) != FIRST_STACK_REG)
8153    {
8154      /* If both the top of the 387 stack dies, and the other operand
8155	 is also a stack register that dies, then this must be a
8156	 `fcompp' float compare */
8157
8158      if (eflags_p == 1)
8159	{
8160	  /* There is no double popping fcomi variant.  Fortunately,
8161	     eflags is immune from the fstp's cc clobbering.  */
8162	  if (unordered_p)
8163	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8164	  else
8165	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8166	  return "fstp\t%y0";
8167	}
8168      else
8169	{
8170	  if (eflags_p == 2)
8171	    {
8172	      if (unordered_p)
8173		return "fucompp\n\tfnstsw\t%0";
8174	      else
8175		return "fcompp\n\tfnstsw\t%0";
8176	    }
8177	  else
8178	    {
8179	      if (unordered_p)
8180		return "fucompp";
8181	      else
8182		return "fcompp";
8183	    }
8184	}
8185    }
8186  else
8187    {
8188      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8189
8190      static const char * const alt[24] =
8191      {
8192	"fcom%z1\t%y1",
8193	"fcomp%z1\t%y1",
8194	"fucom%z1\t%y1",
8195	"fucomp%z1\t%y1",
8196
8197	"ficom%z1\t%y1",
8198	"ficomp%z1\t%y1",
8199	NULL,
8200	NULL,
8201
8202	"fcomi\t{%y1, %0|%0, %y1}",
8203	"fcomip\t{%y1, %0|%0, %y1}",
8204	"fucomi\t{%y1, %0|%0, %y1}",
8205	"fucomip\t{%y1, %0|%0, %y1}",
8206
8207	NULL,
8208	NULL,
8209	NULL,
8210	NULL,
8211
8212	"fcom%z2\t%y2\n\tfnstsw\t%0",
8213	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8214	"fucom%z2\t%y2\n\tfnstsw\t%0",
8215	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8216
8217	"ficom%z2\t%y2\n\tfnstsw\t%0",
8218	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8219	NULL,
8220	NULL
8221      };
8222
8223      int mask;
8224      const char *ret;
8225
8226      mask  = eflags_p << 3;
8227      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8228      mask |= unordered_p << 1;
8229      mask |= stack_top_dies;
8230
8231      if (mask >= 24)
8232	abort ();
8233      ret = alt[mask];
8234      if (ret == NULL)
8235	abort ();
8236
8237      return ret;
8238    }
8239}
8240
8241void
8242ix86_output_addr_vec_elt (FILE *file, int value)
8243{
8244  const char *directive = ASM_LONG;
8245
8246  if (TARGET_64BIT)
8247    {
8248#ifdef ASM_QUAD
8249      directive = ASM_QUAD;
8250#else
8251      abort ();
8252#endif
8253    }
8254
8255  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8256}
8257
8258void
8259ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8260{
8261  if (TARGET_64BIT)
8262    fprintf (file, "%s%s%d-%s%d\n",
8263	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8264  else if (HAVE_AS_GOTOFF_IN_DATA)
8265    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8266#if TARGET_MACHO
8267  else if (TARGET_MACHO)
8268    {
8269      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8270      machopic_output_function_base_name (file);
8271      fprintf(file, "\n");
8272    }
8273#endif
8274  else
8275    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8276		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8277}
8278
8279/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8280   for the target.  */
8281
8282void
8283ix86_expand_clear (rtx dest)
8284{
8285  rtx tmp;
8286
8287  /* We play register width games, which are only valid after reload.  */
8288  if (!reload_completed)
8289    abort ();
8290
8291  /* Avoid HImode and its attendant prefix byte.  */
8292  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8293    dest = gen_rtx_REG (SImode, REGNO (dest));
8294
8295  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8296
8297  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8298  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8299    {
8300      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8301      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8302    }
8303
8304  emit_insn (tmp);
8305}
8306
8307/* X is an unchanging MEM.  If it is a constant pool reference, return
8308   the constant pool rtx, else NULL.  */
8309
8310static rtx
8311maybe_get_pool_constant (rtx x)
8312{
8313  x = ix86_delegitimize_address (XEXP (x, 0));
8314
8315  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8316    return get_pool_constant (x);
8317
8318  return NULL_RTX;
8319}
8320
8321void
8322ix86_expand_move (enum machine_mode mode, rtx operands[])
8323{
8324  int strict = (reload_in_progress || reload_completed);
8325  rtx op0, op1;
8326  enum tls_model model;
8327
8328  op0 = operands[0];
8329  op1 = operands[1];
8330
8331  model = tls_symbolic_operand (op1, Pmode);
8332  if (model)
8333    {
8334      op1 = legitimize_tls_address (op1, model, true);
8335      op1 = force_operand (op1, op0);
8336      if (op1 == op0)
8337	return;
8338    }
8339
8340  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8341    {
8342#if TARGET_MACHO
8343      if (MACHOPIC_PURE)
8344	{
8345	  rtx temp = ((reload_in_progress
8346		       || ((op0 && GET_CODE (op0) == REG)
8347			   && mode == Pmode))
8348		      ? op0 : gen_reg_rtx (Pmode));
8349	  op1 = machopic_indirect_data_reference (op1, temp);
8350	  op1 = machopic_legitimize_pic_address (op1, mode,
8351						 temp == op1 ? 0 : temp);
8352	}
8353      else if (MACHOPIC_INDIRECT)
8354	op1 = machopic_indirect_data_reference (op1, 0);
8355      if (op0 == op1)
8356	return;
8357#else
8358      if (GET_CODE (op0) == MEM)
8359	op1 = force_reg (Pmode, op1);
8360      else
8361	{
8362	  rtx temp = op0;
8363	  if (GET_CODE (temp) != REG)
8364	    temp = gen_reg_rtx (Pmode);
8365	  temp = legitimize_pic_address (op1, temp);
8366	  if (temp == op0)
8367	    return;
8368	  op1 = temp;
8369	}
8370#endif /* TARGET_MACHO */
8371    }
8372  else
8373    {
8374      if (GET_CODE (op0) == MEM
8375	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8376	      || !push_operand (op0, mode))
8377	  && GET_CODE (op1) == MEM)
8378	op1 = force_reg (mode, op1);
8379
8380      if (push_operand (op0, mode)
8381	  && ! general_no_elim_operand (op1, mode))
8382	op1 = copy_to_mode_reg (mode, op1);
8383
8384      /* Force large constants in 64bit compilation into register
8385	 to get them CSEed.  */
8386      if (TARGET_64BIT && mode == DImode
8387	  && immediate_operand (op1, mode)
8388	  && !x86_64_zero_extended_value (op1)
8389	  && !register_operand (op0, mode)
8390	  && optimize && !reload_completed && !reload_in_progress)
8391	op1 = copy_to_mode_reg (mode, op1);
8392
8393      if (FLOAT_MODE_P (mode))
8394	{
8395	  /* If we are loading a floating point constant to a register,
8396	     force the value to memory now, since we'll get better code
8397	     out the back end.  */
8398
8399	  if (strict)
8400	    ;
8401	  else if (GET_CODE (op1) == CONST_DOUBLE)
8402	    {
8403	      op1 = validize_mem (force_const_mem (mode, op1));
8404	      if (!register_operand (op0, mode))
8405		{
8406		  rtx temp = gen_reg_rtx (mode);
8407		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8408		  emit_move_insn (op0, temp);
8409		  return;
8410		}
8411	    }
8412	}
8413    }
8414
8415  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8416}
8417
8418void
8419ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8420{
8421  /* Force constants other than zero into memory.  We do not know how
8422     the instructions used to build constants modify the upper 64 bits
8423     of the register, once we have that information we may be able
8424     to handle some of them more efficiently.  */
8425  if ((reload_in_progress | reload_completed) == 0
8426      && register_operand (operands[0], mode)
8427      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8428    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8429
8430  /* Make operand1 a register if it isn't already.  */
8431  if (!no_new_pseudos
8432      && !register_operand (operands[0], mode)
8433      && !register_operand (operands[1], mode))
8434    {
8435      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8436      emit_move_insn (operands[0], temp);
8437      return;
8438    }
8439
8440  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8441}
8442
8443/* Attempt to expand a binary operator.  Make the expansion closer to the
8444   actual machine, then just general_operand, which will allow 3 separate
8445   memory references (one output, two input) in a single insn.  */
8446
8447void
8448ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8449			     rtx operands[])
8450{
8451  int matching_memory;
8452  rtx src1, src2, dst, op, clob;
8453
8454  dst = operands[0];
8455  src1 = operands[1];
8456  src2 = operands[2];
8457
8458  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8459  if (GET_RTX_CLASS (code) == 'c'
8460      && (rtx_equal_p (dst, src2)
8461	  || immediate_operand (src1, mode)))
8462    {
8463      rtx temp = src1;
8464      src1 = src2;
8465      src2 = temp;
8466    }
8467
8468  /* If the destination is memory, and we do not have matching source
8469     operands, do things in registers.  */
8470  matching_memory = 0;
8471  if (GET_CODE (dst) == MEM)
8472    {
8473      if (rtx_equal_p (dst, src1))
8474	matching_memory = 1;
8475      else if (GET_RTX_CLASS (code) == 'c'
8476	       && rtx_equal_p (dst, src2))
8477	matching_memory = 2;
8478      else
8479	dst = gen_reg_rtx (mode);
8480    }
8481
8482  /* Both source operands cannot be in memory.  */
8483  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8484    {
8485      if (matching_memory != 2)
8486	src2 = force_reg (mode, src2);
8487      else
8488	src1 = force_reg (mode, src1);
8489    }
8490
8491  /* If the operation is not commutable, source 1 cannot be a constant
8492     or non-matching memory.  */
8493  if ((CONSTANT_P (src1)
8494       || (!matching_memory && GET_CODE (src1) == MEM))
8495      && GET_RTX_CLASS (code) != 'c')
8496    src1 = force_reg (mode, src1);
8497
8498  /* If optimizing, copy to regs to improve CSE */
8499  if (optimize && ! no_new_pseudos)
8500    {
8501      if (GET_CODE (dst) == MEM)
8502	dst = gen_reg_rtx (mode);
8503      if (GET_CODE (src1) == MEM)
8504	src1 = force_reg (mode, src1);
8505      if (GET_CODE (src2) == MEM)
8506	src2 = force_reg (mode, src2);
8507    }
8508
8509  /* Emit the instruction.  */
8510
8511  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8512  if (reload_in_progress)
8513    {
8514      /* Reload doesn't know about the flags register, and doesn't know that
8515         it doesn't want to clobber it.  We can only do this with PLUS.  */
8516      if (code != PLUS)
8517	abort ();
8518      emit_insn (op);
8519    }
8520  else
8521    {
8522      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8523      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8524    }
8525
8526  /* Fix up the destination if needed.  */
8527  if (dst != operands[0])
8528    emit_move_insn (operands[0], dst);
8529}
8530
8531/* Return TRUE or FALSE depending on whether the binary operator meets the
8532   appropriate constraints.  */
8533
8534int
8535ix86_binary_operator_ok (enum rtx_code code,
8536			 enum machine_mode mode ATTRIBUTE_UNUSED,
8537			 rtx operands[3])
8538{
8539  /* Both source operands cannot be in memory.  */
8540  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8541    return 0;
8542  /* If the operation is not commutable, source 1 cannot be a constant.  */
8543  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8544    return 0;
8545  /* If the destination is memory, we must have a matching source operand.  */
8546  if (GET_CODE (operands[0]) == MEM
8547      && ! (rtx_equal_p (operands[0], operands[1])
8548	    || (GET_RTX_CLASS (code) == 'c'
8549		&& rtx_equal_p (operands[0], operands[2]))))
8550    return 0;
8551  /* If the operation is not commutable and the source 1 is memory, we must
8552     have a matching destination.  */
8553  if (GET_CODE (operands[1]) == MEM
8554      && GET_RTX_CLASS (code) != 'c'
8555      && ! rtx_equal_p (operands[0], operands[1]))
8556    return 0;
8557  return 1;
8558}
8559
8560/* Attempt to expand a unary operator.  Make the expansion closer to the
8561   actual machine, then just general_operand, which will allow 2 separate
8562   memory references (one output, one input) in a single insn.  */
8563
8564void
8565ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8566			    rtx operands[])
8567{
8568  int matching_memory;
8569  rtx src, dst, op, clob;
8570
8571  dst = operands[0];
8572  src = operands[1];
8573
8574  /* If the destination is memory, and we do not have matching source
8575     operands, do things in registers.  */
8576  matching_memory = 0;
8577  if (GET_CODE (dst) == MEM)
8578    {
8579      if (rtx_equal_p (dst, src))
8580	matching_memory = 1;
8581      else
8582	dst = gen_reg_rtx (mode);
8583    }
8584
8585  /* When source operand is memory, destination must match.  */
8586  if (!matching_memory && GET_CODE (src) == MEM)
8587    src = force_reg (mode, src);
8588
8589  /* If optimizing, copy to regs to improve CSE */
8590  if (optimize && ! no_new_pseudos)
8591    {
8592      if (GET_CODE (dst) == MEM)
8593	dst = gen_reg_rtx (mode);
8594      if (GET_CODE (src) == MEM)
8595	src = force_reg (mode, src);
8596    }
8597
8598  /* Emit the instruction.  */
8599
8600  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8601  if (reload_in_progress || code == NOT)
8602    {
8603      /* Reload doesn't know about the flags register, and doesn't know that
8604         it doesn't want to clobber it.  */
8605      if (code != NOT)
8606        abort ();
8607      emit_insn (op);
8608    }
8609  else
8610    {
8611      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8612      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8613    }
8614
8615  /* Fix up the destination if needed.  */
8616  if (dst != operands[0])
8617    emit_move_insn (operands[0], dst);
8618}
8619
8620/* Return TRUE or FALSE depending on whether the unary operator meets the
8621   appropriate constraints.  */
8622
8623int
8624ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8625			enum machine_mode mode ATTRIBUTE_UNUSED,
8626			rtx operands[2] ATTRIBUTE_UNUSED)
8627{
8628  /* If one of operands is memory, source and destination must match.  */
8629  if ((GET_CODE (operands[0]) == MEM
8630       || GET_CODE (operands[1]) == MEM)
8631      && ! rtx_equal_p (operands[0], operands[1]))
8632    return FALSE;
8633  return TRUE;
8634}
8635
8636/* Return TRUE or FALSE depending on whether the first SET in INSN
8637   has source and destination with matching CC modes, and that the
8638   CC mode is at least as constrained as REQ_MODE.  */
8639
8640int
8641ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8642{
8643  rtx set;
8644  enum machine_mode set_mode;
8645
8646  set = PATTERN (insn);
8647  if (GET_CODE (set) == PARALLEL)
8648    set = XVECEXP (set, 0, 0);
8649  if (GET_CODE (set) != SET)
8650    abort ();
8651  if (GET_CODE (SET_SRC (set)) != COMPARE)
8652    abort ();
8653
8654  set_mode = GET_MODE (SET_DEST (set));
8655  switch (set_mode)
8656    {
8657    case CCNOmode:
8658      if (req_mode != CCNOmode
8659	  && (req_mode != CCmode
8660	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8661	return 0;
8662      break;
8663    case CCmode:
8664      if (req_mode == CCGCmode)
8665	return 0;
8666      /* FALLTHRU */
8667    case CCGCmode:
8668      if (req_mode == CCGOCmode || req_mode == CCNOmode)
8669	return 0;
8670      /* FALLTHRU */
8671    case CCGOCmode:
8672      if (req_mode == CCZmode)
8673	return 0;
8674      /* FALLTHRU */
8675    case CCZmode:
8676      break;
8677
8678    default:
8679      abort ();
8680    }
8681
8682  return (GET_MODE (SET_SRC (set)) == set_mode);
8683}
8684
8685/* Generate insn patterns to do an integer compare of OPERANDS.  */
8686
8687static rtx
8688ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8689{
8690  enum machine_mode cmpmode;
8691  rtx tmp, flags;
8692
8693  cmpmode = SELECT_CC_MODE (code, op0, op1);
8694  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8695
8696  /* This is very simple, but making the interface the same as in the
8697     FP case makes the rest of the code easier.  */
8698  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8699  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8700
8701  /* Return the test that should be put into the flags user, i.e.
8702     the bcc, scc, or cmov instruction.  */
8703  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8704}
8705
8706/* Figure out whether to use ordered or unordered fp comparisons.
8707   Return the appropriate mode to use.  */
8708
8709enum machine_mode
8710ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8711{
8712  /* ??? In order to make all comparisons reversible, we do all comparisons
8713     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8714     all forms trapping and nontrapping comparisons, we can make inequality
8715     comparisons trapping again, since it results in better code when using
8716     FCOM based compares.  */
8717  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8718}
8719
8720enum machine_mode
8721ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8722{
8723  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8724    return ix86_fp_compare_mode (code);
8725  switch (code)
8726    {
8727      /* Only zero flag is needed.  */
8728    case EQ:			/* ZF=0 */
8729    case NE:			/* ZF!=0 */
8730      return CCZmode;
8731      /* Codes needing carry flag.  */
8732    case GEU:			/* CF=0 */
8733    case GTU:			/* CF=0 & ZF=0 */
8734    case LTU:			/* CF=1 */
8735    case LEU:			/* CF=1 | ZF=1 */
8736      return CCmode;
8737      /* Codes possibly doable only with sign flag when
8738         comparing against zero.  */
8739    case GE:			/* SF=OF   or   SF=0 */
8740    case LT:			/* SF<>OF  or   SF=1 */
8741      if (op1 == const0_rtx)
8742	return CCGOCmode;
8743      else
8744	/* For other cases Carry flag is not required.  */
8745	return CCGCmode;
8746      /* Codes doable only with sign flag when comparing
8747         against zero, but we miss jump instruction for it
8748         so we need to use relational tests against overflow
8749         that thus needs to be zero.  */
8750    case GT:			/* ZF=0 & SF=OF */
8751    case LE:			/* ZF=1 | SF<>OF */
8752      if (op1 == const0_rtx)
8753	return CCNOmode;
8754      else
8755	return CCGCmode;
8756      /* strcmp pattern do (use flags) and combine may ask us for proper
8757	 mode.  */
8758    case USE:
8759      return CCmode;
8760    default:
8761      abort ();
8762    }
8763}
8764
8765/* Return the fixed registers used for condition codes.  */
8766
8767static bool
8768ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8769{
8770  *p1 = FLAGS_REG;
8771  *p2 = FPSR_REG;
8772  return true;
8773}
8774
8775/* If two condition code modes are compatible, return a condition code
8776   mode which is compatible with both.  Otherwise, return
8777   VOIDmode.  */
8778
8779static enum machine_mode
8780ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8781{
8782  if (m1 == m2)
8783    return m1;
8784
8785  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8786    return VOIDmode;
8787
8788  if ((m1 == CCGCmode && m2 == CCGOCmode)
8789      || (m1 == CCGOCmode && m2 == CCGCmode))
8790    return CCGCmode;
8791
8792  switch (m1)
8793    {
8794    default:
8795      abort ();
8796
8797    case CCmode:
8798    case CCGCmode:
8799    case CCGOCmode:
8800    case CCNOmode:
8801    case CCZmode:
8802      switch (m2)
8803	{
8804	default:
8805	  return VOIDmode;
8806
8807	case CCmode:
8808	case CCGCmode:
8809	case CCGOCmode:
8810	case CCNOmode:
8811	case CCZmode:
8812	  return CCmode;
8813	}
8814
8815    case CCFPmode:
8816    case CCFPUmode:
8817      /* These are only compatible with themselves, which we already
8818	 checked above.  */
8819      return VOIDmode;
8820    }
8821}
8822
8823/* Return true if we should use an FCOMI instruction for this fp comparison.  */
8824
8825int
8826ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8827{
8828  enum rtx_code swapped_code = swap_condition (code);
8829  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8830	  || (ix86_fp_comparison_cost (swapped_code)
8831	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8832}
8833
8834/* Swap, force into registers, or otherwise massage the two operands
8835   to a fp comparison.  The operands are updated in place; the new
8836   comparison code is returned.  */
8837
8838static enum rtx_code
8839ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8840{
8841  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8842  rtx op0 = *pop0, op1 = *pop1;
8843  enum machine_mode op_mode = GET_MODE (op0);
8844  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8845
8846  /* All of the unordered compare instructions only work on registers.
8847     The same is true of the XFmode compare instructions.  The same is
8848     true of the fcomi compare instructions.  */
8849
8850  if (!is_sse
8851      && (fpcmp_mode == CCFPUmode
8852	  || op_mode == XFmode
8853	  || ix86_use_fcomi_compare (code)))
8854    {
8855      op0 = force_reg (op_mode, op0);
8856      op1 = force_reg (op_mode, op1);
8857    }
8858  else
8859    {
8860      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8861	 things around if they appear profitable, otherwise force op0
8862	 into a register.  */
8863
8864      if (standard_80387_constant_p (op0) == 0
8865	  || (GET_CODE (op0) == MEM
8866	      && ! (standard_80387_constant_p (op1) == 0
8867		    || GET_CODE (op1) == MEM)))
8868	{
8869	  rtx tmp;
8870	  tmp = op0, op0 = op1, op1 = tmp;
8871	  code = swap_condition (code);
8872	}
8873
8874      if (GET_CODE (op0) != REG)
8875	op0 = force_reg (op_mode, op0);
8876
8877      if (CONSTANT_P (op1))
8878	{
8879	  if (standard_80387_constant_p (op1))
8880	    op1 = force_reg (op_mode, op1);
8881	  else
8882	    op1 = validize_mem (force_const_mem (op_mode, op1));
8883	}
8884    }
8885
8886  /* Try to rearrange the comparison to make it cheaper.  */
8887  if (ix86_fp_comparison_cost (code)
8888      > ix86_fp_comparison_cost (swap_condition (code))
8889      && (GET_CODE (op1) == REG || !no_new_pseudos))
8890    {
8891      rtx tmp;
8892      tmp = op0, op0 = op1, op1 = tmp;
8893      code = swap_condition (code);
8894      if (GET_CODE (op0) != REG)
8895	op0 = force_reg (op_mode, op0);
8896    }
8897
8898  *pop0 = op0;
8899  *pop1 = op1;
8900  return code;
8901}
8902
8903/* Convert comparison codes we use to represent FP comparison to integer
8904   code that will result in proper branch.  Return UNKNOWN if no such code
8905   is available.  */
8906static enum rtx_code
8907ix86_fp_compare_code_to_integer (enum rtx_code code)
8908{
8909  switch (code)
8910    {
8911    case GT:
8912      return GTU;
8913    case GE:
8914      return GEU;
8915    case ORDERED:
8916    case UNORDERED:
8917      return code;
8918      break;
8919    case UNEQ:
8920      return EQ;
8921      break;
8922    case UNLT:
8923      return LTU;
8924      break;
8925    case UNLE:
8926      return LEU;
8927      break;
8928    case LTGT:
8929      return NE;
8930      break;
8931    default:
8932      return UNKNOWN;
8933    }
8934}
8935
8936/* Split comparison code CODE into comparisons we can do using branch
8937   instructions.  BYPASS_CODE is comparison code for branch that will
8938   branch around FIRST_CODE and SECOND_CODE.  If some of branches
8939   is not required, set value to NIL.
8940   We never require more than two branches.  */
8941static void
8942ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8943			  enum rtx_code *first_code,
8944			  enum rtx_code *second_code)
8945{
8946  *first_code = code;
8947  *bypass_code = NIL;
8948  *second_code = NIL;
8949
8950  /* The fcomi comparison sets flags as follows:
8951
8952     cmp    ZF PF CF
8953     >      0  0  0
8954     <      0  0  1
8955     =      1  0  0
8956     un     1  1  1 */
8957
8958  switch (code)
8959    {
8960    case GT:			/* GTU - CF=0 & ZF=0 */
8961    case GE:			/* GEU - CF=0 */
8962    case ORDERED:		/* PF=0 */
8963    case UNORDERED:		/* PF=1 */
8964    case UNEQ:			/* EQ - ZF=1 */
8965    case UNLT:			/* LTU - CF=1 */
8966    case UNLE:			/* LEU - CF=1 | ZF=1 */
8967    case LTGT:			/* EQ - ZF=0 */
8968      break;
8969    case LT:			/* LTU - CF=1 - fails on unordered */
8970      *first_code = UNLT;
8971      *bypass_code = UNORDERED;
8972      break;
8973    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
8974      *first_code = UNLE;
8975      *bypass_code = UNORDERED;
8976      break;
8977    case EQ:			/* EQ - ZF=1 - fails on unordered */
8978      *first_code = UNEQ;
8979      *bypass_code = UNORDERED;
8980      break;
8981    case NE:			/* NE - ZF=0 - fails on unordered */
8982      *first_code = LTGT;
8983      *second_code = UNORDERED;
8984      break;
8985    case UNGE:			/* GEU - CF=0 - fails on unordered */
8986      *first_code = GE;
8987      *second_code = UNORDERED;
8988      break;
8989    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
8990      *first_code = GT;
8991      *second_code = UNORDERED;
8992      break;
8993    default:
8994      abort ();
8995    }
8996  if (!TARGET_IEEE_FP)
8997    {
8998      *second_code = NIL;
8999      *bypass_code = NIL;
9000    }
9001}
9002
9003/* Return cost of comparison done fcom + arithmetics operations on AX.
9004   All following functions do use number of instructions as a cost metrics.
9005   In future this should be tweaked to compute bytes for optimize_size and
9006   take into account performance of various instructions on various CPUs.  */
9007static int
9008ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9009{
9010  if (!TARGET_IEEE_FP)
9011    return 4;
9012  /* The cost of code output by ix86_expand_fp_compare.  */
9013  switch (code)
9014    {
9015    case UNLE:
9016    case UNLT:
9017    case LTGT:
9018    case GT:
9019    case GE:
9020    case UNORDERED:
9021    case ORDERED:
9022    case UNEQ:
9023      return 4;
9024      break;
9025    case LT:
9026    case NE:
9027    case EQ:
9028    case UNGE:
9029      return 5;
9030      break;
9031    case LE:
9032    case UNGT:
9033      return 6;
9034      break;
9035    default:
9036      abort ();
9037    }
9038}
9039
9040/* Return cost of comparison done using fcomi operation.
9041   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9042static int
9043ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9044{
9045  enum rtx_code bypass_code, first_code, second_code;
9046  /* Return arbitrarily high cost when instruction is not supported - this
9047     prevents gcc from using it.  */
9048  if (!TARGET_CMOVE)
9049    return 1024;
9050  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9051  return (bypass_code != NIL || second_code != NIL) + 2;
9052}
9053
9054/* Return cost of comparison done using sahf operation.
9055   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9056static int
9057ix86_fp_comparison_sahf_cost (enum rtx_code code)
9058{
9059  enum rtx_code bypass_code, first_code, second_code;
9060  /* Return arbitrarily high cost when instruction is not preferred - this
9061     avoids gcc from using it.  */
9062  if (!TARGET_USE_SAHF && !optimize_size)
9063    return 1024;
9064  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9065  return (bypass_code != NIL || second_code != NIL) + 3;
9066}
9067
9068/* Compute cost of the comparison done using any method.
9069   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9070static int
9071ix86_fp_comparison_cost (enum rtx_code code)
9072{
9073  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9074  int min;
9075
9076  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9077  sahf_cost = ix86_fp_comparison_sahf_cost (code);
9078
9079  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9080  if (min > sahf_cost)
9081    min = sahf_cost;
9082  if (min > fcomi_cost)
9083    min = fcomi_cost;
9084  return min;
9085}
9086
9087/* Generate insn patterns to do a floating point compare of OPERANDS.  */
9088
9089static rtx
9090ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9091			rtx *second_test, rtx *bypass_test)
9092{
9093  enum machine_mode fpcmp_mode, intcmp_mode;
9094  rtx tmp, tmp2;
9095  int cost = ix86_fp_comparison_cost (code);
9096  enum rtx_code bypass_code, first_code, second_code;
9097
9098  fpcmp_mode = ix86_fp_compare_mode (code);
9099  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9100
9101  if (second_test)
9102    *second_test = NULL_RTX;
9103  if (bypass_test)
9104    *bypass_test = NULL_RTX;
9105
9106  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9107
9108  /* Do fcomi/sahf based test when profitable.  */
9109  if ((bypass_code == NIL || bypass_test)
9110      && (second_code == NIL || second_test)
9111      && ix86_fp_comparison_arithmetics_cost (code) > cost)
9112    {
9113      if (TARGET_CMOVE)
9114	{
9115	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9116	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9117			     tmp);
9118	  emit_insn (tmp);
9119	}
9120      else
9121	{
9122	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9123	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9124	  if (!scratch)
9125	    scratch = gen_reg_rtx (HImode);
9126	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9127	  emit_insn (gen_x86_sahf_1 (scratch));
9128	}
9129
9130      /* The FP codes work out to act like unsigned.  */
9131      intcmp_mode = fpcmp_mode;
9132      code = first_code;
9133      if (bypass_code != NIL)
9134	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9135				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9136				       const0_rtx);
9137      if (second_code != NIL)
9138	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9139				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9140				       const0_rtx);
9141    }
9142  else
9143    {
9144      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
9145      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9146      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9147      if (!scratch)
9148	scratch = gen_reg_rtx (HImode);
9149      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9150
9151      /* In the unordered case, we have to check C2 for NaN's, which
9152	 doesn't happen to work out to anything nice combination-wise.
9153	 So do some bit twiddling on the value we've got in AH to come
9154	 up with an appropriate set of condition codes.  */
9155
9156      intcmp_mode = CCNOmode;
9157      switch (code)
9158	{
9159	case GT:
9160	case UNGT:
9161	  if (code == GT || !TARGET_IEEE_FP)
9162	    {
9163	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9164	      code = EQ;
9165	    }
9166	  else
9167	    {
9168	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9169	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9170	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9171	      intcmp_mode = CCmode;
9172	      code = GEU;
9173	    }
9174	  break;
9175	case LT:
9176	case UNLT:
9177	  if (code == LT && TARGET_IEEE_FP)
9178	    {
9179	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9180	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9181	      intcmp_mode = CCmode;
9182	      code = EQ;
9183	    }
9184	  else
9185	    {
9186	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9187	      code = NE;
9188	    }
9189	  break;
9190	case GE:
9191	case UNGE:
9192	  if (code == GE || !TARGET_IEEE_FP)
9193	    {
9194	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9195	      code = EQ;
9196	    }
9197	  else
9198	    {
9199	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9200	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9201					     GEN_INT (0x01)));
9202	      code = NE;
9203	    }
9204	  break;
9205	case LE:
9206	case UNLE:
9207	  if (code == LE && TARGET_IEEE_FP)
9208	    {
9209	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9210	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9211	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9212	      intcmp_mode = CCmode;
9213	      code = LTU;
9214	    }
9215	  else
9216	    {
9217	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9218	      code = NE;
9219	    }
9220	  break;
9221	case EQ:
9222	case UNEQ:
9223	  if (code == EQ && TARGET_IEEE_FP)
9224	    {
9225	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9226	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9227	      intcmp_mode = CCmode;
9228	      code = EQ;
9229	    }
9230	  else
9231	    {
9232	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9233	      code = NE;
9234	      break;
9235	    }
9236	  break;
9237	case NE:
9238	case LTGT:
9239	  if (code == NE && TARGET_IEEE_FP)
9240	    {
9241	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9242	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9243					     GEN_INT (0x40)));
9244	      code = NE;
9245	    }
9246	  else
9247	    {
9248	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9249	      code = EQ;
9250	    }
9251	  break;
9252
9253	case UNORDERED:
9254	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9255	  code = NE;
9256	  break;
9257	case ORDERED:
9258	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9259	  code = EQ;
9260	  break;
9261
9262	default:
9263	  abort ();
9264	}
9265    }
9266
9267  /* Return the test that should be put into the flags user, i.e.
9268     the bcc, scc, or cmov instruction.  */
9269  return gen_rtx_fmt_ee (code, VOIDmode,
9270			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9271			 const0_rtx);
9272}
9273
9274rtx
9275ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9276{
9277  rtx op0, op1, ret;
9278  op0 = ix86_compare_op0;
9279  op1 = ix86_compare_op1;
9280
9281  if (second_test)
9282    *second_test = NULL_RTX;
9283  if (bypass_test)
9284    *bypass_test = NULL_RTX;
9285
9286  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9287    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9288				  second_test, bypass_test);
9289  else
9290    ret = ix86_expand_int_compare (code, op0, op1);
9291
9292  return ret;
9293}
9294
9295/* Return true if the CODE will result in nontrivial jump sequence.  */
9296bool
9297ix86_fp_jump_nontrivial_p (enum rtx_code code)
9298{
9299  enum rtx_code bypass_code, first_code, second_code;
9300  if (!TARGET_CMOVE)
9301    return true;
9302  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9303  return bypass_code != NIL || second_code != NIL;
9304}
9305
9306void
9307ix86_expand_branch (enum rtx_code code, rtx label)
9308{
9309  rtx tmp;
9310
9311  switch (GET_MODE (ix86_compare_op0))
9312    {
9313    case QImode:
9314    case HImode:
9315    case SImode:
9316      simple:
9317      tmp = ix86_expand_compare (code, NULL, NULL);
9318      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9319				  gen_rtx_LABEL_REF (VOIDmode, label),
9320				  pc_rtx);
9321      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9322      return;
9323
9324    case SFmode:
9325    case DFmode:
9326    case XFmode:
9327      {
9328	rtvec vec;
9329	int use_fcomi;
9330	enum rtx_code bypass_code, first_code, second_code;
9331
9332	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9333					     &ix86_compare_op1);
9334
9335	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9336
9337	/* Check whether we will use the natural sequence with one jump.  If
9338	   so, we can expand jump early.  Otherwise delay expansion by
9339	   creating compound insn to not confuse optimizers.  */
9340	if (bypass_code == NIL && second_code == NIL
9341	    && TARGET_CMOVE)
9342	  {
9343	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9344				  gen_rtx_LABEL_REF (VOIDmode, label),
9345				  pc_rtx, NULL_RTX);
9346	  }
9347	else
9348	  {
9349	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
9350				  ix86_compare_op0, ix86_compare_op1);
9351	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9352					gen_rtx_LABEL_REF (VOIDmode, label),
9353					pc_rtx);
9354	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9355
9356	    use_fcomi = ix86_use_fcomi_compare (code);
9357	    vec = rtvec_alloc (3 + !use_fcomi);
9358	    RTVEC_ELT (vec, 0) = tmp;
9359	    RTVEC_ELT (vec, 1)
9360	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9361	    RTVEC_ELT (vec, 2)
9362	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9363	    if (! use_fcomi)
9364	      RTVEC_ELT (vec, 3)
9365		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9366
9367	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9368	  }
9369	return;
9370      }
9371
9372    case DImode:
9373      if (TARGET_64BIT)
9374	goto simple;
9375      /* Expand DImode branch into multiple compare+branch.  */
9376      {
9377	rtx lo[2], hi[2], label2;
9378	enum rtx_code code1, code2, code3;
9379
9380	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9381	  {
9382	    tmp = ix86_compare_op0;
9383	    ix86_compare_op0 = ix86_compare_op1;
9384	    ix86_compare_op1 = tmp;
9385	    code = swap_condition (code);
9386	  }
9387	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9388	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9389
9390	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9391	   avoid two branches.  This costs one extra insn, so disable when
9392	   optimizing for size.  */
9393
9394	if ((code == EQ || code == NE)
9395	    && (!optimize_size
9396	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
9397	  {
9398	    rtx xor0, xor1;
9399
9400	    xor1 = hi[0];
9401	    if (hi[1] != const0_rtx)
9402	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9403				   NULL_RTX, 0, OPTAB_WIDEN);
9404
9405	    xor0 = lo[0];
9406	    if (lo[1] != const0_rtx)
9407	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9408				   NULL_RTX, 0, OPTAB_WIDEN);
9409
9410	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9411				NULL_RTX, 0, OPTAB_WIDEN);
9412
9413	    ix86_compare_op0 = tmp;
9414	    ix86_compare_op1 = const0_rtx;
9415	    ix86_expand_branch (code, label);
9416	    return;
9417	  }
9418
9419	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9420	   op1 is a constant and the low word is zero, then we can just
9421	   examine the high word.  */
9422
9423	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9424	  switch (code)
9425	    {
9426	    case LT: case LTU: case GE: case GEU:
9427	      ix86_compare_op0 = hi[0];
9428	      ix86_compare_op1 = hi[1];
9429	      ix86_expand_branch (code, label);
9430	      return;
9431	    default:
9432	      break;
9433	    }
9434
9435	/* Otherwise, we need two or three jumps.  */
9436
9437	label2 = gen_label_rtx ();
9438
9439	code1 = code;
9440	code2 = swap_condition (code);
9441	code3 = unsigned_condition (code);
9442
9443	switch (code)
9444	  {
9445	  case LT: case GT: case LTU: case GTU:
9446	    break;
9447
9448	  case LE:   code1 = LT;  code2 = GT;  break;
9449	  case GE:   code1 = GT;  code2 = LT;  break;
9450	  case LEU:  code1 = LTU; code2 = GTU; break;
9451	  case GEU:  code1 = GTU; code2 = LTU; break;
9452
9453	  case EQ:   code1 = NIL; code2 = NE;  break;
9454	  case NE:   code2 = NIL; break;
9455
9456	  default:
9457	    abort ();
9458	  }
9459
9460	/*
9461	 * a < b =>
9462	 *    if (hi(a) < hi(b)) goto true;
9463	 *    if (hi(a) > hi(b)) goto false;
9464	 *    if (lo(a) < lo(b)) goto true;
9465	 *  false:
9466	 */
9467
9468	ix86_compare_op0 = hi[0];
9469	ix86_compare_op1 = hi[1];
9470
9471	if (code1 != NIL)
9472	  ix86_expand_branch (code1, label);
9473	if (code2 != NIL)
9474	  ix86_expand_branch (code2, label2);
9475
9476	ix86_compare_op0 = lo[0];
9477	ix86_compare_op1 = lo[1];
9478	ix86_expand_branch (code3, label);
9479
9480	if (code2 != NIL)
9481	  emit_label (label2);
9482	return;
9483      }
9484
9485    default:
9486      abort ();
9487    }
9488}
9489
9490/* Split branch based on floating point condition.  */
9491void
9492ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9493		      rtx target1, rtx target2, rtx tmp)
9494{
9495  rtx second, bypass;
9496  rtx label = NULL_RTX;
9497  rtx condition;
9498  int bypass_probability = -1, second_probability = -1, probability = -1;
9499  rtx i;
9500
9501  if (target2 != pc_rtx)
9502    {
9503      rtx tmp = target2;
9504      code = reverse_condition_maybe_unordered (code);
9505      target2 = target1;
9506      target1 = tmp;
9507    }
9508
9509  condition = ix86_expand_fp_compare (code, op1, op2,
9510				      tmp, &second, &bypass);
9511
9512  if (split_branch_probability >= 0)
9513    {
9514      /* Distribute the probabilities across the jumps.
9515	 Assume the BYPASS and SECOND to be always test
9516	 for UNORDERED.  */
9517      probability = split_branch_probability;
9518
9519      /* Value of 1 is low enough to make no need for probability
9520	 to be updated.  Later we may run some experiments and see
9521	 if unordered values are more frequent in practice.  */
9522      if (bypass)
9523	bypass_probability = 1;
9524      if (second)
9525	second_probability = 1;
9526    }
9527  if (bypass != NULL_RTX)
9528    {
9529      label = gen_label_rtx ();
9530      i = emit_jump_insn (gen_rtx_SET
9531			  (VOIDmode, pc_rtx,
9532			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9533						 bypass,
9534						 gen_rtx_LABEL_REF (VOIDmode,
9535								    label),
9536						 pc_rtx)));
9537      if (bypass_probability >= 0)
9538	REG_NOTES (i)
9539	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9540			       GEN_INT (bypass_probability),
9541			       REG_NOTES (i));
9542    }
9543  i = emit_jump_insn (gen_rtx_SET
9544		      (VOIDmode, pc_rtx,
9545		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9546					     condition, target1, target2)));
9547  if (probability >= 0)
9548    REG_NOTES (i)
9549      = gen_rtx_EXPR_LIST (REG_BR_PROB,
9550			   GEN_INT (probability),
9551			   REG_NOTES (i));
9552  if (second != NULL_RTX)
9553    {
9554      i = emit_jump_insn (gen_rtx_SET
9555			  (VOIDmode, pc_rtx,
9556			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9557						 target2)));
9558      if (second_probability >= 0)
9559	REG_NOTES (i)
9560	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9561			       GEN_INT (second_probability),
9562			       REG_NOTES (i));
9563    }
9564  if (label != NULL_RTX)
9565    emit_label (label);
9566}
9567
9568int
9569ix86_expand_setcc (enum rtx_code code, rtx dest)
9570{
9571  rtx ret, tmp, tmpreg, equiv;
9572  rtx second_test, bypass_test;
9573
9574  if (GET_MODE (ix86_compare_op0) == DImode
9575      && !TARGET_64BIT)
9576    return 0; /* FAIL */
9577
9578  if (GET_MODE (dest) != QImode)
9579    abort ();
9580
9581  ret = ix86_expand_compare (code, &second_test, &bypass_test);
9582  PUT_MODE (ret, QImode);
9583
9584  tmp = dest;
9585  tmpreg = dest;
9586
9587  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9588  if (bypass_test || second_test)
9589    {
9590      rtx test = second_test;
9591      int bypass = 0;
9592      rtx tmp2 = gen_reg_rtx (QImode);
9593      if (bypass_test)
9594	{
9595	  if (second_test)
9596	    abort ();
9597	  test = bypass_test;
9598	  bypass = 1;
9599	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9600	}
9601      PUT_MODE (test, QImode);
9602      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9603
9604      if (bypass)
9605	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9606      else
9607	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9608    }
9609
9610  /* Attach a REG_EQUAL note describing the comparison result.  */
9611  equiv = simplify_gen_relational (code, QImode,
9612				   GET_MODE (ix86_compare_op0),
9613				   ix86_compare_op0, ix86_compare_op1);
9614  set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9615
9616  return 1; /* DONE */
9617}
9618
9619/* Expand comparison setting or clearing carry flag.  Return true when
9620   successful and set pop for the operation.  */
9621static bool
9622ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9623{
9624  enum machine_mode mode =
9625    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9626
9627  /* Do not handle DImode compares that go trought special path.  Also we can't
9628     deal with FP compares yet.  This is possible to add.  */
9629  if ((mode == DImode && !TARGET_64BIT))
9630    return false;
9631  if (FLOAT_MODE_P (mode))
9632    {
9633      rtx second_test = NULL, bypass_test = NULL;
9634      rtx compare_op, compare_seq;
9635
9636      /* Shortcut:  following common codes never translate into carry flag compares.  */
9637      if (code == EQ || code == NE || code == UNEQ || code == LTGT
9638	  || code == ORDERED || code == UNORDERED)
9639	return false;
9640
9641      /* These comparisons require zero flag; swap operands so they won't.  */
9642      if ((code == GT || code == UNLE || code == LE || code == UNGT)
9643	  && !TARGET_IEEE_FP)
9644	{
9645	  rtx tmp = op0;
9646	  op0 = op1;
9647	  op1 = tmp;
9648	  code = swap_condition (code);
9649	}
9650
9651      /* Try to expand the comparison and verify that we end up with carry flag
9652	 based comparison.  This is fails to be true only when we decide to expand
9653	 comparison using arithmetic that is not too common scenario.  */
9654      start_sequence ();
9655      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9656					   &second_test, &bypass_test);
9657      compare_seq = get_insns ();
9658      end_sequence ();
9659
9660      if (second_test || bypass_test)
9661	return false;
9662      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9663	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9664        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9665      else
9666	code = GET_CODE (compare_op);
9667      if (code != LTU && code != GEU)
9668	return false;
9669      emit_insn (compare_seq);
9670      *pop = compare_op;
9671      return true;
9672    }
9673  if (!INTEGRAL_MODE_P (mode))
9674    return false;
9675  switch (code)
9676    {
9677    case LTU:
9678    case GEU:
9679      break;
9680
9681    /* Convert a==0 into (unsigned)a<1.  */
9682    case EQ:
9683    case NE:
9684      if (op1 != const0_rtx)
9685	return false;
9686      op1 = const1_rtx;
9687      code = (code == EQ ? LTU : GEU);
9688      break;
9689
9690    /* Convert a>b into b<a or a>=b-1.  */
9691    case GTU:
9692    case LEU:
9693      if (GET_CODE (op1) == CONST_INT)
9694	{
9695	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9696	  /* Bail out on overflow.  We still can swap operands but that
9697	     would force loading of the constant into register.  */
9698	  if (op1 == const0_rtx
9699	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9700	    return false;
9701	  code = (code == GTU ? GEU : LTU);
9702	}
9703      else
9704	{
9705	  rtx tmp = op1;
9706	  op1 = op0;
9707	  op0 = tmp;
9708	  code = (code == GTU ? LTU : GEU);
9709	}
9710      break;
9711
9712    /* Convert a>=0 into (unsigned)a<0x80000000.  */
9713    case LT:
9714    case GE:
9715      if (mode == DImode || op1 != const0_rtx)
9716	return false;
9717      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9718      code = (code == LT ? GEU : LTU);
9719      break;
9720    case LE:
9721    case GT:
9722      if (mode == DImode || op1 != constm1_rtx)
9723	return false;
9724      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9725      code = (code == LE ? GEU : LTU);
9726      break;
9727
9728    default:
9729      return false;
9730    }
9731  /* Swapping operands may cause constant to appear as first operand.  */
9732  if (!nonimmediate_operand (op0, VOIDmode))
9733    {
9734      if (no_new_pseudos)
9735	return false;
9736      op0 = force_reg (mode, op0);
9737    }
9738  ix86_compare_op0 = op0;
9739  ix86_compare_op1 = op1;
9740  *pop = ix86_expand_compare (code, NULL, NULL);
9741  if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9742    abort ();
9743  return true;
9744}
9745
9746int
9747ix86_expand_int_movcc (rtx operands[])
9748{
9749  enum rtx_code code = GET_CODE (operands[1]), compare_code;
9750  rtx compare_seq, compare_op;
9751  rtx second_test, bypass_test;
9752  enum machine_mode mode = GET_MODE (operands[0]);
9753  bool sign_bit_compare_p = false;;
9754
9755  start_sequence ();
9756  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9757  compare_seq = get_insns ();
9758  end_sequence ();
9759
9760  compare_code = GET_CODE (compare_op);
9761
9762  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9763      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9764    sign_bit_compare_p = true;
9765
9766  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9767     HImode insns, we'd be swallowed in word prefix ops.  */
9768
9769  if ((mode != HImode || TARGET_FAST_PREFIX)
9770      && (mode != DImode || TARGET_64BIT)
9771      && GET_CODE (operands[2]) == CONST_INT
9772      && GET_CODE (operands[3]) == CONST_INT)
9773    {
9774      rtx out = operands[0];
9775      HOST_WIDE_INT ct = INTVAL (operands[2]);
9776      HOST_WIDE_INT cf = INTVAL (operands[3]);
9777      HOST_WIDE_INT diff;
9778
9779      diff = ct - cf;
9780      /*  Sign bit compares are better done using shifts than we do by using
9781	  sbb.  */
9782      if (sign_bit_compare_p
9783	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9784					     ix86_compare_op1, &compare_op))
9785	{
9786	  /* Detect overlap between destination and compare sources.  */
9787	  rtx tmp = out;
9788
9789          if (!sign_bit_compare_p)
9790	    {
9791	      bool fpcmp = false;
9792
9793	      compare_code = GET_CODE (compare_op);
9794
9795	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9796		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9797		{
9798		  fpcmp = true;
9799		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
9800		}
9801
9802	      /* To simplify rest of code, restrict to the GEU case.  */
9803	      if (compare_code == LTU)
9804		{
9805		  HOST_WIDE_INT tmp = ct;
9806		  ct = cf;
9807		  cf = tmp;
9808		  compare_code = reverse_condition (compare_code);
9809		  code = reverse_condition (code);
9810		}
9811	      else
9812		{
9813		  if (fpcmp)
9814		    PUT_CODE (compare_op,
9815			      reverse_condition_maybe_unordered
9816			        (GET_CODE (compare_op)));
9817		  else
9818		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9819		}
9820	      diff = ct - cf;
9821
9822	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9823		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
9824		tmp = gen_reg_rtx (mode);
9825
9826	      if (mode == DImode)
9827		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9828	      else
9829		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9830	    }
9831	  else
9832	    {
9833	      if (code == GT || code == GE)
9834		code = reverse_condition (code);
9835	      else
9836		{
9837		  HOST_WIDE_INT tmp = ct;
9838		  ct = cf;
9839		  cf = tmp;
9840		  diff = ct - cf;
9841		}
9842	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9843				     ix86_compare_op1, VOIDmode, 0, -1);
9844	    }
9845
9846	  if (diff == 1)
9847	    {
9848	      /*
9849	       * cmpl op0,op1
9850	       * sbbl dest,dest
9851	       * [addl dest, ct]
9852	       *
9853	       * Size 5 - 8.
9854	       */
9855	      if (ct)
9856		tmp = expand_simple_binop (mode, PLUS,
9857					   tmp, GEN_INT (ct),
9858					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9859	    }
9860	  else if (cf == -1)
9861	    {
9862	      /*
9863	       * cmpl op0,op1
9864	       * sbbl dest,dest
9865	       * orl $ct, dest
9866	       *
9867	       * Size 8.
9868	       */
9869	      tmp = expand_simple_binop (mode, IOR,
9870					 tmp, GEN_INT (ct),
9871					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9872	    }
9873	  else if (diff == -1 && ct)
9874	    {
9875	      /*
9876	       * cmpl op0,op1
9877	       * sbbl dest,dest
9878	       * notl dest
9879	       * [addl dest, cf]
9880	       *
9881	       * Size 8 - 11.
9882	       */
9883	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9884	      if (cf)
9885		tmp = expand_simple_binop (mode, PLUS,
9886					   copy_rtx (tmp), GEN_INT (cf),
9887					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9888	    }
9889	  else
9890	    {
9891	      /*
9892	       * cmpl op0,op1
9893	       * sbbl dest,dest
9894	       * [notl dest]
9895	       * andl cf - ct, dest
9896	       * [addl dest, ct]
9897	       *
9898	       * Size 8 - 11.
9899	       */
9900
9901	      if (cf == 0)
9902		{
9903		  cf = ct;
9904		  ct = 0;
9905		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9906		}
9907
9908	      tmp = expand_simple_binop (mode, AND,
9909					 copy_rtx (tmp),
9910					 gen_int_mode (cf - ct, mode),
9911					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9912	      if (ct)
9913		tmp = expand_simple_binop (mode, PLUS,
9914					   copy_rtx (tmp), GEN_INT (ct),
9915					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9916	    }
9917
9918	  if (!rtx_equal_p (tmp, out))
9919	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9920
9921	  return 1; /* DONE */
9922	}
9923
9924      if (diff < 0)
9925	{
9926	  HOST_WIDE_INT tmp;
9927	  tmp = ct, ct = cf, cf = tmp;
9928	  diff = -diff;
9929	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9930	    {
9931	      /* We may be reversing unordered compare to normal compare, that
9932		 is not valid in general (we may convert non-trapping condition
9933		 to trapping one), however on i386 we currently emit all
9934		 comparisons unordered.  */
9935	      compare_code = reverse_condition_maybe_unordered (compare_code);
9936	      code = reverse_condition_maybe_unordered (code);
9937	    }
9938	  else
9939	    {
9940	      compare_code = reverse_condition (compare_code);
9941	      code = reverse_condition (code);
9942	    }
9943	}
9944
9945      compare_code = NIL;
9946      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9947	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9948	{
9949	  if (ix86_compare_op1 == const0_rtx
9950	      && (code == LT || code == GE))
9951	    compare_code = code;
9952	  else if (ix86_compare_op1 == constm1_rtx)
9953	    {
9954	      if (code == LE)
9955		compare_code = LT;
9956	      else if (code == GT)
9957		compare_code = GE;
9958	    }
9959	}
9960
9961      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9962      if (compare_code != NIL
9963	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9964	  && (cf == -1 || ct == -1))
9965	{
9966	  /* If lea code below could be used, only optimize
9967	     if it results in a 2 insn sequence.  */
9968
9969	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9970		 || diff == 3 || diff == 5 || diff == 9)
9971	      || (compare_code == LT && ct == -1)
9972	      || (compare_code == GE && cf == -1))
9973	    {
9974	      /*
9975	       * notl op1	(if necessary)
9976	       * sarl $31, op1
9977	       * orl cf, op1
9978	       */
9979	      if (ct != -1)
9980		{
9981		  cf = ct;
9982		  ct = -1;
9983		  code = reverse_condition (code);
9984		}
9985
9986	      out = emit_store_flag (out, code, ix86_compare_op0,
9987				     ix86_compare_op1, VOIDmode, 0, -1);
9988
9989	      out = expand_simple_binop (mode, IOR,
9990					 out, GEN_INT (cf),
9991					 out, 1, OPTAB_DIRECT);
9992	      if (out != operands[0])
9993		emit_move_insn (operands[0], out);
9994
9995	      return 1; /* DONE */
9996	    }
9997	}
9998
9999
10000      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10001	   || diff == 3 || diff == 5 || diff == 9)
10002	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10003	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10004	{
10005	  /*
10006	   * xorl dest,dest
10007	   * cmpl op1,op2
10008	   * setcc dest
10009	   * lea cf(dest*(ct-cf)),dest
10010	   *
10011	   * Size 14.
10012	   *
10013	   * This also catches the degenerate setcc-only case.
10014	   */
10015
10016	  rtx tmp;
10017	  int nops;
10018
10019	  out = emit_store_flag (out, code, ix86_compare_op0,
10020				 ix86_compare_op1, VOIDmode, 0, 1);
10021
10022	  nops = 0;
10023	  /* On x86_64 the lea instruction operates on Pmode, so we need
10024	     to get arithmetics done in proper mode to match.  */
10025	  if (diff == 1)
10026	    tmp = copy_rtx (out);
10027	  else
10028	    {
10029	      rtx out1;
10030	      out1 = copy_rtx (out);
10031	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10032	      nops++;
10033	      if (diff & 1)
10034		{
10035		  tmp = gen_rtx_PLUS (mode, tmp, out1);
10036		  nops++;
10037		}
10038	    }
10039	  if (cf != 0)
10040	    {
10041	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10042	      nops++;
10043	    }
10044	  if (!rtx_equal_p (tmp, out))
10045	    {
10046	      if (nops == 1)
10047		out = force_operand (tmp, copy_rtx (out));
10048	      else
10049		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10050	    }
10051	  if (!rtx_equal_p (out, operands[0]))
10052	    emit_move_insn (operands[0], copy_rtx (out));
10053
10054	  return 1; /* DONE */
10055	}
10056
10057      /*
10058       * General case:			Jumpful:
10059       *   xorl dest,dest		cmpl op1, op2
10060       *   cmpl op1, op2		movl ct, dest
10061       *   setcc dest			jcc 1f
10062       *   decl dest			movl cf, dest
10063       *   andl (cf-ct),dest		1:
10064       *   addl ct,dest
10065       *
10066       * Size 20.			Size 14.
10067       *
10068       * This is reasonably steep, but branch mispredict costs are
10069       * high on modern cpus, so consider failing only if optimizing
10070       * for space.
10071       */
10072
10073      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10074	  && BRANCH_COST >= 2)
10075	{
10076	  if (cf == 0)
10077	    {
10078	      cf = ct;
10079	      ct = 0;
10080	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10081		/* We may be reversing unordered compare to normal compare,
10082		   that is not valid in general (we may convert non-trapping
10083		   condition to trapping one), however on i386 we currently
10084		   emit all comparisons unordered.  */
10085		code = reverse_condition_maybe_unordered (code);
10086	      else
10087		{
10088		  code = reverse_condition (code);
10089		  if (compare_code != NIL)
10090		    compare_code = reverse_condition (compare_code);
10091		}
10092	    }
10093
10094	  if (compare_code != NIL)
10095	    {
10096	      /* notl op1	(if needed)
10097		 sarl $31, op1
10098		 andl (cf-ct), op1
10099		 addl ct, op1
10100
10101		 For x < 0 (resp. x <= -1) there will be no notl,
10102		 so if possible swap the constants to get rid of the
10103		 complement.
10104		 True/false will be -1/0 while code below (store flag
10105		 followed by decrement) is 0/-1, so the constants need
10106		 to be exchanged once more.  */
10107
10108	      if (compare_code == GE || !cf)
10109		{
10110		  code = reverse_condition (code);
10111		  compare_code = LT;
10112		}
10113	      else
10114		{
10115		  HOST_WIDE_INT tmp = cf;
10116		  cf = ct;
10117		  ct = tmp;
10118		}
10119
10120	      out = emit_store_flag (out, code, ix86_compare_op0,
10121				     ix86_compare_op1, VOIDmode, 0, -1);
10122	    }
10123	  else
10124	    {
10125	      out = emit_store_flag (out, code, ix86_compare_op0,
10126				     ix86_compare_op1, VOIDmode, 0, 1);
10127
10128	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10129					 copy_rtx (out), 1, OPTAB_DIRECT);
10130	    }
10131
10132	  out = expand_simple_binop (mode, AND, copy_rtx (out),
10133				     gen_int_mode (cf - ct, mode),
10134				     copy_rtx (out), 1, OPTAB_DIRECT);
10135	  if (ct)
10136	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10137				       copy_rtx (out), 1, OPTAB_DIRECT);
10138	  if (!rtx_equal_p (out, operands[0]))
10139	    emit_move_insn (operands[0], copy_rtx (out));
10140
10141	  return 1; /* DONE */
10142	}
10143    }
10144
10145  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10146    {
10147      /* Try a few things more with specific constants and a variable.  */
10148
10149      optab op;
10150      rtx var, orig_out, out, tmp;
10151
10152      if (BRANCH_COST <= 2)
10153	return 0; /* FAIL */
10154
10155      /* If one of the two operands is an interesting constant, load a
10156	 constant with the above and mask it in with a logical operation.  */
10157
10158      if (GET_CODE (operands[2]) == CONST_INT)
10159	{
10160	  var = operands[3];
10161	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10162	    operands[3] = constm1_rtx, op = and_optab;
10163	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10164	    operands[3] = const0_rtx, op = ior_optab;
10165	  else
10166	    return 0; /* FAIL */
10167	}
10168      else if (GET_CODE (operands[3]) == CONST_INT)
10169	{
10170	  var = operands[2];
10171	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10172	    operands[2] = constm1_rtx, op = and_optab;
10173	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10174	    operands[2] = const0_rtx, op = ior_optab;
10175	  else
10176	    return 0; /* FAIL */
10177	}
10178      else
10179        return 0; /* FAIL */
10180
10181      orig_out = operands[0];
10182      tmp = gen_reg_rtx (mode);
10183      operands[0] = tmp;
10184
10185      /* Recurse to get the constant loaded.  */
10186      if (ix86_expand_int_movcc (operands) == 0)
10187        return 0; /* FAIL */
10188
10189      /* Mask in the interesting variable.  */
10190      out = expand_binop (mode, op, var, tmp, orig_out, 0,
10191			  OPTAB_WIDEN);
10192      if (!rtx_equal_p (out, orig_out))
10193	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10194
10195      return 1; /* DONE */
10196    }
10197
10198  /*
10199   * For comparison with above,
10200   *
10201   * movl cf,dest
10202   * movl ct,tmp
10203   * cmpl op1,op2
10204   * cmovcc tmp,dest
10205   *
10206   * Size 15.
10207   */
10208
10209  if (! nonimmediate_operand (operands[2], mode))
10210    operands[2] = force_reg (mode, operands[2]);
10211  if (! nonimmediate_operand (operands[3], mode))
10212    operands[3] = force_reg (mode, operands[3]);
10213
10214  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10215    {
10216      rtx tmp = gen_reg_rtx (mode);
10217      emit_move_insn (tmp, operands[3]);
10218      operands[3] = tmp;
10219    }
10220  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10221    {
10222      rtx tmp = gen_reg_rtx (mode);
10223      emit_move_insn (tmp, operands[2]);
10224      operands[2] = tmp;
10225    }
10226
10227  if (! register_operand (operands[2], VOIDmode)
10228      && (mode == QImode
10229          || ! register_operand (operands[3], VOIDmode)))
10230    operands[2] = force_reg (mode, operands[2]);
10231
10232  if (mode == QImode
10233      && ! register_operand (operands[3], VOIDmode))
10234    operands[3] = force_reg (mode, operands[3]);
10235
10236  emit_insn (compare_seq);
10237  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10238			  gen_rtx_IF_THEN_ELSE (mode,
10239						compare_op, operands[2],
10240						operands[3])));
10241  if (bypass_test)
10242    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10243			    gen_rtx_IF_THEN_ELSE (mode,
10244				  bypass_test,
10245				  copy_rtx (operands[3]),
10246				  copy_rtx (operands[0]))));
10247  if (second_test)
10248    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10249			    gen_rtx_IF_THEN_ELSE (mode,
10250				  second_test,
10251				  copy_rtx (operands[2]),
10252				  copy_rtx (operands[0]))));
10253
10254  return 1; /* DONE */
10255}
10256
10257int
10258ix86_expand_fp_movcc (rtx operands[])
10259{
10260  enum rtx_code code;
10261  rtx tmp;
10262  rtx compare_op, second_test, bypass_test;
10263
10264  /* For SF/DFmode conditional moves based on comparisons
10265     in same mode, we may want to use SSE min/max instructions.  */
10266  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10267       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10268      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10269      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
10270      && (!TARGET_IEEE_FP
10271	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10272      /* We may be called from the post-reload splitter.  */
10273      && (!REG_P (operands[0])
10274	  || SSE_REG_P (operands[0])
10275	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10276    {
10277      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10278      code = GET_CODE (operands[1]);
10279
10280      /* See if we have (cross) match between comparison operands and
10281         conditional move operands.  */
10282      if (rtx_equal_p (operands[2], op1))
10283	{
10284	  rtx tmp = op0;
10285	  op0 = op1;
10286	  op1 = tmp;
10287	  code = reverse_condition_maybe_unordered (code);
10288	}
10289      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10290	{
10291	  /* Check for min operation.  */
10292	  if (code == LT || code == UNLE)
10293	    {
10294	       if (code == UNLE)
10295		{
10296		  rtx tmp = op0;
10297		  op0 = op1;
10298		  op1 = tmp;
10299		}
10300	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10301	       if (memory_operand (op0, VOIDmode))
10302		 op0 = force_reg (GET_MODE (operands[0]), op0);
10303	       if (GET_MODE (operands[0]) == SFmode)
10304		 emit_insn (gen_minsf3 (operands[0], op0, op1));
10305	       else
10306		 emit_insn (gen_mindf3 (operands[0], op0, op1));
10307	       return 1;
10308	    }
10309	  /* Check for max operation.  */
10310	  if (code == GT || code == UNGE)
10311	    {
10312	       if (code == UNGE)
10313		{
10314		  rtx tmp = op0;
10315		  op0 = op1;
10316		  op1 = tmp;
10317		}
10318	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10319	       if (memory_operand (op0, VOIDmode))
10320		 op0 = force_reg (GET_MODE (operands[0]), op0);
10321	       if (GET_MODE (operands[0]) == SFmode)
10322		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10323	       else
10324		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10325	       return 1;
10326	    }
10327	}
10328      /* Manage condition to be sse_comparison_operator.  In case we are
10329	 in non-ieee mode, try to canonicalize the destination operand
10330	 to be first in the comparison - this helps reload to avoid extra
10331	 moves.  */
10332      if (!sse_comparison_operator (operands[1], VOIDmode)
10333	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10334	{
10335	  rtx tmp = ix86_compare_op0;
10336	  ix86_compare_op0 = ix86_compare_op1;
10337	  ix86_compare_op1 = tmp;
10338	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10339					VOIDmode, ix86_compare_op0,
10340					ix86_compare_op1);
10341	}
10342      /* Similarly try to manage result to be first operand of conditional
10343	 move. We also don't support the NE comparison on SSE, so try to
10344	 avoid it.  */
10345      if ((rtx_equal_p (operands[0], operands[3])
10346	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10347	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10348	{
10349	  rtx tmp = operands[2];
10350	  operands[2] = operands[3];
10351	  operands[3] = tmp;
10352	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10353					  (GET_CODE (operands[1])),
10354					VOIDmode, ix86_compare_op0,
10355					ix86_compare_op1);
10356	}
10357      if (GET_MODE (operands[0]) == SFmode)
10358	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10359				    operands[2], operands[3],
10360				    ix86_compare_op0, ix86_compare_op1));
10361      else
10362	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10363				    operands[2], operands[3],
10364				    ix86_compare_op0, ix86_compare_op1));
10365      return 1;
10366    }
10367
10368  /* The floating point conditional move instructions don't directly
10369     support conditions resulting from a signed integer comparison.  */
10370
10371  code = GET_CODE (operands[1]);
10372  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10373
10374  /* The floating point conditional move instructions don't directly
10375     support signed integer comparisons.  */
10376
10377  if (!fcmov_comparison_operator (compare_op, VOIDmode))
10378    {
10379      if (second_test != NULL || bypass_test != NULL)
10380	abort ();
10381      tmp = gen_reg_rtx (QImode);
10382      ix86_expand_setcc (code, tmp);
10383      code = NE;
10384      ix86_compare_op0 = tmp;
10385      ix86_compare_op1 = const0_rtx;
10386      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
10387    }
10388  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10389    {
10390      tmp = gen_reg_rtx (GET_MODE (operands[0]));
10391      emit_move_insn (tmp, operands[3]);
10392      operands[3] = tmp;
10393    }
10394  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10395    {
10396      tmp = gen_reg_rtx (GET_MODE (operands[0]));
10397      emit_move_insn (tmp, operands[2]);
10398      operands[2] = tmp;
10399    }
10400
10401  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10402			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10403				compare_op,
10404				operands[2],
10405				operands[3])));
10406  if (bypass_test)
10407    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10408			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10409				  bypass_test,
10410				  operands[3],
10411				  operands[0])));
10412  if (second_test)
10413    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10414			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10415				  second_test,
10416				  operands[2],
10417				  operands[0])));
10418
10419  return 1;
10420}
10421
10422/* Expand conditional increment or decrement using adb/sbb instructions.
10423   The default case using setcc followed by the conditional move can be
10424   done by generic code.  */
10425int
10426ix86_expand_int_addcc (rtx operands[])
10427{
10428  enum rtx_code code = GET_CODE (operands[1]);
10429  rtx compare_op;
10430  rtx val = const0_rtx;
10431  bool fpcmp = false;
10432  enum machine_mode mode = GET_MODE (operands[0]);
10433
10434  if (operands[3] != const1_rtx
10435      && operands[3] != constm1_rtx)
10436    return 0;
10437  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10438				       ix86_compare_op1, &compare_op))
10439     return 0;
10440  code = GET_CODE (compare_op);
10441
10442  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10443      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10444    {
10445      fpcmp = true;
10446      code = ix86_fp_compare_code_to_integer (code);
10447    }
10448
10449  if (code != LTU)
10450    {
10451      val = constm1_rtx;
10452      if (fpcmp)
10453	PUT_CODE (compare_op,
10454		  reverse_condition_maybe_unordered
10455		    (GET_CODE (compare_op)));
10456      else
10457	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10458    }
10459  PUT_MODE (compare_op, mode);
10460
10461  /* Construct either adc or sbb insn.  */
10462  if ((code == LTU) == (operands[3] == constm1_rtx))
10463    {
10464      switch (GET_MODE (operands[0]))
10465	{
10466	  case QImode:
10467            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10468	    break;
10469	  case HImode:
10470            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10471	    break;
10472	  case SImode:
10473            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10474	    break;
10475	  case DImode:
10476            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10477	    break;
10478	  default:
10479	    abort ();
10480	}
10481    }
10482  else
10483    {
10484      switch (GET_MODE (operands[0]))
10485	{
10486	  case QImode:
10487            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10488	    break;
10489	  case HImode:
10490            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10491	    break;
10492	  case SImode:
10493            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10494	    break;
10495	  case DImode:
10496            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10497	    break;
10498	  default:
10499	    abort ();
10500	}
10501    }
10502  return 1; /* DONE */
10503}
10504
10505
10506/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
10507   works for floating pointer parameters and nonoffsetable memories.
10508   For pushes, it returns just stack offsets; the values will be saved
10509   in the right order.  Maximally three parts are generated.  */
10510
10511static int
10512ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10513{
10514  int size;
10515
10516  if (!TARGET_64BIT)
10517    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10518  else
10519    size = (GET_MODE_SIZE (mode) + 4) / 8;
10520
10521  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10522    abort ();
10523  if (size < 2 || size > 3)
10524    abort ();
10525
10526  /* Optimize constant pool reference to immediates.  This is used by fp
10527     moves, that force all constants to memory to allow combining.  */
10528  if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10529    {
10530      rtx tmp = maybe_get_pool_constant (operand);
10531      if (tmp)
10532	operand = tmp;
10533    }
10534
10535  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10536    {
10537      /* The only non-offsetable memories we handle are pushes.  */
10538      if (! push_operand (operand, VOIDmode))
10539	abort ();
10540
10541      operand = copy_rtx (operand);
10542      PUT_MODE (operand, Pmode);
10543      parts[0] = parts[1] = parts[2] = operand;
10544    }
10545  else if (!TARGET_64BIT)
10546    {
10547      if (mode == DImode)
10548	split_di (&operand, 1, &parts[0], &parts[1]);
10549      else
10550	{
10551	  if (REG_P (operand))
10552	    {
10553	      if (!reload_completed)
10554		abort ();
10555	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10556	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10557	      if (size == 3)
10558		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10559	    }
10560	  else if (offsettable_memref_p (operand))
10561	    {
10562	      operand = adjust_address (operand, SImode, 0);
10563	      parts[0] = operand;
10564	      parts[1] = adjust_address (operand, SImode, 4);
10565	      if (size == 3)
10566		parts[2] = adjust_address (operand, SImode, 8);
10567	    }
10568	  else if (GET_CODE (operand) == CONST_DOUBLE)
10569	    {
10570	      REAL_VALUE_TYPE r;
10571	      long l[4];
10572
10573	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10574	      switch (mode)
10575		{
10576		case XFmode:
10577		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10578		  parts[2] = gen_int_mode (l[2], SImode);
10579		  break;
10580		case DFmode:
10581		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10582		  break;
10583		default:
10584		  abort ();
10585		}
10586	      parts[1] = gen_int_mode (l[1], SImode);
10587	      parts[0] = gen_int_mode (l[0], SImode);
10588	    }
10589	  else
10590	    abort ();
10591	}
10592    }
10593  else
10594    {
10595      if (mode == TImode)
10596	split_ti (&operand, 1, &parts[0], &parts[1]);
10597      if (mode == XFmode || mode == TFmode)
10598	{
10599	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10600	  if (REG_P (operand))
10601	    {
10602	      if (!reload_completed)
10603		abort ();
10604	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10605	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10606	    }
10607	  else if (offsettable_memref_p (operand))
10608	    {
10609	      operand = adjust_address (operand, DImode, 0);
10610	      parts[0] = operand;
10611	      parts[1] = adjust_address (operand, upper_mode, 8);
10612	    }
10613	  else if (GET_CODE (operand) == CONST_DOUBLE)
10614	    {
10615	      REAL_VALUE_TYPE r;
10616	      long l[3];
10617
10618	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10619	      real_to_target (l, &r, mode);
10620	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
10621	      if (HOST_BITS_PER_WIDE_INT >= 64)
10622	        parts[0]
10623		  = gen_int_mode
10624		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10625		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10626		       DImode);
10627	      else
10628	        parts[0] = immed_double_const (l[0], l[1], DImode);
10629	      if (upper_mode == SImode)
10630	        parts[1] = gen_int_mode (l[2], SImode);
10631	      else if (HOST_BITS_PER_WIDE_INT >= 64)
10632	        parts[1]
10633		  = gen_int_mode
10634		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10635		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10636		       DImode);
10637	      else
10638	        parts[1] = immed_double_const (l[2], l[3], DImode);
10639	    }
10640	  else
10641	    abort ();
10642	}
10643    }
10644
10645  return size;
10646}
10647
10648/* Emit insns to perform a move or push of DI, DF, and XF values.
10649   Return false when normal moves are needed; true when all required
10650   insns have been emitted.  Operands 2-4 contain the input values
10651   int the correct order; operands 5-7 contain the output values.  */
10652
10653void
10654ix86_split_long_move (rtx operands[])
10655{
10656  rtx part[2][3];
10657  int nparts;
10658  int push = 0;
10659  int collisions = 0;
10660  enum machine_mode mode = GET_MODE (operands[0]);
10661
10662  /* The DFmode expanders may ask us to move double.
10663     For 64bit target this is single move.  By hiding the fact
10664     here we simplify i386.md splitters.  */
10665  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10666    {
10667      /* Optimize constant pool reference to immediates.  This is used by
10668	 fp moves, that force all constants to memory to allow combining.  */
10669
10670      if (GET_CODE (operands[1]) == MEM
10671	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10672	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10673	operands[1] = get_pool_constant (XEXP (operands[1], 0));
10674      if (push_operand (operands[0], VOIDmode))
10675	{
10676	  operands[0] = copy_rtx (operands[0]);
10677	  PUT_MODE (operands[0], Pmode);
10678	}
10679      else
10680        operands[0] = gen_lowpart (DImode, operands[0]);
10681      operands[1] = gen_lowpart (DImode, operands[1]);
10682      emit_move_insn (operands[0], operands[1]);
10683      return;
10684    }
10685
10686  /* The only non-offsettable memory we handle is push.  */
10687  if (push_operand (operands[0], VOIDmode))
10688    push = 1;
10689  else if (GET_CODE (operands[0]) == MEM
10690	   && ! offsettable_memref_p (operands[0]))
10691    abort ();
10692
10693  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10694  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10695
10696  /* When emitting push, take care for source operands on the stack.  */
10697  if (push && GET_CODE (operands[1]) == MEM
10698      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10699    {
10700      if (nparts == 3)
10701	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10702				     XEXP (part[1][2], 0));
10703      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10704				   XEXP (part[1][1], 0));
10705    }
10706
10707  /* We need to do copy in the right order in case an address register
10708     of the source overlaps the destination.  */
10709  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10710    {
10711      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10712	collisions++;
10713      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10714	collisions++;
10715      if (nparts == 3
10716	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10717	collisions++;
10718
10719      /* Collision in the middle part can be handled by reordering.  */
10720      if (collisions == 1 && nparts == 3
10721	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10722	{
10723	  rtx tmp;
10724	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10725	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10726	}
10727
10728      /* If there are more collisions, we can't handle it by reordering.
10729	 Do an lea to the last part and use only one colliding move.  */
10730      else if (collisions > 1)
10731	{
10732	  rtx base;
10733
10734	  collisions = 1;
10735
10736	  base = part[0][nparts - 1];
10737
10738	  /* Handle the case when the last part isn't valid for lea.
10739	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10740	  if (GET_MODE (base) != Pmode)
10741	    base = gen_rtx_REG (Pmode, REGNO (base));
10742
10743	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10744	  part[1][0] = replace_equiv_address (part[1][0], base);
10745	  part[1][1] = replace_equiv_address (part[1][1],
10746				      plus_constant (base, UNITS_PER_WORD));
10747	  if (nparts == 3)
10748	    part[1][2] = replace_equiv_address (part[1][2],
10749				      plus_constant (base, 8));
10750	}
10751    }
10752
10753  if (push)
10754    {
10755      if (!TARGET_64BIT)
10756	{
10757	  if (nparts == 3)
10758	    {
10759	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10760                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10761	      emit_move_insn (part[0][2], part[1][2]);
10762	    }
10763	}
10764      else
10765	{
10766	  /* In 64bit mode we don't have 32bit push available.  In case this is
10767	     register, it is OK - we will just use larger counterpart.  We also
10768	     retype memory - these comes from attempt to avoid REX prefix on
10769	     moving of second half of TFmode value.  */
10770	  if (GET_MODE (part[1][1]) == SImode)
10771	    {
10772	      if (GET_CODE (part[1][1]) == MEM)
10773		part[1][1] = adjust_address (part[1][1], DImode, 0);
10774	      else if (REG_P (part[1][1]))
10775		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10776	      else
10777		abort ();
10778	      if (GET_MODE (part[1][0]) == SImode)
10779		part[1][0] = part[1][1];
10780	    }
10781	}
10782      emit_move_insn (part[0][1], part[1][1]);
10783      emit_move_insn (part[0][0], part[1][0]);
10784      return;
10785    }
10786
10787  /* Choose correct order to not overwrite the source before it is copied.  */
10788  if ((REG_P (part[0][0])
10789       && REG_P (part[1][1])
10790       && (REGNO (part[0][0]) == REGNO (part[1][1])
10791	   || (nparts == 3
10792	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10793      || (collisions > 0
10794	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10795    {
10796      if (nparts == 3)
10797	{
10798	  operands[2] = part[0][2];
10799	  operands[3] = part[0][1];
10800	  operands[4] = part[0][0];
10801	  operands[5] = part[1][2];
10802	  operands[6] = part[1][1];
10803	  operands[7] = part[1][0];
10804	}
10805      else
10806	{
10807	  operands[2] = part[0][1];
10808	  operands[3] = part[0][0];
10809	  operands[5] = part[1][1];
10810	  operands[6] = part[1][0];
10811	}
10812    }
10813  else
10814    {
10815      if (nparts == 3)
10816	{
10817	  operands[2] = part[0][0];
10818	  operands[3] = part[0][1];
10819	  operands[4] = part[0][2];
10820	  operands[5] = part[1][0];
10821	  operands[6] = part[1][1];
10822	  operands[7] = part[1][2];
10823	}
10824      else
10825	{
10826	  operands[2] = part[0][0];
10827	  operands[3] = part[0][1];
10828	  operands[5] = part[1][0];
10829	  operands[6] = part[1][1];
10830	}
10831    }
10832  emit_move_insn (operands[2], operands[5]);
10833  emit_move_insn (operands[3], operands[6]);
10834  if (nparts == 3)
10835    emit_move_insn (operands[4], operands[7]);
10836
10837  return;
10838}
10839
10840void
10841ix86_split_ashldi (rtx *operands, rtx scratch)
10842{
10843  rtx low[2], high[2];
10844  int count;
10845
10846  if (GET_CODE (operands[2]) == CONST_INT)
10847    {
10848      split_di (operands, 2, low, high);
10849      count = INTVAL (operands[2]) & 63;
10850
10851      if (count >= 32)
10852	{
10853	  emit_move_insn (high[0], low[1]);
10854	  emit_move_insn (low[0], const0_rtx);
10855
10856	  if (count > 32)
10857	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10858	}
10859      else
10860	{
10861	  if (!rtx_equal_p (operands[0], operands[1]))
10862	    emit_move_insn (operands[0], operands[1]);
10863	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10864	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10865	}
10866    }
10867  else
10868    {
10869      if (!rtx_equal_p (operands[0], operands[1]))
10870	emit_move_insn (operands[0], operands[1]);
10871
10872      split_di (operands, 1, low, high);
10873
10874      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10875      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10876
10877      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10878	{
10879	  if (! no_new_pseudos)
10880	    scratch = force_reg (SImode, const0_rtx);
10881	  else
10882	    emit_move_insn (scratch, const0_rtx);
10883
10884	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10885					  scratch));
10886	}
10887      else
10888	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10889    }
10890}
10891
10892void
10893ix86_split_ashrdi (rtx *operands, rtx scratch)
10894{
10895  rtx low[2], high[2];
10896  int count;
10897
10898  if (GET_CODE (operands[2]) == CONST_INT)
10899    {
10900      split_di (operands, 2, low, high);
10901      count = INTVAL (operands[2]) & 63;
10902
10903      if (count >= 32)
10904	{
10905	  emit_move_insn (low[0], high[1]);
10906
10907	  if (! reload_completed)
10908	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10909	  else
10910	    {
10911	      emit_move_insn (high[0], low[0]);
10912	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10913	    }
10914
10915	  if (count > 32)
10916	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10917	}
10918      else
10919	{
10920	  if (!rtx_equal_p (operands[0], operands[1]))
10921	    emit_move_insn (operands[0], operands[1]);
10922	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10923	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10924	}
10925    }
10926  else
10927    {
10928      if (!rtx_equal_p (operands[0], operands[1]))
10929	emit_move_insn (operands[0], operands[1]);
10930
10931      split_di (operands, 1, low, high);
10932
10933      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10934      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10935
10936      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10937	{
10938	  if (! no_new_pseudos)
10939	    scratch = gen_reg_rtx (SImode);
10940	  emit_move_insn (scratch, high[0]);
10941	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10942	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10943					  scratch));
10944	}
10945      else
10946	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10947    }
10948}
10949
10950void
10951ix86_split_lshrdi (rtx *operands, rtx scratch)
10952{
10953  rtx low[2], high[2];
10954  int count;
10955
10956  if (GET_CODE (operands[2]) == CONST_INT)
10957    {
10958      split_di (operands, 2, low, high);
10959      count = INTVAL (operands[2]) & 63;
10960
10961      if (count >= 32)
10962	{
10963	  emit_move_insn (low[0], high[1]);
10964	  emit_move_insn (high[0], const0_rtx);
10965
10966	  if (count > 32)
10967	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10968	}
10969      else
10970	{
10971	  if (!rtx_equal_p (operands[0], operands[1]))
10972	    emit_move_insn (operands[0], operands[1]);
10973	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10974	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10975	}
10976    }
10977  else
10978    {
10979      if (!rtx_equal_p (operands[0], operands[1]))
10980	emit_move_insn (operands[0], operands[1]);
10981
10982      split_di (operands, 1, low, high);
10983
10984      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10985      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10986
10987      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
10988      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10989	{
10990	  if (! no_new_pseudos)
10991	    scratch = force_reg (SImode, const0_rtx);
10992	  else
10993	    emit_move_insn (scratch, const0_rtx);
10994
10995	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10996					  scratch));
10997	}
10998      else
10999	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11000    }
11001}
11002
11003/* Helper function for the string operations below.  Dest VARIABLE whether
11004   it is aligned to VALUE bytes.  If true, jump to the label.  */
11005static rtx
11006ix86_expand_aligntest (rtx variable, int value)
11007{
11008  rtx label = gen_label_rtx ();
11009  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11010  if (GET_MODE (variable) == DImode)
11011    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11012  else
11013    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11014  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11015			   1, label);
11016  return label;
11017}
11018
11019/* Adjust COUNTER by the VALUE.  */
11020static void
11021ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11022{
11023  if (GET_MODE (countreg) == DImode)
11024    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11025  else
11026    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11027}
11028
11029/* Zero extend possibly SImode EXP to Pmode register.  */
11030rtx
11031ix86_zero_extend_to_Pmode (rtx exp)
11032{
11033  rtx r;
11034  if (GET_MODE (exp) == VOIDmode)
11035    return force_reg (Pmode, exp);
11036  if (GET_MODE (exp) == Pmode)
11037    return copy_to_mode_reg (Pmode, exp);
11038  r = gen_reg_rtx (Pmode);
11039  emit_insn (gen_zero_extendsidi2 (r, exp));
11040  return r;
11041}
11042
11043/* Expand string move (memcpy) operation.  Use i386 string operations when
11044   profitable.  expand_clrstr contains similar code.  */
11045int
11046ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11047{
11048  rtx srcreg, destreg, countreg, srcexp, destexp;
11049  enum machine_mode counter_mode;
11050  HOST_WIDE_INT align = 0;
11051  unsigned HOST_WIDE_INT count = 0;
11052
11053  if (GET_CODE (align_exp) == CONST_INT)
11054    align = INTVAL (align_exp);
11055
11056  /* Can't use any of this if the user has appropriated esi or edi.  */
11057  if (global_regs[4] || global_regs[5])
11058    return 0;
11059
11060  /* This simple hack avoids all inlining code and simplifies code below.  */
11061  if (!TARGET_ALIGN_STRINGOPS)
11062    align = 64;
11063
11064  if (GET_CODE (count_exp) == CONST_INT)
11065    {
11066      count = INTVAL (count_exp);
11067      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11068	return 0;
11069    }
11070
11071  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11072     for 64bits use SImode when possible, otherwise DImode.
11073     Set count to number of bytes copied when known at compile time.  */
11074  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11075      || x86_64_zero_extended_value (count_exp))
11076    counter_mode = SImode;
11077  else
11078    counter_mode = DImode;
11079
11080  if (counter_mode != SImode && counter_mode != DImode)
11081    abort ();
11082
11083  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11084  if (destreg != XEXP (dst, 0))
11085    dst = replace_equiv_address_nv (dst, destreg);
11086  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11087  if (srcreg != XEXP (src, 0))
11088    src = replace_equiv_address_nv (src, srcreg);
11089
11090  /* When optimizing for size emit simple rep ; movsb instruction for
11091     counts not divisible by 4.  */
11092
11093  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11094    {
11095      emit_insn (gen_cld ());
11096      countreg = ix86_zero_extend_to_Pmode (count_exp);
11097      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11098      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11099      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11100			      destexp, srcexp));
11101    }
11102
11103  /* For constant aligned (or small unaligned) copies use rep movsl
11104     followed by code copying the rest.  For PentiumPro ensure 8 byte
11105     alignment to allow rep movsl acceleration.  */
11106
11107  else if (count != 0
11108	   && (align >= 8
11109	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11110	       || optimize_size || count < (unsigned int) 64))
11111    {
11112      unsigned HOST_WIDE_INT offset = 0;
11113      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11114      rtx srcmem, dstmem;
11115
11116      emit_insn (gen_cld ());
11117      if (count & ~(size - 1))
11118	{
11119	  countreg = copy_to_mode_reg (counter_mode,
11120				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11121						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11122	  countreg = ix86_zero_extend_to_Pmode (countreg);
11123
11124	  destexp = gen_rtx_ASHIFT (Pmode, countreg,
11125				    GEN_INT (size == 4 ? 2 : 3));
11126	  srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11127	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11128
11129	  emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11130				  countreg, destexp, srcexp));
11131	  offset = count & ~(size - 1);
11132	}
11133      if (size == 8 && (count & 0x04))
11134	{
11135	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11136						 offset);
11137	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11138						 offset);
11139	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11140	  offset += 4;
11141	}
11142      if (count & 0x02)
11143	{
11144	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11145						 offset);
11146	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11147						 offset);
11148	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11149	  offset += 2;
11150	}
11151      if (count & 0x01)
11152	{
11153	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11154						 offset);
11155	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11156						 offset);
11157	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11158	}
11159    }
11160  /* The generic code based on the glibc implementation:
11161     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11162     allowing accelerated copying there)
11163     - copy the data using rep movsl
11164     - copy the rest.  */
11165  else
11166    {
11167      rtx countreg2;
11168      rtx label = NULL;
11169      rtx srcmem, dstmem;
11170      int desired_alignment = (TARGET_PENTIUMPRO
11171			       && (count == 0 || count >= (unsigned int) 260)
11172			       ? 8 : UNITS_PER_WORD);
11173      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
11174      dst = change_address (dst, BLKmode, destreg);
11175      src = change_address (src, BLKmode, srcreg);
11176
11177      /* In case we don't know anything about the alignment, default to
11178         library version, since it is usually equally fast and result in
11179         shorter code.
11180
11181	 Also emit call when we know that the count is large and call overhead
11182	 will not be important.  */
11183      if (!TARGET_INLINE_ALL_STRINGOPS
11184	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11185	return 0;
11186
11187      if (TARGET_SINGLE_STRINGOP)
11188	emit_insn (gen_cld ());
11189
11190      countreg2 = gen_reg_rtx (Pmode);
11191      countreg = copy_to_mode_reg (counter_mode, count_exp);
11192
11193      /* We don't use loops to align destination and to copy parts smaller
11194         than 4 bytes, because gcc is able to optimize such code better (in
11195         the case the destination or the count really is aligned, gcc is often
11196         able to predict the branches) and also it is friendlier to the
11197         hardware branch prediction.
11198
11199         Using loops is beneficial for generic case, because we can
11200         handle small counts using the loops.  Many CPUs (such as Athlon)
11201         have large REP prefix setup costs.
11202
11203         This is quite costly.  Maybe we can revisit this decision later or
11204         add some customizability to this code.  */
11205
11206      if (count == 0 && align < desired_alignment)
11207	{
11208	  label = gen_label_rtx ();
11209	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11210				   LEU, 0, counter_mode, 1, label);
11211	}
11212      if (align <= 1)
11213	{
11214	  rtx label = ix86_expand_aligntest (destreg, 1);
11215	  srcmem = change_address (src, QImode, srcreg);
11216	  dstmem = change_address (dst, QImode, destreg);
11217	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11218	  ix86_adjust_counter (countreg, 1);
11219	  emit_label (label);
11220	  LABEL_NUSES (label) = 1;
11221	}
11222      if (align <= 2)
11223	{
11224	  rtx label = ix86_expand_aligntest (destreg, 2);
11225	  srcmem = change_address (src, HImode, srcreg);
11226	  dstmem = change_address (dst, HImode, destreg);
11227	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11228	  ix86_adjust_counter (countreg, 2);
11229	  emit_label (label);
11230	  LABEL_NUSES (label) = 1;
11231	}
11232      if (align <= 4 && desired_alignment > 4)
11233	{
11234	  rtx label = ix86_expand_aligntest (destreg, 4);
11235	  srcmem = change_address (src, SImode, srcreg);
11236	  dstmem = change_address (dst, SImode, destreg);
11237	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11238	  ix86_adjust_counter (countreg, 4);
11239	  emit_label (label);
11240	  LABEL_NUSES (label) = 1;
11241	}
11242
11243      if (label && desired_alignment > 4 && !TARGET_64BIT)
11244	{
11245	  emit_label (label);
11246	  LABEL_NUSES (label) = 1;
11247	  label = NULL_RTX;
11248	}
11249      if (!TARGET_SINGLE_STRINGOP)
11250	emit_insn (gen_cld ());
11251      if (TARGET_64BIT)
11252	{
11253	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11254				  GEN_INT (3)));
11255	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11256	}
11257      else
11258	{
11259	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11260	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11261	}
11262      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11263      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11264      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11265			      countreg2, destexp, srcexp));
11266
11267      if (label)
11268	{
11269	  emit_label (label);
11270	  LABEL_NUSES (label) = 1;
11271	}
11272      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11273	{
11274	  srcmem = change_address (src, SImode, srcreg);
11275	  dstmem = change_address (dst, SImode, destreg);
11276	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277	}
11278      if ((align <= 4 || count == 0) && TARGET_64BIT)
11279	{
11280	  rtx label = ix86_expand_aligntest (countreg, 4);
11281	  srcmem = change_address (src, SImode, srcreg);
11282	  dstmem = change_address (dst, SImode, destreg);
11283	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11284	  emit_label (label);
11285	  LABEL_NUSES (label) = 1;
11286	}
11287      if (align > 2 && count != 0 && (count & 2))
11288	{
11289	  srcmem = change_address (src, HImode, srcreg);
11290	  dstmem = change_address (dst, HImode, destreg);
11291	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11292	}
11293      if (align <= 2 || count == 0)
11294	{
11295	  rtx label = ix86_expand_aligntest (countreg, 2);
11296	  srcmem = change_address (src, HImode, srcreg);
11297	  dstmem = change_address (dst, HImode, destreg);
11298	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11299	  emit_label (label);
11300	  LABEL_NUSES (label) = 1;
11301	}
11302      if (align > 1 && count != 0 && (count & 1))
11303	{
11304	  srcmem = change_address (src, QImode, srcreg);
11305	  dstmem = change_address (dst, QImode, destreg);
11306	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11307	}
11308      if (align <= 1 || count == 0)
11309	{
11310	  rtx label = ix86_expand_aligntest (countreg, 1);
11311	  srcmem = change_address (src, QImode, srcreg);
11312	  dstmem = change_address (dst, QImode, destreg);
11313	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11314	  emit_label (label);
11315	  LABEL_NUSES (label) = 1;
11316	}
11317    }
11318
11319  return 1;
11320}
11321
11322/* Expand string clear operation (bzero).  Use i386 string operations when
11323   profitable.  expand_movstr contains similar code.  */
11324int
11325ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11326{
11327  rtx destreg, zeroreg, countreg, destexp;
11328  enum machine_mode counter_mode;
11329  HOST_WIDE_INT align = 0;
11330  unsigned HOST_WIDE_INT count = 0;
11331
11332  if (GET_CODE (align_exp) == CONST_INT)
11333    align = INTVAL (align_exp);
11334
11335  /* Can't use any of this if the user has appropriated esi.  */
11336  if (global_regs[4])
11337    return 0;
11338
11339  /* This simple hack avoids all inlining code and simplifies code below.  */
11340  if (!TARGET_ALIGN_STRINGOPS)
11341    align = 32;
11342
11343  if (GET_CODE (count_exp) == CONST_INT)
11344    {
11345      count = INTVAL (count_exp);
11346      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11347	return 0;
11348    }
11349  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11350     for 64bits use SImode when possible, otherwise DImode.
11351     Set count to number of bytes copied when known at compile time.  */
11352  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11353      || x86_64_zero_extended_value (count_exp))
11354    counter_mode = SImode;
11355  else
11356    counter_mode = DImode;
11357
11358  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11359  if (destreg != XEXP (dst, 0))
11360    dst = replace_equiv_address_nv (dst, destreg);
11361
11362  emit_insn (gen_cld ());
11363
11364  /* When optimizing for size emit simple rep ; movsb instruction for
11365     counts not divisible by 4.  */
11366
11367  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11368    {
11369      countreg = ix86_zero_extend_to_Pmode (count_exp);
11370      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11371      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11372      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11373    }
11374  else if (count != 0
11375	   && (align >= 8
11376	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11377	       || optimize_size || count < (unsigned int) 64))
11378    {
11379      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11380      unsigned HOST_WIDE_INT offset = 0;
11381
11382      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11383      if (count & ~(size - 1))
11384	{
11385	  countreg = copy_to_mode_reg (counter_mode,
11386				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11387						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11388	  countreg = ix86_zero_extend_to_Pmode (countreg);
11389	  destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11390	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11391	  emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11392	  offset = count & ~(size - 1);
11393	}
11394      if (size == 8 && (count & 0x04))
11395	{
11396	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11397						  offset);
11398	  emit_insn (gen_strset (destreg, mem,
11399				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11400	  offset += 4;
11401	}
11402      if (count & 0x02)
11403	{
11404	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11405						  offset);
11406	  emit_insn (gen_strset (destreg, mem,
11407				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11408	  offset += 2;
11409	}
11410      if (count & 0x01)
11411	{
11412	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11413						  offset);
11414	  emit_insn (gen_strset (destreg, mem,
11415				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11416	}
11417    }
11418  else
11419    {
11420      rtx countreg2;
11421      rtx label = NULL;
11422      /* Compute desired alignment of the string operation.  */
11423      int desired_alignment = (TARGET_PENTIUMPRO
11424			       && (count == 0 || count >= (unsigned int) 260)
11425			       ? 8 : UNITS_PER_WORD);
11426
11427      /* In case we don't know anything about the alignment, default to
11428         library version, since it is usually equally fast and result in
11429         shorter code.
11430
11431	 Also emit call when we know that the count is large and call overhead
11432	 will not be important.  */
11433      if (!TARGET_INLINE_ALL_STRINGOPS
11434	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11435	return 0;
11436
11437      if (TARGET_SINGLE_STRINGOP)
11438	emit_insn (gen_cld ());
11439
11440      countreg2 = gen_reg_rtx (Pmode);
11441      countreg = copy_to_mode_reg (counter_mode, count_exp);
11442      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11443      /* Get rid of MEM_OFFSET, it won't be accurate.  */
11444      dst = change_address (dst, BLKmode, destreg);
11445
11446      if (count == 0 && align < desired_alignment)
11447	{
11448	  label = gen_label_rtx ();
11449	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11450				   LEU, 0, counter_mode, 1, label);
11451	}
11452      if (align <= 1)
11453	{
11454	  rtx label = ix86_expand_aligntest (destreg, 1);
11455	  emit_insn (gen_strset (destreg, dst,
11456				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11457	  ix86_adjust_counter (countreg, 1);
11458	  emit_label (label);
11459	  LABEL_NUSES (label) = 1;
11460	}
11461      if (align <= 2)
11462	{
11463	  rtx label = ix86_expand_aligntest (destreg, 2);
11464	  emit_insn (gen_strset (destreg, dst,
11465				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11466	  ix86_adjust_counter (countreg, 2);
11467	  emit_label (label);
11468	  LABEL_NUSES (label) = 1;
11469	}
11470      if (align <= 4 && desired_alignment > 4)
11471	{
11472	  rtx label = ix86_expand_aligntest (destreg, 4);
11473	  emit_insn (gen_strset (destreg, dst,
11474				 (TARGET_64BIT
11475				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11476				  : zeroreg)));
11477	  ix86_adjust_counter (countreg, 4);
11478	  emit_label (label);
11479	  LABEL_NUSES (label) = 1;
11480	}
11481
11482      if (label && desired_alignment > 4 && !TARGET_64BIT)
11483	{
11484	  emit_label (label);
11485	  LABEL_NUSES (label) = 1;
11486	  label = NULL_RTX;
11487	}
11488
11489      if (!TARGET_SINGLE_STRINGOP)
11490	emit_insn (gen_cld ());
11491      if (TARGET_64BIT)
11492	{
11493	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11494				  GEN_INT (3)));
11495	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11496	}
11497      else
11498	{
11499	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11500	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11501	}
11502      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11503      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11504
11505      if (label)
11506	{
11507	  emit_label (label);
11508	  LABEL_NUSES (label) = 1;
11509	}
11510
11511      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11512	emit_insn (gen_strset (destreg, dst,
11513			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
11514      if (TARGET_64BIT && (align <= 4 || count == 0))
11515	{
11516	  rtx label = ix86_expand_aligntest (countreg, 4);
11517	  emit_insn (gen_strset (destreg, dst,
11518				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11519	  emit_label (label);
11520	  LABEL_NUSES (label) = 1;
11521	}
11522      if (align > 2 && count != 0 && (count & 2))
11523	emit_insn (gen_strset (destreg, dst,
11524			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
11525      if (align <= 2 || count == 0)
11526	{
11527	  rtx label = ix86_expand_aligntest (countreg, 2);
11528	  emit_insn (gen_strset (destreg, dst,
11529				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11530	  emit_label (label);
11531	  LABEL_NUSES (label) = 1;
11532	}
11533      if (align > 1 && count != 0 && (count & 1))
11534	emit_insn (gen_strset (destreg, dst,
11535			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
11536      if (align <= 1 || count == 0)
11537	{
11538	  rtx label = ix86_expand_aligntest (countreg, 1);
11539	  emit_insn (gen_strset (destreg, dst,
11540				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11541	  emit_label (label);
11542	  LABEL_NUSES (label) = 1;
11543	}
11544    }
11545  return 1;
11546}
11547
11548/* Expand strlen.  */
11549int
11550ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11551{
11552  rtx addr, scratch1, scratch2, scratch3, scratch4;
11553
11554  /* The generic case of strlen expander is long.  Avoid it's
11555     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
11556
11557  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11558      && !TARGET_INLINE_ALL_STRINGOPS
11559      && !optimize_size
11560      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11561    return 0;
11562
11563  addr = force_reg (Pmode, XEXP (src, 0));
11564  scratch1 = gen_reg_rtx (Pmode);
11565
11566  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11567      && !optimize_size)
11568    {
11569      /* Well it seems that some optimizer does not combine a call like
11570         foo(strlen(bar), strlen(bar));
11571         when the move and the subtraction is done here.  It does calculate
11572         the length just once when these instructions are done inside of
11573         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
11574         often used and I use one fewer register for the lifetime of
11575         output_strlen_unroll() this is better.  */
11576
11577      emit_move_insn (out, addr);
11578
11579      ix86_expand_strlensi_unroll_1 (out, src, align);
11580
11581      /* strlensi_unroll_1 returns the address of the zero at the end of
11582         the string, like memchr(), so compute the length by subtracting
11583         the start address.  */
11584      if (TARGET_64BIT)
11585	emit_insn (gen_subdi3 (out, out, addr));
11586      else
11587	emit_insn (gen_subsi3 (out, out, addr));
11588    }
11589  else
11590    {
11591      rtx unspec;
11592      scratch2 = gen_reg_rtx (Pmode);
11593      scratch3 = gen_reg_rtx (Pmode);
11594      scratch4 = force_reg (Pmode, constm1_rtx);
11595
11596      emit_move_insn (scratch3, addr);
11597      eoschar = force_reg (QImode, eoschar);
11598
11599      emit_insn (gen_cld ());
11600      src = replace_equiv_address_nv (src, scratch3);
11601
11602      /* If .md starts supporting :P, this can be done in .md.  */
11603      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11604						 scratch4), UNSPEC_SCAS);
11605      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11606      if (TARGET_64BIT)
11607	{
11608	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11609	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11610	}
11611      else
11612	{
11613	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11614	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11615	}
11616    }
11617  return 1;
11618}
11619
11620/* Expand the appropriate insns for doing strlen if not just doing
11621   repnz; scasb
11622
11623   out = result, initialized with the start address
11624   align_rtx = alignment of the address.
11625   scratch = scratch register, initialized with the startaddress when
11626	not aligned, otherwise undefined
11627
11628   This is just the body. It needs the initializations mentioned above and
11629   some address computing at the end.  These things are done in i386.md.  */
11630
11631static void
11632ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11633{
11634  int align;
11635  rtx tmp;
11636  rtx align_2_label = NULL_RTX;
11637  rtx align_3_label = NULL_RTX;
11638  rtx align_4_label = gen_label_rtx ();
11639  rtx end_0_label = gen_label_rtx ();
11640  rtx mem;
11641  rtx tmpreg = gen_reg_rtx (SImode);
11642  rtx scratch = gen_reg_rtx (SImode);
11643  rtx cmp;
11644
11645  align = 0;
11646  if (GET_CODE (align_rtx) == CONST_INT)
11647    align = INTVAL (align_rtx);
11648
11649  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
11650
11651  /* Is there a known alignment and is it less than 4?  */
11652  if (align < 4)
11653    {
11654      rtx scratch1 = gen_reg_rtx (Pmode);
11655      emit_move_insn (scratch1, out);
11656      /* Is there a known alignment and is it not 2? */
11657      if (align != 2)
11658	{
11659	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11660	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11661
11662	  /* Leave just the 3 lower bits.  */
11663	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11664				    NULL_RTX, 0, OPTAB_WIDEN);
11665
11666	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11667				   Pmode, 1, align_4_label);
11668	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11669				   Pmode, 1, align_2_label);
11670	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11671				   Pmode, 1, align_3_label);
11672	}
11673      else
11674        {
11675	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
11676	     check if is aligned to 4 - byte.  */
11677
11678	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11679				    NULL_RTX, 0, OPTAB_WIDEN);
11680
11681	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11682				   Pmode, 1, align_4_label);
11683        }
11684
11685      mem = change_address (src, QImode, out);
11686
11687      /* Now compare the bytes.  */
11688
11689      /* Compare the first n unaligned byte on a byte per byte basis.  */
11690      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11691			       QImode, 1, end_0_label);
11692
11693      /* Increment the address.  */
11694      if (TARGET_64BIT)
11695	emit_insn (gen_adddi3 (out, out, const1_rtx));
11696      else
11697	emit_insn (gen_addsi3 (out, out, const1_rtx));
11698
11699      /* Not needed with an alignment of 2 */
11700      if (align != 2)
11701	{
11702	  emit_label (align_2_label);
11703
11704	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11705				   end_0_label);
11706
11707	  if (TARGET_64BIT)
11708	    emit_insn (gen_adddi3 (out, out, const1_rtx));
11709	  else
11710	    emit_insn (gen_addsi3 (out, out, const1_rtx));
11711
11712	  emit_label (align_3_label);
11713	}
11714
11715      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11716			       end_0_label);
11717
11718      if (TARGET_64BIT)
11719	emit_insn (gen_adddi3 (out, out, const1_rtx));
11720      else
11721	emit_insn (gen_addsi3 (out, out, const1_rtx));
11722    }
11723
11724  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
11725     align this loop.  It gives only huge programs, but does not help to
11726     speed up.  */
11727  emit_label (align_4_label);
11728
11729  mem = change_address (src, SImode, out);
11730  emit_move_insn (scratch, mem);
11731  if (TARGET_64BIT)
11732    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11733  else
11734    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11735
11736  /* This formula yields a nonzero result iff one of the bytes is zero.
11737     This saves three branches inside loop and many cycles.  */
11738
11739  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11740  emit_insn (gen_one_cmplsi2 (scratch, scratch));
11741  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11742  emit_insn (gen_andsi3 (tmpreg, tmpreg,
11743			 gen_int_mode (0x80808080, SImode)));
11744  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11745			   align_4_label);
11746
11747  if (TARGET_CMOVE)
11748    {
11749       rtx reg = gen_reg_rtx (SImode);
11750       rtx reg2 = gen_reg_rtx (Pmode);
11751       emit_move_insn (reg, tmpreg);
11752       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11753
11754       /* If zero is not in the first two bytes, move two bytes forward.  */
11755       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11756       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11757       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11758       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11759			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
11760						     reg,
11761						     tmpreg)));
11762       /* Emit lea manually to avoid clobbering of flags.  */
11763       emit_insn (gen_rtx_SET (SImode, reg2,
11764			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11765
11766       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11767       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11768       emit_insn (gen_rtx_SET (VOIDmode, out,
11769			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11770						     reg2,
11771						     out)));
11772
11773    }
11774  else
11775    {
11776       rtx end_2_label = gen_label_rtx ();
11777       /* Is zero in the first two bytes? */
11778
11779       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11780       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11781       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11782       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11783                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11784                            pc_rtx);
11785       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11786       JUMP_LABEL (tmp) = end_2_label;
11787
11788       /* Not in the first two.  Move two bytes forward.  */
11789       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11790       if (TARGET_64BIT)
11791	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11792       else
11793	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11794
11795       emit_label (end_2_label);
11796
11797    }
11798
11799  /* Avoid branch in fixing the byte.  */
11800  tmpreg = gen_lowpart (QImode, tmpreg);
11801  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11802  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11803  if (TARGET_64BIT)
11804    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11805  else
11806    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11807
11808  emit_label (end_0_label);
11809}
11810
11811void
11812ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11813		  rtx callarg2 ATTRIBUTE_UNUSED,
11814		  rtx pop, int sibcall)
11815{
11816  rtx use = NULL, call;
11817
11818  if (pop == const0_rtx)
11819    pop = NULL;
11820  if (TARGET_64BIT && pop)
11821    abort ();
11822
11823#if TARGET_MACHO
11824  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11825    fnaddr = machopic_indirect_call_target (fnaddr);
11826#else
11827  /* Static functions and indirect calls don't need the pic register.  */
11828  if (! TARGET_64BIT && flag_pic
11829      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11830      && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11831    use_reg (&use, pic_offset_table_rtx);
11832
11833  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11834    {
11835      rtx al = gen_rtx_REG (QImode, 0);
11836      emit_move_insn (al, callarg2);
11837      use_reg (&use, al);
11838    }
11839#endif /* TARGET_MACHO */
11840
11841  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11842    {
11843      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11844      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11845    }
11846  if (sibcall && TARGET_64BIT
11847      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11848    {
11849      rtx addr;
11850      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11851      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11852      emit_move_insn (fnaddr, addr);
11853      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11854    }
11855
11856  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11857  if (retval)
11858    call = gen_rtx_SET (VOIDmode, retval, call);
11859  if (pop)
11860    {
11861      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11862      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11863      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11864    }
11865
11866  call = emit_call_insn (call);
11867  if (use)
11868    CALL_INSN_FUNCTION_USAGE (call) = use;
11869}
11870
11871
11872/* Clear stack slot assignments remembered from previous functions.
11873   This is called from INIT_EXPANDERS once before RTL is emitted for each
11874   function.  */
11875
11876static struct machine_function *
11877ix86_init_machine_status (void)
11878{
11879  struct machine_function *f;
11880
11881  f = ggc_alloc_cleared (sizeof (struct machine_function));
11882  f->use_fast_prologue_epilogue_nregs = -1;
11883
11884  return f;
11885}
11886
11887/* Return a MEM corresponding to a stack slot with mode MODE.
11888   Allocate a new slot if necessary.
11889
11890   The RTL for a function can have several slots available: N is
11891   which slot to use.  */
11892
11893rtx
11894assign_386_stack_local (enum machine_mode mode, int n)
11895{
11896  struct stack_local_entry *s;
11897
11898  if (n < 0 || n >= MAX_386_STACK_LOCALS)
11899    abort ();
11900
11901  for (s = ix86_stack_locals; s; s = s->next)
11902    if (s->mode == mode && s->n == n)
11903      return s->rtl;
11904
11905  s = (struct stack_local_entry *)
11906    ggc_alloc (sizeof (struct stack_local_entry));
11907  s->n = n;
11908  s->mode = mode;
11909  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11910
11911  s->next = ix86_stack_locals;
11912  ix86_stack_locals = s;
11913  return s->rtl;
11914}
11915
11916/* Construct the SYMBOL_REF for the tls_get_addr function.  */
11917
11918static GTY(()) rtx ix86_tls_symbol;
11919rtx
11920ix86_tls_get_addr (void)
11921{
11922
11923  if (!ix86_tls_symbol)
11924    {
11925      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11926					    (TARGET_GNU_TLS && !TARGET_64BIT)
11927					    ? "___tls_get_addr"
11928					    : "__tls_get_addr");
11929    }
11930
11931  return ix86_tls_symbol;
11932}
11933
11934/* Calculate the length of the memory address in the instruction
11935   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11936
11937static int
11938memory_address_length (rtx addr)
11939{
11940  struct ix86_address parts;
11941  rtx base, index, disp;
11942  int len;
11943
11944  if (GET_CODE (addr) == PRE_DEC
11945      || GET_CODE (addr) == POST_INC
11946      || GET_CODE (addr) == PRE_MODIFY
11947      || GET_CODE (addr) == POST_MODIFY)
11948    return 0;
11949
11950  if (! ix86_decompose_address (addr, &parts))
11951    abort ();
11952
11953  base = parts.base;
11954  index = parts.index;
11955  disp = parts.disp;
11956  len = 0;
11957
11958  /* Rule of thumb:
11959       - esp as the base always wants an index,
11960       - ebp as the base always wants a displacement.  */
11961
11962  /* Register Indirect.  */
11963  if (base && !index && !disp)
11964    {
11965      /* esp (for its index) and ebp (for its displacement) need
11966	 the two-byte modrm form.  */
11967      if (addr == stack_pointer_rtx
11968	  || addr == arg_pointer_rtx
11969	  || addr == frame_pointer_rtx
11970	  || addr == hard_frame_pointer_rtx)
11971	len = 1;
11972    }
11973
11974  /* Direct Addressing.  */
11975  else if (disp && !base && !index)
11976    len = 4;
11977
11978  else
11979    {
11980      /* Find the length of the displacement constant.  */
11981      if (disp)
11982	{
11983	  if (GET_CODE (disp) == CONST_INT
11984	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11985	      && base)
11986	    len = 1;
11987	  else
11988	    len = 4;
11989	}
11990      /* ebp always wants a displacement.  */
11991      else if (base == hard_frame_pointer_rtx)
11992        len = 1;
11993
11994      /* An index requires the two-byte modrm form....  */
11995      if (index
11996	  /* ...like esp, which always wants an index.  */
11997	  || base == stack_pointer_rtx
11998	  || base == arg_pointer_rtx
11999	  || base == frame_pointer_rtx)
12000	len += 1;
12001    }
12002
12003  return len;
12004}
12005
12006/* Compute default value for "length_immediate" attribute.  When SHORTFORM
12007   is set, expect that insn have 8bit immediate alternative.  */
12008int
12009ix86_attr_length_immediate_default (rtx insn, int shortform)
12010{
12011  int len = 0;
12012  int i;
12013  extract_insn_cached (insn);
12014  for (i = recog_data.n_operands - 1; i >= 0; --i)
12015    if (CONSTANT_P (recog_data.operand[i]))
12016      {
12017	if (len)
12018	  abort ();
12019	if (shortform
12020	    && GET_CODE (recog_data.operand[i]) == CONST_INT
12021	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12022	  len = 1;
12023	else
12024	  {
12025	    switch (get_attr_mode (insn))
12026	      {
12027		case MODE_QI:
12028		  len+=1;
12029		  break;
12030		case MODE_HI:
12031		  len+=2;
12032		  break;
12033		case MODE_SI:
12034		  len+=4;
12035		  break;
12036		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
12037		case MODE_DI:
12038		  len+=4;
12039		  break;
12040		default:
12041		  fatal_insn ("unknown insn mode", insn);
12042	      }
12043	  }
12044      }
12045  return len;
12046}
12047/* Compute default value for "length_address" attribute.  */
12048int
12049ix86_attr_length_address_default (rtx insn)
12050{
12051  int i;
12052
12053  if (get_attr_type (insn) == TYPE_LEA)
12054    {
12055      rtx set = PATTERN (insn);
12056      if (GET_CODE (set) == SET)
12057	;
12058      else if (GET_CODE (set) == PARALLEL
12059	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12060	set = XVECEXP (set, 0, 0);
12061      else
12062	{
12063#ifdef ENABLE_CHECKING
12064	  abort ();
12065#endif
12066	  return 0;
12067	}
12068
12069      return memory_address_length (SET_SRC (set));
12070    }
12071
12072  extract_insn_cached (insn);
12073  for (i = recog_data.n_operands - 1; i >= 0; --i)
12074    if (GET_CODE (recog_data.operand[i]) == MEM)
12075      {
12076	return memory_address_length (XEXP (recog_data.operand[i], 0));
12077	break;
12078      }
12079  return 0;
12080}
12081
12082/* Return the maximum number of instructions a cpu can issue.  */
12083
12084static int
12085ix86_issue_rate (void)
12086{
12087  switch (ix86_tune)
12088    {
12089    case PROCESSOR_PENTIUM:
12090    case PROCESSOR_K6:
12091      return 2;
12092
12093    case PROCESSOR_PENTIUMPRO:
12094    case PROCESSOR_PENTIUM4:
12095    case PROCESSOR_ATHLON:
12096    case PROCESSOR_K8:
12097      return 3;
12098
12099    default:
12100      return 1;
12101    }
12102}
12103
12104/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12105   by DEP_INSN and nothing set by DEP_INSN.  */
12106
12107static int
12108ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12109{
12110  rtx set, set2;
12111
12112  /* Simplify the test for uninteresting insns.  */
12113  if (insn_type != TYPE_SETCC
12114      && insn_type != TYPE_ICMOV
12115      && insn_type != TYPE_FCMOV
12116      && insn_type != TYPE_IBR)
12117    return 0;
12118
12119  if ((set = single_set (dep_insn)) != 0)
12120    {
12121      set = SET_DEST (set);
12122      set2 = NULL_RTX;
12123    }
12124  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12125	   && XVECLEN (PATTERN (dep_insn), 0) == 2
12126	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12127	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12128    {
12129      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12130      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12131    }
12132  else
12133    return 0;
12134
12135  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12136    return 0;
12137
12138  /* This test is true if the dependent insn reads the flags but
12139     not any other potentially set register.  */
12140  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12141    return 0;
12142
12143  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12144    return 0;
12145
12146  return 1;
12147}
12148
12149/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12150   address with operands set by DEP_INSN.  */
12151
12152static int
12153ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12154{
12155  rtx addr;
12156
12157  if (insn_type == TYPE_LEA
12158      && TARGET_PENTIUM)
12159    {
12160      addr = PATTERN (insn);
12161      if (GET_CODE (addr) == SET)
12162	;
12163      else if (GET_CODE (addr) == PARALLEL
12164	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12165	addr = XVECEXP (addr, 0, 0);
12166      else
12167	abort ();
12168      addr = SET_SRC (addr);
12169    }
12170  else
12171    {
12172      int i;
12173      extract_insn_cached (insn);
12174      for (i = recog_data.n_operands - 1; i >= 0; --i)
12175	if (GET_CODE (recog_data.operand[i]) == MEM)
12176	  {
12177	    addr = XEXP (recog_data.operand[i], 0);
12178	    goto found;
12179	  }
12180      return 0;
12181    found:;
12182    }
12183
12184  return modified_in_p (addr, dep_insn);
12185}
12186
12187static int
12188ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12189{
12190  enum attr_type insn_type, dep_insn_type;
12191  enum attr_memory memory, dep_memory;
12192  rtx set, set2;
12193  int dep_insn_code_number;
12194
12195  /* Anti and output dependencies have zero cost on all CPUs.  */
12196  if (REG_NOTE_KIND (link) != 0)
12197    return 0;
12198
12199  dep_insn_code_number = recog_memoized (dep_insn);
12200
12201  /* If we can't recognize the insns, we can't really do anything.  */
12202  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12203    return cost;
12204
12205  insn_type = get_attr_type (insn);
12206  dep_insn_type = get_attr_type (dep_insn);
12207
12208  switch (ix86_tune)
12209    {
12210    case PROCESSOR_PENTIUM:
12211      /* Address Generation Interlock adds a cycle of latency.  */
12212      if (ix86_agi_dependant (insn, dep_insn, insn_type))
12213	cost += 1;
12214
12215      /* ??? Compares pair with jump/setcc.  */
12216      if (ix86_flags_dependant (insn, dep_insn, insn_type))
12217	cost = 0;
12218
12219      /* Floating point stores require value to be ready one cycle earlier.  */
12220      if (insn_type == TYPE_FMOV
12221	  && get_attr_memory (insn) == MEMORY_STORE
12222	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12223	cost += 1;
12224      break;
12225
12226    case PROCESSOR_PENTIUMPRO:
12227      memory = get_attr_memory (insn);
12228      dep_memory = get_attr_memory (dep_insn);
12229
12230      /* Since we can't represent delayed latencies of load+operation,
12231	 increase the cost here for non-imov insns.  */
12232      if (dep_insn_type != TYPE_IMOV
12233          && dep_insn_type != TYPE_FMOV
12234          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12235	cost += 1;
12236
12237      /* INT->FP conversion is expensive.  */
12238      if (get_attr_fp_int_src (dep_insn))
12239	cost += 5;
12240
12241      /* There is one cycle extra latency between an FP op and a store.  */
12242      if (insn_type == TYPE_FMOV
12243	  && (set = single_set (dep_insn)) != NULL_RTX
12244	  && (set2 = single_set (insn)) != NULL_RTX
12245	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12246	  && GET_CODE (SET_DEST (set2)) == MEM)
12247	cost += 1;
12248
12249      /* Show ability of reorder buffer to hide latency of load by executing
12250	 in parallel with previous instruction in case
12251	 previous instruction is not needed to compute the address.  */
12252      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12253	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12254	{
12255	  /* Claim moves to take one cycle, as core can issue one load
12256	     at time and the next load can start cycle later.  */
12257	  if (dep_insn_type == TYPE_IMOV
12258	      || dep_insn_type == TYPE_FMOV)
12259	    cost = 1;
12260	  else if (cost > 1)
12261	    cost--;
12262	}
12263      break;
12264
12265    case PROCESSOR_K6:
12266      memory = get_attr_memory (insn);
12267      dep_memory = get_attr_memory (dep_insn);
12268      /* The esp dependency is resolved before the instruction is really
12269         finished.  */
12270      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12271	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12272	return 1;
12273
12274      /* Since we can't represent delayed latencies of load+operation,
12275	 increase the cost here for non-imov insns.  */
12276      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12277	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12278
12279      /* INT->FP conversion is expensive.  */
12280      if (get_attr_fp_int_src (dep_insn))
12281	cost += 5;
12282
12283      /* Show ability of reorder buffer to hide latency of load by executing
12284	 in parallel with previous instruction in case
12285	 previous instruction is not needed to compute the address.  */
12286      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12287	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12288	{
12289	  /* Claim moves to take one cycle, as core can issue one load
12290	     at time and the next load can start cycle later.  */
12291	  if (dep_insn_type == TYPE_IMOV
12292	      || dep_insn_type == TYPE_FMOV)
12293	    cost = 1;
12294	  else if (cost > 2)
12295	    cost -= 2;
12296	  else
12297	    cost = 1;
12298	}
12299      break;
12300
12301    case PROCESSOR_ATHLON:
12302    case PROCESSOR_K8:
12303      memory = get_attr_memory (insn);
12304      dep_memory = get_attr_memory (dep_insn);
12305
12306      /* Show ability of reorder buffer to hide latency of load by executing
12307	 in parallel with previous instruction in case
12308	 previous instruction is not needed to compute the address.  */
12309      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12310	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12311	{
12312	  enum attr_unit unit = get_attr_unit (insn);
12313	  int loadcost = 3;
12314
12315	  /* Because of the difference between the length of integer and
12316	     floating unit pipeline preparation stages, the memory operands
12317	     for floating point are cheaper.
12318
12319	     ??? For Athlon it the difference is most probably 2.  */
12320	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12321	    loadcost = 3;
12322	  else
12323	    loadcost = TARGET_ATHLON ? 2 : 0;
12324
12325	  if (cost >= loadcost)
12326	    cost -= loadcost;
12327	  else
12328	    cost = 0;
12329	}
12330
12331    default:
12332      break;
12333    }
12334
12335  return cost;
12336}
12337
12338static union
12339{
12340  struct ppro_sched_data
12341  {
12342    rtx decode[3];
12343    int issued_this_cycle;
12344  } ppro;
12345} ix86_sched_data;
12346
12347static enum attr_ppro_uops
12348ix86_safe_ppro_uops (rtx insn)
12349{
12350  if (recog_memoized (insn) >= 0)
12351    return get_attr_ppro_uops (insn);
12352  else
12353    return PPRO_UOPS_MANY;
12354}
12355
12356static void
12357ix86_dump_ppro_packet (FILE *dump)
12358{
12359  if (ix86_sched_data.ppro.decode[0])
12360    {
12361      fprintf (dump, "PPRO packet: %d",
12362	       INSN_UID (ix86_sched_data.ppro.decode[0]));
12363      if (ix86_sched_data.ppro.decode[1])
12364	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12365      if (ix86_sched_data.ppro.decode[2])
12366	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12367      fputc ('\n', dump);
12368    }
12369}
12370
12371/* We're beginning a new block.  Initialize data structures as necessary.  */
12372
12373static void
12374ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12375		 int sched_verbose ATTRIBUTE_UNUSED,
12376		 int veclen ATTRIBUTE_UNUSED)
12377{
12378  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12379}
12380
12381/* Shift INSN to SLOT, and shift everything else down.  */
12382
12383static void
12384ix86_reorder_insn (rtx *insnp, rtx *slot)
12385{
12386  if (insnp != slot)
12387    {
12388      rtx insn = *insnp;
12389      do
12390	insnp[0] = insnp[1];
12391      while (++insnp != slot);
12392      *insnp = insn;
12393    }
12394}
12395
12396static void
12397ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12398{
12399  rtx decode[3];
12400  enum attr_ppro_uops cur_uops;
12401  int issued_this_cycle;
12402  rtx *insnp;
12403  int i;
12404
12405  /* At this point .ppro.decode contains the state of the three
12406     decoders from last "cycle".  That is, those insns that were
12407     actually independent.  But here we're scheduling for the
12408     decoder, and we may find things that are decodable in the
12409     same cycle.  */
12410
12411  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12412  issued_this_cycle = 0;
12413
12414  insnp = e_ready;
12415  cur_uops = ix86_safe_ppro_uops (*insnp);
12416
12417  /* If the decoders are empty, and we've a complex insn at the
12418     head of the priority queue, let it issue without complaint.  */
12419  if (decode[0] == NULL)
12420    {
12421      if (cur_uops == PPRO_UOPS_MANY)
12422	{
12423	  decode[0] = *insnp;
12424	  goto ppro_done;
12425	}
12426
12427      /* Otherwise, search for a 2-4 uop unsn to issue.  */
12428      while (cur_uops != PPRO_UOPS_FEW)
12429	{
12430	  if (insnp == ready)
12431	    break;
12432	  cur_uops = ix86_safe_ppro_uops (*--insnp);
12433	}
12434
12435      /* If so, move it to the head of the line.  */
12436      if (cur_uops == PPRO_UOPS_FEW)
12437	ix86_reorder_insn (insnp, e_ready);
12438
12439      /* Issue the head of the queue.  */
12440      issued_this_cycle = 1;
12441      decode[0] = *e_ready--;
12442    }
12443
12444  /* Look for simple insns to fill in the other two slots.  */
12445  for (i = 1; i < 3; ++i)
12446    if (decode[i] == NULL)
12447      {
12448	if (ready > e_ready)
12449	  goto ppro_done;
12450
12451	insnp = e_ready;
12452	cur_uops = ix86_safe_ppro_uops (*insnp);
12453	while (cur_uops != PPRO_UOPS_ONE)
12454	  {
12455	    if (insnp == ready)
12456	      break;
12457	    cur_uops = ix86_safe_ppro_uops (*--insnp);
12458	  }
12459
12460	/* Found one.  Move it to the head of the queue and issue it.  */
12461	if (cur_uops == PPRO_UOPS_ONE)
12462	  {
12463	    ix86_reorder_insn (insnp, e_ready);
12464	    decode[i] = *e_ready--;
12465	    issued_this_cycle++;
12466	    continue;
12467	  }
12468
12469	/* ??? Didn't find one.  Ideally, here we would do a lazy split
12470	   of 2-uop insns, issue one and queue the other.  */
12471      }
12472
12473 ppro_done:
12474  if (issued_this_cycle == 0)
12475    issued_this_cycle = 1;
12476  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12477}
12478
12479/* We are about to being issuing insns for this clock cycle.
12480   Override the default sort algorithm to better slot instructions.  */
12481static int
12482ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12483		    int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12484		    int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12485{
12486  int n_ready = *n_readyp;
12487  rtx *e_ready = ready + n_ready - 1;
12488
12489  /* Make sure to go ahead and initialize key items in
12490     ix86_sched_data if we are not going to bother trying to
12491     reorder the ready queue.  */
12492  if (n_ready < 2)
12493    {
12494      ix86_sched_data.ppro.issued_this_cycle = 1;
12495      goto out;
12496    }
12497
12498  switch (ix86_tune)
12499    {
12500    default:
12501      break;
12502
12503    case PROCESSOR_PENTIUMPRO:
12504      ix86_sched_reorder_ppro (ready, e_ready);
12505      break;
12506    }
12507
12508out:
12509  return ix86_issue_rate ();
12510}
12511
12512/* We are about to issue INSN.  Return the number of insns left on the
12513   ready queue that can be issued this cycle.  */
12514
12515static int
12516ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12517		     int can_issue_more)
12518{
12519  int i;
12520  switch (ix86_tune)
12521    {
12522    default:
12523      return can_issue_more - 1;
12524
12525    case PROCESSOR_PENTIUMPRO:
12526      {
12527	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12528
12529	if (uops == PPRO_UOPS_MANY)
12530	  {
12531	    if (sched_verbose)
12532	      ix86_dump_ppro_packet (dump);
12533	    ix86_sched_data.ppro.decode[0] = insn;
12534	    ix86_sched_data.ppro.decode[1] = NULL;
12535	    ix86_sched_data.ppro.decode[2] = NULL;
12536	    if (sched_verbose)
12537	      ix86_dump_ppro_packet (dump);
12538	    ix86_sched_data.ppro.decode[0] = NULL;
12539	  }
12540	else if (uops == PPRO_UOPS_FEW)
12541	  {
12542	    if (sched_verbose)
12543	      ix86_dump_ppro_packet (dump);
12544	    ix86_sched_data.ppro.decode[0] = insn;
12545	    ix86_sched_data.ppro.decode[1] = NULL;
12546	    ix86_sched_data.ppro.decode[2] = NULL;
12547	  }
12548	else
12549	  {
12550	    for (i = 0; i < 3; ++i)
12551	      if (ix86_sched_data.ppro.decode[i] == NULL)
12552		{
12553		  ix86_sched_data.ppro.decode[i] = insn;
12554		  break;
12555		}
12556	    if (i == 3)
12557	      abort ();
12558	    if (i == 2)
12559	      {
12560	        if (sched_verbose)
12561	          ix86_dump_ppro_packet (dump);
12562		ix86_sched_data.ppro.decode[0] = NULL;
12563		ix86_sched_data.ppro.decode[1] = NULL;
12564		ix86_sched_data.ppro.decode[2] = NULL;
12565	      }
12566	  }
12567      }
12568      return --ix86_sched_data.ppro.issued_this_cycle;
12569    }
12570}
12571
12572static int
12573ia32_use_dfa_pipeline_interface (void)
12574{
12575  if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12576    return 1;
12577  return 0;
12578}
12579
12580/* How many alternative schedules to try.  This should be as wide as the
12581   scheduling freedom in the DFA, but no wider.  Making this value too
12582   large results extra work for the scheduler.  */
12583
12584static int
12585ia32_multipass_dfa_lookahead (void)
12586{
12587  if (ix86_tune == PROCESSOR_PENTIUM)
12588    return 2;
12589  else
12590   return 0;
12591}
12592
12593
12594/* Compute the alignment given to a constant that is being placed in memory.
12595   EXP is the constant and ALIGN is the alignment that the object would
12596   ordinarily have.
12597   The value of this function is used instead of that alignment to align
12598   the object.  */
12599
12600int
12601ix86_constant_alignment (tree exp, int align)
12602{
12603  if (TREE_CODE (exp) == REAL_CST)
12604    {
12605      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12606	return 64;
12607      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12608	return 128;
12609    }
12610  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12611	   && !TARGET_NO_ALIGN_LONG_STRINGS
12612	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12613    return BITS_PER_WORD;
12614
12615  return align;
12616}
12617
12618/* Compute the alignment for a static variable.
12619   TYPE is the data type, and ALIGN is the alignment that
12620   the object would ordinarily have.  The value of this function is used
12621   instead of that alignment to align the object.  */
12622
12623int
12624ix86_data_alignment (tree type, int align)
12625{
12626  if (AGGREGATE_TYPE_P (type)
12627       && TYPE_SIZE (type)
12628       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12629       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12630	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12631    return 256;
12632
12633  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12634     to 16byte boundary.  */
12635  if (TARGET_64BIT)
12636    {
12637      if (AGGREGATE_TYPE_P (type)
12638	   && TYPE_SIZE (type)
12639	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12640	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12641	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12642	return 128;
12643    }
12644
12645  if (TREE_CODE (type) == ARRAY_TYPE)
12646    {
12647      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12648	return 64;
12649      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12650	return 128;
12651    }
12652  else if (TREE_CODE (type) == COMPLEX_TYPE)
12653    {
12654
12655      if (TYPE_MODE (type) == DCmode && align < 64)
12656	return 64;
12657      if (TYPE_MODE (type) == XCmode && align < 128)
12658	return 128;
12659    }
12660  else if ((TREE_CODE (type) == RECORD_TYPE
12661	    || TREE_CODE (type) == UNION_TYPE
12662	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12663	   && TYPE_FIELDS (type))
12664    {
12665      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12666	return 64;
12667      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12668	return 128;
12669    }
12670  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12671	   || TREE_CODE (type) == INTEGER_TYPE)
12672    {
12673      if (TYPE_MODE (type) == DFmode && align < 64)
12674	return 64;
12675      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12676	return 128;
12677    }
12678
12679  return align;
12680}
12681
12682/* Compute the alignment for a local variable.
12683   TYPE is the data type, and ALIGN is the alignment that
12684   the object would ordinarily have.  The value of this macro is used
12685   instead of that alignment to align the object.  */
12686
12687int
12688ix86_local_alignment (tree type, int align)
12689{
12690  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12691     to 16byte boundary.  */
12692  if (TARGET_64BIT)
12693    {
12694      if (AGGREGATE_TYPE_P (type)
12695	   && TYPE_SIZE (type)
12696	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12697	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12698	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12699	return 128;
12700    }
12701  if (TREE_CODE (type) == ARRAY_TYPE)
12702    {
12703      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12704	return 64;
12705      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12706	return 128;
12707    }
12708  else if (TREE_CODE (type) == COMPLEX_TYPE)
12709    {
12710      if (TYPE_MODE (type) == DCmode && align < 64)
12711	return 64;
12712      if (TYPE_MODE (type) == XCmode && align < 128)
12713	return 128;
12714    }
12715  else if ((TREE_CODE (type) == RECORD_TYPE
12716	    || TREE_CODE (type) == UNION_TYPE
12717	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12718	   && TYPE_FIELDS (type))
12719    {
12720      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12721	return 64;
12722      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12723	return 128;
12724    }
12725  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12726	   || TREE_CODE (type) == INTEGER_TYPE)
12727    {
12728
12729      if (TYPE_MODE (type) == DFmode && align < 64)
12730	return 64;
12731      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12732	return 128;
12733    }
12734  return align;
12735}
12736
12737/* Emit RTL insns to initialize the variable parts of a trampoline.
12738   FNADDR is an RTX for the address of the function's pure code.
12739   CXT is an RTX for the static chain value for the function.  */
12740void
12741x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12742{
12743  if (!TARGET_64BIT)
12744    {
12745      /* Compute offset from the end of the jmp to the target function.  */
12746      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12747			       plus_constant (tramp, 10),
12748			       NULL_RTX, 1, OPTAB_DIRECT);
12749      emit_move_insn (gen_rtx_MEM (QImode, tramp),
12750		      gen_int_mode (0xb9, QImode));
12751      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12752      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12753		      gen_int_mode (0xe9, QImode));
12754      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12755    }
12756  else
12757    {
12758      int offset = 0;
12759      /* Try to load address using shorter movl instead of movabs.
12760         We may want to support movq for kernel mode, but kernel does not use
12761         trampolines at the moment.  */
12762      if (x86_64_zero_extended_value (fnaddr))
12763	{
12764	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12765	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12766			  gen_int_mode (0xbb41, HImode));
12767	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12768			  gen_lowpart (SImode, fnaddr));
12769	  offset += 6;
12770	}
12771      else
12772	{
12773	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12774			  gen_int_mode (0xbb49, HImode));
12775	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12776			  fnaddr);
12777	  offset += 10;
12778	}
12779      /* Load static chain using movabs to r10.  */
12780      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12781		      gen_int_mode (0xba49, HImode));
12782      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12783		      cxt);
12784      offset += 10;
12785      /* Jump to the r11 */
12786      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12787		      gen_int_mode (0xff49, HImode));
12788      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12789		      gen_int_mode (0xe3, QImode));
12790      offset += 3;
12791      if (offset > TRAMPOLINE_SIZE)
12792	abort ();
12793    }
12794
12795#ifdef ENABLE_EXECUTE_STACK
12796  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12797		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12798#endif
12799}
12800
12801#define def_builtin(MASK, NAME, TYPE, CODE)			\
12802do {								\
12803  if ((MASK) & target_flags					\
12804      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12805    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12806		      NULL, NULL_TREE);				\
12807} while (0)
12808
12809struct builtin_description
12810{
12811  const unsigned int mask;
12812  const enum insn_code icode;
12813  const char *const name;
12814  const enum ix86_builtins code;
12815  const enum rtx_code comparison;
12816  const unsigned int flag;
12817};
12818
12819static const struct builtin_description bdesc_comi[] =
12820{
12821  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12822  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12823  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12824  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12825  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12826  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12827  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12828  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12829  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12830  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12831  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12832  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12833  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12834  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12835  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12836  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12837  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12838  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12839  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12840  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12841  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12842  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12843  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12844  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12845};
12846
12847static const struct builtin_description bdesc_2arg[] =
12848{
12849  /* SSE */
12850  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12851  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12852  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12853  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12854  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12855  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12856  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12857  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12858
12859  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12860  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12861  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12862  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12863  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12864  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12865  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12866  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12867  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12868  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12869  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12870  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12871  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12872  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12873  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12874  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12875  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12876  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12877  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12878  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12879
12880  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12881  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12882  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12883  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12884
12885  { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12886  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12887  { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12888  { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12889
12890  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12891  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12892  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12893  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12894  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12895
12896  /* MMX */
12897  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12898  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12899  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12900  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12901  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12902  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12903  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12904  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12905
12906  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12907  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12908  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12909  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12910  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12911  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12912  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12913  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12914
12915  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12916  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12917  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12918
12919  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12920  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12921  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12922  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12923
12924  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12925  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12926
12927  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12928  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12929  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12930  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12931  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12932  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12933
12934  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12935  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12936  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12937  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12938
12939  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12940  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12941  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12942  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12943  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12944  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12945
12946  /* Special.  */
12947  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12948  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12949  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12950
12951  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12952  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12953  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12954
12955  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12956  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12957  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12958  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12959  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12960  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12961
12962  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12963  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12964  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12965  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12966  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12967  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12968
12969  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12970  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12971  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12972  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12973
12974  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12975  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12976
12977  /* SSE2 */
12978  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12979  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12980  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12981  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12982  { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12983  { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12984  { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12985  { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12986
12987  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12988  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12989  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12990  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12991  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12992  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12993  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12994  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12995  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12996  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12997  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12998  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12999  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13000  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13001  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13002  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13003  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13004  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13005  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13006  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13007
13008  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13009  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13010  { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13011  { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13012
13013  { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13014  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13015  { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13016  { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13017
13018  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13019  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13020  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13021
13022  /* SSE2 MMX */
13023  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13024  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13025  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13026  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13027  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13028  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13029  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13030  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13031
13032  { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13033  { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13034  { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13035  { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13036  { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13037  { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13038  { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13039  { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13040
13041  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13042  { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13043  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13044  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13045
13046  { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13047  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13048  { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13049  { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13050
13051  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13052  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13053
13054  { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13055  { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13056  { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13057  { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13058  { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13059  { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13060
13061  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13062  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13063  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13064  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13065
13066  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13067  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13068  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13069  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13070  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13071  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13072  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13073  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13074
13075  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13076  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13077  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13078
13079  { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13080  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13081
13082  { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13083  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13084  { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13085  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13086  { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13087  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13088
13089  { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13090  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13091  { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13092  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13093  { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13094  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13095
13096  { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13097  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13098  { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13099  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13100
13101  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13102
13103  { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13104  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13105  { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13106  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13107
13108  /* SSE3 MMX */
13109  { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13110  { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13111  { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13112  { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13113  { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13114  { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13115};
13116
13117static const struct builtin_description bdesc_1arg[] =
13118{
13119  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13120  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13121
13122  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13123  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13124  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13125
13126  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13127  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13128  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13129  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13130  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13131  { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13132
13133  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13134  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13135  { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13136  { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13137
13138  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13139
13140  { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13141  { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13142
13143  { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13144  { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13145  { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13146  { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13147  { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13148
13149  { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13150
13151  { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13152  { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13153  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13154  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13155
13156  { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13157  { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13158  { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13159
13160  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13161
13162  /* SSE3 */
13163  { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13164  { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13165  { MASK_SSE3, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13166};
13167
13168void
13169ix86_init_builtins (void)
13170{
13171  if (TARGET_MMX)
13172    ix86_init_mmx_sse_builtins ();
13173}
13174
13175/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
13176   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
13177   builtins.  */
13178static void
13179ix86_init_mmx_sse_builtins (void)
13180{
13181  const struct builtin_description * d;
13182  size_t i;
13183
13184  tree pchar_type_node = build_pointer_type (char_type_node);
13185  tree pcchar_type_node = build_pointer_type (
13186			     build_type_variant (char_type_node, 1, 0));
13187  tree pfloat_type_node = build_pointer_type (float_type_node);
13188  tree pcfloat_type_node = build_pointer_type (
13189			     build_type_variant (float_type_node, 1, 0));
13190  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13191  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13192  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13193
13194  /* Comparisons.  */
13195  tree int_ftype_v4sf_v4sf
13196    = build_function_type_list (integer_type_node,
13197				V4SF_type_node, V4SF_type_node, NULL_TREE);
13198  tree v4si_ftype_v4sf_v4sf
13199    = build_function_type_list (V4SI_type_node,
13200				V4SF_type_node, V4SF_type_node, NULL_TREE);
13201  /* MMX/SSE/integer conversions.  */
13202  tree int_ftype_v4sf
13203    = build_function_type_list (integer_type_node,
13204				V4SF_type_node, NULL_TREE);
13205  tree int64_ftype_v4sf
13206    = build_function_type_list (long_long_integer_type_node,
13207				V4SF_type_node, NULL_TREE);
13208  tree int_ftype_v8qi
13209    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13210  tree v4sf_ftype_v4sf_int
13211    = build_function_type_list (V4SF_type_node,
13212				V4SF_type_node, integer_type_node, NULL_TREE);
13213  tree v4sf_ftype_v4sf_int64
13214    = build_function_type_list (V4SF_type_node,
13215				V4SF_type_node, long_long_integer_type_node,
13216				NULL_TREE);
13217  tree v4sf_ftype_v4sf_v2si
13218    = build_function_type_list (V4SF_type_node,
13219				V4SF_type_node, V2SI_type_node, NULL_TREE);
13220  tree int_ftype_v4hi_int
13221    = build_function_type_list (integer_type_node,
13222				V4HI_type_node, integer_type_node, NULL_TREE);
13223  tree v4hi_ftype_v4hi_int_int
13224    = build_function_type_list (V4HI_type_node, V4HI_type_node,
13225				integer_type_node, integer_type_node,
13226				NULL_TREE);
13227  /* Miscellaneous.  */
13228  tree v8qi_ftype_v4hi_v4hi
13229    = build_function_type_list (V8QI_type_node,
13230				V4HI_type_node, V4HI_type_node, NULL_TREE);
13231  tree v4hi_ftype_v2si_v2si
13232    = build_function_type_list (V4HI_type_node,
13233				V2SI_type_node, V2SI_type_node, NULL_TREE);
13234  tree v4sf_ftype_v4sf_v4sf_int
13235    = build_function_type_list (V4SF_type_node,
13236				V4SF_type_node, V4SF_type_node,
13237				integer_type_node, NULL_TREE);
13238  tree v2si_ftype_v4hi_v4hi
13239    = build_function_type_list (V2SI_type_node,
13240				V4HI_type_node, V4HI_type_node, NULL_TREE);
13241  tree v4hi_ftype_v4hi_int
13242    = build_function_type_list (V4HI_type_node,
13243				V4HI_type_node, integer_type_node, NULL_TREE);
13244  tree v4hi_ftype_v4hi_di
13245    = build_function_type_list (V4HI_type_node,
13246				V4HI_type_node, long_long_unsigned_type_node,
13247				NULL_TREE);
13248  tree v2si_ftype_v2si_di
13249    = build_function_type_list (V2SI_type_node,
13250				V2SI_type_node, long_long_unsigned_type_node,
13251				NULL_TREE);
13252  tree void_ftype_void
13253    = build_function_type (void_type_node, void_list_node);
13254  tree void_ftype_unsigned
13255    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13256  tree void_ftype_unsigned_unsigned
13257    = build_function_type_list (void_type_node, unsigned_type_node,
13258				unsigned_type_node, NULL_TREE);
13259  tree void_ftype_pcvoid_unsigned_unsigned
13260    = build_function_type_list (void_type_node, const_ptr_type_node,
13261				unsigned_type_node, unsigned_type_node,
13262				NULL_TREE);
13263  tree unsigned_ftype_void
13264    = build_function_type (unsigned_type_node, void_list_node);
13265  tree di_ftype_void
13266    = build_function_type (long_long_unsigned_type_node, void_list_node);
13267  tree v4sf_ftype_void
13268    = build_function_type (V4SF_type_node, void_list_node);
13269  tree v2si_ftype_v4sf
13270    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13271  /* Loads/stores.  */
13272  tree void_ftype_v8qi_v8qi_pchar
13273    = build_function_type_list (void_type_node,
13274				V8QI_type_node, V8QI_type_node,
13275				pchar_type_node, NULL_TREE);
13276  tree v4sf_ftype_pcfloat
13277    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13278  /* @@@ the type is bogus */
13279  tree v4sf_ftype_v4sf_pv2si
13280    = build_function_type_list (V4SF_type_node,
13281				V4SF_type_node, pv2si_type_node, NULL_TREE);
13282  tree void_ftype_pv2si_v4sf
13283    = build_function_type_list (void_type_node,
13284				pv2si_type_node, V4SF_type_node, NULL_TREE);
13285  tree void_ftype_pfloat_v4sf
13286    = build_function_type_list (void_type_node,
13287				pfloat_type_node, V4SF_type_node, NULL_TREE);
13288  tree void_ftype_pdi_di
13289    = build_function_type_list (void_type_node,
13290				pdi_type_node, long_long_unsigned_type_node,
13291				NULL_TREE);
13292  tree void_ftype_pv2di_v2di
13293    = build_function_type_list (void_type_node,
13294				pv2di_type_node, V2DI_type_node, NULL_TREE);
13295  /* Normal vector unops.  */
13296  tree v4sf_ftype_v4sf
13297    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13298
13299  /* Normal vector binops.  */
13300  tree v4sf_ftype_v4sf_v4sf
13301    = build_function_type_list (V4SF_type_node,
13302				V4SF_type_node, V4SF_type_node, NULL_TREE);
13303  tree v8qi_ftype_v8qi_v8qi
13304    = build_function_type_list (V8QI_type_node,
13305				V8QI_type_node, V8QI_type_node, NULL_TREE);
13306  tree v4hi_ftype_v4hi_v4hi
13307    = build_function_type_list (V4HI_type_node,
13308				V4HI_type_node, V4HI_type_node, NULL_TREE);
13309  tree v2si_ftype_v2si_v2si
13310    = build_function_type_list (V2SI_type_node,
13311				V2SI_type_node, V2SI_type_node, NULL_TREE);
13312  tree di_ftype_di_di
13313    = build_function_type_list (long_long_unsigned_type_node,
13314				long_long_unsigned_type_node,
13315				long_long_unsigned_type_node, NULL_TREE);
13316
13317  tree v2si_ftype_v2sf
13318    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13319  tree v2sf_ftype_v2si
13320    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13321  tree v2si_ftype_v2si
13322    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13323  tree v2sf_ftype_v2sf
13324    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13325  tree v2sf_ftype_v2sf_v2sf
13326    = build_function_type_list (V2SF_type_node,
13327				V2SF_type_node, V2SF_type_node, NULL_TREE);
13328  tree v2si_ftype_v2sf_v2sf
13329    = build_function_type_list (V2SI_type_node,
13330				V2SF_type_node, V2SF_type_node, NULL_TREE);
13331  tree pint_type_node    = build_pointer_type (integer_type_node);
13332  tree pcint_type_node = build_pointer_type (
13333			     build_type_variant (integer_type_node, 1, 0));
13334  tree pdouble_type_node = build_pointer_type (double_type_node);
13335  tree pcdouble_type_node = build_pointer_type (
13336				build_type_variant (double_type_node, 1, 0));
13337  tree int_ftype_v2df_v2df
13338    = build_function_type_list (integer_type_node,
13339				V2DF_type_node, V2DF_type_node, NULL_TREE);
13340
13341  tree ti_ftype_void
13342    = build_function_type (intTI_type_node, void_list_node);
13343  tree v2di_ftype_void
13344    = build_function_type (V2DI_type_node, void_list_node);
13345  tree ti_ftype_ti_ti
13346    = build_function_type_list (intTI_type_node,
13347				intTI_type_node, intTI_type_node, NULL_TREE);
13348  tree void_ftype_pcvoid
13349    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13350  tree v2di_ftype_di
13351    = build_function_type_list (V2DI_type_node,
13352				long_long_unsigned_type_node, NULL_TREE);
13353  tree di_ftype_v2di
13354    = build_function_type_list (long_long_unsigned_type_node,
13355				V2DI_type_node, NULL_TREE);
13356  tree v4sf_ftype_v4si
13357    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13358  tree v4si_ftype_v4sf
13359    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13360  tree v2df_ftype_v4si
13361    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13362  tree v4si_ftype_v2df
13363    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13364  tree v2si_ftype_v2df
13365    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13366  tree v4sf_ftype_v2df
13367    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13368  tree v2df_ftype_v2si
13369    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13370  tree v2df_ftype_v4sf
13371    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13372  tree int_ftype_v2df
13373    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13374  tree int64_ftype_v2df
13375    = build_function_type_list (long_long_integer_type_node,
13376				V2DF_type_node, NULL_TREE);
13377  tree v2df_ftype_v2df_int
13378    = build_function_type_list (V2DF_type_node,
13379				V2DF_type_node, integer_type_node, NULL_TREE);
13380  tree v2df_ftype_v2df_int64
13381    = build_function_type_list (V2DF_type_node,
13382				V2DF_type_node, long_long_integer_type_node,
13383				NULL_TREE);
13384  tree v4sf_ftype_v4sf_v2df
13385    = build_function_type_list (V4SF_type_node,
13386				V4SF_type_node, V2DF_type_node, NULL_TREE);
13387  tree v2df_ftype_v2df_v4sf
13388    = build_function_type_list (V2DF_type_node,
13389				V2DF_type_node, V4SF_type_node, NULL_TREE);
13390  tree v2df_ftype_v2df_v2df_int
13391    = build_function_type_list (V2DF_type_node,
13392				V2DF_type_node, V2DF_type_node,
13393				integer_type_node,
13394				NULL_TREE);
13395  tree v2df_ftype_v2df_pv2si
13396    = build_function_type_list (V2DF_type_node,
13397				V2DF_type_node, pv2si_type_node, NULL_TREE);
13398  tree void_ftype_pv2si_v2df
13399    = build_function_type_list (void_type_node,
13400				pv2si_type_node, V2DF_type_node, NULL_TREE);
13401  tree void_ftype_pdouble_v2df
13402    = build_function_type_list (void_type_node,
13403				pdouble_type_node, V2DF_type_node, NULL_TREE);
13404  tree void_ftype_pint_int
13405    = build_function_type_list (void_type_node,
13406				pint_type_node, integer_type_node, NULL_TREE);
13407  tree void_ftype_v16qi_v16qi_pchar
13408    = build_function_type_list (void_type_node,
13409				V16QI_type_node, V16QI_type_node,
13410				pchar_type_node, NULL_TREE);
13411  tree v2df_ftype_pcdouble
13412    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13413  tree v2df_ftype_v2df_v2df
13414    = build_function_type_list (V2DF_type_node,
13415				V2DF_type_node, V2DF_type_node, NULL_TREE);
13416  tree v16qi_ftype_v16qi_v16qi
13417    = build_function_type_list (V16QI_type_node,
13418				V16QI_type_node, V16QI_type_node, NULL_TREE);
13419  tree v8hi_ftype_v8hi_v8hi
13420    = build_function_type_list (V8HI_type_node,
13421				V8HI_type_node, V8HI_type_node, NULL_TREE);
13422  tree v4si_ftype_v4si_v4si
13423    = build_function_type_list (V4SI_type_node,
13424				V4SI_type_node, V4SI_type_node, NULL_TREE);
13425  tree v2di_ftype_v2di_v2di
13426    = build_function_type_list (V2DI_type_node,
13427				V2DI_type_node, V2DI_type_node, NULL_TREE);
13428  tree v2di_ftype_v2df_v2df
13429    = build_function_type_list (V2DI_type_node,
13430				V2DF_type_node, V2DF_type_node, NULL_TREE);
13431  tree v2df_ftype_v2df
13432    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13433  tree v2df_ftype_double
13434    = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13435  tree v2df_ftype_double_double
13436    = build_function_type_list (V2DF_type_node,
13437				double_type_node, double_type_node, NULL_TREE);
13438  tree int_ftype_v8hi_int
13439    = build_function_type_list (integer_type_node,
13440				V8HI_type_node, integer_type_node, NULL_TREE);
13441  tree v8hi_ftype_v8hi_int_int
13442    = build_function_type_list (V8HI_type_node,
13443				V8HI_type_node, integer_type_node,
13444				integer_type_node, NULL_TREE);
13445  tree v2di_ftype_v2di_int
13446    = build_function_type_list (V2DI_type_node,
13447				V2DI_type_node, integer_type_node, NULL_TREE);
13448  tree v4si_ftype_v4si_int
13449    = build_function_type_list (V4SI_type_node,
13450				V4SI_type_node, integer_type_node, NULL_TREE);
13451  tree v8hi_ftype_v8hi_int
13452    = build_function_type_list (V8HI_type_node,
13453				V8HI_type_node, integer_type_node, NULL_TREE);
13454  tree v8hi_ftype_v8hi_v2di
13455    = build_function_type_list (V8HI_type_node,
13456				V8HI_type_node, V2DI_type_node, NULL_TREE);
13457  tree v4si_ftype_v4si_v2di
13458    = build_function_type_list (V4SI_type_node,
13459				V4SI_type_node, V2DI_type_node, NULL_TREE);
13460  tree v4si_ftype_v8hi_v8hi
13461    = build_function_type_list (V4SI_type_node,
13462				V8HI_type_node, V8HI_type_node, NULL_TREE);
13463  tree di_ftype_v8qi_v8qi
13464    = build_function_type_list (long_long_unsigned_type_node,
13465				V8QI_type_node, V8QI_type_node, NULL_TREE);
13466  tree v2di_ftype_v16qi_v16qi
13467    = build_function_type_list (V2DI_type_node,
13468				V16QI_type_node, V16QI_type_node, NULL_TREE);
13469  tree int_ftype_v16qi
13470    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13471  tree v16qi_ftype_pcchar
13472    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13473  tree void_ftype_pchar_v16qi
13474    = build_function_type_list (void_type_node,
13475			        pchar_type_node, V16QI_type_node, NULL_TREE);
13476  tree v4si_ftype_pcint
13477    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13478  tree void_ftype_pcint_v4si
13479    = build_function_type_list (void_type_node,
13480			        pcint_type_node, V4SI_type_node, NULL_TREE);
13481  tree v2di_ftype_v2di
13482    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13483
13484  tree float80_type;
13485  tree float128_type;
13486
13487  /* The __float80 type.  */
13488  if (TYPE_MODE (long_double_type_node) == XFmode)
13489    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13490					       "__float80");
13491  else
13492    {
13493      /* The __float80 type.  */
13494      float80_type = make_node (REAL_TYPE);
13495      TYPE_PRECISION (float80_type) = 96;
13496      layout_type (float80_type);
13497      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13498    }
13499
13500  float128_type = make_node (REAL_TYPE);
13501  TYPE_PRECISION (float128_type) = 128;
13502  layout_type (float128_type);
13503  (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13504
13505  /* Add all builtins that are more or less simple operations on two
13506     operands.  */
13507  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13508    {
13509      /* Use one of the operands; the target can have a different mode for
13510	 mask-generating compares.  */
13511      enum machine_mode mode;
13512      tree type;
13513
13514      if (d->name == 0)
13515	continue;
13516      mode = insn_data[d->icode].operand[1].mode;
13517
13518      switch (mode)
13519	{
13520	case V16QImode:
13521	  type = v16qi_ftype_v16qi_v16qi;
13522	  break;
13523	case V8HImode:
13524	  type = v8hi_ftype_v8hi_v8hi;
13525	  break;
13526	case V4SImode:
13527	  type = v4si_ftype_v4si_v4si;
13528	  break;
13529	case V2DImode:
13530	  type = v2di_ftype_v2di_v2di;
13531	  break;
13532	case V2DFmode:
13533	  type = v2df_ftype_v2df_v2df;
13534	  break;
13535	case TImode:
13536	  type = ti_ftype_ti_ti;
13537	  break;
13538	case V4SFmode:
13539	  type = v4sf_ftype_v4sf_v4sf;
13540	  break;
13541	case V8QImode:
13542	  type = v8qi_ftype_v8qi_v8qi;
13543	  break;
13544	case V4HImode:
13545	  type = v4hi_ftype_v4hi_v4hi;
13546	  break;
13547	case V2SImode:
13548	  type = v2si_ftype_v2si_v2si;
13549	  break;
13550	case DImode:
13551	  type = di_ftype_di_di;
13552	  break;
13553
13554	default:
13555	  abort ();
13556	}
13557
13558      /* Override for comparisons.  */
13559      if (d->icode == CODE_FOR_maskcmpv4sf3
13560	  || d->icode == CODE_FOR_maskncmpv4sf3
13561	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
13562	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13563	type = v4si_ftype_v4sf_v4sf;
13564
13565      if (d->icode == CODE_FOR_maskcmpv2df3
13566	  || d->icode == CODE_FOR_maskncmpv2df3
13567	  || d->icode == CODE_FOR_vmmaskcmpv2df3
13568	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
13569	type = v2di_ftype_v2df_v2df;
13570
13571      def_builtin (d->mask, d->name, type, d->code);
13572    }
13573
13574  /* Add the remaining MMX insns with somewhat more complicated types.  */
13575  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13576  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13577  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13578  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13579  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13580
13581  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13582  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13583  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13584
13585  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13586  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13587
13588  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13589  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13590
13591  /* comi/ucomi insns.  */
13592  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13593    if (d->mask == MASK_SSE2)
13594      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13595    else
13596      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13597
13598  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13599  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13600  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13601
13602  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13603  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13604  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13605  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13606  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13607  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13608  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13609  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13610  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13611  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13612  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13613
13614  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13615  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13616
13617  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13618
13619  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13620  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13621  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13622  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13623  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13624  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13625
13626  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13627  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13628  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13629  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13630
13631  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13632  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13633  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13634  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13635
13636  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13637
13638  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13639
13640  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13641  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13642  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13643  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13644  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13645  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13646
13647  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13648
13649  /* Original 3DNow!  */
13650  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13651  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13652  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13653  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13654  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13655  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13656  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13657  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13658  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13659  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13660  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13661  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13662  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13663  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13664  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13665  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13666  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13667  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13668  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13669  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13670
13671  /* 3DNow! extension as used in the Athlon CPU.  */
13672  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13673  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13674  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13675  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13676  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13677  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13678
13679  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13680
13681  /* SSE2 */
13682  def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13683  def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13684
13685  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13686  def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13687  def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13688
13689  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13690  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13691  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13692  def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13693  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13694  def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13695
13696  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13697  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13698  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13699  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13700
13701  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13702  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13703  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13704  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13705  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13706
13707  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13708  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13709  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13710  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13711
13712  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13713  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13714
13715  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13716
13717  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13718  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13719
13720  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13721  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13722  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13723  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13724  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13725
13726  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13727
13728  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13729  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13730  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13731  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13732
13733  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13734  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13735  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13736
13737  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13738  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13739  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13740  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13741
13742  def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13743  def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13744  def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13745  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13746  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13747  def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13748  def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13749
13750  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13751  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13752  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13753
13754  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13755  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13756  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13757  def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13758  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13759  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13760  def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13761
13762  def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13763
13764  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13765  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13766  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13767
13768  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13769  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13770  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13771
13772  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13773  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13774
13775  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13776  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13777  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13778  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13779
13780  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13781  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13782  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13783  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13784
13785  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13786  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13787
13788  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13789
13790  /* Prescott New Instructions.  */
13791  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13792	       void_ftype_pcvoid_unsigned_unsigned,
13793	       IX86_BUILTIN_MONITOR);
13794  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13795	       void_ftype_unsigned_unsigned,
13796	       IX86_BUILTIN_MWAIT);
13797  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13798	       v4sf_ftype_v4sf,
13799	       IX86_BUILTIN_MOVSHDUP);
13800  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13801	       v4sf_ftype_v4sf,
13802	       IX86_BUILTIN_MOVSLDUP);
13803  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13804	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13805  def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13806	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13807  def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13808	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13809}
13810
13811/* Errors in the source file can cause expand_expr to return const0_rtx
13812   where we expect a vector.  To avoid crashing, use one of the vector
13813   clear instructions.  */
13814static rtx
13815safe_vector_operand (rtx x, enum machine_mode mode)
13816{
13817  if (x != const0_rtx)
13818    return x;
13819  x = gen_reg_rtx (mode);
13820
13821  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13822    emit_insn (gen_mmx_clrdi (mode == DImode ? x
13823			      : gen_rtx_SUBREG (DImode, x, 0)));
13824  else
13825    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13826				: gen_rtx_SUBREG (V4SFmode, x, 0),
13827				CONST0_RTX (V4SFmode)));
13828  return x;
13829}
13830
13831/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13832
13833static rtx
13834ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13835{
13836  rtx pat;
13837  tree arg0 = TREE_VALUE (arglist);
13838  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13839  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13840  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13841  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13842  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13843  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13844
13845  if (VECTOR_MODE_P (mode0))
13846    op0 = safe_vector_operand (op0, mode0);
13847  if (VECTOR_MODE_P (mode1))
13848    op1 = safe_vector_operand (op1, mode1);
13849
13850  if (! target
13851      || GET_MODE (target) != tmode
13852      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13853    target = gen_reg_rtx (tmode);
13854
13855  if (GET_MODE (op1) == SImode && mode1 == TImode)
13856    {
13857      rtx x = gen_reg_rtx (V4SImode);
13858      emit_insn (gen_sse2_loadd (x, op1));
13859      op1 = gen_lowpart (TImode, x);
13860    }
13861
13862  /* In case the insn wants input operands in modes different from
13863     the result, abort.  */
13864  if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13865      || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13866    abort ();
13867
13868  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13869    op0 = copy_to_mode_reg (mode0, op0);
13870  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13871    op1 = copy_to_mode_reg (mode1, op1);
13872
13873  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13874     yet one of the two must not be a memory.  This is normally enforced
13875     by expanders, but we didn't bother to create one here.  */
13876  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13877    op0 = copy_to_mode_reg (mode0, op0);
13878
13879  pat = GEN_FCN (icode) (target, op0, op1);
13880  if (! pat)
13881    return 0;
13882  emit_insn (pat);
13883  return target;
13884}
13885
13886/* Subroutine of ix86_expand_builtin to take care of stores.  */
13887
13888static rtx
13889ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13890{
13891  rtx pat;
13892  tree arg0 = TREE_VALUE (arglist);
13893  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13894  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13895  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13896  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13897  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13898
13899  if (VECTOR_MODE_P (mode1))
13900    op1 = safe_vector_operand (op1, mode1);
13901
13902  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13903  op1 = copy_to_mode_reg (mode1, op1);
13904
13905  pat = GEN_FCN (icode) (op0, op1);
13906  if (pat)
13907    emit_insn (pat);
13908  return 0;
13909}
13910
13911/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13912
13913static rtx
13914ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13915			  rtx target, int do_load)
13916{
13917  rtx pat;
13918  tree arg0 = TREE_VALUE (arglist);
13919  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13920  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13921  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13922
13923  if (! target
13924      || GET_MODE (target) != tmode
13925      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13926    target = gen_reg_rtx (tmode);
13927  if (do_load)
13928    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13929  else
13930    {
13931      if (VECTOR_MODE_P (mode0))
13932	op0 = safe_vector_operand (op0, mode0);
13933
13934      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13935	op0 = copy_to_mode_reg (mode0, op0);
13936    }
13937
13938  pat = GEN_FCN (icode) (target, op0);
13939  if (! pat)
13940    return 0;
13941  emit_insn (pat);
13942  return target;
13943}
13944
13945/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13946   sqrtss, rsqrtss, rcpss.  */
13947
13948static rtx
13949ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13950{
13951  rtx pat;
13952  tree arg0 = TREE_VALUE (arglist);
13953  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13954  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13955  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13956
13957  if (! target
13958      || GET_MODE (target) != tmode
13959      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13960    target = gen_reg_rtx (tmode);
13961
13962  if (VECTOR_MODE_P (mode0))
13963    op0 = safe_vector_operand (op0, mode0);
13964
13965  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13966    op0 = copy_to_mode_reg (mode0, op0);
13967
13968  op1 = op0;
13969  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13970    op1 = copy_to_mode_reg (mode0, op1);
13971
13972  pat = GEN_FCN (icode) (target, op0, op1);
13973  if (! pat)
13974    return 0;
13975  emit_insn (pat);
13976  return target;
13977}
13978
13979/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
13980
13981static rtx
13982ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13983			 rtx target)
13984{
13985  rtx pat;
13986  tree arg0 = TREE_VALUE (arglist);
13987  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13988  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13989  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13990  rtx op2;
13991  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13992  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13993  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13994  enum rtx_code comparison = d->comparison;
13995
13996  if (VECTOR_MODE_P (mode0))
13997    op0 = safe_vector_operand (op0, mode0);
13998  if (VECTOR_MODE_P (mode1))
13999    op1 = safe_vector_operand (op1, mode1);
14000
14001  /* Swap operands if we have a comparison that isn't available in
14002     hardware.  */
14003  if (d->flag)
14004    {
14005      rtx tmp = gen_reg_rtx (mode1);
14006      emit_move_insn (tmp, op1);
14007      op1 = op0;
14008      op0 = tmp;
14009    }
14010
14011  if (! target
14012      || GET_MODE (target) != tmode
14013      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14014    target = gen_reg_rtx (tmode);
14015
14016  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14017    op0 = copy_to_mode_reg (mode0, op0);
14018  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14019    op1 = copy_to_mode_reg (mode1, op1);
14020
14021  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14022  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14023  if (! pat)
14024    return 0;
14025  emit_insn (pat);
14026  return target;
14027}
14028
14029/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
14030
14031static rtx
14032ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14033		      rtx target)
14034{
14035  rtx pat;
14036  tree arg0 = TREE_VALUE (arglist);
14037  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14038  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14039  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14040  rtx op2;
14041  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14042  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14043  enum rtx_code comparison = d->comparison;
14044
14045  if (VECTOR_MODE_P (mode0))
14046    op0 = safe_vector_operand (op0, mode0);
14047  if (VECTOR_MODE_P (mode1))
14048    op1 = safe_vector_operand (op1, mode1);
14049
14050  /* Swap operands if we have a comparison that isn't available in
14051     hardware.  */
14052  if (d->flag)
14053    {
14054      rtx tmp = op1;
14055      op1 = op0;
14056      op0 = tmp;
14057    }
14058
14059  target = gen_reg_rtx (SImode);
14060  emit_move_insn (target, const0_rtx);
14061  target = gen_rtx_SUBREG (QImode, target, 0);
14062
14063  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14064    op0 = copy_to_mode_reg (mode0, op0);
14065  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14066    op1 = copy_to_mode_reg (mode1, op1);
14067
14068  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14069  pat = GEN_FCN (d->icode) (op0, op1);
14070  if (! pat)
14071    return 0;
14072  emit_insn (pat);
14073  emit_insn (gen_rtx_SET (VOIDmode,
14074			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14075			  gen_rtx_fmt_ee (comparison, QImode,
14076					  SET_DEST (pat),
14077					  const0_rtx)));
14078
14079  return SUBREG_REG (target);
14080}
14081
14082/* Expand an expression EXP that calls a built-in function,
14083   with result going to TARGET if that's convenient
14084   (and in mode MODE if that's convenient).
14085   SUBTARGET may be used as the target for computing one of EXP's operands.
14086   IGNORE is nonzero if the value is to be ignored.  */
14087
14088rtx
14089ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14090		     enum machine_mode mode ATTRIBUTE_UNUSED,
14091		     int ignore ATTRIBUTE_UNUSED)
14092{
14093  const struct builtin_description *d;
14094  size_t i;
14095  enum insn_code icode;
14096  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14097  tree arglist = TREE_OPERAND (exp, 1);
14098  tree arg0, arg1, arg2;
14099  rtx op0, op1, op2, pat;
14100  enum machine_mode tmode, mode0, mode1, mode2;
14101  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14102
14103  switch (fcode)
14104    {
14105    case IX86_BUILTIN_EMMS:
14106      emit_insn (gen_emms ());
14107      return 0;
14108
14109    case IX86_BUILTIN_SFENCE:
14110      emit_insn (gen_sfence ());
14111      return 0;
14112
14113    case IX86_BUILTIN_PEXTRW:
14114    case IX86_BUILTIN_PEXTRW128:
14115      icode = (fcode == IX86_BUILTIN_PEXTRW
14116	       ? CODE_FOR_mmx_pextrw
14117	       : CODE_FOR_sse2_pextrw);
14118      arg0 = TREE_VALUE (arglist);
14119      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14120      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14121      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14122      tmode = insn_data[icode].operand[0].mode;
14123      mode0 = insn_data[icode].operand[1].mode;
14124      mode1 = insn_data[icode].operand[2].mode;
14125
14126      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14127	op0 = copy_to_mode_reg (mode0, op0);
14128      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14129	{
14130	  error ("selector must be an integer constant in the range 0..%i",
14131		  fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14132	  return gen_reg_rtx (tmode);
14133	}
14134      if (target == 0
14135	  || GET_MODE (target) != tmode
14136	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14137	target = gen_reg_rtx (tmode);
14138      pat = GEN_FCN (icode) (target, op0, op1);
14139      if (! pat)
14140	return 0;
14141      emit_insn (pat);
14142      return target;
14143
14144    case IX86_BUILTIN_PINSRW:
14145    case IX86_BUILTIN_PINSRW128:
14146      icode = (fcode == IX86_BUILTIN_PINSRW
14147	       ? CODE_FOR_mmx_pinsrw
14148	       : CODE_FOR_sse2_pinsrw);
14149      arg0 = TREE_VALUE (arglist);
14150      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14151      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14152      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14153      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14154      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14155      tmode = insn_data[icode].operand[0].mode;
14156      mode0 = insn_data[icode].operand[1].mode;
14157      mode1 = insn_data[icode].operand[2].mode;
14158      mode2 = insn_data[icode].operand[3].mode;
14159
14160      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14161	op0 = copy_to_mode_reg (mode0, op0);
14162      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14163	op1 = copy_to_mode_reg (mode1, op1);
14164      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14165	{
14166	  error ("selector must be an integer constant in the range 0..%i",
14167		  fcode == IX86_BUILTIN_PINSRW ? 15:255);
14168	  return const0_rtx;
14169	}
14170      if (target == 0
14171	  || GET_MODE (target) != tmode
14172	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14173	target = gen_reg_rtx (tmode);
14174      pat = GEN_FCN (icode) (target, op0, op1, op2);
14175      if (! pat)
14176	return 0;
14177      emit_insn (pat);
14178      return target;
14179
14180    case IX86_BUILTIN_MASKMOVQ:
14181    case IX86_BUILTIN_MASKMOVDQU:
14182      icode = (fcode == IX86_BUILTIN_MASKMOVQ
14183	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14184	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14185		  : CODE_FOR_sse2_maskmovdqu));
14186      /* Note the arg order is different from the operand order.  */
14187      arg1 = TREE_VALUE (arglist);
14188      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14189      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14190      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14191      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14192      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14193      mode0 = insn_data[icode].operand[0].mode;
14194      mode1 = insn_data[icode].operand[1].mode;
14195      mode2 = insn_data[icode].operand[2].mode;
14196
14197      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14198	op0 = copy_to_mode_reg (mode0, op0);
14199      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14200	op1 = copy_to_mode_reg (mode1, op1);
14201      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14202	op2 = copy_to_mode_reg (mode2, op2);
14203      pat = GEN_FCN (icode) (op0, op1, op2);
14204      if (! pat)
14205	return 0;
14206      emit_insn (pat);
14207      return 0;
14208
14209    case IX86_BUILTIN_SQRTSS:
14210      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14211    case IX86_BUILTIN_RSQRTSS:
14212      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14213    case IX86_BUILTIN_RCPSS:
14214      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14215
14216    case IX86_BUILTIN_LOADAPS:
14217      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14218
14219    case IX86_BUILTIN_LOADUPS:
14220      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14221
14222    case IX86_BUILTIN_STOREAPS:
14223      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14224
14225    case IX86_BUILTIN_STOREUPS:
14226      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14227
14228    case IX86_BUILTIN_LOADSS:
14229      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14230
14231    case IX86_BUILTIN_STORESS:
14232      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14233
14234    case IX86_BUILTIN_LOADHPS:
14235    case IX86_BUILTIN_LOADLPS:
14236    case IX86_BUILTIN_LOADHPD:
14237    case IX86_BUILTIN_LOADLPD:
14238      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14239	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14240	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14241	       : CODE_FOR_sse2_movsd);
14242      arg0 = TREE_VALUE (arglist);
14243      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14244      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14245      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14246      tmode = insn_data[icode].operand[0].mode;
14247      mode0 = insn_data[icode].operand[1].mode;
14248      mode1 = insn_data[icode].operand[2].mode;
14249
14250      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14251	op0 = copy_to_mode_reg (mode0, op0);
14252      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14253      if (target == 0
14254	  || GET_MODE (target) != tmode
14255	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14256	target = gen_reg_rtx (tmode);
14257      pat = GEN_FCN (icode) (target, op0, op1);
14258      if (! pat)
14259	return 0;
14260      emit_insn (pat);
14261      return target;
14262
14263    case IX86_BUILTIN_STOREHPS:
14264    case IX86_BUILTIN_STORELPS:
14265    case IX86_BUILTIN_STOREHPD:
14266    case IX86_BUILTIN_STORELPD:
14267      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14268	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14269	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14270	       : CODE_FOR_sse2_movsd);
14271      arg0 = TREE_VALUE (arglist);
14272      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14273      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14274      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14275      mode0 = insn_data[icode].operand[1].mode;
14276      mode1 = insn_data[icode].operand[2].mode;
14277
14278      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14279      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14280	op1 = copy_to_mode_reg (mode1, op1);
14281
14282      pat = GEN_FCN (icode) (op0, op0, op1);
14283      if (! pat)
14284	return 0;
14285      emit_insn (pat);
14286      return 0;
14287
14288    case IX86_BUILTIN_MOVNTPS:
14289      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14290    case IX86_BUILTIN_MOVNTQ:
14291      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14292
14293    case IX86_BUILTIN_LDMXCSR:
14294      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14295      target = assign_386_stack_local (SImode, 0);
14296      emit_move_insn (target, op0);
14297      emit_insn (gen_ldmxcsr (target));
14298      return 0;
14299
14300    case IX86_BUILTIN_STMXCSR:
14301      target = assign_386_stack_local (SImode, 0);
14302      emit_insn (gen_stmxcsr (target));
14303      return copy_to_mode_reg (SImode, target);
14304
14305    case IX86_BUILTIN_SHUFPS:
14306    case IX86_BUILTIN_SHUFPD:
14307      icode = (fcode == IX86_BUILTIN_SHUFPS
14308	       ? CODE_FOR_sse_shufps
14309	       : CODE_FOR_sse2_shufpd);
14310      arg0 = TREE_VALUE (arglist);
14311      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14312      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14313      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14314      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14315      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14316      tmode = insn_data[icode].operand[0].mode;
14317      mode0 = insn_data[icode].operand[1].mode;
14318      mode1 = insn_data[icode].operand[2].mode;
14319      mode2 = insn_data[icode].operand[3].mode;
14320
14321      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14322	op0 = copy_to_mode_reg (mode0, op0);
14323      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14324	op1 = copy_to_mode_reg (mode1, op1);
14325      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14326	{
14327	  /* @@@ better error message */
14328	  error ("mask must be an immediate");
14329	  return gen_reg_rtx (tmode);
14330	}
14331      if (target == 0
14332	  || GET_MODE (target) != tmode
14333	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14334	target = gen_reg_rtx (tmode);
14335      pat = GEN_FCN (icode) (target, op0, op1, op2);
14336      if (! pat)
14337	return 0;
14338      emit_insn (pat);
14339      return target;
14340
14341    case IX86_BUILTIN_PSHUFW:
14342    case IX86_BUILTIN_PSHUFD:
14343    case IX86_BUILTIN_PSHUFHW:
14344    case IX86_BUILTIN_PSHUFLW:
14345      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14346	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14347	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14348	       : CODE_FOR_mmx_pshufw);
14349      arg0 = TREE_VALUE (arglist);
14350      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14351      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14352      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14353      tmode = insn_data[icode].operand[0].mode;
14354      mode1 = insn_data[icode].operand[1].mode;
14355      mode2 = insn_data[icode].operand[2].mode;
14356
14357      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14358	op0 = copy_to_mode_reg (mode1, op0);
14359      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14360	{
14361	  /* @@@ better error message */
14362	  error ("mask must be an immediate");
14363	  return const0_rtx;
14364	}
14365      if (target == 0
14366	  || GET_MODE (target) != tmode
14367	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14368	target = gen_reg_rtx (tmode);
14369      pat = GEN_FCN (icode) (target, op0, op1);
14370      if (! pat)
14371	return 0;
14372      emit_insn (pat);
14373      return target;
14374
14375    case IX86_BUILTIN_PSLLDQI128:
14376    case IX86_BUILTIN_PSRLDQI128:
14377      icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14378	       : CODE_FOR_sse2_lshrti3);
14379      arg0 = TREE_VALUE (arglist);
14380      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14381      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14382      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14383      tmode = insn_data[icode].operand[0].mode;
14384      mode1 = insn_data[icode].operand[1].mode;
14385      mode2 = insn_data[icode].operand[2].mode;
14386
14387      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14388	{
14389	  op0 = copy_to_reg (op0);
14390	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14391	}
14392      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14393	{
14394	  error ("shift must be an immediate");
14395	  return const0_rtx;
14396	}
14397      target = gen_reg_rtx (V2DImode);
14398      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14399      if (! pat)
14400	return 0;
14401      emit_insn (pat);
14402      return target;
14403
14404    case IX86_BUILTIN_FEMMS:
14405      emit_insn (gen_femms ());
14406      return NULL_RTX;
14407
14408    case IX86_BUILTIN_PAVGUSB:
14409      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14410
14411    case IX86_BUILTIN_PF2ID:
14412      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14413
14414    case IX86_BUILTIN_PFACC:
14415      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14416
14417    case IX86_BUILTIN_PFADD:
14418     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14419
14420    case IX86_BUILTIN_PFCMPEQ:
14421      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14422
14423    case IX86_BUILTIN_PFCMPGE:
14424      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14425
14426    case IX86_BUILTIN_PFCMPGT:
14427      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14428
14429    case IX86_BUILTIN_PFMAX:
14430      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14431
14432    case IX86_BUILTIN_PFMIN:
14433      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14434
14435    case IX86_BUILTIN_PFMUL:
14436      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14437
14438    case IX86_BUILTIN_PFRCP:
14439      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14440
14441    case IX86_BUILTIN_PFRCPIT1:
14442      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14443
14444    case IX86_BUILTIN_PFRCPIT2:
14445      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14446
14447    case IX86_BUILTIN_PFRSQIT1:
14448      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14449
14450    case IX86_BUILTIN_PFRSQRT:
14451      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14452
14453    case IX86_BUILTIN_PFSUB:
14454      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14455
14456    case IX86_BUILTIN_PFSUBR:
14457      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14458
14459    case IX86_BUILTIN_PI2FD:
14460      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14461
14462    case IX86_BUILTIN_PMULHRW:
14463      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14464
14465    case IX86_BUILTIN_PF2IW:
14466      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14467
14468    case IX86_BUILTIN_PFNACC:
14469      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14470
14471    case IX86_BUILTIN_PFPNACC:
14472      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14473
14474    case IX86_BUILTIN_PI2FW:
14475      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14476
14477    case IX86_BUILTIN_PSWAPDSI:
14478      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14479
14480    case IX86_BUILTIN_PSWAPDSF:
14481      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14482
14483    case IX86_BUILTIN_SSE_ZERO:
14484      target = gen_reg_rtx (V4SFmode);
14485      emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14486      return target;
14487
14488    case IX86_BUILTIN_MMX_ZERO:
14489      target = gen_reg_rtx (DImode);
14490      emit_insn (gen_mmx_clrdi (target));
14491      return target;
14492
14493    case IX86_BUILTIN_CLRTI:
14494      target = gen_reg_rtx (V2DImode);
14495      emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14496      return target;
14497
14498
14499    case IX86_BUILTIN_SQRTSD:
14500      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14501    case IX86_BUILTIN_LOADAPD:
14502      return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14503    case IX86_BUILTIN_LOADUPD:
14504      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14505
14506    case IX86_BUILTIN_STOREAPD:
14507      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14508    case IX86_BUILTIN_STOREUPD:
14509      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14510
14511    case IX86_BUILTIN_LOADSD:
14512      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14513
14514    case IX86_BUILTIN_STORESD:
14515      return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14516
14517    case IX86_BUILTIN_SETPD1:
14518      target = assign_386_stack_local (DFmode, 0);
14519      arg0 = TREE_VALUE (arglist);
14520      emit_move_insn (adjust_address (target, DFmode, 0),
14521		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14522      op0 = gen_reg_rtx (V2DFmode);
14523      emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14524      emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14525      return op0;
14526
14527    case IX86_BUILTIN_SETPD:
14528      target = assign_386_stack_local (V2DFmode, 0);
14529      arg0 = TREE_VALUE (arglist);
14530      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14531      emit_move_insn (adjust_address (target, DFmode, 0),
14532		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14533      emit_move_insn (adjust_address (target, DFmode, 8),
14534		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14535      op0 = gen_reg_rtx (V2DFmode);
14536      emit_insn (gen_sse2_movapd (op0, target));
14537      return op0;
14538
14539    case IX86_BUILTIN_LOADRPD:
14540      target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14541					 gen_reg_rtx (V2DFmode), 1);
14542      emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14543      return target;
14544
14545    case IX86_BUILTIN_LOADPD1:
14546      target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14547					 gen_reg_rtx (V2DFmode), 1);
14548      emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14549      return target;
14550
14551    case IX86_BUILTIN_STOREPD1:
14552      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14553    case IX86_BUILTIN_STORERPD:
14554      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14555
14556    case IX86_BUILTIN_CLRPD:
14557      target = gen_reg_rtx (V2DFmode);
14558      emit_insn (gen_sse_clrv2df (target));
14559      return target;
14560
14561    case IX86_BUILTIN_MFENCE:
14562	emit_insn (gen_sse2_mfence ());
14563	return 0;
14564    case IX86_BUILTIN_LFENCE:
14565	emit_insn (gen_sse2_lfence ());
14566	return 0;
14567
14568    case IX86_BUILTIN_CLFLUSH:
14569	arg0 = TREE_VALUE (arglist);
14570	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14571	icode = CODE_FOR_sse2_clflush;
14572	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14573	    op0 = copy_to_mode_reg (Pmode, op0);
14574
14575	emit_insn (gen_sse2_clflush (op0));
14576	return 0;
14577
14578    case IX86_BUILTIN_MOVNTPD:
14579      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14580    case IX86_BUILTIN_MOVNTDQ:
14581      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14582    case IX86_BUILTIN_MOVNTI:
14583      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14584
14585    case IX86_BUILTIN_LOADDQA:
14586      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14587    case IX86_BUILTIN_LOADDQU:
14588      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14589    case IX86_BUILTIN_LOADD:
14590      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14591
14592    case IX86_BUILTIN_STOREDQA:
14593      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14594    case IX86_BUILTIN_STOREDQU:
14595      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14596    case IX86_BUILTIN_STORED:
14597      return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14598
14599    case IX86_BUILTIN_MONITOR:
14600      arg0 = TREE_VALUE (arglist);
14601      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14602      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14603      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14604      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14605      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14606      if (!REG_P (op0))
14607	op0 = copy_to_mode_reg (SImode, op0);
14608      if (!REG_P (op1))
14609	op1 = copy_to_mode_reg (SImode, op1);
14610      if (!REG_P (op2))
14611	op2 = copy_to_mode_reg (SImode, op2);
14612      emit_insn (gen_monitor (op0, op1, op2));
14613      return 0;
14614
14615    case IX86_BUILTIN_MWAIT:
14616      arg0 = TREE_VALUE (arglist);
14617      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14618      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14619      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14620      if (!REG_P (op0))
14621	op0 = copy_to_mode_reg (SImode, op0);
14622      if (!REG_P (op1))
14623	op1 = copy_to_mode_reg (SImode, op1);
14624      emit_insn (gen_mwait (op0, op1));
14625      return 0;
14626
14627    case IX86_BUILTIN_LOADDDUP:
14628      return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14629
14630    case IX86_BUILTIN_LDDQU:
14631      return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14632				       1);
14633
14634    default:
14635      break;
14636    }
14637
14638  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14639    if (d->code == fcode)
14640      {
14641	/* Compares are treated specially.  */
14642	if (d->icode == CODE_FOR_maskcmpv4sf3
14643	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
14644	    || d->icode == CODE_FOR_maskncmpv4sf3
14645	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
14646	    || d->icode == CODE_FOR_maskcmpv2df3
14647	    || d->icode == CODE_FOR_vmmaskcmpv2df3
14648	    || d->icode == CODE_FOR_maskncmpv2df3
14649	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
14650	  return ix86_expand_sse_compare (d, arglist, target);
14651
14652	return ix86_expand_binop_builtin (d->icode, arglist, target);
14653      }
14654
14655  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14656    if (d->code == fcode)
14657      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14658
14659  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14660    if (d->code == fcode)
14661      return ix86_expand_sse_comi (d, arglist, target);
14662
14663  /* @@@ Should really do something sensible here.  */
14664  return 0;
14665}
14666
14667/* Store OPERAND to the memory after reload is completed.  This means
14668   that we can't easily use assign_stack_local.  */
14669rtx
14670ix86_force_to_memory (enum machine_mode mode, rtx operand)
14671{
14672  rtx result;
14673  if (!reload_completed)
14674    abort ();
14675  if (TARGET_RED_ZONE)
14676    {
14677      result = gen_rtx_MEM (mode,
14678			    gen_rtx_PLUS (Pmode,
14679					  stack_pointer_rtx,
14680					  GEN_INT (-RED_ZONE_SIZE)));
14681      emit_move_insn (result, operand);
14682    }
14683  else if (!TARGET_RED_ZONE && TARGET_64BIT)
14684    {
14685      switch (mode)
14686	{
14687	case HImode:
14688	case SImode:
14689	  operand = gen_lowpart (DImode, operand);
14690	  /* FALLTHRU */
14691	case DImode:
14692	  emit_insn (
14693		      gen_rtx_SET (VOIDmode,
14694				   gen_rtx_MEM (DImode,
14695						gen_rtx_PRE_DEC (DImode,
14696							stack_pointer_rtx)),
14697				   operand));
14698	  break;
14699	default:
14700	  abort ();
14701	}
14702      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14703    }
14704  else
14705    {
14706      switch (mode)
14707	{
14708	case DImode:
14709	  {
14710	    rtx operands[2];
14711	    split_di (&operand, 1, operands, operands + 1);
14712	    emit_insn (
14713			gen_rtx_SET (VOIDmode,
14714				     gen_rtx_MEM (SImode,
14715						  gen_rtx_PRE_DEC (Pmode,
14716							stack_pointer_rtx)),
14717				     operands[1]));
14718	    emit_insn (
14719			gen_rtx_SET (VOIDmode,
14720				     gen_rtx_MEM (SImode,
14721						  gen_rtx_PRE_DEC (Pmode,
14722							stack_pointer_rtx)),
14723				     operands[0]));
14724	  }
14725	  break;
14726	case HImode:
14727	  /* It is better to store HImodes as SImodes.  */
14728	  if (!TARGET_PARTIAL_REG_STALL)
14729	    operand = gen_lowpart (SImode, operand);
14730	  /* FALLTHRU */
14731	case SImode:
14732	  emit_insn (
14733		      gen_rtx_SET (VOIDmode,
14734				   gen_rtx_MEM (GET_MODE (operand),
14735						gen_rtx_PRE_DEC (SImode,
14736							stack_pointer_rtx)),
14737				   operand));
14738	  break;
14739	default:
14740	  abort ();
14741	}
14742      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14743    }
14744  return result;
14745}
14746
14747/* Free operand from the memory.  */
14748void
14749ix86_free_from_memory (enum machine_mode mode)
14750{
14751  if (!TARGET_RED_ZONE)
14752    {
14753      int size;
14754
14755      if (mode == DImode || TARGET_64BIT)
14756	size = 8;
14757      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14758	size = 2;
14759      else
14760	size = 4;
14761      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
14762         to pop or add instruction if registers are available.  */
14763      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14764			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14765					    GEN_INT (size))));
14766    }
14767}
14768
14769/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14770   QImode must go into class Q_REGS.
14771   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
14772   movdf to do mem-to-mem moves through integer regs.  */
14773enum reg_class
14774ix86_preferred_reload_class (rtx x, enum reg_class class)
14775{
14776  if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14777    return NO_REGS;
14778  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14779    {
14780      /* SSE can't load any constant directly yet.  */
14781      if (SSE_CLASS_P (class))
14782	return NO_REGS;
14783      /* Floats can load 0 and 1.  */
14784      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14785	{
14786	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
14787	  if (MAYBE_SSE_CLASS_P (class))
14788	    return (reg_class_subset_p (class, GENERAL_REGS)
14789		    ? GENERAL_REGS : FLOAT_REGS);
14790	  else
14791	    return class;
14792	}
14793      /* General regs can load everything.  */
14794      if (reg_class_subset_p (class, GENERAL_REGS))
14795	return GENERAL_REGS;
14796      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
14797      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14798	return NO_REGS;
14799    }
14800  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14801    return NO_REGS;
14802  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14803    return Q_REGS;
14804  return class;
14805}
14806
14807/* If we are copying between general and FP registers, we need a memory
14808   location. The same is true for SSE and MMX registers.
14809
14810   The macro can't work reliably when one of the CLASSES is class containing
14811   registers from multiple units (SSE, MMX, integer).  We avoid this by never
14812   combining those units in single alternative in the machine description.
14813   Ensure that this constraint holds to avoid unexpected surprises.
14814
14815   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14816   enforce these sanity checks.  */
14817int
14818ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14819			      enum machine_mode mode, int strict)
14820{
14821  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14822      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14823      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14824      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14825      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14826      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14827    {
14828      if (strict)
14829	abort ();
14830      else
14831	return 1;
14832    }
14833  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14834	  || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14835	       || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14836	      && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14837		  || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14838}
14839/* Return the cost of moving data from a register in class CLASS1 to
14840   one in class CLASS2.
14841
14842   It is not required that the cost always equal 2 when FROM is the same as TO;
14843   on some machines it is expensive to move between registers if they are not
14844   general registers.  */
14845int
14846ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14847			 enum reg_class class2)
14848{
14849  /* In case we require secondary memory, compute cost of the store followed
14850     by load.  In order to avoid bad register allocation choices, we need
14851     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14852
14853  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14854    {
14855      int cost = 1;
14856
14857      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14858		   MEMORY_MOVE_COST (mode, class1, 1));
14859      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14860		   MEMORY_MOVE_COST (mode, class2, 1));
14861
14862      /* In case of copying from general_purpose_register we may emit multiple
14863         stores followed by single load causing memory size mismatch stall.
14864         Count this as arbitrarily high cost of 20.  */
14865      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14866	cost += 20;
14867
14868      /* In the case of FP/MMX moves, the registers actually overlap, and we
14869	 have to switch modes in order to treat them differently.  */
14870      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14871          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14872	cost += 20;
14873
14874      return cost;
14875    }
14876
14877  /* Moves between SSE/MMX and integer unit are expensive.  */
14878  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14879      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14880    return ix86_cost->mmxsse_to_integer;
14881  if (MAYBE_FLOAT_CLASS_P (class1))
14882    return ix86_cost->fp_move;
14883  if (MAYBE_SSE_CLASS_P (class1))
14884    return ix86_cost->sse_move;
14885  if (MAYBE_MMX_CLASS_P (class1))
14886    return ix86_cost->mmx_move;
14887  return 2;
14888}
14889
14890/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14891int
14892ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14893{
14894  /* Flags and only flags can only hold CCmode values.  */
14895  if (CC_REGNO_P (regno))
14896    return GET_MODE_CLASS (mode) == MODE_CC;
14897  if (GET_MODE_CLASS (mode) == MODE_CC
14898      || GET_MODE_CLASS (mode) == MODE_RANDOM
14899      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14900    return 0;
14901  if (FP_REGNO_P (regno))
14902    return VALID_FP_MODE_P (mode);
14903  if (SSE_REGNO_P (regno))
14904    return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14905  if (MMX_REGNO_P (regno))
14906    return (TARGET_MMX
14907	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14908  /* We handle both integer and floats in the general purpose registers.
14909     In future we should be able to handle vector modes as well.  */
14910  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14911    return 0;
14912  /* Take care for QImode values - they can be in non-QI regs, but then
14913     they do cause partial register stalls.  */
14914  if (regno < 4 || mode != QImode || TARGET_64BIT)
14915    return 1;
14916  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14917}
14918
14919/* Return the cost of moving data of mode M between a
14920   register and memory.  A value of 2 is the default; this cost is
14921   relative to those in `REGISTER_MOVE_COST'.
14922
14923   If moving between registers and memory is more expensive than
14924   between two registers, you should define this macro to express the
14925   relative cost.
14926
14927   Model also increased moving costs of QImode registers in non
14928   Q_REGS classes.
14929 */
14930int
14931ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14932{
14933  if (FLOAT_CLASS_P (class))
14934    {
14935      int index;
14936      switch (mode)
14937	{
14938	  case SFmode:
14939	    index = 0;
14940	    break;
14941	  case DFmode:
14942	    index = 1;
14943	    break;
14944	  case XFmode:
14945	    index = 2;
14946	    break;
14947	  default:
14948	    return 100;
14949	}
14950      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14951    }
14952  if (SSE_CLASS_P (class))
14953    {
14954      int index;
14955      switch (GET_MODE_SIZE (mode))
14956	{
14957	  case 4:
14958	    index = 0;
14959	    break;
14960	  case 8:
14961	    index = 1;
14962	    break;
14963	  case 16:
14964	    index = 2;
14965	    break;
14966	  default:
14967	    return 100;
14968	}
14969      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14970    }
14971  if (MMX_CLASS_P (class))
14972    {
14973      int index;
14974      switch (GET_MODE_SIZE (mode))
14975	{
14976	  case 4:
14977	    index = 0;
14978	    break;
14979	  case 8:
14980	    index = 1;
14981	    break;
14982	  default:
14983	    return 100;
14984	}
14985      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14986    }
14987  switch (GET_MODE_SIZE (mode))
14988    {
14989      case 1:
14990	if (in)
14991	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14992		  : ix86_cost->movzbl_load);
14993	else
14994	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14995		  : ix86_cost->int_store[0] + 4);
14996	break;
14997      case 2:
14998	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14999      default:
15000	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
15001	if (mode == TFmode)
15002	  mode = XFmode;
15003	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15004		* (((int) GET_MODE_SIZE (mode)
15005		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15006    }
15007}
15008
15009/* Compute a (partial) cost for rtx X.  Return true if the complete
15010   cost has been computed, and false if subexpressions should be
15011   scanned.  In either case, *TOTAL contains the cost result.  */
15012
15013static bool
15014ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15015{
15016  enum machine_mode mode = GET_MODE (x);
15017
15018  switch (code)
15019    {
15020    case CONST_INT:
15021    case CONST:
15022    case LABEL_REF:
15023    case SYMBOL_REF:
15024      if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15025	*total = 3;
15026      else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15027	*total = 2;
15028      else if (flag_pic && SYMBOLIC_CONST (x)
15029	       && (!TARGET_64BIT
15030		   || (!GET_CODE (x) != LABEL_REF
15031		       && (GET_CODE (x) != SYMBOL_REF
15032		           || !SYMBOL_REF_LOCAL_P (x)))))
15033	*total = 1;
15034      else
15035	*total = 0;
15036      return true;
15037
15038    case CONST_DOUBLE:
15039      if (mode == VOIDmode)
15040	*total = 0;
15041      else
15042	switch (standard_80387_constant_p (x))
15043	  {
15044	  case 1: /* 0.0 */
15045	    *total = 1;
15046	    break;
15047	  default: /* Other constants */
15048	    *total = 2;
15049	    break;
15050	  case 0:
15051	  case -1:
15052	    /* Start with (MEM (SYMBOL_REF)), since that's where
15053	       it'll probably end up.  Add a penalty for size.  */
15054	    *total = (COSTS_N_INSNS (1)
15055		      + (flag_pic != 0 && !TARGET_64BIT)
15056		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15057	    break;
15058	  }
15059      return true;
15060
15061    case ZERO_EXTEND:
15062      /* The zero extensions is often completely free on x86_64, so make
15063	 it as cheap as possible.  */
15064      if (TARGET_64BIT && mode == DImode
15065	  && GET_MODE (XEXP (x, 0)) == SImode)
15066	*total = 1;
15067      else if (TARGET_ZERO_EXTEND_WITH_AND)
15068	*total = COSTS_N_INSNS (ix86_cost->add);
15069      else
15070	*total = COSTS_N_INSNS (ix86_cost->movzx);
15071      return false;
15072
15073    case SIGN_EXTEND:
15074      *total = COSTS_N_INSNS (ix86_cost->movsx);
15075      return false;
15076
15077    case ASHIFT:
15078      if (GET_CODE (XEXP (x, 1)) == CONST_INT
15079	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15080	{
15081	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15082	  if (value == 1)
15083	    {
15084	      *total = COSTS_N_INSNS (ix86_cost->add);
15085	      return false;
15086	    }
15087	  if ((value == 2 || value == 3)
15088	      && !TARGET_DECOMPOSE_LEA
15089	      && ix86_cost->lea <= ix86_cost->shift_const)
15090	    {
15091	      *total = COSTS_N_INSNS (ix86_cost->lea);
15092	      return false;
15093	    }
15094	}
15095      /* FALLTHRU */
15096
15097    case ROTATE:
15098    case ASHIFTRT:
15099    case LSHIFTRT:
15100    case ROTATERT:
15101      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15102	{
15103	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15104	    {
15105	      if (INTVAL (XEXP (x, 1)) > 32)
15106		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15107	      else
15108		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15109	    }
15110	  else
15111	    {
15112	      if (GET_CODE (XEXP (x, 1)) == AND)
15113		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15114	      else
15115		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15116	    }
15117	}
15118      else
15119	{
15120	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15121	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
15122	  else
15123	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
15124	}
15125      return false;
15126
15127    case MULT:
15128      if (FLOAT_MODE_P (mode))
15129	*total = COSTS_N_INSNS (ix86_cost->fmul);
15130      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15131	{
15132	  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15133	  int nbits;
15134
15135	  for (nbits = 0; value != 0; value >>= 1)
15136	    nbits++;
15137
15138	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15139			          + nbits * ix86_cost->mult_bit);
15140	}
15141      else
15142	{
15143	  /* This is arbitrary */
15144	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15145			          + 7 * ix86_cost->mult_bit);
15146	}
15147      return false;
15148
15149    case DIV:
15150    case UDIV:
15151    case MOD:
15152    case UMOD:
15153      if (FLOAT_MODE_P (mode))
15154	*total = COSTS_N_INSNS (ix86_cost->fdiv);
15155      else
15156	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15157      return false;
15158
15159    case PLUS:
15160      if (FLOAT_MODE_P (mode))
15161	*total = COSTS_N_INSNS (ix86_cost->fadd);
15162      else if (!TARGET_DECOMPOSE_LEA
15163	       && GET_MODE_CLASS (mode) == MODE_INT
15164	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15165	{
15166	  if (GET_CODE (XEXP (x, 0)) == PLUS
15167	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15168	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15169	      && CONSTANT_P (XEXP (x, 1)))
15170	    {
15171	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15172	      if (val == 2 || val == 4 || val == 8)
15173		{
15174		  *total = COSTS_N_INSNS (ix86_cost->lea);
15175		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15176		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15177				      outer_code);
15178		  *total += rtx_cost (XEXP (x, 1), outer_code);
15179		  return true;
15180		}
15181	    }
15182	  else if (GET_CODE (XEXP (x, 0)) == MULT
15183		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15184	    {
15185	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15186	      if (val == 2 || val == 4 || val == 8)
15187		{
15188		  *total = COSTS_N_INSNS (ix86_cost->lea);
15189		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15190		  *total += rtx_cost (XEXP (x, 1), outer_code);
15191		  return true;
15192		}
15193	    }
15194	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
15195	    {
15196	      *total = COSTS_N_INSNS (ix86_cost->lea);
15197	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15198	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15199	      *total += rtx_cost (XEXP (x, 1), outer_code);
15200	      return true;
15201	    }
15202	}
15203      /* FALLTHRU */
15204
15205    case MINUS:
15206      if (FLOAT_MODE_P (mode))
15207	{
15208	  *total = COSTS_N_INSNS (ix86_cost->fadd);
15209	  return false;
15210	}
15211      /* FALLTHRU */
15212
15213    case AND:
15214    case IOR:
15215    case XOR:
15216      if (!TARGET_64BIT && mode == DImode)
15217	{
15218	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15219		    + (rtx_cost (XEXP (x, 0), outer_code)
15220		       << (GET_MODE (XEXP (x, 0)) != DImode))
15221		    + (rtx_cost (XEXP (x, 1), outer_code)
15222	               << (GET_MODE (XEXP (x, 1)) != DImode)));
15223	  return true;
15224	}
15225      /* FALLTHRU */
15226
15227    case NEG:
15228      if (FLOAT_MODE_P (mode))
15229	{
15230	  *total = COSTS_N_INSNS (ix86_cost->fchs);
15231	  return false;
15232	}
15233      /* FALLTHRU */
15234
15235    case NOT:
15236      if (!TARGET_64BIT && mode == DImode)
15237	*total = COSTS_N_INSNS (ix86_cost->add * 2);
15238      else
15239	*total = COSTS_N_INSNS (ix86_cost->add);
15240      return false;
15241
15242    case FLOAT_EXTEND:
15243      if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15244	*total = 0;
15245      return false;
15246
15247    case ABS:
15248      if (FLOAT_MODE_P (mode))
15249	*total = COSTS_N_INSNS (ix86_cost->fabs);
15250      return false;
15251
15252    case SQRT:
15253      if (FLOAT_MODE_P (mode))
15254	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
15255      return false;
15256
15257    case UNSPEC:
15258      if (XINT (x, 1) == UNSPEC_TP)
15259	*total = 0;
15260      return false;
15261
15262    default:
15263      return false;
15264    }
15265}
15266
15267#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15268static void
15269ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15270{
15271  init_section ();
15272  fputs ("\tpushl $", asm_out_file);
15273  assemble_name (asm_out_file, XSTR (symbol, 0));
15274  fputc ('\n', asm_out_file);
15275}
15276#endif
15277
15278#if TARGET_MACHO
15279
15280static int current_machopic_label_num;
15281
15282/* Given a symbol name and its associated stub, write out the
15283   definition of the stub.  */
15284
15285void
15286machopic_output_stub (FILE *file, const char *symb, const char *stub)
15287{
15288  unsigned int length;
15289  char *binder_name, *symbol_name, lazy_ptr_name[32];
15290  int label = ++current_machopic_label_num;
15291
15292  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
15293  symb = (*targetm.strip_name_encoding) (symb);
15294
15295  length = strlen (stub);
15296  binder_name = alloca (length + 32);
15297  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15298
15299  length = strlen (symb);
15300  symbol_name = alloca (length + 32);
15301  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15302
15303  sprintf (lazy_ptr_name, "L%d$lz", label);
15304
15305  if (MACHOPIC_PURE)
15306    machopic_picsymbol_stub_section ();
15307  else
15308    machopic_symbol_stub_section ();
15309
15310  fprintf (file, "%s:\n", stub);
15311  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15312
15313  if (MACHOPIC_PURE)
15314    {
15315      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15316      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15317      fprintf (file, "\tjmp %%edx\n");
15318    }
15319  else
15320    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15321
15322  fprintf (file, "%s:\n", binder_name);
15323
15324  if (MACHOPIC_PURE)
15325    {
15326      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15327      fprintf (file, "\tpushl %%eax\n");
15328    }
15329  else
15330    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15331
15332  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15333
15334  machopic_lazy_symbol_ptr_section ();
15335  fprintf (file, "%s:\n", lazy_ptr_name);
15336  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15337  fprintf (file, "\t.long %s\n", binder_name);
15338}
15339#endif /* TARGET_MACHO */
15340
15341/* Order the registers for register allocator.  */
15342
15343void
15344x86_order_regs_for_local_alloc (void)
15345{
15346   int pos = 0;
15347   int i;
15348
15349   /* First allocate the local general purpose registers.  */
15350   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15351     if (GENERAL_REGNO_P (i) && call_used_regs[i])
15352	reg_alloc_order [pos++] = i;
15353
15354   /* Global general purpose registers.  */
15355   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15356     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15357	reg_alloc_order [pos++] = i;
15358
15359   /* x87 registers come first in case we are doing FP math
15360      using them.  */
15361   if (!TARGET_SSE_MATH)
15362     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15363       reg_alloc_order [pos++] = i;
15364
15365   /* SSE registers.  */
15366   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15367     reg_alloc_order [pos++] = i;
15368   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15369     reg_alloc_order [pos++] = i;
15370
15371   /* x87 registers.  */
15372   if (TARGET_SSE_MATH)
15373     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15374       reg_alloc_order [pos++] = i;
15375
15376   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15377     reg_alloc_order [pos++] = i;
15378
15379   /* Initialize the rest of array as we do not allocate some registers
15380      at all.  */
15381   while (pos < FIRST_PSEUDO_REGISTER)
15382     reg_alloc_order [pos++] = 0;
15383}
15384
15385#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15386#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15387#endif
15388
15389/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15390   struct attribute_spec.handler.  */
15391static tree
15392ix86_handle_struct_attribute (tree *node, tree name,
15393			      tree args ATTRIBUTE_UNUSED,
15394			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15395{
15396  tree *type = NULL;
15397  if (DECL_P (*node))
15398    {
15399      if (TREE_CODE (*node) == TYPE_DECL)
15400	type = &TREE_TYPE (*node);
15401    }
15402  else
15403    type = node;
15404
15405  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15406		 || TREE_CODE (*type) == UNION_TYPE)))
15407    {
15408      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15409      *no_add_attrs = true;
15410    }
15411
15412  else if ((is_attribute_p ("ms_struct", name)
15413	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15414	   || ((is_attribute_p ("gcc_struct", name)
15415		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15416    {
15417      warning ("`%s' incompatible attribute ignored",
15418               IDENTIFIER_POINTER (name));
15419      *no_add_attrs = true;
15420    }
15421
15422  return NULL_TREE;
15423}
15424
15425static bool
15426ix86_ms_bitfield_layout_p (tree record_type)
15427{
15428  return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15429	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15430    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15431}
15432
15433/* Returns an expression indicating where the this parameter is
15434   located on entry to the FUNCTION.  */
15435
15436static rtx
15437x86_this_parameter (tree function)
15438{
15439  tree type = TREE_TYPE (function);
15440
15441  if (TARGET_64BIT)
15442    {
15443      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15444      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15445    }
15446
15447  if (ix86_function_regparm (type, function) > 0)
15448    {
15449      tree parm;
15450
15451      parm = TYPE_ARG_TYPES (type);
15452      /* Figure out whether or not the function has a variable number of
15453	 arguments.  */
15454      for (; parm; parm = TREE_CHAIN (parm))
15455	if (TREE_VALUE (parm) == void_type_node)
15456	  break;
15457      /* If not, the this parameter is in the first argument.  */
15458      if (parm)
15459	{
15460	  int regno = 0;
15461	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15462	    regno = 2;
15463	  return gen_rtx_REG (SImode, regno);
15464	}
15465    }
15466
15467  if (aggregate_value_p (TREE_TYPE (type), type))
15468    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15469  else
15470    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15471}
15472
15473/* Determine whether x86_output_mi_thunk can succeed.  */
15474
15475static bool
15476x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15477			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15478			 HOST_WIDE_INT vcall_offset, tree function)
15479{
15480  /* 64-bit can handle anything.  */
15481  if (TARGET_64BIT)
15482    return true;
15483
15484  /* For 32-bit, everything's fine if we have one free register.  */
15485  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15486    return true;
15487
15488  /* Need a free register for vcall_offset.  */
15489  if (vcall_offset)
15490    return false;
15491
15492  /* Need a free register for GOT references.  */
15493  if (flag_pic && !(*targetm.binds_local_p) (function))
15494    return false;
15495
15496  /* Otherwise ok.  */
15497  return true;
15498}
15499
15500/* Output the assembler code for a thunk function.  THUNK_DECL is the
15501   declaration for the thunk function itself, FUNCTION is the decl for
15502   the target function.  DELTA is an immediate constant offset to be
15503   added to THIS.  If VCALL_OFFSET is nonzero, the word at
15504   *(*this + vcall_offset) should be added to THIS.  */
15505
15506static void
15507x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15508		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15509		     HOST_WIDE_INT vcall_offset, tree function)
15510{
15511  rtx xops[3];
15512  rtx this = x86_this_parameter (function);
15513  rtx this_reg, tmp;
15514
15515  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
15516     pull it in now and let DELTA benefit.  */
15517  if (REG_P (this))
15518    this_reg = this;
15519  else if (vcall_offset)
15520    {
15521      /* Put the this parameter into %eax.  */
15522      xops[0] = this;
15523      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15524      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15525    }
15526  else
15527    this_reg = NULL_RTX;
15528
15529  /* Adjust the this parameter by a fixed constant.  */
15530  if (delta)
15531    {
15532      xops[0] = GEN_INT (delta);
15533      xops[1] = this_reg ? this_reg : this;
15534      if (TARGET_64BIT)
15535	{
15536	  if (!x86_64_general_operand (xops[0], DImode))
15537	    {
15538	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15539	      xops[1] = tmp;
15540	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15541	      xops[0] = tmp;
15542	      xops[1] = this;
15543	    }
15544	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15545	}
15546      else
15547	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15548    }
15549
15550  /* Adjust the this parameter by a value stored in the vtable.  */
15551  if (vcall_offset)
15552    {
15553      if (TARGET_64BIT)
15554	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15555      else
15556	{
15557	  int tmp_regno = 2 /* ECX */;
15558	  if (lookup_attribute ("fastcall",
15559	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
15560	    tmp_regno = 0 /* EAX */;
15561	  tmp = gen_rtx_REG (SImode, tmp_regno);
15562	}
15563
15564      xops[0] = gen_rtx_MEM (Pmode, this_reg);
15565      xops[1] = tmp;
15566      if (TARGET_64BIT)
15567	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15568      else
15569	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15570
15571      /* Adjust the this parameter.  */
15572      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15573      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15574	{
15575	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15576	  xops[0] = GEN_INT (vcall_offset);
15577	  xops[1] = tmp2;
15578	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15579	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15580	}
15581      xops[1] = this_reg;
15582      if (TARGET_64BIT)
15583	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15584      else
15585	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15586    }
15587
15588  /* If necessary, drop THIS back to its stack slot.  */
15589  if (this_reg && this_reg != this)
15590    {
15591      xops[0] = this_reg;
15592      xops[1] = this;
15593      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15594    }
15595
15596  xops[0] = XEXP (DECL_RTL (function), 0);
15597  if (TARGET_64BIT)
15598    {
15599      if (!flag_pic || (*targetm.binds_local_p) (function))
15600	output_asm_insn ("jmp\t%P0", xops);
15601      else
15602	{
15603	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15604	  tmp = gen_rtx_CONST (Pmode, tmp);
15605	  tmp = gen_rtx_MEM (QImode, tmp);
15606	  xops[0] = tmp;
15607	  output_asm_insn ("jmp\t%A0", xops);
15608	}
15609    }
15610  else
15611    {
15612      if (!flag_pic || (*targetm.binds_local_p) (function))
15613	output_asm_insn ("jmp\t%P0", xops);
15614      else
15615#if TARGET_MACHO
15616	if (TARGET_MACHO)
15617	  {
15618	    const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15619	    tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15620	    tmp = gen_rtx_MEM (QImode, tmp);
15621	    xops[0] = tmp;
15622	    output_asm_insn ("jmp\t%0", xops);
15623	  }
15624	else
15625#endif /* TARGET_MACHO */
15626	{
15627	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15628	  output_set_got (tmp);
15629
15630	  xops[1] = tmp;
15631	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15632	  output_asm_insn ("jmp\t{*}%1", xops);
15633	}
15634    }
15635}
15636
15637static void
15638x86_file_start (void)
15639{
15640  default_file_start ();
15641  if (X86_FILE_START_VERSION_DIRECTIVE)
15642    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15643  if (X86_FILE_START_FLTUSED)
15644    fputs ("\t.global\t__fltused\n", asm_out_file);
15645  if (ix86_asm_dialect == ASM_INTEL)
15646    fputs ("\t.intel_syntax\n", asm_out_file);
15647}
15648
15649int
15650x86_field_alignment (tree field, int computed)
15651{
15652  enum machine_mode mode;
15653  tree type = TREE_TYPE (field);
15654
15655  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15656    return computed;
15657  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15658		    ? get_inner_array_type (type) : type);
15659  if (mode == DFmode || mode == DCmode
15660      || GET_MODE_CLASS (mode) == MODE_INT
15661      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15662    return MIN (32, computed);
15663  return computed;
15664}
15665
15666/* Output assembler code to FILE to increment profiler label # LABELNO
15667   for profiling a function entry.  */
15668void
15669x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15670{
15671  if (TARGET_64BIT)
15672    if (flag_pic)
15673      {
15674#ifndef NO_PROFILE_COUNTERS
15675	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15676#endif
15677	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15678      }
15679    else
15680      {
15681#ifndef NO_PROFILE_COUNTERS
15682	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15683#endif
15684	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15685      }
15686  else if (flag_pic)
15687    {
15688#ifndef NO_PROFILE_COUNTERS
15689      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15690	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15691#endif
15692      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15693    }
15694  else
15695    {
15696#ifndef NO_PROFILE_COUNTERS
15697      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15698	       PROFILE_COUNT_REGISTER);
15699#endif
15700      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15701    }
15702}
15703
15704/* We don't have exact information about the insn sizes, but we may assume
15705   quite safely that we are informed about all 1 byte insns and memory
15706   address sizes.  This is enough to eliminate unnecessary padding in
15707   99% of cases.  */
15708
15709static int
15710min_insn_size (rtx insn)
15711{
15712  int l = 0;
15713
15714  if (!INSN_P (insn) || !active_insn_p (insn))
15715    return 0;
15716
15717  /* Discard alignments we've emit and jump instructions.  */
15718  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15719      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15720    return 0;
15721  if (GET_CODE (insn) == JUMP_INSN
15722      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15723	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15724    return 0;
15725
15726  /* Important case - calls are always 5 bytes.
15727     It is common to have many calls in the row.  */
15728  if (GET_CODE (insn) == CALL_INSN
15729      && symbolic_reference_mentioned_p (PATTERN (insn))
15730      && !SIBLING_CALL_P (insn))
15731    return 5;
15732  if (get_attr_length (insn) <= 1)
15733    return 1;
15734
15735  /* For normal instructions we may rely on the sizes of addresses
15736     and the presence of symbol to require 4 bytes of encoding.
15737     This is not the case for jumps where references are PC relative.  */
15738  if (GET_CODE (insn) != JUMP_INSN)
15739    {
15740      l = get_attr_length_address (insn);
15741      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15742	l = 4;
15743    }
15744  if (l)
15745    return 1+l;
15746  else
15747    return 2;
15748}
15749
15750/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15751   window.  */
15752
15753static void
15754k8_avoid_jump_misspredicts (void)
15755{
15756  rtx insn, start = get_insns ();
15757  int nbytes = 0, njumps = 0;
15758  int isjump = 0;
15759
15760  /* Look for all minimal intervals of instructions containing 4 jumps.
15761     The intervals are bounded by START and INSN.  NBYTES is the total
15762     size of instructions in the interval including INSN and not including
15763     START.  When the NBYTES is smaller than 16 bytes, it is possible
15764     that the end of START and INSN ends up in the same 16byte page.
15765
15766     The smallest offset in the page INSN can start is the case where START
15767     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
15768     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15769     */
15770  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15771    {
15772
15773      nbytes += min_insn_size (insn);
15774      if (rtl_dump_file)
15775        fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15776		INSN_UID (insn), min_insn_size (insn));
15777      if ((GET_CODE (insn) == JUMP_INSN
15778	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
15779	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15780	  || GET_CODE (insn) == CALL_INSN)
15781	njumps++;
15782      else
15783	continue;
15784
15785      while (njumps > 3)
15786	{
15787	  start = NEXT_INSN (start);
15788	  if ((GET_CODE (start) == JUMP_INSN
15789	       && GET_CODE (PATTERN (start)) != ADDR_VEC
15790	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15791	      || GET_CODE (start) == CALL_INSN)
15792	    njumps--, isjump = 1;
15793	  else
15794	    isjump = 0;
15795	  nbytes -= min_insn_size (start);
15796	}
15797      if (njumps < 0)
15798	abort ();
15799      if (rtl_dump_file)
15800        fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15801		INSN_UID (start), INSN_UID (insn), nbytes);
15802
15803      if (njumps == 3 && isjump && nbytes < 16)
15804	{
15805	  int padsize = 15 - nbytes + min_insn_size (insn);
15806
15807	  if (rtl_dump_file)
15808	    fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15809          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15810	}
15811    }
15812}
15813
15814/* Implement machine specific optimizations.
15815   At the moment we implement single transformation: AMD Athlon works faster
15816   when RET is not destination of conditional jump or directly preceded
15817   by other jump instruction.  We avoid the penalty by inserting NOP just
15818   before the RET instructions in such cases.  */
15819static void
15820ix86_reorg (void)
15821{
15822  edge e;
15823
15824  if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15825    return;
15826  for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15827  {
15828    basic_block bb = e->src;
15829    rtx ret = BB_END (bb);
15830    rtx prev;
15831    bool replace = false;
15832
15833    if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15834	|| !maybe_hot_bb_p (bb))
15835      continue;
15836    for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15837      if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15838	break;
15839    if (prev && GET_CODE (prev) == CODE_LABEL)
15840      {
15841	edge e;
15842	for (e = bb->pred; e; e = e->pred_next)
15843	  if (EDGE_FREQUENCY (e) && e->src->index >= 0
15844	      && !(e->flags & EDGE_FALLTHRU))
15845	    replace = true;
15846      }
15847    if (!replace)
15848      {
15849	prev = prev_active_insn (ret);
15850	if (prev
15851	    && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15852		|| GET_CODE (prev) == CALL_INSN))
15853	  replace = true;
15854	/* Empty functions get branch mispredict even when the jump destination
15855	   is not visible to us.  */
15856	if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15857	  replace = true;
15858      }
15859    if (replace)
15860      {
15861        emit_insn_before (gen_return_internal_long (), ret);
15862	delete_insn (ret);
15863      }
15864  }
15865  k8_avoid_jump_misspredicts ();
15866}
15867
15868/* Return nonzero when QImode register that must be represented via REX prefix
15869   is used.  */
15870bool
15871x86_extended_QIreg_mentioned_p (rtx insn)
15872{
15873  int i;
15874  extract_insn_cached (insn);
15875  for (i = 0; i < recog_data.n_operands; i++)
15876    if (REG_P (recog_data.operand[i])
15877	&& REGNO (recog_data.operand[i]) >= 4)
15878       return true;
15879  return false;
15880}
15881
15882/* Return nonzero when P points to register encoded via REX prefix.
15883   Called via for_each_rtx.  */
15884static int
15885extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15886{
15887   unsigned int regno;
15888   if (!REG_P (*p))
15889     return 0;
15890   regno = REGNO (*p);
15891   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15892}
15893
15894/* Return true when INSN mentions register that must be encoded using REX
15895   prefix.  */
15896bool
15897x86_extended_reg_mentioned_p (rtx insn)
15898{
15899  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15900}
15901
15902/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
15903   optabs would emit if we didn't have TFmode patterns.  */
15904
15905void
15906x86_emit_floatuns (rtx operands[2])
15907{
15908  rtx neglab, donelab, i0, i1, f0, in, out;
15909  enum machine_mode mode, inmode;
15910
15911  inmode = GET_MODE (operands[1]);
15912  if (inmode != SImode
15913      && inmode != DImode)
15914    abort ();
15915
15916  out = operands[0];
15917  in = force_reg (inmode, operands[1]);
15918  mode = GET_MODE (out);
15919  neglab = gen_label_rtx ();
15920  donelab = gen_label_rtx ();
15921  i1 = gen_reg_rtx (Pmode);
15922  f0 = gen_reg_rtx (mode);
15923
15924  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15925
15926  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15927  emit_jump_insn (gen_jump (donelab));
15928  emit_barrier ();
15929
15930  emit_label (neglab);
15931
15932  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15933  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15934  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15935  expand_float (f0, i0, 0);
15936  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15937
15938  emit_label (donelab);
15939}
15940
15941/* Return if we do not know how to pass TYPE solely in registers.  */
15942bool
15943ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15944{
15945   if (default_must_pass_in_stack (mode, type))
15946     return true;
15947   return (!TARGET_64BIT && type && mode == TImode);
15948}
15949
15950/* Initialize vector TARGET via VALS.  */
15951void
15952ix86_expand_vector_init (rtx target, rtx vals)
15953{
15954  enum machine_mode mode = GET_MODE (target);
15955  int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15956  int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15957  int i;
15958
15959  for (i = n_elts - 1; i >= 0; i--)
15960    if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15961	&& GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15962      break;
15963
15964  /* Few special cases first...
15965     ... constants are best loaded from constant pool.  */
15966  if (i < 0)
15967    {
15968      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15969      return;
15970    }
15971
15972  /* ... values where only first field is non-constant are best loaded
15973     from the pool and overwriten via move later.  */
15974  if (!i)
15975    {
15976      rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15977				    GET_MODE_INNER (mode), 0);
15978
15979      op = force_reg (mode, op);
15980      XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15981      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15982      switch (GET_MODE (target))
15983	{
15984	  case V2DFmode:
15985	    emit_insn (gen_sse2_movsd (target, target, op));
15986	    break;
15987	  case V4SFmode:
15988	    emit_insn (gen_sse_movss (target, target, op));
15989	    break;
15990	  default:
15991	    break;
15992	}
15993      return;
15994    }
15995
15996  /* And the busy sequence doing rotations.  */
15997  switch (GET_MODE (target))
15998    {
15999      case V2DFmode:
16000	{
16001	  rtx vecop0 =
16002	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16003	  rtx vecop1 =
16004	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16005
16006	  vecop0 = force_reg (V2DFmode, vecop0);
16007	  vecop1 = force_reg (V2DFmode, vecop1);
16008	  emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16009	}
16010	break;
16011      case V4SFmode:
16012	{
16013	  rtx vecop0 =
16014	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16015	  rtx vecop1 =
16016	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16017	  rtx vecop2 =
16018	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16019	  rtx vecop3 =
16020	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16021	  rtx tmp1 = gen_reg_rtx (V4SFmode);
16022	  rtx tmp2 = gen_reg_rtx (V4SFmode);
16023
16024	  vecop0 = force_reg (V4SFmode, vecop0);
16025	  vecop1 = force_reg (V4SFmode, vecop1);
16026	  vecop2 = force_reg (V4SFmode, vecop2);
16027	  vecop3 = force_reg (V4SFmode, vecop3);
16028	  emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16029	  emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16030	  emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16031	}
16032	break;
16033      default:
16034	abort ();
16035    }
16036}
16037
16038#include "gt-i386.h"
16039