i386.c revision 130711
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 130711 2004-06-19 07:29:04Z obrien $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-attr.h"
38#include "flags.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49#include "langhooks.h"
50
51#ifndef CHECK_STACK_LIMIT
52#define CHECK_STACK_LIMIT (-1)
53#endif
54
55/* Processor costs (relative to an add) */
56static const
57struct processor_costs size_cost = {	/* costs for tunning for size */
58  2,					/* cost of an add instruction */
59  3,					/* cost of a lea instruction */
60  2,					/* variable shift costs */
61  3,					/* constant shift costs */
62  3,					/* cost of starting a multiply */
63  0,					/* cost of multiply per each bit set */
64  3,					/* cost of a divide/mod */
65  3,					/* cost of movsx */
66  3,					/* cost of movzx */
67  0,					/* "large" insn */
68  2,					/* MOVE_RATIO */
69  2,					/* cost for loading QImode using movzbl */
70  {2, 2, 2},				/* cost of loading integer registers
71					   in QImode, HImode and SImode.
72					   Relative to reg-reg move (2).  */
73  {2, 2, 2},				/* cost of storing integer registers */
74  2,					/* cost of reg,reg fld/fst */
75  {2, 2, 2},				/* cost of loading fp registers
76					   in SFmode, DFmode and XFmode */
77  {2, 2, 2},				/* cost of loading integer registers */
78  3,					/* cost of moving MMX register */
79  {3, 3},				/* cost of loading MMX registers
80					   in SImode and DImode */
81  {3, 3},				/* cost of storing MMX registers
82					   in SImode and DImode */
83  3,					/* cost of moving SSE register */
84  {3, 3, 3},				/* cost of loading SSE registers
85					   in SImode, DImode and TImode */
86  {3, 3, 3},				/* cost of storing SSE registers
87					   in SImode, DImode and TImode */
88  3,					/* MMX or SSE register to integer */
89  0,					/* size of prefetch block */
90  0,					/* number of parallel prefetches */
91  2,					/* cost of FADD and FSUB insns.  */
92  2,					/* cost of FMUL instruction.  */
93  2,					/* cost of FDIV instruction.  */
94  2,					/* cost of FABS instruction.  */
95  2,					/* cost of FCHS instruction.  */
96  2,					/* cost of FSQRT instruction.  */
97};
98
99/* Processor costs (relative to an add) */
100static const
101struct processor_costs i386_cost = {	/* 386 specific costs */
102  1,					/* cost of an add instruction */
103  1,					/* cost of a lea instruction */
104  3,					/* variable shift costs */
105  2,					/* constant shift costs */
106  6,					/* cost of starting a multiply */
107  1,					/* cost of multiply per each bit set */
108  23,					/* cost of a divide/mod */
109  3,					/* cost of movsx */
110  2,					/* cost of movzx */
111  15,					/* "large" insn */
112  3,					/* MOVE_RATIO */
113  4,					/* cost for loading QImode using movzbl */
114  {2, 4, 2},				/* cost of loading integer registers
115					   in QImode, HImode and SImode.
116					   Relative to reg-reg move (2).  */
117  {2, 4, 2},				/* cost of storing integer registers */
118  2,					/* cost of reg,reg fld/fst */
119  {8, 8, 8},				/* cost of loading fp registers
120					   in SFmode, DFmode and XFmode */
121  {8, 8, 8},				/* cost of loading integer registers */
122  2,					/* cost of moving MMX register */
123  {4, 8},				/* cost of loading MMX registers
124					   in SImode and DImode */
125  {4, 8},				/* cost of storing MMX registers
126					   in SImode and DImode */
127  2,					/* cost of moving SSE register */
128  {4, 8, 16},				/* cost of loading SSE registers
129					   in SImode, DImode and TImode */
130  {4, 8, 16},				/* cost of storing SSE registers
131					   in SImode, DImode and TImode */
132  3,					/* MMX or SSE register to integer */
133  0,					/* size of prefetch block */
134  0,					/* number of parallel prefetches */
135  23,					/* cost of FADD and FSUB insns.  */
136  27,					/* cost of FMUL instruction.  */
137  88,					/* cost of FDIV instruction.  */
138  22,					/* cost of FABS instruction.  */
139  24,					/* cost of FCHS instruction.  */
140  122,					/* cost of FSQRT instruction.  */
141};
142
143static const
144struct processor_costs i486_cost = {	/* 486 specific costs */
145  1,					/* cost of an add instruction */
146  1,					/* cost of a lea instruction */
147  3,					/* variable shift costs */
148  2,					/* constant shift costs */
149  12,					/* cost of starting a multiply */
150  1,					/* cost of multiply per each bit set */
151  40,					/* cost of a divide/mod */
152  3,					/* cost of movsx */
153  2,					/* cost of movzx */
154  15,					/* "large" insn */
155  3,					/* MOVE_RATIO */
156  4,					/* cost for loading QImode using movzbl */
157  {2, 4, 2},				/* cost of loading integer registers
158					   in QImode, HImode and SImode.
159					   Relative to reg-reg move (2).  */
160  {2, 4, 2},				/* cost of storing integer registers */
161  2,					/* cost of reg,reg fld/fst */
162  {8, 8, 8},				/* cost of loading fp registers
163					   in SFmode, DFmode and XFmode */
164  {8, 8, 8},				/* cost of loading integer registers */
165  2,					/* cost of moving MMX register */
166  {4, 8},				/* cost of loading MMX registers
167					   in SImode and DImode */
168  {4, 8},				/* cost of storing MMX registers
169					   in SImode and DImode */
170  2,					/* cost of moving SSE register */
171  {4, 8, 16},				/* cost of loading SSE registers
172					   in SImode, DImode and TImode */
173  {4, 8, 16},				/* cost of storing SSE registers
174					   in SImode, DImode and TImode */
175  3,					/* MMX or SSE register to integer */
176  0,					/* size of prefetch block */
177  0,					/* number of parallel prefetches */
178  8,					/* cost of FADD and FSUB insns.  */
179  16,					/* cost of FMUL instruction.  */
180  73,					/* cost of FDIV instruction.  */
181  3,					/* cost of FABS instruction.  */
182  3,					/* cost of FCHS instruction.  */
183  83,					/* cost of FSQRT instruction.  */
184};
185
186static const
187struct processor_costs pentium_cost = {
188  1,					/* cost of an add instruction */
189  1,					/* cost of a lea instruction */
190  4,					/* variable shift costs */
191  1,					/* constant shift costs */
192  11,					/* cost of starting a multiply */
193  0,					/* cost of multiply per each bit set */
194  25,					/* cost of a divide/mod */
195  3,					/* cost of movsx */
196  2,					/* cost of movzx */
197  8,					/* "large" insn */
198  6,					/* MOVE_RATIO */
199  6,					/* cost for loading QImode using movzbl */
200  {2, 4, 2},				/* cost of loading integer registers
201					   in QImode, HImode and SImode.
202					   Relative to reg-reg move (2).  */
203  {2, 4, 2},				/* cost of storing integer registers */
204  2,					/* cost of reg,reg fld/fst */
205  {2, 2, 6},				/* cost of loading fp registers
206					   in SFmode, DFmode and XFmode */
207  {4, 4, 6},				/* cost of loading integer registers */
208  8,					/* cost of moving MMX register */
209  {8, 8},				/* cost of loading MMX registers
210					   in SImode and DImode */
211  {8, 8},				/* cost of storing MMX registers
212					   in SImode and DImode */
213  2,					/* cost of moving SSE register */
214  {4, 8, 16},				/* cost of loading SSE registers
215					   in SImode, DImode and TImode */
216  {4, 8, 16},				/* cost of storing SSE registers
217					   in SImode, DImode and TImode */
218  3,					/* MMX or SSE register to integer */
219  0,					/* size of prefetch block */
220  0,					/* number of parallel prefetches */
221  3,					/* cost of FADD and FSUB insns.  */
222  3,					/* cost of FMUL instruction.  */
223  39,					/* cost of FDIV instruction.  */
224  1,					/* cost of FABS instruction.  */
225  1,					/* cost of FCHS instruction.  */
226  70,					/* cost of FSQRT instruction.  */
227};
228
229static const
230struct processor_costs pentiumpro_cost = {
231  1,					/* cost of an add instruction */
232  1,					/* cost of a lea instruction */
233  1,					/* variable shift costs */
234  1,					/* constant shift costs */
235  4,					/* cost of starting a multiply */
236  0,					/* cost of multiply per each bit set */
237  17,					/* cost of a divide/mod */
238  1,					/* cost of movsx */
239  1,					/* cost of movzx */
240  8,					/* "large" insn */
241  6,					/* MOVE_RATIO */
242  2,					/* cost for loading QImode using movzbl */
243  {4, 4, 4},				/* cost of loading integer registers
244					   in QImode, HImode and SImode.
245					   Relative to reg-reg move (2).  */
246  {2, 2, 2},				/* cost of storing integer registers */
247  2,					/* cost of reg,reg fld/fst */
248  {2, 2, 6},				/* cost of loading fp registers
249					   in SFmode, DFmode and XFmode */
250  {4, 4, 6},				/* cost of loading integer registers */
251  2,					/* cost of moving MMX register */
252  {2, 2},				/* cost of loading MMX registers
253					   in SImode and DImode */
254  {2, 2},				/* cost of storing MMX registers
255					   in SImode and DImode */
256  2,					/* cost of moving SSE register */
257  {2, 2, 8},				/* cost of loading SSE registers
258					   in SImode, DImode and TImode */
259  {2, 2, 8},				/* cost of storing SSE registers
260					   in SImode, DImode and TImode */
261  3,					/* MMX or SSE register to integer */
262  32,					/* size of prefetch block */
263  6,					/* number of parallel prefetches */
264  3,					/* cost of FADD and FSUB insns.  */
265  5,					/* cost of FMUL instruction.  */
266  56,					/* cost of FDIV instruction.  */
267  2,					/* cost of FABS instruction.  */
268  2,					/* cost of FCHS instruction.  */
269  56,					/* cost of FSQRT instruction.  */
270};
271
272static const
273struct processor_costs k6_cost = {
274  1,					/* cost of an add instruction */
275  2,					/* cost of a lea instruction */
276  1,					/* variable shift costs */
277  1,					/* constant shift costs */
278  3,					/* cost of starting a multiply */
279  0,					/* cost of multiply per each bit set */
280  18,					/* cost of a divide/mod */
281  2,					/* cost of movsx */
282  2,					/* cost of movzx */
283  8,					/* "large" insn */
284  4,					/* MOVE_RATIO */
285  3,					/* cost for loading QImode using movzbl */
286  {4, 5, 4},				/* cost of loading integer registers
287					   in QImode, HImode and SImode.
288					   Relative to reg-reg move (2).  */
289  {2, 3, 2},				/* cost of storing integer registers */
290  4,					/* cost of reg,reg fld/fst */
291  {6, 6, 6},				/* cost of loading fp registers
292					   in SFmode, DFmode and XFmode */
293  {4, 4, 4},				/* cost of loading integer registers */
294  2,					/* cost of moving MMX register */
295  {2, 2},				/* cost of loading MMX registers
296					   in SImode and DImode */
297  {2, 2},				/* cost of storing MMX registers
298					   in SImode and DImode */
299  2,					/* cost of moving SSE register */
300  {2, 2, 8},				/* cost of loading SSE registers
301					   in SImode, DImode and TImode */
302  {2, 2, 8},				/* cost of storing SSE registers
303					   in SImode, DImode and TImode */
304  6,					/* MMX or SSE register to integer */
305  32,					/* size of prefetch block */
306  1,					/* number of parallel prefetches */
307  2,					/* cost of FADD and FSUB insns.  */
308  2,					/* cost of FMUL instruction.  */
309  56,					/* cost of FDIV instruction.  */
310  2,					/* cost of FABS instruction.  */
311  2,					/* cost of FCHS instruction.  */
312  56,					/* cost of FSQRT instruction.  */
313};
314
315static const
316struct processor_costs athlon_cost = {
317  1,					/* cost of an add instruction */
318  2,					/* cost of a lea instruction */
319  1,					/* variable shift costs */
320  1,					/* constant shift costs */
321  5,					/* cost of starting a multiply */
322  0,					/* cost of multiply per each bit set */
323  42,					/* cost of a divide/mod */
324  1,					/* cost of movsx */
325  1,					/* cost of movzx */
326  8,					/* "large" insn */
327  9,					/* MOVE_RATIO */
328  4,					/* cost for loading QImode using movzbl */
329  {3, 4, 3},				/* cost of loading integer registers
330					   in QImode, HImode and SImode.
331					   Relative to reg-reg move (2).  */
332  {3, 4, 3},				/* cost of storing integer registers */
333  4,					/* cost of reg,reg fld/fst */
334  {4, 4, 12},				/* cost of loading fp registers
335					   in SFmode, DFmode and XFmode */
336  {6, 6, 8},				/* cost of loading integer registers */
337  2,					/* cost of moving MMX register */
338  {4, 4},				/* cost of loading MMX registers
339					   in SImode and DImode */
340  {4, 4},				/* cost of storing MMX registers
341					   in SImode and DImode */
342  2,					/* cost of moving SSE register */
343  {4, 4, 6},				/* cost of loading SSE registers
344					   in SImode, DImode and TImode */
345  {4, 4, 5},				/* cost of storing SSE registers
346					   in SImode, DImode and TImode */
347  5,					/* MMX or SSE register to integer */
348  64,					/* size of prefetch block */
349  6,					/* number of parallel prefetches */
350  4,					/* cost of FADD and FSUB insns.  */
351  4,					/* cost of FMUL instruction.  */
352  24,					/* cost of FDIV instruction.  */
353  2,					/* cost of FABS instruction.  */
354  2,					/* cost of FCHS instruction.  */
355  35,					/* cost of FSQRT instruction.  */
356};
357
358static const
359struct processor_costs pentium4_cost = {
360  1,					/* cost of an add instruction */
361  1,					/* cost of a lea instruction */
362  8,					/* variable shift costs */
363  8,					/* constant shift costs */
364  30,					/* cost of starting a multiply */
365  0,					/* cost of multiply per each bit set */
366  112,					/* cost of a divide/mod */
367  1,					/* cost of movsx */
368  1,					/* cost of movzx */
369  16,					/* "large" insn */
370  6,					/* MOVE_RATIO */
371  2,					/* cost for loading QImode using movzbl */
372  {4, 5, 4},				/* cost of loading integer registers
373					   in QImode, HImode and SImode.
374					   Relative to reg-reg move (2).  */
375  {2, 3, 2},				/* cost of storing integer registers */
376  2,					/* cost of reg,reg fld/fst */
377  {2, 2, 6},				/* cost of loading fp registers
378					   in SFmode, DFmode and XFmode */
379  {4, 4, 6},				/* cost of loading integer registers */
380  2,					/* cost of moving MMX register */
381  {2, 2},				/* cost of loading MMX registers
382					   in SImode and DImode */
383  {2, 2},				/* cost of storing MMX registers
384					   in SImode and DImode */
385  12,					/* cost of moving SSE register */
386  {12, 12, 12},				/* cost of loading SSE registers
387					   in SImode, DImode and TImode */
388  {2, 2, 8},				/* cost of storing SSE registers
389					   in SImode, DImode and TImode */
390  10,					/* MMX or SSE register to integer */
391  64,					/* size of prefetch block */
392  6,					/* number of parallel prefetches */
393  5,					/* cost of FADD and FSUB insns.  */
394  7,					/* cost of FMUL instruction.  */
395  43,					/* cost of FDIV instruction.  */
396  2,					/* cost of FABS instruction.  */
397  2,					/* cost of FCHS instruction.  */
398  43,					/* cost of FSQRT instruction.  */
399};
400
401const struct processor_costs *ix86_cost = &pentium_cost;
402
403/* Processor feature/optimization bitmasks.  */
404#define m_386 (1<<PROCESSOR_I386)
405#define m_486 (1<<PROCESSOR_I486)
406#define m_PENT (1<<PROCESSOR_PENTIUM)
407#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
408#define m_K6  (1<<PROCESSOR_K6)
409#define m_ATHLON  (1<<PROCESSOR_ATHLON)
410#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
411
412const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
413const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
414const int x86_zero_extend_with_and = m_486 | m_PENT;
415const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
416const int x86_double_with_add = ~m_386;
417const int x86_use_bit_test = m_386;
418const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
419const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
420const int x86_3dnow_a = m_ATHLON;
421const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
422const int x86_branch_hints = m_PENT4;
423const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
424const int x86_partial_reg_stall = m_PPRO;
425const int x86_use_loop = m_K6;
426const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
427const int x86_use_mov0 = m_K6;
428const int x86_use_cltd = ~(m_PENT | m_K6);
429const int x86_read_modify_write = ~m_PENT;
430const int x86_read_modify = ~(m_PENT | m_PPRO);
431const int x86_split_long_moves = m_PPRO;
432const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
433const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
434const int x86_single_stringop = m_386 | m_PENT4;
435const int x86_qimode_math = ~(0);
436const int x86_promote_qi_regs = 0;
437const int x86_himode_math = ~(m_PPRO);
438const int x86_promote_hi_regs = m_PPRO;
439const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
440const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
441const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
442const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
443const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
444const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
445const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
446const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
447const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
448const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
449const int x86_decompose_lea = m_PENT4;
450const int x86_shift1 = ~m_486;
451const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
452
453/* In case the avreage insn count for single function invocation is
454   lower than this constant, emit fast (but longer) prologue and
455   epilogue code.  */
456#define FAST_PROLOGUE_INSN_COUNT 30
457
458/* Set by prologue expander and used by epilogue expander to determine
459   the style used.  */
460static int use_fast_prologue_epilogue;
461
462/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
463static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
464static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
465static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
466
467/* Array of the smallest class containing reg number REGNO, indexed by
468   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
469
470enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
471{
472  /* ax, dx, cx, bx */
473  AREG, DREG, CREG, BREG,
474  /* si, di, bp, sp */
475  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
476  /* FP registers */
477  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
478  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
479  /* arg pointer */
480  NON_Q_REGS,
481  /* flags, fpsr, dirflag, frame */
482  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
483  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
484  SSE_REGS, SSE_REGS,
485  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
486  MMX_REGS, MMX_REGS,
487  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
488  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
489  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
490  SSE_REGS, SSE_REGS,
491};
492
493/* The "default" register map used in 32bit mode.  */
494
495int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
496{
497  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
498  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
499  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
500  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
501  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
502  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
503  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
504};
505
506static int const x86_64_int_parameter_registers[6] =
507{
508  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
509  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
510};
511
512static int const x86_64_int_return_registers[4] =
513{
514  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
515};
516
517/* The "default" register map used in 64bit mode.  */
518int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
519{
520  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
521  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
522  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
523  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
524  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
525  8,9,10,11,12,13,14,15,		/* extended integer registers */
526  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
527};
528
529/* Define the register numbers to be used in Dwarf debugging information.
530   The SVR4 reference port C compiler uses the following register numbers
531   in its Dwarf output code:
532	0 for %eax (gcc regno = 0)
533	1 for %ecx (gcc regno = 2)
534	2 for %edx (gcc regno = 1)
535	3 for %ebx (gcc regno = 3)
536	4 for %esp (gcc regno = 7)
537	5 for %ebp (gcc regno = 6)
538	6 for %esi (gcc regno = 4)
539	7 for %edi (gcc regno = 5)
540   The following three DWARF register numbers are never generated by
541   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
542   believes these numbers have these meanings.
543	8  for %eip    (no gcc equivalent)
544	9  for %eflags (gcc regno = 17)
545	10 for %trapno (no gcc equivalent)
546   It is not at all clear how we should number the FP stack registers
547   for the x86 architecture.  If the version of SDB on x86/svr4 were
548   a bit less brain dead with respect to floating-point then we would
549   have a precedent to follow with respect to DWARF register numbers
550   for x86 FP registers, but the SDB on x86/svr4 is so completely
551   broken with respect to FP registers that it is hardly worth thinking
552   of it as something to strive for compatibility with.
553   The version of x86/svr4 SDB I have at the moment does (partially)
554   seem to believe that DWARF register number 11 is associated with
555   the x86 register %st(0), but that's about all.  Higher DWARF
556   register numbers don't seem to be associated with anything in
557   particular, and even for DWARF regno 11, SDB only seems to under-
558   stand that it should say that a variable lives in %st(0) (when
559   asked via an `=' command) if we said it was in DWARF regno 11,
560   but SDB still prints garbage when asked for the value of the
561   variable in question (via a `/' command).
562   (Also note that the labels SDB prints for various FP stack regs
563   when doing an `x' command are all wrong.)
564   Note that these problems generally don't affect the native SVR4
565   C compiler because it doesn't allow the use of -O with -g and
566   because when it is *not* optimizing, it allocates a memory
567   location for each floating-point variable, and the memory
568   location is what gets described in the DWARF AT_location
569   attribute for the variable in question.
570   Regardless of the severe mental illness of the x86/svr4 SDB, we
571   do something sensible here and we use the following DWARF
572   register numbers.  Note that these are all stack-top-relative
573   numbers.
574	11 for %st(0) (gcc regno = 8)
575	12 for %st(1) (gcc regno = 9)
576	13 for %st(2) (gcc regno = 10)
577	14 for %st(3) (gcc regno = 11)
578	15 for %st(4) (gcc regno = 12)
579	16 for %st(5) (gcc regno = 13)
580	17 for %st(6) (gcc regno = 14)
581	18 for %st(7) (gcc regno = 15)
582*/
583int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
584{
585  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
586  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
587  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
588  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
589  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
590  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
591  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
592};
593
594/* Test and compare insns in i386.md store the information needed to
595   generate branch and scc insns here.  */
596
597rtx ix86_compare_op0 = NULL_RTX;
598rtx ix86_compare_op1 = NULL_RTX;
599
600/* The encoding characters for the four TLS models present in ELF.  */
601
602static char const tls_model_chars[] = " GLil";
603
604#define MAX_386_STACK_LOCALS 3
605/* Size of the register save area.  */
606#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
607
608/* Define the structure for the machine field in struct function.  */
609struct machine_function GTY(())
610{
611  rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
612  const char *some_ld_name;
613  int save_varrargs_registers;
614  int accesses_prev_frame;
615};
616
617#define ix86_stack_locals (cfun->machine->stack_locals)
618#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
619
620/* Structure describing stack frame layout.
621   Stack grows downward:
622
623   [arguments]
624					      <- ARG_POINTER
625   saved pc
626
627   saved frame pointer if frame_pointer_needed
628					      <- HARD_FRAME_POINTER
629   [saved regs]
630
631   [padding1]          \
632		        )
633   [va_arg registers]  (
634		        > to_allocate	      <- FRAME_POINTER
635   [frame]	       (
636		        )
637   [padding2]	       /
638  */
639struct ix86_frame
640{
641  int nregs;
642  int padding1;
643  int va_arg_size;
644  HOST_WIDE_INT frame;
645  int padding2;
646  int outgoing_arguments_size;
647  int red_zone_size;
648
649  HOST_WIDE_INT to_allocate;
650  /* The offsets relative to ARG_POINTER.  */
651  HOST_WIDE_INT frame_pointer_offset;
652  HOST_WIDE_INT hard_frame_pointer_offset;
653  HOST_WIDE_INT stack_pointer_offset;
654};
655
656/* Used to enable/disable debugging features.  */
657const char *ix86_debug_arg_string, *ix86_debug_addr_string;
658/* Code model option as passed by user.  */
659const char *ix86_cmodel_string;
660/* Parsed value.  */
661enum cmodel ix86_cmodel;
662/* Asm dialect.  */
663const char *ix86_asm_string;
664enum asm_dialect ix86_asm_dialect = ASM_ATT;
665/* TLS dialext.  */
666const char *ix86_tls_dialect_string;
667enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
668
669/* Which unit we are generating floating point math for.  */
670enum fpmath_unit ix86_fpmath;
671
672/* Which cpu are we scheduling for.  */
673enum processor_type ix86_cpu;
674/* Which instruction set architecture to use.  */
675enum processor_type ix86_arch;
676
677/* Strings to hold which cpu and instruction set architecture  to use.  */
678const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
679const char *ix86_arch_string;		/* for -march=<xxx> */
680const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
681
682/* # of registers to use to pass arguments.  */
683const char *ix86_regparm_string;
684
685/* true if sse prefetch instruction is not NOOP.  */
686int x86_prefetch_sse;
687
688/* ix86_regparm_string as a number */
689int ix86_regparm;
690
691/* Alignment to use for loops and jumps:  */
692
693/* Power of two alignment for loops.  */
694const char *ix86_align_loops_string;
695
696/* Power of two alignment for non-loop jumps.  */
697const char *ix86_align_jumps_string;
698
699/* Power of two alignment for stack boundary in bytes.  */
700const char *ix86_preferred_stack_boundary_string;
701
702/* Preferred alignment for stack boundary in bits.  */
703int ix86_preferred_stack_boundary;
704
705/* Values 1-5: see jump.c */
706int ix86_branch_cost;
707const char *ix86_branch_cost_string;
708
709/* Power of two alignment for functions.  */
710const char *ix86_align_funcs_string;
711
712/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
713static char internal_label_prefix[16];
714static int internal_label_prefix_len;
715
716static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
717static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
718static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
719static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
720				       int, int, FILE *));
721static const char *get_some_local_dynamic_name PARAMS ((void));
722static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
723static rtx maybe_get_pool_constant PARAMS ((rtx));
724static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
725static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
726							   rtx *, rtx *));
727static rtx get_thread_pointer PARAMS ((void));
728static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
729static rtx gen_push PARAMS ((rtx));
730static int memory_address_length PARAMS ((rtx addr));
731static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
732static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
733static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
734static void ix86_dump_ppro_packet PARAMS ((FILE *));
735static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
736static struct machine_function * ix86_init_machine_status PARAMS ((void));
737static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
738static int ix86_nsaved_regs PARAMS ((void));
739static void ix86_emit_save_regs PARAMS ((void));
740static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
741static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
742static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
743static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
744static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
745static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
746static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
747static rtx ix86_expand_aligntest PARAMS ((rtx, int));
748static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
749static int ix86_issue_rate PARAMS ((void));
750static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
751static void ix86_sched_init PARAMS ((FILE *, int, int));
752static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
753static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
754static int ia32_use_dfa_pipeline_interface PARAMS ((void));
755static int ia32_multipass_dfa_lookahead PARAMS ((void));
756static void ix86_init_mmx_sse_builtins PARAMS ((void));
757static rtx x86_this_parameter PARAMS ((tree));
758static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
759					 HOST_WIDE_INT, tree));
760static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
761					     HOST_WIDE_INT, tree));
762
763struct ix86_address
764{
765  rtx base, index, disp;
766  HOST_WIDE_INT scale;
767};
768
769static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
770static bool ix86_cannot_force_const_mem PARAMS ((rtx));
771
772static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
773static const char *ix86_strip_name_encoding PARAMS ((const char *))
774     ATTRIBUTE_UNUSED;
775
776struct builtin_description;
777static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
778					 tree, rtx));
779static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
780					    tree, rtx));
781static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
782static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
783static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
784static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
785static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
786static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
787static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
788					      enum rtx_code *,
789					      enum rtx_code *,
790					      enum rtx_code *));
791static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
792					  rtx *, rtx *));
793static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
794static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
795static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
796static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
797static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
798static int ix86_save_reg PARAMS ((unsigned int, int));
799static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
800static int ix86_comp_type_attributes PARAMS ((tree, tree));
801static int ix86_fntype_regparm PARAMS ((tree));
802const struct attribute_spec ix86_attribute_table[];
803static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
804static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
805static int ix86_value_regno PARAMS ((enum machine_mode));
806static bool contains_128bit_aligned_vector_p PARAMS ((tree));
807
808#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
809static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
810#endif
811
812/* Register class used for passing given 64bit part of the argument.
813   These represent classes as documented by the PS ABI, with the exception
814   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
815   use SF or DFmode move instead of DImode to avoid reformating penalties.
816
817   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
818   whenever possible (upper half does contain padding).
819 */
820enum x86_64_reg_class
821  {
822    X86_64_NO_CLASS,
823    X86_64_INTEGER_CLASS,
824    X86_64_INTEGERSI_CLASS,
825    X86_64_SSE_CLASS,
826    X86_64_SSESF_CLASS,
827    X86_64_SSEDF_CLASS,
828    X86_64_SSEUP_CLASS,
829    X86_64_X87_CLASS,
830    X86_64_X87UP_CLASS,
831    X86_64_MEMORY_CLASS
832  };
833static const char * const x86_64_reg_class_name[] =
834   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
835
836#define MAX_CLASSES 4
837static int classify_argument PARAMS ((enum machine_mode, tree,
838				      enum x86_64_reg_class [MAX_CLASSES],
839				      int));
840static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
841				     int *));
842static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
843					const int *, int));
844static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
845						    enum x86_64_reg_class));
846
847/* Initialize the GCC target structure.  */
848#undef TARGET_ATTRIBUTE_TABLE
849#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
850#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
851#  undef TARGET_MERGE_DECL_ATTRIBUTES
852#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
853#endif
854
855#undef TARGET_COMP_TYPE_ATTRIBUTES
856#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
857
858#undef TARGET_INIT_BUILTINS
859#define TARGET_INIT_BUILTINS ix86_init_builtins
860
861#undef TARGET_EXPAND_BUILTIN
862#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
863
864#undef TARGET_ASM_FUNCTION_EPILOGUE
865#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
866
867#undef TARGET_ASM_OPEN_PAREN
868#define TARGET_ASM_OPEN_PAREN ""
869#undef TARGET_ASM_CLOSE_PAREN
870#define TARGET_ASM_CLOSE_PAREN ""
871
872#undef TARGET_ASM_ALIGNED_HI_OP
873#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
874#undef TARGET_ASM_ALIGNED_SI_OP
875#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
876#ifdef ASM_QUAD
877#undef TARGET_ASM_ALIGNED_DI_OP
878#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
879#endif
880
881#undef TARGET_ASM_UNALIGNED_HI_OP
882#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
883#undef TARGET_ASM_UNALIGNED_SI_OP
884#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
885#undef TARGET_ASM_UNALIGNED_DI_OP
886#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
887
888#undef TARGET_SCHED_ADJUST_COST
889#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
890#undef TARGET_SCHED_ISSUE_RATE
891#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
892#undef TARGET_SCHED_VARIABLE_ISSUE
893#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
894#undef TARGET_SCHED_INIT
895#define TARGET_SCHED_INIT ix86_sched_init
896#undef TARGET_SCHED_REORDER
897#define TARGET_SCHED_REORDER ix86_sched_reorder
898#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
899#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
900  ia32_use_dfa_pipeline_interface
901#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
902#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
903  ia32_multipass_dfa_lookahead
904
905#ifdef HAVE_AS_TLS
906#undef TARGET_HAVE_TLS
907#define TARGET_HAVE_TLS true
908#endif
909#undef TARGET_CANNOT_FORCE_CONST_MEM
910#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
911
912#undef TARGET_ASM_OUTPUT_MI_THUNK
913#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
914#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
915#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
916
917struct gcc_target targetm = TARGET_INITIALIZER;
918
919/* The svr4 ABI for the i386 says that records and unions are returned
920   in memory.  */
921#ifndef DEFAULT_PCC_STRUCT_RETURN
922#define DEFAULT_PCC_STRUCT_RETURN 1
923#endif
924
925/* Sometimes certain combinations of command options do not make
926   sense on a particular target machine.  You can define a macro
927   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
928   defined, is executed once just after all the command options have
929   been parsed.
930
931   Don't use this macro to turn on various extra optimizations for
932   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
933
934void
935override_options ()
936{
937  int i;
938  /* Comes from final.c -- no real reason to change it.  */
939#define MAX_CODE_ALIGN 16
940
941  static struct ptt
942    {
943      const struct processor_costs *cost;	/* Processor costs */
944      const int target_enable;			/* Target flags to enable.  */
945      const int target_disable;			/* Target flags to disable.  */
946      const int align_loop;			/* Default alignments.  */
947      const int align_loop_max_skip;
948      const int align_jump;
949      const int align_jump_max_skip;
950      const int align_func;
951      const int branch_cost;
952    }
953  const processor_target_table[PROCESSOR_max] =
954    {
955      {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
956      {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
957      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
958      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
959      {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
960      {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
961      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
962    };
963
964  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
965  static struct pta
966    {
967      const char *const name;		/* processor name or nickname.  */
968      const enum processor_type processor;
969      const enum pta_flags
970	{
971	  PTA_SSE = 1,
972	  PTA_SSE2 = 2,
973	  PTA_MMX = 4,
974	  PTA_PREFETCH_SSE = 8,
975	  PTA_3DNOW = 16,
976	  PTA_3DNOW_A = 64
977	} flags;
978    }
979  const processor_alias_table[] =
980    {
981      {"i386", PROCESSOR_I386, 0},
982      {"i486", PROCESSOR_I486, 0},
983      {"i586", PROCESSOR_PENTIUM, 0},
984      {"pentium", PROCESSOR_PENTIUM, 0},
985      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
986      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
987      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
988      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
989      {"i686", PROCESSOR_PENTIUMPRO, 0},
990      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
991      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
992      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
993      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
994				       PTA_MMX | PTA_PREFETCH_SSE},
995      {"k6", PROCESSOR_K6, PTA_MMX},
996      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
997      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
998      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
999				   | PTA_3DNOW_A},
1000      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1001					 | PTA_3DNOW | PTA_3DNOW_A},
1002      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1003				    | PTA_3DNOW_A | PTA_SSE},
1004      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1005				      | PTA_3DNOW_A | PTA_SSE},
1006      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1007				      | PTA_3DNOW_A | PTA_SSE},
1008    };
1009
1010  int const pta_size = ARRAY_SIZE (processor_alias_table);
1011
1012  /* By default our XFmode is the 80-bit extended format.  If we have
1013     use TFmode instead, it's also the 80-bit format, but with padding.  */
1014  real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1015  real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1016
1017  /* Set the default values for switches whose default depends on TARGET_64BIT
1018     in case they weren't overwriten by command line options.  */
1019  if (TARGET_64BIT)
1020    {
1021      if (flag_omit_frame_pointer == 2)
1022	flag_omit_frame_pointer = 1;
1023      if (flag_asynchronous_unwind_tables == 2)
1024	flag_asynchronous_unwind_tables = 1;
1025      if (flag_pcc_struct_return == 2)
1026	flag_pcc_struct_return = 0;
1027    }
1028  else
1029    {
1030      if (flag_omit_frame_pointer == 2)
1031	flag_omit_frame_pointer = 0;
1032      if (flag_asynchronous_unwind_tables == 2)
1033	flag_asynchronous_unwind_tables = 0;
1034      if (flag_pcc_struct_return == 2)
1035	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1036    }
1037
1038#ifdef SUBTARGET_OVERRIDE_OPTIONS
1039  SUBTARGET_OVERRIDE_OPTIONS;
1040#endif
1041
1042  if (!ix86_cpu_string && ix86_arch_string)
1043    ix86_cpu_string = ix86_arch_string;
1044  if (!ix86_cpu_string)
1045    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1046  if (!ix86_arch_string)
1047    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1048
1049  if (ix86_cmodel_string != 0)
1050    {
1051      if (!strcmp (ix86_cmodel_string, "small"))
1052	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1053      else if (flag_pic)
1054	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1055      else if (!strcmp (ix86_cmodel_string, "32"))
1056	ix86_cmodel = CM_32;
1057      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1058	ix86_cmodel = CM_KERNEL;
1059      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1060	ix86_cmodel = CM_MEDIUM;
1061      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1062	ix86_cmodel = CM_LARGE;
1063      else
1064	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1065    }
1066  else
1067    {
1068      ix86_cmodel = CM_32;
1069      if (TARGET_64BIT)
1070	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1071    }
1072  if (ix86_asm_string != 0)
1073    {
1074      if (!strcmp (ix86_asm_string, "intel"))
1075	ix86_asm_dialect = ASM_INTEL;
1076      else if (!strcmp (ix86_asm_string, "att"))
1077	ix86_asm_dialect = ASM_ATT;
1078      else
1079	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1080    }
1081  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1082    error ("code model `%s' not supported in the %s bit mode",
1083	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1084  if (ix86_cmodel == CM_LARGE)
1085    sorry ("code model `large' not supported yet");
1086  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1087    sorry ("%i-bit mode not compiled in",
1088	   (target_flags & MASK_64BIT) ? 64 : 32);
1089
1090  for (i = 0; i < pta_size; i++)
1091    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1092      {
1093	ix86_arch = processor_alias_table[i].processor;
1094	/* Default cpu tuning to the architecture.  */
1095	ix86_cpu = ix86_arch;
1096	if (processor_alias_table[i].flags & PTA_MMX
1097	    && !(target_flags_explicit & MASK_MMX))
1098	  target_flags |= MASK_MMX;
1099	if (processor_alias_table[i].flags & PTA_3DNOW
1100	    && !(target_flags_explicit & MASK_3DNOW))
1101	  target_flags |= MASK_3DNOW;
1102	if (processor_alias_table[i].flags & PTA_3DNOW_A
1103	    && !(target_flags_explicit & MASK_3DNOW_A))
1104	  target_flags |= MASK_3DNOW_A;
1105	if (processor_alias_table[i].flags & PTA_SSE
1106	    && !(target_flags_explicit & MASK_SSE))
1107	  target_flags |= MASK_SSE;
1108	if (processor_alias_table[i].flags & PTA_SSE2
1109	    && !(target_flags_explicit & MASK_SSE2))
1110	  target_flags |= MASK_SSE2;
1111	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112	  x86_prefetch_sse = true;
1113	break;
1114      }
1115
1116  if (i == pta_size)
1117    error ("bad value (%s) for -march= switch", ix86_arch_string);
1118
1119  for (i = 0; i < pta_size; i++)
1120    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1121      {
1122	ix86_cpu = processor_alias_table[i].processor;
1123	break;
1124      }
1125  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1126    x86_prefetch_sse = true;
1127  if (i == pta_size)
1128    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1129
1130  if (optimize_size)
1131    ix86_cost = &size_cost;
1132  else
1133    ix86_cost = processor_target_table[ix86_cpu].cost;
1134  target_flags |= processor_target_table[ix86_cpu].target_enable;
1135  target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1136
1137  /* Arrange to set up i386_stack_locals for all functions.  */
1138  init_machine_status = ix86_init_machine_status;
1139
1140  /* Validate -mregparm= value.  */
1141  if (ix86_regparm_string)
1142    {
1143      i = atoi (ix86_regparm_string);
1144      if (i < 0 || i > REGPARM_MAX)
1145	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1146      else
1147	ix86_regparm = i;
1148    }
1149  else
1150   if (TARGET_64BIT)
1151     ix86_regparm = REGPARM_MAX;
1152
1153  /* If the user has provided any of the -malign-* options,
1154     warn and use that value only if -falign-* is not set.
1155     Remove this code in GCC 3.2 or later.  */
1156  if (ix86_align_loops_string)
1157    {
1158      warning ("-malign-loops is obsolete, use -falign-loops");
1159      if (align_loops == 0)
1160	{
1161	  i = atoi (ix86_align_loops_string);
1162	  if (i < 0 || i > MAX_CODE_ALIGN)
1163	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1164	  else
1165	    align_loops = 1 << i;
1166	}
1167    }
1168
1169  if (ix86_align_jumps_string)
1170    {
1171      warning ("-malign-jumps is obsolete, use -falign-jumps");
1172      if (align_jumps == 0)
1173	{
1174	  i = atoi (ix86_align_jumps_string);
1175	  if (i < 0 || i > MAX_CODE_ALIGN)
1176	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1177	  else
1178	    align_jumps = 1 << i;
1179	}
1180    }
1181
1182  if (ix86_align_funcs_string)
1183    {
1184      warning ("-malign-functions is obsolete, use -falign-functions");
1185      if (align_functions == 0)
1186	{
1187	  i = atoi (ix86_align_funcs_string);
1188	  if (i < 0 || i > MAX_CODE_ALIGN)
1189	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1190	  else
1191	    align_functions = 1 << i;
1192	}
1193    }
1194
1195  /* Default align_* from the processor table.  */
1196  if (align_loops == 0)
1197    {
1198      align_loops = processor_target_table[ix86_cpu].align_loop;
1199      align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1200    }
1201  if (align_jumps == 0)
1202    {
1203      align_jumps = processor_target_table[ix86_cpu].align_jump;
1204      align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1205    }
1206  if (align_functions == 0)
1207    {
1208      align_functions = processor_target_table[ix86_cpu].align_func;
1209    }
1210
1211  /* Validate -mpreferred-stack-boundary= value, or provide default.
1212     The default of 128 bits is for Pentium III's SSE __m128, but we
1213     don't want additional code to keep the stack aligned when
1214     optimizing for code size.  */
1215  ix86_preferred_stack_boundary = (optimize_size
1216				   ? TARGET_64BIT ? 128 : 32
1217				   : 128);
1218  if (ix86_preferred_stack_boundary_string)
1219    {
1220      i = atoi (ix86_preferred_stack_boundary_string);
1221      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1222	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1223	       TARGET_64BIT ? 4 : 2);
1224      else
1225	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1226    }
1227
1228  /* Validate -mbranch-cost= value, or provide default.  */
1229  ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1230  if (ix86_branch_cost_string)
1231    {
1232      i = atoi (ix86_branch_cost_string);
1233      if (i < 0 || i > 5)
1234	error ("-mbranch-cost=%d is not between 0 and 5", i);
1235      else
1236	ix86_branch_cost = i;
1237    }
1238
1239  if (ix86_tls_dialect_string)
1240    {
1241      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1242	ix86_tls_dialect = TLS_DIALECT_GNU;
1243      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1244	ix86_tls_dialect = TLS_DIALECT_SUN;
1245      else
1246	error ("bad value (%s) for -mtls-dialect= switch",
1247	       ix86_tls_dialect_string);
1248    }
1249
1250  /* Keep nonleaf frame pointers.  */
1251  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1252    flag_omit_frame_pointer = 1;
1253
1254  /* If we're doing fast math, we don't care about comparison order
1255     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1256  if (flag_unsafe_math_optimizations)
1257    target_flags &= ~MASK_IEEE_FP;
1258
1259  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1260     since the insns won't need emulation.  */
1261  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1262    target_flags &= ~MASK_NO_FANCY_MATH_387;
1263
1264  /* Turn on SSE2 builtins for -mpni.  */
1265  if (TARGET_PNI)
1266    target_flags |= MASK_SSE2;
1267
1268  /* Turn on SSE builtins for -msse2.  */
1269  if (TARGET_SSE2)
1270    target_flags |= MASK_SSE;
1271
1272  if (TARGET_64BIT)
1273    {
1274      if (TARGET_ALIGN_DOUBLE)
1275	error ("-malign-double makes no sense in the 64bit mode");
1276      if (TARGET_RTD)
1277	error ("-mrtd calling convention not supported in the 64bit mode");
1278      /* Enable by default the SSE and MMX builtins.  */
1279      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1280      ix86_fpmath = FPMATH_SSE;
1281     }
1282  else
1283    ix86_fpmath = FPMATH_387;
1284
1285  if (ix86_fpmath_string != 0)
1286    {
1287      if (! strcmp (ix86_fpmath_string, "387"))
1288	ix86_fpmath = FPMATH_387;
1289      else if (! strcmp (ix86_fpmath_string, "sse"))
1290	{
1291	  if (!TARGET_SSE)
1292	    {
1293	      warning ("SSE instruction set disabled, using 387 arithmetics");
1294	      ix86_fpmath = FPMATH_387;
1295	    }
1296	  else
1297	    ix86_fpmath = FPMATH_SSE;
1298	}
1299      else if (! strcmp (ix86_fpmath_string, "387,sse")
1300	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1301	{
1302	  if (!TARGET_SSE)
1303	    {
1304	      warning ("SSE instruction set disabled, using 387 arithmetics");
1305	      ix86_fpmath = FPMATH_387;
1306	    }
1307	  else if (!TARGET_80387)
1308	    {
1309	      warning ("387 instruction set disabled, using SSE arithmetics");
1310	      ix86_fpmath = FPMATH_SSE;
1311	    }
1312	  else
1313	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1314	}
1315      else
1316	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1317    }
1318
1319  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1320     on by -msse.  */
1321  if (TARGET_SSE)
1322    {
1323      target_flags |= MASK_MMX;
1324      x86_prefetch_sse = true;
1325    }
1326
1327  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1328  if (TARGET_3DNOW)
1329    {
1330      target_flags |= MASK_MMX;
1331      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1332	 extensions it adds.  */
1333      if (x86_3dnow_a & (1 << ix86_arch))
1334	target_flags |= MASK_3DNOW_A;
1335    }
1336  if ((x86_accumulate_outgoing_args & CPUMASK)
1337      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1338      && !optimize_size)
1339    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1340
1341  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1342  {
1343    char *p;
1344    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1345    p = strchr (internal_label_prefix, 'X');
1346    internal_label_prefix_len = p - internal_label_prefix;
1347    *p = '\0';
1348  }
1349}
1350
1351void
1352optimization_options (level, size)
1353     int level;
1354     int size ATTRIBUTE_UNUSED;
1355{
1356  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1357     make the problem with not enough registers even worse.  */
1358#ifdef INSN_SCHEDULING
1359  if (level > 1)
1360    flag_schedule_insns = 0;
1361#endif
1362
1363  /* The default values of these switches depend on the TARGET_64BIT
1364     that is not known at this moment.  Mark these values with 2 and
1365     let user the to override these.  In case there is no command line option
1366     specifying them, we will set the defaults in override_options.  */
1367  if (optimize >= 1)
1368    flag_omit_frame_pointer = 2;
1369  flag_pcc_struct_return = 2;
1370  flag_asynchronous_unwind_tables = 2;
1371}
1372
1373/* Table of valid machine attributes.  */
1374const struct attribute_spec ix86_attribute_table[] =
1375{
1376  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1377  /* Stdcall attribute says callee is responsible for popping arguments
1378     if they are not variable.  */
1379  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1380  /* Cdecl attribute says the callee is a normal C declaration */
1381  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1382  /* Regparm attribute specifies how many integer arguments are to be
1383     passed in registers.  */
1384  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1385#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1386  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1387  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1388  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1389#endif
1390  { NULL,        0, 0, false, false, false, NULL }
1391};
1392
1393/* Handle a "cdecl" or "stdcall" attribute;
1394   arguments as in struct attribute_spec.handler.  */
1395static tree
1396ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1397     tree *node;
1398     tree name;
1399     tree args ATTRIBUTE_UNUSED;
1400     int flags ATTRIBUTE_UNUSED;
1401     bool *no_add_attrs;
1402{
1403  if (TREE_CODE (*node) != FUNCTION_TYPE
1404      && TREE_CODE (*node) != METHOD_TYPE
1405      && TREE_CODE (*node) != FIELD_DECL
1406      && TREE_CODE (*node) != TYPE_DECL)
1407    {
1408      warning ("`%s' attribute only applies to functions",
1409	       IDENTIFIER_POINTER (name));
1410      *no_add_attrs = true;
1411    }
1412
1413  if (TARGET_64BIT)
1414    {
1415      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1416      *no_add_attrs = true;
1417    }
1418
1419  return NULL_TREE;
1420}
1421
1422/* Handle a "regparm" attribute;
1423   arguments as in struct attribute_spec.handler.  */
1424static tree
1425ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1426     tree *node;
1427     tree name;
1428     tree args;
1429     int flags ATTRIBUTE_UNUSED;
1430     bool *no_add_attrs;
1431{
1432  if (TREE_CODE (*node) != FUNCTION_TYPE
1433      && TREE_CODE (*node) != METHOD_TYPE
1434      && TREE_CODE (*node) != FIELD_DECL
1435      && TREE_CODE (*node) != TYPE_DECL)
1436    {
1437      warning ("`%s' attribute only applies to functions",
1438	       IDENTIFIER_POINTER (name));
1439      *no_add_attrs = true;
1440    }
1441  else
1442    {
1443      tree cst;
1444
1445      cst = TREE_VALUE (args);
1446      if (TREE_CODE (cst) != INTEGER_CST)
1447	{
1448	  warning ("`%s' attribute requires an integer constant argument",
1449		   IDENTIFIER_POINTER (name));
1450	  *no_add_attrs = true;
1451	}
1452      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1453	{
1454	  warning ("argument to `%s' attribute larger than %d",
1455		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1456	  *no_add_attrs = true;
1457	}
1458    }
1459
1460  return NULL_TREE;
1461}
1462
1463/* Return 0 if the attributes for two types are incompatible, 1 if they
1464   are compatible, and 2 if they are nearly compatible (which causes a
1465   warning to be generated).  */
1466
1467static int
1468ix86_comp_type_attributes (type1, type2)
1469     tree type1;
1470     tree type2;
1471{
1472  /* Check for mismatch of non-default calling convention.  */
1473  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1474
1475  if (TREE_CODE (type1) != FUNCTION_TYPE)
1476    return 1;
1477
1478  /* Check for mismatched return types (cdecl vs stdcall).  */
1479  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1480      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1481    return 0;
1482  return 1;
1483}
1484
1485/* Return the regparm value for a fuctio with the indicated TYPE.  */
1486
1487static int
1488ix86_fntype_regparm (type)
1489     tree type;
1490{
1491  tree attr;
1492
1493  attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1494  if (attr)
1495    return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1496  else
1497    return ix86_regparm;
1498}
1499
1500/* Value is the number of bytes of arguments automatically
1501   popped when returning from a subroutine call.
1502   FUNDECL is the declaration node of the function (as a tree),
1503   FUNTYPE is the data type of the function (as a tree),
1504   or for a library call it is an identifier node for the subroutine name.
1505   SIZE is the number of bytes of arguments passed on the stack.
1506
1507   On the 80386, the RTD insn may be used to pop them if the number
1508     of args is fixed, but if the number is variable then the caller
1509     must pop them all.  RTD can't be used for library calls now
1510     because the library is compiled with the Unix compiler.
1511   Use of RTD is a selectable option, since it is incompatible with
1512   standard Unix calling sequences.  If the option is not selected,
1513   the caller must always pop the args.
1514
1515   The attribute stdcall is equivalent to RTD on a per module basis.  */
1516
1517int
1518ix86_return_pops_args (fundecl, funtype, size)
1519     tree fundecl;
1520     tree funtype;
1521     int size;
1522{
1523  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1524
1525    /* Cdecl functions override -mrtd, and never pop the stack.  */
1526  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1527
1528    /* Stdcall functions will pop the stack if not variable args.  */
1529    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1530      rtd = 1;
1531
1532    if (rtd
1533        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1534	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1535		== void_type_node)))
1536      return size;
1537  }
1538
1539  /* Lose any fake structure return argument if it is passed on the stack.  */
1540  if (aggregate_value_p (TREE_TYPE (funtype))
1541      && !TARGET_64BIT)
1542    {
1543      int nregs = ix86_fntype_regparm (funtype);
1544
1545      if (!nregs)
1546	return GET_MODE_SIZE (Pmode);
1547    }
1548
1549  return 0;
1550}
1551
1552/* Argument support functions.  */
1553
1554/* Return true when register may be used to pass function parameters.  */
1555bool
1556ix86_function_arg_regno_p (regno)
1557     int regno;
1558{
1559  int i;
1560  if (!TARGET_64BIT)
1561    return (regno < REGPARM_MAX
1562	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1563  if (SSE_REGNO_P (regno) && TARGET_SSE)
1564    return true;
1565  /* RAX is used as hidden argument to va_arg functions.  */
1566  if (!regno)
1567    return true;
1568  for (i = 0; i < REGPARM_MAX; i++)
1569    if (regno == x86_64_int_parameter_registers[i])
1570      return true;
1571  return false;
1572}
1573
1574/* Initialize a variable CUM of type CUMULATIVE_ARGS
1575   for a call to a function whose data type is FNTYPE.
1576   For a library call, FNTYPE is 0.  */
1577
1578void
1579init_cumulative_args (cum, fntype, libname)
1580     CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1581     tree fntype;		/* tree ptr for function decl */
1582     rtx libname;		/* SYMBOL_REF of library name or 0 */
1583{
1584  static CUMULATIVE_ARGS zero_cum;
1585  tree param, next_param;
1586
1587  if (TARGET_DEBUG_ARG)
1588    {
1589      fprintf (stderr, "\ninit_cumulative_args (");
1590      if (fntype)
1591	fprintf (stderr, "fntype code = %s, ret code = %s",
1592		 tree_code_name[(int) TREE_CODE (fntype)],
1593		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1594      else
1595	fprintf (stderr, "no fntype");
1596
1597      if (libname)
1598	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1599    }
1600
1601  *cum = zero_cum;
1602
1603  /* Set up the number of registers to use for passing arguments.  */
1604  cum->nregs = ix86_regparm;
1605  cum->sse_nregs = SSE_REGPARM_MAX;
1606  if (fntype && !TARGET_64BIT)
1607    {
1608      tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1609
1610      if (attr)
1611	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1612    }
1613  cum->maybe_vaarg = false;
1614
1615  /* Determine if this function has variable arguments.  This is
1616     indicated by the last argument being 'void_type_mode' if there
1617     are no variable arguments.  If there are variable arguments, then
1618     we won't pass anything in registers */
1619
1620  if (cum->nregs)
1621    {
1622      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1623	   param != 0; param = next_param)
1624	{
1625	  next_param = TREE_CHAIN (param);
1626	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1627	    {
1628	      if (!TARGET_64BIT)
1629		cum->nregs = 0;
1630	      cum->maybe_vaarg = true;
1631	    }
1632	}
1633    }
1634  if ((!fntype && !libname)
1635      || (fntype && !TYPE_ARG_TYPES (fntype)))
1636    cum->maybe_vaarg = 1;
1637
1638  if (TARGET_DEBUG_ARG)
1639    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1640
1641  return;
1642}
1643
1644/* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1645   of this code is to classify each 8bytes of incoming argument by the register
1646   class and assign registers accordingly.  */
1647
1648/* Return the union class of CLASS1 and CLASS2.
1649   See the x86-64 PS ABI for details.  */
1650
1651static enum x86_64_reg_class
1652merge_classes (class1, class2)
1653     enum x86_64_reg_class class1, class2;
1654{
1655  /* Rule #1: If both classes are equal, this is the resulting class.  */
1656  if (class1 == class2)
1657    return class1;
1658
1659  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1660     the other class.  */
1661  if (class1 == X86_64_NO_CLASS)
1662    return class2;
1663  if (class2 == X86_64_NO_CLASS)
1664    return class1;
1665
1666  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1667  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1668    return X86_64_MEMORY_CLASS;
1669
1670  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1671  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1672      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1673    return X86_64_INTEGERSI_CLASS;
1674  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1675      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1676    return X86_64_INTEGER_CLASS;
1677
1678  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1679  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1680      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1681    return X86_64_MEMORY_CLASS;
1682
1683  /* Rule #6: Otherwise class SSE is used.  */
1684  return X86_64_SSE_CLASS;
1685}
1686
1687/* Classify the argument of type TYPE and mode MODE.
1688   CLASSES will be filled by the register class used to pass each word
1689   of the operand.  The number of words is returned.  In case the parameter
1690   should be passed in memory, 0 is returned. As a special case for zero
1691   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1692
1693   BIT_OFFSET is used internally for handling records and specifies offset
1694   of the offset in bits modulo 256 to avoid overflow cases.
1695
1696   See the x86-64 PS ABI for details.
1697*/
1698
1699static int
1700classify_argument (mode, type, classes, bit_offset)
1701     enum machine_mode mode;
1702     tree type;
1703     enum x86_64_reg_class classes[MAX_CLASSES];
1704     int bit_offset;
1705{
1706  int bytes =
1707    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1708  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1709
1710  /* Variable sized entities are always passed/returned in memory.  */
1711  if (bytes < 0)
1712    return 0;
1713
1714  if (type && AGGREGATE_TYPE_P (type))
1715    {
1716      int i;
1717      tree field;
1718      enum x86_64_reg_class subclasses[MAX_CLASSES];
1719
1720      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1721      if (bytes > 16)
1722	return 0;
1723
1724      for (i = 0; i < words; i++)
1725	classes[i] = X86_64_NO_CLASS;
1726
1727      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1728	 signalize memory class, so handle it as special case.  */
1729      if (!words)
1730	{
1731	  classes[0] = X86_64_NO_CLASS;
1732	  return 1;
1733	}
1734
1735      /* Classify each field of record and merge classes.  */
1736      if (TREE_CODE (type) == RECORD_TYPE)
1737	{
1738	  /* For classes first merge in the field of the subclasses.  */
1739	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1740	    {
1741	      tree bases = TYPE_BINFO_BASETYPES (type);
1742	      int n_bases = TREE_VEC_LENGTH (bases);
1743	      int i;
1744
1745	      for (i = 0; i < n_bases; ++i)
1746		{
1747		   tree binfo = TREE_VEC_ELT (bases, i);
1748		   int num;
1749		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1750		   tree type = BINFO_TYPE (binfo);
1751
1752		   num = classify_argument (TYPE_MODE (type),
1753					    type, subclasses,
1754					    (offset + bit_offset) % 256);
1755		   if (!num)
1756		     return 0;
1757		   for (i = 0; i < num; i++)
1758		     {
1759		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1760		       classes[i + pos] =
1761			 merge_classes (subclasses[i], classes[i + pos]);
1762		     }
1763		}
1764	    }
1765	  /* And now merge the fields of structure.   */
1766	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1767	    {
1768	      if (TREE_CODE (field) == FIELD_DECL)
1769		{
1770		  int num;
1771
1772		  /* Bitfields are always classified as integer.  Handle them
1773		     early, since later code would consider them to be
1774		     misaligned integers.  */
1775		  if (DECL_BIT_FIELD (field))
1776		    {
1777		      for (i = int_bit_position (field) / 8 / 8;
1778			   i < (int_bit_position (field)
1779			        + tree_low_cst (DECL_SIZE (field), 0)
1780			       	+ 63) / 8 / 8; i++)
1781			classes[i] =
1782			  merge_classes (X86_64_INTEGER_CLASS,
1783					 classes[i]);
1784		    }
1785		  else
1786		    {
1787		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1788					       TREE_TYPE (field), subclasses,
1789					       (int_bit_position (field)
1790						+ bit_offset) % 256);
1791		      if (!num)
1792			return 0;
1793		      for (i = 0; i < num; i++)
1794			{
1795			  int pos =
1796			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1797			  classes[i + pos] =
1798			    merge_classes (subclasses[i], classes[i + pos]);
1799			}
1800		    }
1801		}
1802	    }
1803	}
1804      /* Arrays are handled as small records.  */
1805      else if (TREE_CODE (type) == ARRAY_TYPE)
1806	{
1807	  int num;
1808	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1809				   TREE_TYPE (type), subclasses, bit_offset);
1810	  if (!num)
1811	    return 0;
1812
1813	  /* The partial classes are now full classes.  */
1814	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1815	    subclasses[0] = X86_64_SSE_CLASS;
1816	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1817	    subclasses[0] = X86_64_INTEGER_CLASS;
1818
1819	  for (i = 0; i < words; i++)
1820	    classes[i] = subclasses[i % num];
1821	}
1822      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1823      else if (TREE_CODE (type) == UNION_TYPE
1824	       || TREE_CODE (type) == QUAL_UNION_TYPE)
1825	{
1826	  /* For classes first merge in the field of the subclasses.  */
1827	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1828	    {
1829	      tree bases = TYPE_BINFO_BASETYPES (type);
1830	      int n_bases = TREE_VEC_LENGTH (bases);
1831	      int i;
1832
1833	      for (i = 0; i < n_bases; ++i)
1834		{
1835		   tree binfo = TREE_VEC_ELT (bases, i);
1836		   int num;
1837		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1838		   tree type = BINFO_TYPE (binfo);
1839
1840		   num = classify_argument (TYPE_MODE (type),
1841					    type, subclasses,
1842					    (offset + (bit_offset % 64)) % 256);
1843		   if (!num)
1844		     return 0;
1845		   for (i = 0; i < num; i++)
1846		     {
1847		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1848		       classes[i + pos] =
1849			 merge_classes (subclasses[i], classes[i + pos]);
1850		     }
1851		}
1852	    }
1853	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1854	    {
1855	      if (TREE_CODE (field) == FIELD_DECL)
1856		{
1857		  int num;
1858		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1859					   TREE_TYPE (field), subclasses,
1860					   bit_offset);
1861		  if (!num)
1862		    return 0;
1863		  for (i = 0; i < num; i++)
1864		    classes[i] = merge_classes (subclasses[i], classes[i]);
1865		}
1866	    }
1867	}
1868      else
1869	abort ();
1870
1871      /* Final merger cleanup.  */
1872      for (i = 0; i < words; i++)
1873	{
1874	  /* If one class is MEMORY, everything should be passed in
1875	     memory.  */
1876	  if (classes[i] == X86_64_MEMORY_CLASS)
1877	    return 0;
1878
1879	  /* The X86_64_SSEUP_CLASS should be always preceded by
1880	     X86_64_SSE_CLASS.  */
1881	  if (classes[i] == X86_64_SSEUP_CLASS
1882	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1883	    classes[i] = X86_64_SSE_CLASS;
1884
1885	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1886	  if (classes[i] == X86_64_X87UP_CLASS
1887	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1888	    classes[i] = X86_64_SSE_CLASS;
1889	}
1890      return words;
1891    }
1892
1893  /* Compute alignment needed.  We align all types to natural boundaries with
1894     exception of XFmode that is aligned to 64bits.  */
1895  if (mode != VOIDmode && mode != BLKmode)
1896    {
1897      int mode_alignment = GET_MODE_BITSIZE (mode);
1898
1899      if (mode == XFmode)
1900	mode_alignment = 128;
1901      else if (mode == XCmode)
1902	mode_alignment = 256;
1903      /* Misaligned fields are always returned in memory.  */
1904      if (bit_offset % mode_alignment)
1905	return 0;
1906    }
1907
1908  /* Classification of atomic types.  */
1909  switch (mode)
1910    {
1911    case DImode:
1912    case SImode:
1913    case HImode:
1914    case QImode:
1915    case CSImode:
1916    case CHImode:
1917    case CQImode:
1918      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1919	classes[0] = X86_64_INTEGERSI_CLASS;
1920      else
1921	classes[0] = X86_64_INTEGER_CLASS;
1922      return 1;
1923    case CDImode:
1924    case TImode:
1925      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1926      return 2;
1927    case CTImode:
1928      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1929      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1930      return 4;
1931    case SFmode:
1932      if (!(bit_offset % 64))
1933	classes[0] = X86_64_SSESF_CLASS;
1934      else
1935	classes[0] = X86_64_SSE_CLASS;
1936      return 1;
1937    case DFmode:
1938      classes[0] = X86_64_SSEDF_CLASS;
1939      return 1;
1940    case TFmode:
1941      classes[0] = X86_64_X87_CLASS;
1942      classes[1] = X86_64_X87UP_CLASS;
1943      return 2;
1944    case TCmode:
1945      classes[0] = X86_64_X87_CLASS;
1946      classes[1] = X86_64_X87UP_CLASS;
1947      classes[2] = X86_64_X87_CLASS;
1948      classes[3] = X86_64_X87UP_CLASS;
1949      return 4;
1950    case DCmode:
1951      classes[0] = X86_64_SSEDF_CLASS;
1952      classes[1] = X86_64_SSEDF_CLASS;
1953      return 2;
1954    case SCmode:
1955      classes[0] = X86_64_SSE_CLASS;
1956      return 1;
1957    case V4SFmode:
1958    case V4SImode:
1959    case V16QImode:
1960    case V8HImode:
1961    case V2DFmode:
1962    case V2DImode:
1963      classes[0] = X86_64_SSE_CLASS;
1964      classes[1] = X86_64_SSEUP_CLASS;
1965      return 2;
1966    case V2SFmode:
1967    case V2SImode:
1968    case V4HImode:
1969    case V8QImode:
1970      return 0;
1971    case BLKmode:
1972    case VOIDmode:
1973      return 0;
1974    default:
1975      abort ();
1976    }
1977}
1978
1979/* Examine the argument and return set number of register required in each
1980   class.  Return 0 iff parameter should be passed in memory.  */
1981static int
1982examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1983     enum machine_mode mode;
1984     tree type;
1985     int *int_nregs, *sse_nregs;
1986     int in_return;
1987{
1988  enum x86_64_reg_class class[MAX_CLASSES];
1989  int n = classify_argument (mode, type, class, 0);
1990
1991  *int_nregs = 0;
1992  *sse_nregs = 0;
1993  if (!n)
1994    return 0;
1995  for (n--; n >= 0; n--)
1996    switch (class[n])
1997      {
1998      case X86_64_INTEGER_CLASS:
1999      case X86_64_INTEGERSI_CLASS:
2000	(*int_nregs)++;
2001	break;
2002      case X86_64_SSE_CLASS:
2003      case X86_64_SSESF_CLASS:
2004      case X86_64_SSEDF_CLASS:
2005	(*sse_nregs)++;
2006	break;
2007      case X86_64_NO_CLASS:
2008      case X86_64_SSEUP_CLASS:
2009	break;
2010      case X86_64_X87_CLASS:
2011      case X86_64_X87UP_CLASS:
2012	if (!in_return)
2013	  return 0;
2014	break;
2015      case X86_64_MEMORY_CLASS:
2016	abort ();
2017      }
2018  return 1;
2019}
2020/* Construct container for the argument used by GCC interface.  See
2021   FUNCTION_ARG for the detailed description.  */
2022static rtx
2023construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2024     enum machine_mode mode;
2025     tree type;
2026     int in_return;
2027     int nintregs, nsseregs;
2028     const int * intreg;
2029     int sse_regno;
2030{
2031  enum machine_mode tmpmode;
2032  int bytes =
2033    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2034  enum x86_64_reg_class class[MAX_CLASSES];
2035  int n;
2036  int i;
2037  int nexps = 0;
2038  int needed_sseregs, needed_intregs;
2039  rtx exp[MAX_CLASSES];
2040  rtx ret;
2041
2042  n = classify_argument (mode, type, class, 0);
2043  if (TARGET_DEBUG_ARG)
2044    {
2045      if (!n)
2046	fprintf (stderr, "Memory class\n");
2047      else
2048	{
2049	  fprintf (stderr, "Classes:");
2050	  for (i = 0; i < n; i++)
2051	    {
2052	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2053	    }
2054	   fprintf (stderr, "\n");
2055	}
2056    }
2057  if (!n)
2058    return NULL;
2059  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2060    return NULL;
2061  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2062    return NULL;
2063
2064  /* First construct simple cases.  Avoid SCmode, since we want to use
2065     single register to pass this type.  */
2066  if (n == 1 && mode != SCmode)
2067    switch (class[0])
2068      {
2069      case X86_64_INTEGER_CLASS:
2070      case X86_64_INTEGERSI_CLASS:
2071	return gen_rtx_REG (mode, intreg[0]);
2072      case X86_64_SSE_CLASS:
2073      case X86_64_SSESF_CLASS:
2074      case X86_64_SSEDF_CLASS:
2075	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2076      case X86_64_X87_CLASS:
2077	return gen_rtx_REG (mode, FIRST_STACK_REG);
2078      case X86_64_NO_CLASS:
2079	/* Zero sized array, struct or class.  */
2080	return NULL;
2081      default:
2082	abort ();
2083      }
2084  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2085    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2086  if (n == 2
2087      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2088    return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2089  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2090      && class[1] == X86_64_INTEGER_CLASS
2091      && (mode == CDImode || mode == TImode)
2092      && intreg[0] + 1 == intreg[1])
2093    return gen_rtx_REG (mode, intreg[0]);
2094  if (n == 4
2095      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2096      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2097    return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2098
2099  /* Otherwise figure out the entries of the PARALLEL.  */
2100  for (i = 0; i < n; i++)
2101    {
2102      switch (class[i])
2103        {
2104	  case X86_64_NO_CLASS:
2105	    break;
2106	  case X86_64_INTEGER_CLASS:
2107	  case X86_64_INTEGERSI_CLASS:
2108	    /* Merge TImodes on aligned occassions here too.  */
2109	    if (i * 8 + 8 > bytes)
2110	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2111	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2112	      tmpmode = SImode;
2113	    else
2114	      tmpmode = DImode;
2115	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2116	    if (tmpmode == BLKmode)
2117	      tmpmode = DImode;
2118	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2119					       gen_rtx_REG (tmpmode, *intreg),
2120					       GEN_INT (i*8));
2121	    intreg++;
2122	    break;
2123	  case X86_64_SSESF_CLASS:
2124	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2125					       gen_rtx_REG (SFmode,
2126							    SSE_REGNO (sse_regno)),
2127					       GEN_INT (i*8));
2128	    sse_regno++;
2129	    break;
2130	  case X86_64_SSEDF_CLASS:
2131	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2132					       gen_rtx_REG (DFmode,
2133							    SSE_REGNO (sse_regno)),
2134					       GEN_INT (i*8));
2135	    sse_regno++;
2136	    break;
2137	  case X86_64_SSE_CLASS:
2138	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2139	      tmpmode = TImode;
2140	    else
2141	      tmpmode = DImode;
2142	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2143					       gen_rtx_REG (tmpmode,
2144							    SSE_REGNO (sse_regno)),
2145					       GEN_INT (i*8));
2146	    if (tmpmode == TImode)
2147	      i++;
2148	    sse_regno++;
2149	    break;
2150	  default:
2151	    abort ();
2152	}
2153    }
2154  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2155  for (i = 0; i < nexps; i++)
2156    XVECEXP (ret, 0, i) = exp [i];
2157  return ret;
2158}
2159
2160/* Update the data in CUM to advance over an argument
2161   of mode MODE and data type TYPE.
2162   (TYPE is null for libcalls where that information may not be available.)  */
2163
2164void
2165function_arg_advance (cum, mode, type, named)
2166     CUMULATIVE_ARGS *cum;	/* current arg information */
2167     enum machine_mode mode;	/* current arg mode */
2168     tree type;			/* type of the argument or 0 if lib support */
2169     int named;			/* whether or not the argument was named */
2170{
2171  int bytes =
2172    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2173  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2174
2175  if (TARGET_DEBUG_ARG)
2176    fprintf (stderr,
2177	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2178	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2179  if (TARGET_64BIT)
2180    {
2181      int int_nregs, sse_nregs;
2182      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2183	cum->words += words;
2184      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2185	{
2186	  cum->nregs -= int_nregs;
2187	  cum->sse_nregs -= sse_nregs;
2188	  cum->regno += int_nregs;
2189	  cum->sse_regno += sse_nregs;
2190	}
2191      else
2192	cum->words += words;
2193    }
2194  else
2195    {
2196      if (TARGET_SSE && mode == TImode)
2197	{
2198	  cum->sse_words += words;
2199	  cum->sse_nregs -= 1;
2200	  cum->sse_regno += 1;
2201	  if (cum->sse_nregs <= 0)
2202	    {
2203	      cum->sse_nregs = 0;
2204	      cum->sse_regno = 0;
2205	    }
2206	}
2207      else
2208	{
2209	  cum->words += words;
2210	  cum->nregs -= words;
2211	  cum->regno += words;
2212
2213	  if (cum->nregs <= 0)
2214	    {
2215	      cum->nregs = 0;
2216	      cum->regno = 0;
2217	    }
2218	}
2219    }
2220  return;
2221}
2222
2223/* Define where to put the arguments to a function.
2224   Value is zero to push the argument on the stack,
2225   or a hard register in which to store the argument.
2226
2227   MODE is the argument's machine mode.
2228   TYPE is the data type of the argument (as a tree).
2229    This is null for libcalls where that information may
2230    not be available.
2231   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2232    the preceding args and about the function being called.
2233   NAMED is nonzero if this argument is a named parameter
2234    (otherwise it is an extra parameter matching an ellipsis).  */
2235
2236rtx
2237function_arg (cum, mode, type, named)
2238     CUMULATIVE_ARGS *cum;	/* current arg information */
2239     enum machine_mode mode;	/* current arg mode */
2240     tree type;			/* type of the argument or 0 if lib support */
2241     int named;			/* != 0 for normal args, == 0 for ... args */
2242{
2243  rtx ret   = NULL_RTX;
2244  int bytes =
2245    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2246  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2247
2248  /* Handle an hidden AL argument containing number of registers for varargs
2249     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2250     any AL settings.  */
2251  if (mode == VOIDmode)
2252    {
2253      if (TARGET_64BIT)
2254	return GEN_INT (cum->maybe_vaarg
2255			? (cum->sse_nregs < 0
2256			   ? SSE_REGPARM_MAX
2257			   : cum->sse_regno)
2258			: -1);
2259      else
2260	return constm1_rtx;
2261    }
2262  if (TARGET_64BIT)
2263    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2264			       &x86_64_int_parameter_registers [cum->regno],
2265			       cum->sse_regno);
2266  else
2267    switch (mode)
2268      {
2269	/* For now, pass fp/complex values on the stack.  */
2270      default:
2271	break;
2272
2273      case BLKmode:
2274	if (bytes < 0)
2275	  break;
2276	/* FALLTHRU */
2277      case DImode:
2278      case SImode:
2279      case HImode:
2280      case QImode:
2281	if (words <= cum->nregs)
2282	  ret = gen_rtx_REG (mode, cum->regno);
2283	break;
2284      case TImode:
2285	if (cum->sse_nregs)
2286	  ret = gen_rtx_REG (mode, cum->sse_regno);
2287	break;
2288      }
2289
2290  if (TARGET_DEBUG_ARG)
2291    {
2292      fprintf (stderr,
2293	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2294	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2295
2296      if (ret)
2297	print_simple_rtl (stderr, ret);
2298      else
2299	fprintf (stderr, ", stack");
2300
2301      fprintf (stderr, " )\n");
2302    }
2303
2304  return ret;
2305}
2306
2307/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2308   ABI  */
2309static bool
2310contains_128bit_aligned_vector_p (type)
2311     tree type;
2312{
2313  enum machine_mode mode = TYPE_MODE (type);
2314  if (SSE_REG_MODE_P (mode)
2315      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2316    return true;
2317  if (TYPE_ALIGN (type) < 128)
2318    return false;
2319
2320  if (AGGREGATE_TYPE_P (type))
2321    {
2322      /* Walk the agregates recursivly.  */
2323      if (TREE_CODE (type) == RECORD_TYPE
2324	  || TREE_CODE (type) == UNION_TYPE
2325	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2326	{
2327	  tree field;
2328
2329	  if (TYPE_BINFO (type) != NULL
2330	      && TYPE_BINFO_BASETYPES (type) != NULL)
2331	    {
2332	      tree bases = TYPE_BINFO_BASETYPES (type);
2333	      int n_bases = TREE_VEC_LENGTH (bases);
2334	      int i;
2335
2336	      for (i = 0; i < n_bases; ++i)
2337		{
2338		  tree binfo = TREE_VEC_ELT (bases, i);
2339		  tree type = BINFO_TYPE (binfo);
2340
2341		  if (contains_128bit_aligned_vector_p (type))
2342		    return true;
2343		}
2344	    }
2345	  /* And now merge the fields of structure.   */
2346	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2347	    {
2348	      if (TREE_CODE (field) == FIELD_DECL
2349		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2350		return true;
2351	    }
2352	}
2353      /* Just for use if some languages passes arrays by value.  */
2354      else if (TREE_CODE (type) == ARRAY_TYPE)
2355	{
2356	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2357	    return true;
2358	}
2359      else
2360	abort ();
2361    }
2362  return false;
2363}
2364
2365/* A C expression that indicates when an argument must be passed by
2366   reference.  If nonzero for an argument, a copy of that argument is
2367   made in memory and a pointer to the argument is passed instead of
2368   the argument itself.  The pointer is passed in whatever way is
2369   appropriate for passing a pointer to that type.  */
2370
2371int
2372function_arg_pass_by_reference (cum, mode, type, named)
2373     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2374     enum machine_mode mode ATTRIBUTE_UNUSED;
2375     tree type;
2376     int named ATTRIBUTE_UNUSED;
2377{
2378  if (!TARGET_64BIT)
2379    return 0;
2380
2381  if (type && int_size_in_bytes (type) == -1)
2382    {
2383      if (TARGET_DEBUG_ARG)
2384	fprintf (stderr, "function_arg_pass_by_reference\n");
2385      return 1;
2386    }
2387
2388  return 0;
2389}
2390
2391/* Gives the alignment boundary, in bits, of an argument with the specified mode
2392   and type.   */
2393
2394int
2395ix86_function_arg_boundary (mode, type)
2396     enum machine_mode mode;
2397     tree type;
2398{
2399  int align;
2400  if (type)
2401    align = TYPE_ALIGN (type);
2402  else
2403    align = GET_MODE_ALIGNMENT (mode);
2404  if (align < PARM_BOUNDARY)
2405    align = PARM_BOUNDARY;
2406  if (!TARGET_64BIT)
2407    {
2408      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2409	 make an exception for SSE modes since these require 128bit
2410	 alignment.
2411
2412	 The handling here differs from field_alignment.  ICC aligns MMX
2413	 arguments to 4 byte boundaries, while structure fields are aligned
2414	 to 8 byte boundaries.  */
2415      if (!type)
2416	{
2417	  if (!SSE_REG_MODE_P (mode))
2418	    align = PARM_BOUNDARY;
2419	}
2420      else
2421	{
2422	  if (!contains_128bit_aligned_vector_p (type))
2423	    align = PARM_BOUNDARY;
2424	}
2425      if (align != PARM_BOUNDARY && !TARGET_SSE)
2426	abort();
2427    }
2428  if (align > 128)
2429    align = 128;
2430  return align;
2431}
2432
2433/* Return true if N is a possible register number of function value.  */
2434bool
2435ix86_function_value_regno_p (regno)
2436     int regno;
2437{
2438  if (!TARGET_64BIT)
2439    {
2440      return ((regno) == 0
2441	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2442	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2443    }
2444  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2445	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2446	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2447}
2448
2449/* Define how to find the value returned by a function.
2450   VALTYPE is the data type of the value (as a tree).
2451   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2452   otherwise, FUNC is 0.  */
2453rtx
2454ix86_function_value (valtype)
2455     tree valtype;
2456{
2457  if (TARGET_64BIT)
2458    {
2459      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2460				     REGPARM_MAX, SSE_REGPARM_MAX,
2461				     x86_64_int_return_registers, 0);
2462      /* For zero sized structures, construct_continer return NULL, but we need
2463         to keep rest of compiler happy by returning meaningfull value.  */
2464      if (!ret)
2465	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2466      return ret;
2467    }
2468  else
2469    return gen_rtx_REG (TYPE_MODE (valtype),
2470			ix86_value_regno (TYPE_MODE (valtype)));
2471}
2472
2473/* Return false iff type is returned in memory.  */
2474int
2475ix86_return_in_memory (type)
2476     tree type;
2477{
2478  int needed_intregs, needed_sseregs, size;
2479  enum machine_mode mode = TYPE_MODE (type);
2480
2481  if (TARGET_64BIT)
2482    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2483
2484  if (mode == BLKmode)
2485    return 1;
2486
2487  size = int_size_in_bytes (type);
2488
2489  if (VECTOR_MODE_P (mode) || mode == TImode)
2490    {
2491      /* User-created vectors small enough to fit in EAX.  */
2492      if (size < 8)
2493	return 0;
2494
2495      /* MMX/3dNow values are returned on the stack, since we've
2496	 got to EMMS/FEMMS before returning.  */
2497      if (size == 8)
2498	return 1;
2499
2500      /* SSE values are returned in XMM0.  */
2501      /* ??? Except when it doesn't exist?  We have a choice of
2502	 either (1) being abi incompatible with a -march switch,
2503	 or (2) generating an error here.  Given no good solution,
2504	 I think the safest thing is one warning.  The user won't
2505	 be able to use -Werror, but...  */
2506      if (size == 16)
2507	{
2508	  static bool warned;
2509
2510	  if (TARGET_SSE)
2511	    return 0;
2512
2513	  if (!warned)
2514	    {
2515	      warned = true;
2516	      warning ("SSE vector return without SSE enabled changes the ABI");
2517	    }
2518	  return 1;
2519	}
2520    }
2521
2522  if (mode == TFmode)
2523    return 0;
2524  if (size > 12)
2525    return 1;
2526  return 0;
2527}
2528
2529/* Define how to find the value returned by a library function
2530   assuming the value has mode MODE.  */
2531rtx
2532ix86_libcall_value (mode)
2533   enum machine_mode mode;
2534{
2535  if (TARGET_64BIT)
2536    {
2537      switch (mode)
2538	{
2539	  case SFmode:
2540	  case SCmode:
2541	  case DFmode:
2542	  case DCmode:
2543	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2544	  case TFmode:
2545	  case TCmode:
2546	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2547	  default:
2548	    return gen_rtx_REG (mode, 0);
2549	}
2550    }
2551  else
2552   return gen_rtx_REG (mode, ix86_value_regno (mode));
2553}
2554
2555/* Given a mode, return the register to use for a return value.  */
2556
2557static int
2558ix86_value_regno (mode)
2559     enum machine_mode mode;
2560{
2561  /* Floating point return values in %st(0).  */
2562  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2563    return FIRST_FLOAT_REG;
2564  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
2565     we prevent this case when sse is not available.  */
2566  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2567    return FIRST_SSE_REG;
2568  /* Everything else in %eax.  */
2569  return 0;
2570}
2571
2572/* Create the va_list data type.  */
2573
2574tree
2575ix86_build_va_list ()
2576{
2577  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2578
2579  /* For i386 we use plain pointer to argument area.  */
2580  if (!TARGET_64BIT)
2581    return build_pointer_type (char_type_node);
2582
2583  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2584  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2585
2586  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2587		      unsigned_type_node);
2588  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2589		      unsigned_type_node);
2590  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2591		      ptr_type_node);
2592  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2593		      ptr_type_node);
2594
2595  DECL_FIELD_CONTEXT (f_gpr) = record;
2596  DECL_FIELD_CONTEXT (f_fpr) = record;
2597  DECL_FIELD_CONTEXT (f_ovf) = record;
2598  DECL_FIELD_CONTEXT (f_sav) = record;
2599
2600  TREE_CHAIN (record) = type_decl;
2601  TYPE_NAME (record) = type_decl;
2602  TYPE_FIELDS (record) = f_gpr;
2603  TREE_CHAIN (f_gpr) = f_fpr;
2604  TREE_CHAIN (f_fpr) = f_ovf;
2605  TREE_CHAIN (f_ovf) = f_sav;
2606
2607  layout_type (record);
2608
2609  /* The correct type is an array type of one element.  */
2610  return build_array_type (record, build_index_type (size_zero_node));
2611}
2612
2613/* Perform any needed actions needed for a function that is receiving a
2614   variable number of arguments.
2615
2616   CUM is as above.
2617
2618   MODE and TYPE are the mode and type of the current parameter.
2619
2620   PRETEND_SIZE is a variable that should be set to the amount of stack
2621   that must be pushed by the prolog to pretend that our caller pushed
2622   it.
2623
2624   Normally, this macro will push all remaining incoming registers on the
2625   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2626
2627void
2628ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2629     CUMULATIVE_ARGS *cum;
2630     enum machine_mode mode;
2631     tree type;
2632     int *pretend_size ATTRIBUTE_UNUSED;
2633     int no_rtl;
2634
2635{
2636  CUMULATIVE_ARGS next_cum;
2637  rtx save_area = NULL_RTX, mem;
2638  rtx label;
2639  rtx label_ref;
2640  rtx tmp_reg;
2641  rtx nsse_reg;
2642  int set;
2643  tree fntype;
2644  int stdarg_p;
2645  int i;
2646
2647  if (!TARGET_64BIT)
2648    return;
2649
2650  /* Indicate to allocate space on the stack for varargs save area.  */
2651  ix86_save_varrargs_registers = 1;
2652
2653  cfun->stack_alignment_needed = 128;
2654
2655  fntype = TREE_TYPE (current_function_decl);
2656  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2657	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2658		  != void_type_node));
2659
2660  /* For varargs, we do not want to skip the dummy va_dcl argument.
2661     For stdargs, we do want to skip the last named argument.  */
2662  next_cum = *cum;
2663  if (stdarg_p)
2664    function_arg_advance (&next_cum, mode, type, 1);
2665
2666  if (!no_rtl)
2667    save_area = frame_pointer_rtx;
2668
2669  set = get_varargs_alias_set ();
2670
2671  for (i = next_cum.regno; i < ix86_regparm; i++)
2672    {
2673      mem = gen_rtx_MEM (Pmode,
2674			 plus_constant (save_area, i * UNITS_PER_WORD));
2675      set_mem_alias_set (mem, set);
2676      emit_move_insn (mem, gen_rtx_REG (Pmode,
2677					x86_64_int_parameter_registers[i]));
2678    }
2679
2680  if (next_cum.sse_nregs)
2681    {
2682      /* Now emit code to save SSE registers.  The AX parameter contains number
2683	 of SSE parameter regsiters used to call this function.  We use
2684	 sse_prologue_save insn template that produces computed jump across
2685	 SSE saves.  We need some preparation work to get this working.  */
2686
2687      label = gen_label_rtx ();
2688      label_ref = gen_rtx_LABEL_REF (Pmode, label);
2689
2690      /* Compute address to jump to :
2691         label - 5*eax + nnamed_sse_arguments*5  */
2692      tmp_reg = gen_reg_rtx (Pmode);
2693      nsse_reg = gen_reg_rtx (Pmode);
2694      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2695      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2696			      gen_rtx_MULT (Pmode, nsse_reg,
2697					    GEN_INT (4))));
2698      if (next_cum.sse_regno)
2699	emit_move_insn
2700	  (nsse_reg,
2701	   gen_rtx_CONST (DImode,
2702			  gen_rtx_PLUS (DImode,
2703					label_ref,
2704					GEN_INT (next_cum.sse_regno * 4))));
2705      else
2706	emit_move_insn (nsse_reg, label_ref);
2707      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2708
2709      /* Compute address of memory block we save into.  We always use pointer
2710	 pointing 127 bytes after first byte to store - this is needed to keep
2711	 instruction size limited by 4 bytes.  */
2712      tmp_reg = gen_reg_rtx (Pmode);
2713      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2714			      plus_constant (save_area,
2715					     8 * REGPARM_MAX + 127)));
2716      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2717      set_mem_alias_set (mem, set);
2718      set_mem_align (mem, BITS_PER_WORD);
2719
2720      /* And finally do the dirty job!  */
2721      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2722					GEN_INT (next_cum.sse_regno), label));
2723    }
2724
2725}
2726
2727/* Implement va_start.  */
2728
2729void
2730ix86_va_start (valist, nextarg)
2731     tree valist;
2732     rtx nextarg;
2733{
2734  HOST_WIDE_INT words, n_gpr, n_fpr;
2735  tree f_gpr, f_fpr, f_ovf, f_sav;
2736  tree gpr, fpr, ovf, sav, t;
2737
2738  /* Only 64bit target needs something special.  */
2739  if (!TARGET_64BIT)
2740    {
2741      std_expand_builtin_va_start (valist, nextarg);
2742      return;
2743    }
2744
2745  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2746  f_fpr = TREE_CHAIN (f_gpr);
2747  f_ovf = TREE_CHAIN (f_fpr);
2748  f_sav = TREE_CHAIN (f_ovf);
2749
2750  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2751  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2752  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2753  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2754  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2755
2756  /* Count number of gp and fp argument registers used.  */
2757  words = current_function_args_info.words;
2758  n_gpr = current_function_args_info.regno;
2759  n_fpr = current_function_args_info.sse_regno;
2760
2761  if (TARGET_DEBUG_ARG)
2762    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2763	     (int) words, (int) n_gpr, (int) n_fpr);
2764
2765  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2766	     build_int_2 (n_gpr * 8, 0));
2767  TREE_SIDE_EFFECTS (t) = 1;
2768  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2769
2770  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2771	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2772  TREE_SIDE_EFFECTS (t) = 1;
2773  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2774
2775  /* Find the overflow area.  */
2776  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2777  if (words != 0)
2778    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2779	       build_int_2 (words * UNITS_PER_WORD, 0));
2780  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2781  TREE_SIDE_EFFECTS (t) = 1;
2782  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2783
2784  /* Find the register save area.
2785     Prologue of the function save it right above stack frame.  */
2786  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2787  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2788  TREE_SIDE_EFFECTS (t) = 1;
2789  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2790}
2791
2792/* Implement va_arg.  */
2793rtx
2794ix86_va_arg (valist, type)
2795     tree valist, type;
2796{
2797  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2798  tree f_gpr, f_fpr, f_ovf, f_sav;
2799  tree gpr, fpr, ovf, sav, t;
2800  int size, rsize;
2801  rtx lab_false, lab_over = NULL_RTX;
2802  rtx addr_rtx, r;
2803  rtx container;
2804  int indirect_p = 0;
2805
2806  /* Only 64bit target needs something special.  */
2807  if (!TARGET_64BIT)
2808    {
2809      return std_expand_builtin_va_arg (valist, type);
2810    }
2811
2812  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2813  f_fpr = TREE_CHAIN (f_gpr);
2814  f_ovf = TREE_CHAIN (f_fpr);
2815  f_sav = TREE_CHAIN (f_ovf);
2816
2817  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2818  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2819  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2820  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2821  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2822
2823  size = int_size_in_bytes (type);
2824  if (size == -1)
2825    {
2826      /* Passed by reference.  */
2827      indirect_p = 1;
2828      type = build_pointer_type (type);
2829      size = int_size_in_bytes (type);
2830    }
2831  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2832
2833  container = construct_container (TYPE_MODE (type), type, 0,
2834				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2835  /*
2836   * Pull the value out of the saved registers ...
2837   */
2838
2839  addr_rtx = gen_reg_rtx (Pmode);
2840
2841  if (container)
2842    {
2843      rtx int_addr_rtx, sse_addr_rtx;
2844      int needed_intregs, needed_sseregs;
2845      int need_temp;
2846
2847      lab_over = gen_label_rtx ();
2848      lab_false = gen_label_rtx ();
2849
2850      examine_argument (TYPE_MODE (type), type, 0,
2851		        &needed_intregs, &needed_sseregs);
2852
2853
2854      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2855		   || TYPE_ALIGN (type) > 128);
2856
2857      /* In case we are passing structure, verify that it is consetuctive block
2858         on the register save area.  If not we need to do moves.  */
2859      if (!need_temp && !REG_P (container))
2860	{
2861	  /* Verify that all registers are strictly consetuctive  */
2862	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2863	    {
2864	      int i;
2865
2866	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2867		{
2868		  rtx slot = XVECEXP (container, 0, i);
2869		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2870		      || INTVAL (XEXP (slot, 1)) != i * 16)
2871		    need_temp = 1;
2872		}
2873	    }
2874	  else
2875	    {
2876	      int i;
2877
2878	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2879		{
2880		  rtx slot = XVECEXP (container, 0, i);
2881		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2882		      || INTVAL (XEXP (slot, 1)) != i * 8)
2883		    need_temp = 1;
2884		}
2885	    }
2886	}
2887      if (!need_temp)
2888	{
2889	  int_addr_rtx = addr_rtx;
2890	  sse_addr_rtx = addr_rtx;
2891	}
2892      else
2893	{
2894	  int_addr_rtx = gen_reg_rtx (Pmode);
2895	  sse_addr_rtx = gen_reg_rtx (Pmode);
2896	}
2897      /* First ensure that we fit completely in registers.  */
2898      if (needed_intregs)
2899	{
2900	  emit_cmp_and_jump_insns (expand_expr
2901				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2902				   GEN_INT ((REGPARM_MAX - needed_intregs +
2903					     1) * 8), GE, const1_rtx, SImode,
2904				   1, lab_false);
2905	}
2906      if (needed_sseregs)
2907	{
2908	  emit_cmp_and_jump_insns (expand_expr
2909				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2910				   GEN_INT ((SSE_REGPARM_MAX -
2911					     needed_sseregs + 1) * 16 +
2912					    REGPARM_MAX * 8), GE, const1_rtx,
2913				   SImode, 1, lab_false);
2914	}
2915
2916      /* Compute index to start of area used for integer regs.  */
2917      if (needed_intregs)
2918	{
2919	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2920	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2921	  if (r != int_addr_rtx)
2922	    emit_move_insn (int_addr_rtx, r);
2923	}
2924      if (needed_sseregs)
2925	{
2926	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2927	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2928	  if (r != sse_addr_rtx)
2929	    emit_move_insn (sse_addr_rtx, r);
2930	}
2931      if (need_temp)
2932	{
2933	  int i;
2934	  rtx mem;
2935	  rtx x;
2936
2937	  /* Never use the memory itself, as it has the alias set.  */
2938	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
2939	  mem = gen_rtx_MEM (BLKmode, x);
2940	  force_operand (x, addr_rtx);
2941	  set_mem_alias_set (mem, get_varargs_alias_set ());
2942	  set_mem_align (mem, BITS_PER_UNIT);
2943
2944	  for (i = 0; i < XVECLEN (container, 0); i++)
2945	    {
2946	      rtx slot = XVECEXP (container, 0, i);
2947	      rtx reg = XEXP (slot, 0);
2948	      enum machine_mode mode = GET_MODE (reg);
2949	      rtx src_addr;
2950	      rtx src_mem;
2951	      int src_offset;
2952	      rtx dest_mem;
2953
2954	      if (SSE_REGNO_P (REGNO (reg)))
2955		{
2956		  src_addr = sse_addr_rtx;
2957		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2958		}
2959	      else
2960		{
2961		  src_addr = int_addr_rtx;
2962		  src_offset = REGNO (reg) * 8;
2963		}
2964	      src_mem = gen_rtx_MEM (mode, src_addr);
2965	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2966	      src_mem = adjust_address (src_mem, mode, src_offset);
2967	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2968	      emit_move_insn (dest_mem, src_mem);
2969	    }
2970	}
2971
2972      if (needed_intregs)
2973	{
2974	  t =
2975	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2976		   build_int_2 (needed_intregs * 8, 0));
2977	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2978	  TREE_SIDE_EFFECTS (t) = 1;
2979	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2980	}
2981      if (needed_sseregs)
2982	{
2983	  t =
2984	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2985		   build_int_2 (needed_sseregs * 16, 0));
2986	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2987	  TREE_SIDE_EFFECTS (t) = 1;
2988	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2989	}
2990
2991      emit_jump_insn (gen_jump (lab_over));
2992      emit_barrier ();
2993      emit_label (lab_false);
2994    }
2995
2996  /* ... otherwise out of the overflow area.  */
2997
2998  /* Care for on-stack alignment if needed.  */
2999  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3000    t = ovf;
3001  else
3002    {
3003      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3004      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3005      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3006    }
3007  t = save_expr (t);
3008
3009  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3010  if (r != addr_rtx)
3011    emit_move_insn (addr_rtx, r);
3012
3013  t =
3014    build (PLUS_EXPR, TREE_TYPE (t), t,
3015	   build_int_2 (rsize * UNITS_PER_WORD, 0));
3016  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3017  TREE_SIDE_EFFECTS (t) = 1;
3018  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3019
3020  if (container)
3021    emit_label (lab_over);
3022
3023  if (indirect_p)
3024    {
3025      r = gen_rtx_MEM (Pmode, addr_rtx);
3026      set_mem_alias_set (r, get_varargs_alias_set ());
3027      emit_move_insn (addr_rtx, r);
3028    }
3029
3030  return addr_rtx;
3031}
3032
3033/* Return nonzero if OP is either a i387 or SSE fp register.  */
3034int
3035any_fp_register_operand (op, mode)
3036     rtx op;
3037     enum machine_mode mode ATTRIBUTE_UNUSED;
3038{
3039  return ANY_FP_REG_P (op);
3040}
3041
3042/* Return nonzero if OP is an i387 fp register.  */
3043int
3044fp_register_operand (op, mode)
3045     rtx op;
3046     enum machine_mode mode ATTRIBUTE_UNUSED;
3047{
3048  return FP_REG_P (op);
3049}
3050
3051/* Return nonzero if OP is a non-fp register_operand.  */
3052int
3053register_and_not_any_fp_reg_operand (op, mode)
3054     rtx op;
3055     enum machine_mode mode;
3056{
3057  return register_operand (op, mode) && !ANY_FP_REG_P (op);
3058}
3059
3060/* Return nonzero of OP is a register operand other than an
3061   i387 fp register.  */
3062int
3063register_and_not_fp_reg_operand (op, mode)
3064     rtx op;
3065     enum machine_mode mode;
3066{
3067  return register_operand (op, mode) && !FP_REG_P (op);
3068}
3069
3070/* Return nonzero if OP is general operand representable on x86_64.  */
3071
3072int
3073x86_64_general_operand (op, mode)
3074     rtx op;
3075     enum machine_mode mode;
3076{
3077  if (!TARGET_64BIT)
3078    return general_operand (op, mode);
3079  if (nonimmediate_operand (op, mode))
3080    return 1;
3081  return x86_64_sign_extended_value (op);
3082}
3083
3084/* Return nonzero if OP is general operand representable on x86_64
3085   as either sign extended or zero extended constant.  */
3086
3087int
3088x86_64_szext_general_operand (op, mode)
3089     rtx op;
3090     enum machine_mode mode;
3091{
3092  if (!TARGET_64BIT)
3093    return general_operand (op, mode);
3094  if (nonimmediate_operand (op, mode))
3095    return 1;
3096  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3097}
3098
3099/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3100
3101int
3102x86_64_nonmemory_operand (op, mode)
3103     rtx op;
3104     enum machine_mode mode;
3105{
3106  if (!TARGET_64BIT)
3107    return nonmemory_operand (op, mode);
3108  if (register_operand (op, mode))
3109    return 1;
3110  return x86_64_sign_extended_value (op);
3111}
3112
3113/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3114
3115int
3116x86_64_movabs_operand (op, mode)
3117     rtx op;
3118     enum machine_mode mode;
3119{
3120  if (!TARGET_64BIT || !flag_pic)
3121    return nonmemory_operand (op, mode);
3122  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3123    return 1;
3124  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3125    return 1;
3126  return 0;
3127}
3128
3129/* Return nonzero if OPNUM's MEM should be matched
3130   in movabs* patterns.  */
3131
3132int
3133ix86_check_movabs (insn, opnum)
3134     rtx insn;
3135     int opnum;
3136{
3137  rtx set, mem;
3138
3139  set = PATTERN (insn);
3140  if (GET_CODE (set) == PARALLEL)
3141    set = XVECEXP (set, 0, 0);
3142  if (GET_CODE (set) != SET)
3143    abort ();
3144  mem = XEXP (set, opnum);
3145  while (GET_CODE (mem) == SUBREG)
3146    mem = SUBREG_REG (mem);
3147  if (GET_CODE (mem) != MEM)
3148    abort ();
3149  return (volatile_ok || !MEM_VOLATILE_P (mem));
3150}
3151
3152/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3153
3154int
3155x86_64_szext_nonmemory_operand (op, mode)
3156     rtx op;
3157     enum machine_mode mode;
3158{
3159  if (!TARGET_64BIT)
3160    return nonmemory_operand (op, mode);
3161  if (register_operand (op, mode))
3162    return 1;
3163  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3164}
3165
3166/* Return nonzero if OP is immediate operand representable on x86_64.  */
3167
3168int
3169x86_64_immediate_operand (op, mode)
3170     rtx op;
3171     enum machine_mode mode;
3172{
3173  if (!TARGET_64BIT)
3174    return immediate_operand (op, mode);
3175  return x86_64_sign_extended_value (op);
3176}
3177
3178/* Return nonzero if OP is immediate operand representable on x86_64.  */
3179
3180int
3181x86_64_zext_immediate_operand (op, mode)
3182     rtx op;
3183     enum machine_mode mode ATTRIBUTE_UNUSED;
3184{
3185  return x86_64_zero_extended_value (op);
3186}
3187
3188/* Return nonzero if OP is (const_int 1), else return zero.  */
3189
3190int
3191const_int_1_operand (op, mode)
3192     rtx op;
3193     enum machine_mode mode ATTRIBUTE_UNUSED;
3194{
3195  return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3196}
3197
3198/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3199   for shift & compare patterns, as shifting by 0 does not change flags),
3200   else return zero.  */
3201
3202int
3203const_int_1_31_operand (op, mode)
3204     rtx op;
3205     enum machine_mode mode ATTRIBUTE_UNUSED;
3206{
3207  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3208}
3209
3210/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3211   reference and a constant.  */
3212
3213int
3214symbolic_operand (op, mode)
3215     register rtx op;
3216     enum machine_mode mode ATTRIBUTE_UNUSED;
3217{
3218  switch (GET_CODE (op))
3219    {
3220    case SYMBOL_REF:
3221    case LABEL_REF:
3222      return 1;
3223
3224    case CONST:
3225      op = XEXP (op, 0);
3226      if (GET_CODE (op) == SYMBOL_REF
3227	  || GET_CODE (op) == LABEL_REF
3228	  || (GET_CODE (op) == UNSPEC
3229	      && (XINT (op, 1) == UNSPEC_GOT
3230		  || XINT (op, 1) == UNSPEC_GOTOFF
3231		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3232	return 1;
3233      if (GET_CODE (op) != PLUS
3234	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3235	return 0;
3236
3237      op = XEXP (op, 0);
3238      if (GET_CODE (op) == SYMBOL_REF
3239	  || GET_CODE (op) == LABEL_REF)
3240	return 1;
3241      /* Only @GOTOFF gets offsets.  */
3242      if (GET_CODE (op) != UNSPEC
3243	  || XINT (op, 1) != UNSPEC_GOTOFF)
3244	return 0;
3245
3246      op = XVECEXP (op, 0, 0);
3247      if (GET_CODE (op) == SYMBOL_REF
3248	  || GET_CODE (op) == LABEL_REF)
3249	return 1;
3250      return 0;
3251
3252    default:
3253      return 0;
3254    }
3255}
3256
3257/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3258
3259int
3260pic_symbolic_operand (op, mode)
3261     register rtx op;
3262     enum machine_mode mode ATTRIBUTE_UNUSED;
3263{
3264  if (GET_CODE (op) != CONST)
3265    return 0;
3266  op = XEXP (op, 0);
3267  if (TARGET_64BIT)
3268    {
3269      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3270	return 1;
3271    }
3272  else
3273    {
3274      if (GET_CODE (op) == UNSPEC)
3275	return 1;
3276      if (GET_CODE (op) != PLUS
3277	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3278	return 0;
3279      op = XEXP (op, 0);
3280      if (GET_CODE (op) == UNSPEC)
3281	return 1;
3282    }
3283  return 0;
3284}
3285
3286/* Return true if OP is a symbolic operand that resolves locally.  */
3287
3288static int
3289local_symbolic_operand (op, mode)
3290     rtx op;
3291     enum machine_mode mode ATTRIBUTE_UNUSED;
3292{
3293  if (GET_CODE (op) == CONST
3294      && GET_CODE (XEXP (op, 0)) == PLUS
3295      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3296    op = XEXP (XEXP (op, 0), 0);
3297
3298  if (GET_CODE (op) == LABEL_REF)
3299    return 1;
3300
3301  if (GET_CODE (op) != SYMBOL_REF)
3302    return 0;
3303
3304  /* These we've been told are local by varasm and encode_section_info
3305     respectively.  */
3306  if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3307    return 1;
3308
3309  /* There is, however, a not insubstantial body of code in the rest of
3310     the compiler that assumes it can just stick the results of
3311     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3312  /* ??? This is a hack.  Should update the body of the compiler to
3313     always create a DECL an invoke targetm.encode_section_info.  */
3314  if (strncmp (XSTR (op, 0), internal_label_prefix,
3315	       internal_label_prefix_len) == 0)
3316    return 1;
3317
3318  return 0;
3319}
3320
3321/* Test for various thread-local symbols.  See ix86_encode_section_info. */
3322
3323int
3324tls_symbolic_operand (op, mode)
3325     register rtx op;
3326     enum machine_mode mode ATTRIBUTE_UNUSED;
3327{
3328  const char *symbol_str;
3329
3330  if (GET_CODE (op) != SYMBOL_REF)
3331    return 0;
3332  symbol_str = XSTR (op, 0);
3333
3334  if (symbol_str[0] != '%')
3335    return 0;
3336  return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3337}
3338
3339static int
3340tls_symbolic_operand_1 (op, kind)
3341     rtx op;
3342     enum tls_model kind;
3343{
3344  const char *symbol_str;
3345
3346  if (GET_CODE (op) != SYMBOL_REF)
3347    return 0;
3348  symbol_str = XSTR (op, 0);
3349
3350  return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3351}
3352
3353int
3354global_dynamic_symbolic_operand (op, mode)
3355     register rtx op;
3356     enum machine_mode mode ATTRIBUTE_UNUSED;
3357{
3358  return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3359}
3360
3361int
3362local_dynamic_symbolic_operand (op, mode)
3363     register rtx op;
3364     enum machine_mode mode ATTRIBUTE_UNUSED;
3365{
3366  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3367}
3368
3369int
3370initial_exec_symbolic_operand (op, mode)
3371     register rtx op;
3372     enum machine_mode mode ATTRIBUTE_UNUSED;
3373{
3374  return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3375}
3376
3377int
3378local_exec_symbolic_operand (op, mode)
3379     register rtx op;
3380     enum machine_mode mode ATTRIBUTE_UNUSED;
3381{
3382  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3383}
3384
3385/* Test for a valid operand for a call instruction.  Don't allow the
3386   arg pointer register or virtual regs since they may decay into
3387   reg + const, which the patterns can't handle.  */
3388
3389int
3390call_insn_operand (op, mode)
3391     rtx op;
3392     enum machine_mode mode ATTRIBUTE_UNUSED;
3393{
3394  /* Disallow indirect through a virtual register.  This leads to
3395     compiler aborts when trying to eliminate them.  */
3396  if (GET_CODE (op) == REG
3397      && (op == arg_pointer_rtx
3398	  || op == frame_pointer_rtx
3399	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3400	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3401    return 0;
3402
3403  /* Disallow `call 1234'.  Due to varying assembler lameness this
3404     gets either rejected or translated to `call .+1234'.  */
3405  if (GET_CODE (op) == CONST_INT)
3406    return 0;
3407
3408  /* Explicitly allow SYMBOL_REF even if pic.  */
3409  if (GET_CODE (op) == SYMBOL_REF)
3410    return 1;
3411
3412  /* Otherwise we can allow any general_operand in the address.  */
3413  return general_operand (op, Pmode);
3414}
3415
3416int
3417constant_call_address_operand (op, mode)
3418     rtx op;
3419     enum machine_mode mode ATTRIBUTE_UNUSED;
3420{
3421  if (GET_CODE (op) == CONST
3422      && GET_CODE (XEXP (op, 0)) == PLUS
3423      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3424    op = XEXP (XEXP (op, 0), 0);
3425  return GET_CODE (op) == SYMBOL_REF;
3426}
3427
3428/* Match exactly zero and one.  */
3429
3430int
3431const0_operand (op, mode)
3432     register rtx op;
3433     enum machine_mode mode;
3434{
3435  return op == CONST0_RTX (mode);
3436}
3437
3438int
3439const1_operand (op, mode)
3440     register rtx op;
3441     enum machine_mode mode ATTRIBUTE_UNUSED;
3442{
3443  return op == const1_rtx;
3444}
3445
3446/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3447
3448int
3449const248_operand (op, mode)
3450     register rtx op;
3451     enum machine_mode mode ATTRIBUTE_UNUSED;
3452{
3453  return (GET_CODE (op) == CONST_INT
3454	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3455}
3456
3457/* True if this is a constant appropriate for an increment or decremenmt.  */
3458
3459int
3460incdec_operand (op, mode)
3461     register rtx op;
3462     enum machine_mode mode ATTRIBUTE_UNUSED;
3463{
3464  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3465     registers, since carry flag is not set.  */
3466  if (TARGET_PENTIUM4 && !optimize_size)
3467    return 0;
3468  return op == const1_rtx || op == constm1_rtx;
3469}
3470
3471/* Return nonzero if OP is acceptable as operand of DImode shift
3472   expander.  */
3473
3474int
3475shiftdi_operand (op, mode)
3476     rtx op;
3477     enum machine_mode mode ATTRIBUTE_UNUSED;
3478{
3479  if (TARGET_64BIT)
3480    return nonimmediate_operand (op, mode);
3481  else
3482    return register_operand (op, mode);
3483}
3484
3485/* Return false if this is the stack pointer, or any other fake
3486   register eliminable to the stack pointer.  Otherwise, this is
3487   a register operand.
3488
3489   This is used to prevent esp from being used as an index reg.
3490   Which would only happen in pathological cases.  */
3491
3492int
3493reg_no_sp_operand (op, mode)
3494     register rtx op;
3495     enum machine_mode mode;
3496{
3497  rtx t = op;
3498  if (GET_CODE (t) == SUBREG)
3499    t = SUBREG_REG (t);
3500  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3501    return 0;
3502
3503  return register_operand (op, mode);
3504}
3505
3506int
3507mmx_reg_operand (op, mode)
3508     register rtx op;
3509     enum machine_mode mode ATTRIBUTE_UNUSED;
3510{
3511  return MMX_REG_P (op);
3512}
3513
3514/* Return false if this is any eliminable register.  Otherwise
3515   general_operand.  */
3516
3517int
3518general_no_elim_operand (op, mode)
3519     register rtx op;
3520     enum machine_mode mode;
3521{
3522  rtx t = op;
3523  if (GET_CODE (t) == SUBREG)
3524    t = SUBREG_REG (t);
3525  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3526      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3527      || t == virtual_stack_dynamic_rtx)
3528    return 0;
3529  if (REG_P (t)
3530      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3531      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3532    return 0;
3533
3534  return general_operand (op, mode);
3535}
3536
3537/* Return false if this is any eliminable register.  Otherwise
3538   register_operand or const_int.  */
3539
3540int
3541nonmemory_no_elim_operand (op, mode)
3542     register rtx op;
3543     enum machine_mode mode;
3544{
3545  rtx t = op;
3546  if (GET_CODE (t) == SUBREG)
3547    t = SUBREG_REG (t);
3548  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3549      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3550      || t == virtual_stack_dynamic_rtx)
3551    return 0;
3552
3553  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3554}
3555
3556/* Return false if this is any eliminable register or stack register,
3557   otherwise work like register_operand.  */
3558
3559int
3560index_register_operand (op, mode)
3561     register rtx op;
3562     enum machine_mode mode;
3563{
3564  rtx t = op;
3565  if (GET_CODE (t) == SUBREG)
3566    t = SUBREG_REG (t);
3567  if (!REG_P (t))
3568    return 0;
3569  if (t == arg_pointer_rtx
3570      || t == frame_pointer_rtx
3571      || t == virtual_incoming_args_rtx
3572      || t == virtual_stack_vars_rtx
3573      || t == virtual_stack_dynamic_rtx
3574      || REGNO (t) == STACK_POINTER_REGNUM)
3575    return 0;
3576
3577  return general_operand (op, mode);
3578}
3579
3580/* Return true if op is a Q_REGS class register.  */
3581
3582int
3583q_regs_operand (op, mode)
3584     register rtx op;
3585     enum machine_mode mode;
3586{
3587  if (mode != VOIDmode && GET_MODE (op) != mode)
3588    return 0;
3589  if (GET_CODE (op) == SUBREG)
3590    op = SUBREG_REG (op);
3591  return ANY_QI_REG_P (op);
3592}
3593
3594/* Return true if op is a NON_Q_REGS class register.  */
3595
3596int
3597non_q_regs_operand (op, mode)
3598     register rtx op;
3599     enum machine_mode mode;
3600{
3601  if (mode != VOIDmode && GET_MODE (op) != mode)
3602    return 0;
3603  if (GET_CODE (op) == SUBREG)
3604    op = SUBREG_REG (op);
3605  return NON_QI_REG_P (op);
3606}
3607
3608/*  Return 1 when OP is operand acceptable for standard SSE move.  */
3609int
3610vector_move_operand (op, mode)
3611     rtx op;
3612     enum machine_mode mode;
3613{
3614  if (nonimmediate_operand (op, mode))
3615    return 1;
3616  if (GET_MODE (op) != mode && mode != VOIDmode)
3617    return 0;
3618  return (op == CONST0_RTX (GET_MODE (op)));
3619}
3620
3621/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3622   insns.  */
3623int
3624sse_comparison_operator (op, mode)
3625     rtx op;
3626     enum machine_mode mode ATTRIBUTE_UNUSED;
3627{
3628  enum rtx_code code = GET_CODE (op);
3629  switch (code)
3630    {
3631    /* Operations supported directly.  */
3632    case EQ:
3633    case LT:
3634    case LE:
3635    case UNORDERED:
3636    case NE:
3637    case UNGE:
3638    case UNGT:
3639    case ORDERED:
3640      return 1;
3641    /* These are equivalent to ones above in non-IEEE comparisons.  */
3642    case UNEQ:
3643    case UNLT:
3644    case UNLE:
3645    case LTGT:
3646    case GE:
3647    case GT:
3648      return !TARGET_IEEE_FP;
3649    default:
3650      return 0;
3651    }
3652}
3653/* Return 1 if OP is a valid comparison operator in valid mode.  */
3654int
3655ix86_comparison_operator (op, mode)
3656     register rtx op;
3657     enum machine_mode mode;
3658{
3659  enum machine_mode inmode;
3660  enum rtx_code code = GET_CODE (op);
3661  if (mode != VOIDmode && GET_MODE (op) != mode)
3662    return 0;
3663  if (GET_RTX_CLASS (code) != '<')
3664    return 0;
3665  inmode = GET_MODE (XEXP (op, 0));
3666
3667  if (inmode == CCFPmode || inmode == CCFPUmode)
3668    {
3669      enum rtx_code second_code, bypass_code;
3670      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3671      return (bypass_code == NIL && second_code == NIL);
3672    }
3673  switch (code)
3674    {
3675    case EQ: case NE:
3676      return 1;
3677    case LT: case GE:
3678      if (inmode == CCmode || inmode == CCGCmode
3679	  || inmode == CCGOCmode || inmode == CCNOmode)
3680	return 1;
3681      return 0;
3682    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3683      if (inmode == CCmode)
3684	return 1;
3685      return 0;
3686    case GT: case LE:
3687      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3688	return 1;
3689      return 0;
3690    default:
3691      return 0;
3692    }
3693}
3694
3695/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3696
3697int
3698fcmov_comparison_operator (op, mode)
3699    register rtx op;
3700    enum machine_mode mode;
3701{
3702  enum machine_mode inmode;
3703  enum rtx_code code = GET_CODE (op);
3704  if (mode != VOIDmode && GET_MODE (op) != mode)
3705    return 0;
3706  if (GET_RTX_CLASS (code) != '<')
3707    return 0;
3708  inmode = GET_MODE (XEXP (op, 0));
3709  if (inmode == CCFPmode || inmode == CCFPUmode)
3710    {
3711      enum rtx_code second_code, bypass_code;
3712      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3713      if (bypass_code != NIL || second_code != NIL)
3714	return 0;
3715      code = ix86_fp_compare_code_to_integer (code);
3716    }
3717  /* i387 supports just limited amount of conditional codes.  */
3718  switch (code)
3719    {
3720    case LTU: case GTU: case LEU: case GEU:
3721      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3722	return 1;
3723      return 0;
3724    case ORDERED: case UNORDERED:
3725    case EQ: case NE:
3726      return 1;
3727    default:
3728      return 0;
3729    }
3730}
3731
3732/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3733
3734int
3735promotable_binary_operator (op, mode)
3736     register rtx op;
3737     enum machine_mode mode ATTRIBUTE_UNUSED;
3738{
3739  switch (GET_CODE (op))
3740    {
3741    case MULT:
3742      /* Modern CPUs have same latency for HImode and SImode multiply,
3743         but 386 and 486 do HImode multiply faster.  */
3744      return ix86_cpu > PROCESSOR_I486;
3745    case PLUS:
3746    case AND:
3747    case IOR:
3748    case XOR:
3749    case ASHIFT:
3750      return 1;
3751    default:
3752      return 0;
3753    }
3754}
3755
3756/* Nearly general operand, but accept any const_double, since we wish
3757   to be able to drop them into memory rather than have them get pulled
3758   into registers.  */
3759
3760int
3761cmp_fp_expander_operand (op, mode)
3762     register rtx op;
3763     enum machine_mode mode;
3764{
3765  if (mode != VOIDmode && mode != GET_MODE (op))
3766    return 0;
3767  if (GET_CODE (op) == CONST_DOUBLE)
3768    return 1;
3769  return general_operand (op, mode);
3770}
3771
3772/* Match an SI or HImode register for a zero_extract.  */
3773
3774int
3775ext_register_operand (op, mode)
3776     register rtx op;
3777     enum machine_mode mode ATTRIBUTE_UNUSED;
3778{
3779  int regno;
3780  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3781      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3782    return 0;
3783
3784  if (!register_operand (op, VOIDmode))
3785    return 0;
3786
3787  /* Be curefull to accept only registers having upper parts.  */
3788  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3789  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3790}
3791
3792/* Return 1 if this is a valid binary floating-point operation.
3793   OP is the expression matched, and MODE is its mode.  */
3794
3795int
3796binary_fp_operator (op, mode)
3797    register rtx op;
3798    enum machine_mode mode;
3799{
3800  if (mode != VOIDmode && mode != GET_MODE (op))
3801    return 0;
3802
3803  switch (GET_CODE (op))
3804    {
3805    case PLUS:
3806    case MINUS:
3807    case MULT:
3808    case DIV:
3809      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3810
3811    default:
3812      return 0;
3813    }
3814}
3815
3816int
3817mult_operator (op, mode)
3818    register rtx op;
3819    enum machine_mode mode ATTRIBUTE_UNUSED;
3820{
3821  return GET_CODE (op) == MULT;
3822}
3823
3824int
3825div_operator (op, mode)
3826    register rtx op;
3827    enum machine_mode mode ATTRIBUTE_UNUSED;
3828{
3829  return GET_CODE (op) == DIV;
3830}
3831
3832int
3833arith_or_logical_operator (op, mode)
3834      rtx op;
3835      enum machine_mode mode;
3836{
3837  return ((mode == VOIDmode || GET_MODE (op) == mode)
3838          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3839              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3840}
3841
3842/* Returns 1 if OP is memory operand with a displacement.  */
3843
3844int
3845memory_displacement_operand (op, mode)
3846     register rtx op;
3847     enum machine_mode mode;
3848{
3849  struct ix86_address parts;
3850
3851  if (! memory_operand (op, mode))
3852    return 0;
3853
3854  if (! ix86_decompose_address (XEXP (op, 0), &parts))
3855    abort ();
3856
3857  return parts.disp != NULL_RTX;
3858}
3859
3860/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3861   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3862
3863   ??? It seems likely that this will only work because cmpsi is an
3864   expander, and no actual insns use this.  */
3865
3866int
3867cmpsi_operand (op, mode)
3868      rtx op;
3869      enum machine_mode mode;
3870{
3871  if (nonimmediate_operand (op, mode))
3872    return 1;
3873
3874  if (GET_CODE (op) == AND
3875      && GET_MODE (op) == SImode
3876      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3877      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3878      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3879      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3880      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3881      && GET_CODE (XEXP (op, 1)) == CONST_INT)
3882    return 1;
3883
3884  return 0;
3885}
3886
3887/* Returns 1 if OP is memory operand that can not be represented by the
3888   modRM array.  */
3889
3890int
3891long_memory_operand (op, mode)
3892     register rtx op;
3893     enum machine_mode mode;
3894{
3895  if (! memory_operand (op, mode))
3896    return 0;
3897
3898  return memory_address_length (op) != 0;
3899}
3900
3901/* Return nonzero if the rtx is known aligned.  */
3902
3903int
3904aligned_operand (op, mode)
3905     rtx op;
3906     enum machine_mode mode;
3907{
3908  struct ix86_address parts;
3909
3910  if (!general_operand (op, mode))
3911    return 0;
3912
3913  /* Registers and immediate operands are always "aligned".  */
3914  if (GET_CODE (op) != MEM)
3915    return 1;
3916
3917  /* Don't even try to do any aligned optimizations with volatiles.  */
3918  if (MEM_VOLATILE_P (op))
3919    return 0;
3920
3921  op = XEXP (op, 0);
3922
3923  /* Pushes and pops are only valid on the stack pointer.  */
3924  if (GET_CODE (op) == PRE_DEC
3925      || GET_CODE (op) == POST_INC)
3926    return 1;
3927
3928  /* Decode the address.  */
3929  if (! ix86_decompose_address (op, &parts))
3930    abort ();
3931
3932  if (parts.base && GET_CODE (parts.base) == SUBREG)
3933    parts.base = SUBREG_REG (parts.base);
3934  if (parts.index && GET_CODE (parts.index) == SUBREG)
3935    parts.index = SUBREG_REG (parts.index);
3936
3937  /* Look for some component that isn't known to be aligned.  */
3938  if (parts.index)
3939    {
3940      if (parts.scale < 4
3941	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3942	return 0;
3943    }
3944  if (parts.base)
3945    {
3946      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3947	return 0;
3948    }
3949  if (parts.disp)
3950    {
3951      if (GET_CODE (parts.disp) != CONST_INT
3952	  || (INTVAL (parts.disp) & 3) != 0)
3953	return 0;
3954    }
3955
3956  /* Didn't find one -- this must be an aligned address.  */
3957  return 1;
3958}
3959
3960/* Return true if the constant is something that can be loaded with
3961   a special instruction.  Only handle 0.0 and 1.0; others are less
3962   worthwhile.  */
3963
3964int
3965standard_80387_constant_p (x)
3966     rtx x;
3967{
3968  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3969    return -1;
3970  /* Note that on the 80387, other constants, such as pi, that we should support
3971     too.  On some machines, these are much slower to load as standard constant,
3972     than to load from doubles in memory.  */
3973  if (x == CONST0_RTX (GET_MODE (x)))
3974    return 1;
3975  if (x == CONST1_RTX (GET_MODE (x)))
3976    return 2;
3977  return 0;
3978}
3979
3980/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3981 */
3982int
3983standard_sse_constant_p (x)
3984     rtx x;
3985{
3986  if (x == const0_rtx)
3987    return 1;
3988  return (x == CONST0_RTX (GET_MODE (x)));
3989}
3990
3991/* Returns 1 if OP contains a symbol reference */
3992
3993int
3994symbolic_reference_mentioned_p (op)
3995     rtx op;
3996{
3997  register const char *fmt;
3998  register int i;
3999
4000  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4001    return 1;
4002
4003  fmt = GET_RTX_FORMAT (GET_CODE (op));
4004  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4005    {
4006      if (fmt[i] == 'E')
4007	{
4008	  register int j;
4009
4010	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4011	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4012	      return 1;
4013	}
4014
4015      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4016	return 1;
4017    }
4018
4019  return 0;
4020}
4021
4022/* Return 1 if it is appropriate to emit `ret' instructions in the
4023   body of a function.  Do this only if the epilogue is simple, needing a
4024   couple of insns.  Prior to reloading, we can't tell how many registers
4025   must be saved, so return 0 then.  Return 0 if there is no frame
4026   marker to de-allocate.
4027
4028   If NON_SAVING_SETJMP is defined and true, then it is not possible
4029   for the epilogue to be simple, so return 0.  This is a special case
4030   since NON_SAVING_SETJMP will not cause regs_ever_live to change
4031   until final, but jump_optimize may need to know sooner if a
4032   `return' is OK.  */
4033
4034int
4035ix86_can_use_return_insn_p ()
4036{
4037  struct ix86_frame frame;
4038
4039#ifdef NON_SAVING_SETJMP
4040  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4041    return 0;
4042#endif
4043
4044  if (! reload_completed || frame_pointer_needed)
4045    return 0;
4046
4047  /* Don't allow more than 32 pop, since that's all we can do
4048     with one instruction.  */
4049  if (current_function_pops_args
4050      && current_function_args_size >= 32768)
4051    return 0;
4052
4053  ix86_compute_frame_layout (&frame);
4054  return frame.to_allocate == 0 && frame.nregs == 0;
4055}
4056
4057/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4058int
4059x86_64_sign_extended_value (value)
4060     rtx value;
4061{
4062  switch (GET_CODE (value))
4063    {
4064      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4065         to be at least 32 and this all acceptable constants are
4066	 represented as CONST_INT.  */
4067      case CONST_INT:
4068	if (HOST_BITS_PER_WIDE_INT == 32)
4069	  return 1;
4070	else
4071	  {
4072	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4073	    return trunc_int_for_mode (val, SImode) == val;
4074	  }
4075	break;
4076
4077      /* For certain code models, the symbolic references are known to fit.
4078	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4079	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4080	 only if inside of UNSPEC handled below.  */
4081      case SYMBOL_REF:
4082	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4083
4084      /* For certain code models, the code is near as well.  */
4085      case LABEL_REF:
4086	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4087		|| ix86_cmodel == CM_KERNEL);
4088
4089      /* We also may accept the offsetted memory references in certain special
4090         cases.  */
4091      case CONST:
4092	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4093	  switch (XINT (XEXP (value, 0), 1))
4094	    {
4095	    case UNSPEC_GOTPCREL:
4096	    case UNSPEC_DTPOFF:
4097	    case UNSPEC_GOTNTPOFF:
4098	    case UNSPEC_NTPOFF:
4099	      return 1;
4100	    default:
4101	      break;
4102	    }
4103	if (GET_CODE (XEXP (value, 0)) == PLUS)
4104	  {
4105	    rtx op1 = XEXP (XEXP (value, 0), 0);
4106	    rtx op2 = XEXP (XEXP (value, 0), 1);
4107	    HOST_WIDE_INT offset;
4108
4109	    if (ix86_cmodel == CM_LARGE)
4110	      return 0;
4111	    if (GET_CODE (op2) != CONST_INT)
4112	      return 0;
4113	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4114	    switch (GET_CODE (op1))
4115	      {
4116		case SYMBOL_REF:
4117		  /* For CM_SMALL assume that latest object is 16MB before
4118		     end of 31bits boundary.  We may also accept pretty
4119		     large negative constants knowing that all objects are
4120		     in the positive half of address space.  */
4121		  if (ix86_cmodel == CM_SMALL
4122		      && offset < 16*1024*1024
4123		      && trunc_int_for_mode (offset, SImode) == offset)
4124		    return 1;
4125		  /* For CM_KERNEL we know that all object resist in the
4126		     negative half of 32bits address space.  We may not
4127		     accept negative offsets, since they may be just off
4128		     and we may accept pretty large positive ones.  */
4129		  if (ix86_cmodel == CM_KERNEL
4130		      && offset > 0
4131		      && trunc_int_for_mode (offset, SImode) == offset)
4132		    return 1;
4133		  break;
4134		case LABEL_REF:
4135		  /* These conditions are similar to SYMBOL_REF ones, just the
4136		     constraints for code models differ.  */
4137		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4138		      && offset < 16*1024*1024
4139		      && trunc_int_for_mode (offset, SImode) == offset)
4140		    return 1;
4141		  if (ix86_cmodel == CM_KERNEL
4142		      && offset > 0
4143		      && trunc_int_for_mode (offset, SImode) == offset)
4144		    return 1;
4145		  break;
4146		case UNSPEC:
4147		  switch (XINT (op1, 1))
4148		    {
4149		    case UNSPEC_DTPOFF:
4150		    case UNSPEC_NTPOFF:
4151		      if (offset > 0
4152			  && trunc_int_for_mode (offset, SImode) == offset)
4153			return 1;
4154		    }
4155		  break;
4156		default:
4157		  return 0;
4158	      }
4159	  }
4160	return 0;
4161      default:
4162	return 0;
4163    }
4164}
4165
4166/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4167int
4168x86_64_zero_extended_value (value)
4169     rtx value;
4170{
4171  switch (GET_CODE (value))
4172    {
4173      case CONST_DOUBLE:
4174	if (HOST_BITS_PER_WIDE_INT == 32)
4175	  return  (GET_MODE (value) == VOIDmode
4176		   && !CONST_DOUBLE_HIGH (value));
4177	else
4178	  return 0;
4179      case CONST_INT:
4180	if (HOST_BITS_PER_WIDE_INT == 32)
4181	  return INTVAL (value) >= 0;
4182	else
4183	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4184	break;
4185
4186      /* For certain code models, the symbolic references are known to fit.  */
4187      case SYMBOL_REF:
4188	return ix86_cmodel == CM_SMALL;
4189
4190      /* For certain code models, the code is near as well.  */
4191      case LABEL_REF:
4192	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4193
4194      /* We also may accept the offsetted memory references in certain special
4195         cases.  */
4196      case CONST:
4197	if (GET_CODE (XEXP (value, 0)) == PLUS)
4198	  {
4199	    rtx op1 = XEXP (XEXP (value, 0), 0);
4200	    rtx op2 = XEXP (XEXP (value, 0), 1);
4201
4202	    if (ix86_cmodel == CM_LARGE)
4203	      return 0;
4204	    switch (GET_CODE (op1))
4205	      {
4206		case SYMBOL_REF:
4207		    return 0;
4208		  /* For small code model we may accept pretty large positive
4209		     offsets, since one bit is available for free.  Negative
4210		     offsets are limited by the size of NULL pointer area
4211		     specified by the ABI.  */
4212		  if (ix86_cmodel == CM_SMALL
4213		      && GET_CODE (op2) == CONST_INT
4214		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4215		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4216			  == INTVAL (op2)))
4217		    return 1;
4218	          /* ??? For the kernel, we may accept adjustment of
4219		     -0x10000000, since we know that it will just convert
4220		     negative address space to positive, but perhaps this
4221		     is not worthwhile.  */
4222		  break;
4223		case LABEL_REF:
4224		  /* These conditions are similar to SYMBOL_REF ones, just the
4225		     constraints for code models differ.  */
4226		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4227		      && GET_CODE (op2) == CONST_INT
4228		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4229		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4230			  == INTVAL (op2)))
4231		    return 1;
4232		  break;
4233		default:
4234		  return 0;
4235	      }
4236	  }
4237	return 0;
4238      default:
4239	return 0;
4240    }
4241}
4242
4243/* Value should be nonzero if functions must have frame pointers.
4244   Zero means the frame pointer need not be set up (and parms may
4245   be accessed via the stack pointer) in functions that seem suitable.  */
4246
4247int
4248ix86_frame_pointer_required ()
4249{
4250  /* If we accessed previous frames, then the generated code expects
4251     to be able to access the saved ebp value in our frame.  */
4252  if (cfun->machine->accesses_prev_frame)
4253    return 1;
4254
4255  /* Several x86 os'es need a frame pointer for other reasons,
4256     usually pertaining to setjmp.  */
4257  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4258    return 1;
4259
4260  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4261     the frame pointer by default.  Turn it back on now if we've not
4262     got a leaf function.  */
4263  if (TARGET_OMIT_LEAF_FRAME_POINTER
4264      && (!current_function_is_leaf))
4265    return 1;
4266
4267  if (current_function_profile)
4268    return 1;
4269
4270  return 0;
4271}
4272
4273/* Record that the current function accesses previous call frames.  */
4274
4275void
4276ix86_setup_frame_addresses ()
4277{
4278  cfun->machine->accesses_prev_frame = 1;
4279}
4280
4281#if defined(HAVE_GAS_HIDDEN) && (defined(SUPPORTS_ONE_ONLY) && SUPPORTS_ONE_ONLY)
4282# define USE_HIDDEN_LINKONCE 1
4283#else
4284# define USE_HIDDEN_LINKONCE 0
4285#endif
4286
4287static int pic_labels_used;
4288
4289/* Fills in the label name that should be used for a pc thunk for
4290   the given register.  */
4291
4292static void
4293get_pc_thunk_name (name, regno)
4294     char name[32];
4295     unsigned int regno;
4296{
4297  if (USE_HIDDEN_LINKONCE)
4298    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4299  else
4300    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4301}
4302
4303
4304/* This function generates code for -fpic that loads %ebx with
4305   the return address of the caller and then returns.  */
4306
4307void
4308ix86_asm_file_end (file)
4309     FILE *file;
4310{
4311  rtx xops[2];
4312  int regno;
4313
4314  for (regno = 0; regno < 8; ++regno)
4315    {
4316      char name[32];
4317
4318      if (! ((pic_labels_used >> regno) & 1))
4319	continue;
4320
4321      get_pc_thunk_name (name, regno);
4322
4323      if (USE_HIDDEN_LINKONCE)
4324	{
4325	  tree decl;
4326
4327	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4328			     error_mark_node);
4329	  TREE_PUBLIC (decl) = 1;
4330	  TREE_STATIC (decl) = 1;
4331	  DECL_ONE_ONLY (decl) = 1;
4332
4333	  (*targetm.asm_out.unique_section) (decl, 0);
4334	  named_section (decl, NULL, 0);
4335
4336	  (*targetm.asm_out.globalize_label) (file, name);
4337	  fputs ("\t.hidden\t", file);
4338	  assemble_name (file, name);
4339	  fputc ('\n', file);
4340	  ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4341	}
4342      else
4343	{
4344	  text_section ();
4345	  ASM_OUTPUT_LABEL (file, name);
4346	}
4347
4348      xops[0] = gen_rtx_REG (SImode, regno);
4349      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4350      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4351      output_asm_insn ("ret", xops);
4352    }
4353}
4354
4355/* Emit code for the SET_GOT patterns.  */
4356
4357const char *
4358output_set_got (dest)
4359     rtx dest;
4360{
4361  rtx xops[3];
4362
4363  xops[0] = dest;
4364  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4365
4366  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4367    {
4368      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4369
4370      if (!flag_pic)
4371	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4372      else
4373	output_asm_insn ("call\t%a2", xops);
4374
4375#if TARGET_MACHO
4376      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4377         is what will be referred to by the Mach-O PIC subsystem.  */
4378      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4379#endif
4380      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4381				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4382
4383      if (flag_pic)
4384	output_asm_insn ("pop{l}\t%0", xops);
4385    }
4386  else
4387    {
4388      char name[32];
4389      get_pc_thunk_name (name, REGNO (dest));
4390      pic_labels_used |= 1 << REGNO (dest);
4391
4392      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4393      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4394      output_asm_insn ("call\t%X2", xops);
4395    }
4396
4397  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4398    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4399  else if (!TARGET_MACHO)
4400    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4401
4402  return "";
4403}
4404
4405/* Generate an "push" pattern for input ARG.  */
4406
4407static rtx
4408gen_push (arg)
4409     rtx arg;
4410{
4411  return gen_rtx_SET (VOIDmode,
4412		      gen_rtx_MEM (Pmode,
4413				   gen_rtx_PRE_DEC (Pmode,
4414						    stack_pointer_rtx)),
4415		      arg);
4416}
4417
4418/* Return >= 0 if there is an unused call-clobbered register available
4419   for the entire function.  */
4420
4421static unsigned int
4422ix86_select_alt_pic_regnum ()
4423{
4424  if (current_function_is_leaf && !current_function_profile)
4425    {
4426      int i;
4427      for (i = 2; i >= 0; --i)
4428        if (!regs_ever_live[i])
4429	  return i;
4430    }
4431
4432  return INVALID_REGNUM;
4433}
4434
4435/* Return 1 if we need to save REGNO.  */
4436static int
4437ix86_save_reg (regno, maybe_eh_return)
4438     unsigned int regno;
4439     int maybe_eh_return;
4440{
4441  if (pic_offset_table_rtx
4442      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4443      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4444	  || current_function_profile
4445	  || current_function_calls_eh_return
4446	  || current_function_uses_const_pool))
4447    {
4448      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4449	return 0;
4450      return 1;
4451    }
4452
4453  if (current_function_calls_eh_return && maybe_eh_return)
4454    {
4455      unsigned i;
4456      for (i = 0; ; i++)
4457	{
4458	  unsigned test = EH_RETURN_DATA_REGNO (i);
4459	  if (test == INVALID_REGNUM)
4460	    break;
4461	  if (test == regno)
4462	    return 1;
4463	}
4464    }
4465
4466  return (regs_ever_live[regno]
4467	  && !call_used_regs[regno]
4468	  && !fixed_regs[regno]
4469	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4470}
4471
4472/* Return number of registers to be saved on the stack.  */
4473
4474static int
4475ix86_nsaved_regs ()
4476{
4477  int nregs = 0;
4478  int regno;
4479
4480  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4481    if (ix86_save_reg (regno, true))
4482      nregs++;
4483  return nregs;
4484}
4485
4486/* Return the offset between two registers, one to be eliminated, and the other
4487   its replacement, at the start of a routine.  */
4488
4489HOST_WIDE_INT
4490ix86_initial_elimination_offset (from, to)
4491     int from;
4492     int to;
4493{
4494  struct ix86_frame frame;
4495  ix86_compute_frame_layout (&frame);
4496
4497  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4498    return frame.hard_frame_pointer_offset;
4499  else if (from == FRAME_POINTER_REGNUM
4500	   && to == HARD_FRAME_POINTER_REGNUM)
4501    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4502  else
4503    {
4504      if (to != STACK_POINTER_REGNUM)
4505	abort ();
4506      else if (from == ARG_POINTER_REGNUM)
4507	return frame.stack_pointer_offset;
4508      else if (from != FRAME_POINTER_REGNUM)
4509	abort ();
4510      else
4511	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4512    }
4513}
4514
4515/* Fill structure ix86_frame about frame of currently computed function.  */
4516
4517static void
4518ix86_compute_frame_layout (frame)
4519     struct ix86_frame *frame;
4520{
4521  HOST_WIDE_INT total_size;
4522  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4523  int offset;
4524  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4525  HOST_WIDE_INT size = get_frame_size ();
4526
4527  frame->nregs = ix86_nsaved_regs ();
4528  total_size = size;
4529
4530  /* Skip return address and saved base pointer.  */
4531  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4532
4533  frame->hard_frame_pointer_offset = offset;
4534
4535  /* Do some sanity checking of stack_alignment_needed and
4536     preferred_alignment, since i386 port is the only using those features
4537     that may break easily.  */
4538
4539  if (size && !stack_alignment_needed)
4540    abort ();
4541  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4542    abort ();
4543  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4544    abort ();
4545  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4546    abort ();
4547
4548  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4549    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4550
4551  /* Register save area */
4552  offset += frame->nregs * UNITS_PER_WORD;
4553
4554  /* Va-arg area */
4555  if (ix86_save_varrargs_registers)
4556    {
4557      offset += X86_64_VARARGS_SIZE;
4558      frame->va_arg_size = X86_64_VARARGS_SIZE;
4559    }
4560  else
4561    frame->va_arg_size = 0;
4562
4563  /* Align start of frame for local function.  */
4564  frame->padding1 = ((offset + stack_alignment_needed - 1)
4565		     & -stack_alignment_needed) - offset;
4566
4567  offset += frame->padding1;
4568
4569  /* Frame pointer points here.  */
4570  frame->frame_pointer_offset = offset;
4571
4572  offset += size;
4573
4574  /* Add outgoing arguments area.  Can be skipped if we eliminated
4575     all the function calls as dead code.  */
4576  if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4577    {
4578      offset += current_function_outgoing_args_size;
4579      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4580    }
4581  else
4582    frame->outgoing_arguments_size = 0;
4583
4584  /* Align stack boundary.  Only needed if we're calling another function
4585     or using alloca.  */
4586  if (!current_function_is_leaf || current_function_calls_alloca)
4587    frame->padding2 = ((offset + preferred_alignment - 1)
4588		       & -preferred_alignment) - offset;
4589  else
4590    frame->padding2 = 0;
4591
4592  offset += frame->padding2;
4593
4594  /* We've reached end of stack frame.  */
4595  frame->stack_pointer_offset = offset;
4596
4597  /* Size prologue needs to allocate.  */
4598  frame->to_allocate =
4599    (size + frame->padding1 + frame->padding2
4600     + frame->outgoing_arguments_size + frame->va_arg_size);
4601
4602  if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4603      && current_function_is_leaf)
4604    {
4605      frame->red_zone_size = frame->to_allocate;
4606      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4607	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4608    }
4609  else
4610    frame->red_zone_size = 0;
4611  frame->to_allocate -= frame->red_zone_size;
4612  frame->stack_pointer_offset -= frame->red_zone_size;
4613#if 0
4614  fprintf (stderr, "nregs: %i\n", frame->nregs);
4615  fprintf (stderr, "size: %i\n", size);
4616  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4617  fprintf (stderr, "padding1: %i\n", frame->padding1);
4618  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4619  fprintf (stderr, "padding2: %i\n", frame->padding2);
4620  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4621  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4622  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4623  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4624	   frame->hard_frame_pointer_offset);
4625  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4626#endif
4627}
4628
4629/* Emit code to save registers in the prologue.  */
4630
4631static void
4632ix86_emit_save_regs ()
4633{
4634  register int regno;
4635  rtx insn;
4636
4637  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4638    if (ix86_save_reg (regno, true))
4639      {
4640	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4641	RTX_FRAME_RELATED_P (insn) = 1;
4642      }
4643}
4644
4645/* Emit code to save registers using MOV insns.  First register
4646   is restored from POINTER + OFFSET.  */
4647static void
4648ix86_emit_save_regs_using_mov (pointer, offset)
4649     rtx pointer;
4650     HOST_WIDE_INT offset;
4651{
4652  int regno;
4653  rtx insn;
4654
4655  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4656    if (ix86_save_reg (regno, true))
4657      {
4658	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4659					       Pmode, offset),
4660			       gen_rtx_REG (Pmode, regno));
4661	RTX_FRAME_RELATED_P (insn) = 1;
4662	offset += UNITS_PER_WORD;
4663      }
4664}
4665
4666/* Expand the prologue into a bunch of separate insns.  */
4667
4668void
4669ix86_expand_prologue ()
4670{
4671  rtx insn;
4672  bool pic_reg_used;
4673  struct ix86_frame frame;
4674  int use_mov = 0;
4675  HOST_WIDE_INT allocate;
4676
4677  if (!optimize_size)
4678    {
4679      use_fast_prologue_epilogue
4680	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4681      if (TARGET_PROLOGUE_USING_MOVE)
4682        use_mov = use_fast_prologue_epilogue;
4683    }
4684  ix86_compute_frame_layout (&frame);
4685
4686  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4687     slower on all targets.  Also sdb doesn't like it.  */
4688
4689  if (frame_pointer_needed)
4690    {
4691      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4692      RTX_FRAME_RELATED_P (insn) = 1;
4693
4694      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4695      RTX_FRAME_RELATED_P (insn) = 1;
4696    }
4697
4698  allocate = frame.to_allocate;
4699  /* In case we are dealing only with single register and empty frame,
4700     push is equivalent of the mov+add sequence.  */
4701  if (allocate == 0 && frame.nregs <= 1)
4702    use_mov = 0;
4703
4704  if (!use_mov)
4705    ix86_emit_save_regs ();
4706  else
4707    allocate += frame.nregs * UNITS_PER_WORD;
4708
4709  if (allocate == 0)
4710    ;
4711  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4712    {
4713      insn = emit_insn (gen_pro_epilogue_adjust_stack
4714			(stack_pointer_rtx, stack_pointer_rtx,
4715			 GEN_INT (-allocate)));
4716      RTX_FRAME_RELATED_P (insn) = 1;
4717    }
4718  else
4719    {
4720      /* ??? Is this only valid for Win32?  */
4721
4722      rtx arg0, sym;
4723
4724      if (TARGET_64BIT)
4725	abort ();
4726
4727      arg0 = gen_rtx_REG (SImode, 0);
4728      emit_move_insn (arg0, GEN_INT (allocate));
4729
4730      sym = gen_rtx_MEM (FUNCTION_MODE,
4731			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4732      insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4733
4734      CALL_INSN_FUNCTION_USAGE (insn)
4735	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4736			     CALL_INSN_FUNCTION_USAGE (insn));
4737
4738      /* Don't allow scheduling pass to move insns across __alloca
4739         call.  */
4740      emit_insn (gen_blockage (const0_rtx));
4741    }
4742  if (use_mov)
4743    {
4744      if (!frame_pointer_needed || !frame.to_allocate)
4745        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4746      else
4747        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4748				       -frame.nregs * UNITS_PER_WORD);
4749    }
4750
4751#ifdef SUBTARGET_PROLOGUE
4752  SUBTARGET_PROLOGUE;
4753#endif
4754
4755  pic_reg_used = false;
4756  if (pic_offset_table_rtx
4757      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4758	  || current_function_profile))
4759    {
4760      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4761
4762      if (alt_pic_reg_used != INVALID_REGNUM)
4763	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4764
4765      pic_reg_used = true;
4766    }
4767
4768  if (pic_reg_used)
4769    {
4770      insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4771
4772      /* Even with accurate pre-reload life analysis, we can wind up
4773	 deleting all references to the pic register after reload.
4774	 Consider if cross-jumping unifies two sides of a branch
4775	 controled by a comparison vs the only read from a global.
4776	 In which case, allow the set_got to be deleted, though we're
4777	 too late to do anything about the ebx save in the prologue.  */
4778      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4779    }
4780
4781  /* Prevent function calls from be scheduled before the call to mcount.
4782     In the pic_reg_used case, make sure that the got load isn't deleted.  */
4783  if (current_function_profile)
4784    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4785}
4786
4787/* Emit code to restore saved registers using MOV insns.  First register
4788   is restored from POINTER + OFFSET.  */
4789static void
4790ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4791     rtx pointer;
4792     int offset;
4793     int maybe_eh_return;
4794{
4795  int regno;
4796
4797  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4798    if (ix86_save_reg (regno, maybe_eh_return))
4799      {
4800	emit_move_insn (gen_rtx_REG (Pmode, regno),
4801			adjust_address (gen_rtx_MEM (Pmode, pointer),
4802					Pmode, offset));
4803	offset += UNITS_PER_WORD;
4804      }
4805}
4806
4807/* Restore function stack, frame, and registers.  */
4808
4809void
4810ix86_expand_epilogue (style)
4811     int style;
4812{
4813  int regno;
4814  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4815  struct ix86_frame frame;
4816  HOST_WIDE_INT offset;
4817
4818  ix86_compute_frame_layout (&frame);
4819
4820  /* Calculate start of saved registers relative to ebp.  Special care
4821     must be taken for the normal return case of a function using
4822     eh_return: the eax and edx registers are marked as saved, but not
4823     restored along this path.  */
4824  offset = frame.nregs;
4825  if (current_function_calls_eh_return && style != 2)
4826    offset -= 2;
4827  offset *= -UNITS_PER_WORD;
4828
4829  /* If we're only restoring one register and sp is not valid then
4830     using a move instruction to restore the register since it's
4831     less work than reloading sp and popping the register.
4832
4833     The default code result in stack adjustment using add/lea instruction,
4834     while this code results in LEAVE instruction (or discrete equivalent),
4835     so it is profitable in some other cases as well.  Especially when there
4836     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4837     and there is exactly one register to pop. This heruistic may need some
4838     tuning in future.  */
4839  if ((!sp_valid && frame.nregs <= 1)
4840      || (TARGET_EPILOGUE_USING_MOVE
4841	  && use_fast_prologue_epilogue
4842	  && (frame.nregs > 1 || frame.to_allocate))
4843      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4844      || (frame_pointer_needed && TARGET_USE_LEAVE
4845	  && use_fast_prologue_epilogue && frame.nregs == 1)
4846      || current_function_calls_eh_return)
4847    {
4848      /* Restore registers.  We can use ebp or esp to address the memory
4849	 locations.  If both are available, default to ebp, since offsets
4850	 are known to be small.  Only exception is esp pointing directly to the
4851	 end of block of saved registers, where we may simplify addressing
4852	 mode.  */
4853
4854      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4855	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4856					  frame.to_allocate, style == 2);
4857      else
4858	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4859					  offset, style == 2);
4860
4861      /* eh_return epilogues need %ecx added to the stack pointer.  */
4862      if (style == 2)
4863	{
4864	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4865
4866	  if (frame_pointer_needed)
4867	    {
4868	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4869	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4870	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4871
4872	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4873	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4874
4875	      emit_insn (gen_pro_epilogue_adjust_stack
4876			 (stack_pointer_rtx, sa, const0_rtx));
4877	    }
4878	  else
4879	    {
4880	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4881	      tmp = plus_constant (tmp, (frame.to_allocate
4882                                         + frame.nregs * UNITS_PER_WORD));
4883	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4884	    }
4885	}
4886      else if (!frame_pointer_needed)
4887	emit_insn (gen_pro_epilogue_adjust_stack
4888		   (stack_pointer_rtx, stack_pointer_rtx,
4889		    GEN_INT (frame.to_allocate
4890			     + frame.nregs * UNITS_PER_WORD)));
4891      /* If not an i386, mov & pop is faster than "leave".  */
4892      else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4893	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4894      else
4895	{
4896	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4897						    hard_frame_pointer_rtx,
4898						    const0_rtx));
4899	  if (TARGET_64BIT)
4900	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4901	  else
4902	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4903	}
4904    }
4905  else
4906    {
4907      /* First step is to deallocate the stack frame so that we can
4908	 pop the registers.  */
4909      if (!sp_valid)
4910	{
4911	  if (!frame_pointer_needed)
4912	    abort ();
4913          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4914						    hard_frame_pointer_rtx,
4915						    GEN_INT (offset)));
4916	}
4917      else if (frame.to_allocate)
4918	emit_insn (gen_pro_epilogue_adjust_stack
4919		   (stack_pointer_rtx, stack_pointer_rtx,
4920		    GEN_INT (frame.to_allocate)));
4921
4922      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4923	if (ix86_save_reg (regno, false))
4924	  {
4925	    if (TARGET_64BIT)
4926	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4927	    else
4928	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4929	  }
4930      if (frame_pointer_needed)
4931	{
4932	  /* Leave results in shorter dependency chains on CPUs that are
4933	     able to grok it fast.  */
4934	  if (TARGET_USE_LEAVE)
4935	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4936	  else if (TARGET_64BIT)
4937	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4938	  else
4939	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4940	}
4941    }
4942
4943  /* Sibcall epilogues don't want a return instruction.  */
4944  if (style == 0)
4945    return;
4946
4947  if (current_function_pops_args && current_function_args_size)
4948    {
4949      rtx popc = GEN_INT (current_function_pops_args);
4950
4951      /* i386 can only pop 64K bytes.  If asked to pop more, pop
4952	 return address, do explicit add, and jump indirectly to the
4953	 caller.  */
4954
4955      if (current_function_pops_args >= 65536)
4956	{
4957	  rtx ecx = gen_rtx_REG (SImode, 2);
4958
4959	  /* There are is no "pascal" calling convention in 64bit ABI.  */
4960	  if (TARGET_64BIT)
4961	    abort ();
4962
4963	  emit_insn (gen_popsi1 (ecx));
4964	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4965	  emit_jump_insn (gen_return_indirect_internal (ecx));
4966	}
4967      else
4968	emit_jump_insn (gen_return_pop_internal (popc));
4969    }
4970  else
4971    emit_jump_insn (gen_return_internal ());
4972}
4973
4974/* Reset from the function's potential modifications.  */
4975
4976static void
4977ix86_output_function_epilogue (file, size)
4978     FILE *file ATTRIBUTE_UNUSED;
4979     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4980{
4981  if (pic_offset_table_rtx)
4982    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4983}
4984
4985/* Extract the parts of an RTL expression that is a valid memory address
4986   for an instruction.  Return 0 if the structure of the address is
4987   grossly off.  Return -1 if the address contains ASHIFT, so it is not
4988   strictly valid, but still used for computing length of lea instruction.
4989   */
4990
4991static int
4992ix86_decompose_address (addr, out)
4993     register rtx addr;
4994     struct ix86_address *out;
4995{
4996  rtx base = NULL_RTX;
4997  rtx index = NULL_RTX;
4998  rtx disp = NULL_RTX;
4999  HOST_WIDE_INT scale = 1;
5000  rtx scale_rtx = NULL_RTX;
5001  int retval = 1;
5002
5003  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5004    base = addr;
5005  else if (GET_CODE (addr) == PLUS)
5006    {
5007      rtx op0 = XEXP (addr, 0);
5008      rtx op1 = XEXP (addr, 1);
5009      enum rtx_code code0 = GET_CODE (op0);
5010      enum rtx_code code1 = GET_CODE (op1);
5011
5012      if (code0 == REG || code0 == SUBREG)
5013	{
5014	  if (code1 == REG || code1 == SUBREG)
5015	    index = op0, base = op1;	/* index + base */
5016	  else
5017	    base = op0, disp = op1;	/* base + displacement */
5018	}
5019      else if (code0 == MULT)
5020	{
5021	  index = XEXP (op0, 0);
5022	  scale_rtx = XEXP (op0, 1);
5023	  if (code1 == REG || code1 == SUBREG)
5024	    base = op1;			/* index*scale + base */
5025	  else
5026	    disp = op1;			/* index*scale + disp */
5027	}
5028      else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5029	{
5030	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
5031	  scale_rtx = XEXP (XEXP (op0, 0), 1);
5032	  base = XEXP (op0, 1);
5033	  disp = op1;
5034	}
5035      else if (code0 == PLUS)
5036	{
5037	  index = XEXP (op0, 0);	/* index + base + disp */
5038	  base = XEXP (op0, 1);
5039	  disp = op1;
5040	}
5041      else
5042	return 0;
5043    }
5044  else if (GET_CODE (addr) == MULT)
5045    {
5046      index = XEXP (addr, 0);		/* index*scale */
5047      scale_rtx = XEXP (addr, 1);
5048    }
5049  else if (GET_CODE (addr) == ASHIFT)
5050    {
5051      rtx tmp;
5052
5053      /* We're called for lea too, which implements ashift on occasion.  */
5054      index = XEXP (addr, 0);
5055      tmp = XEXP (addr, 1);
5056      if (GET_CODE (tmp) != CONST_INT)
5057	return 0;
5058      scale = INTVAL (tmp);
5059      if ((unsigned HOST_WIDE_INT) scale > 3)
5060	return 0;
5061      scale = 1 << scale;
5062      retval = -1;
5063    }
5064  else
5065    disp = addr;			/* displacement */
5066
5067  /* Extract the integral value of scale.  */
5068  if (scale_rtx)
5069    {
5070      if (GET_CODE (scale_rtx) != CONST_INT)
5071	return 0;
5072      scale = INTVAL (scale_rtx);
5073    }
5074
5075  /* Allow arg pointer and stack pointer as index if there is not scaling */
5076  if (base && index && scale == 1
5077      && (index == arg_pointer_rtx || index == frame_pointer_rtx
5078          || index == stack_pointer_rtx))
5079    {
5080      rtx tmp = base;
5081      base = index;
5082      index = tmp;
5083    }
5084
5085  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5086  if ((base == hard_frame_pointer_rtx
5087       || base == frame_pointer_rtx
5088       || base == arg_pointer_rtx) && !disp)
5089    disp = const0_rtx;
5090
5091  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5092     Avoid this by transforming to [%esi+0].  */
5093  if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5094      && base && !index && !disp
5095      && REG_P (base)
5096      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5097    disp = const0_rtx;
5098
5099  /* Special case: encode reg+reg instead of reg*2.  */
5100  if (!base && index && scale && scale == 2)
5101    base = index, scale = 1;
5102
5103  /* Special case: scaling cannot be encoded without base or displacement.  */
5104  if (!base && !disp && index && scale != 1)
5105    disp = const0_rtx;
5106
5107  out->base = base;
5108  out->index = index;
5109  out->disp = disp;
5110  out->scale = scale;
5111
5112  return retval;
5113}
5114
5115/* Return cost of the memory address x.
5116   For i386, it is better to use a complex address than let gcc copy
5117   the address into a reg and make a new pseudo.  But not if the address
5118   requires to two regs - that would mean more pseudos with longer
5119   lifetimes.  */
5120int
5121ix86_address_cost (x)
5122     rtx x;
5123{
5124  struct ix86_address parts;
5125  int cost = 1;
5126
5127  if (!ix86_decompose_address (x, &parts))
5128    abort ();
5129
5130  if (parts.base && GET_CODE (parts.base) == SUBREG)
5131    parts.base = SUBREG_REG (parts.base);
5132  if (parts.index && GET_CODE (parts.index) == SUBREG)
5133    parts.index = SUBREG_REG (parts.index);
5134
5135  /* More complex memory references are better.  */
5136  if (parts.disp && parts.disp != const0_rtx)
5137    cost--;
5138
5139  /* Attempt to minimize number of registers in the address.  */
5140  if ((parts.base
5141       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5142      || (parts.index
5143	  && (!REG_P (parts.index)
5144	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5145    cost++;
5146
5147  if (parts.base
5148      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5149      && parts.index
5150      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5151      && parts.base != parts.index)
5152    cost++;
5153
5154  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5155     since it's predecode logic can't detect the length of instructions
5156     and it degenerates to vector decoded.  Increase cost of such
5157     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5158     to split such addresses or even refuse such addresses at all.
5159
5160     Following addressing modes are affected:
5161      [base+scale*index]
5162      [scale*index+disp]
5163      [base+index]
5164
5165     The first and last case  may be avoidable by explicitly coding the zero in
5166     memory address, but I don't have AMD-K6 machine handy to check this
5167     theory.  */
5168
5169  if (TARGET_K6
5170      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5171	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5172	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5173    cost += 10;
5174
5175  return cost;
5176}
5177
5178/* If X is a machine specific address (i.e. a symbol or label being
5179   referenced as a displacement from the GOT implemented using an
5180   UNSPEC), then return the base term.  Otherwise return X.  */
5181
5182rtx
5183ix86_find_base_term (x)
5184     rtx x;
5185{
5186  rtx term;
5187
5188  if (TARGET_64BIT)
5189    {
5190      if (GET_CODE (x) != CONST)
5191	return x;
5192      term = XEXP (x, 0);
5193      if (GET_CODE (term) == PLUS
5194	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5195	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5196	term = XEXP (term, 0);
5197      if (GET_CODE (term) != UNSPEC
5198	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5199	return x;
5200
5201      term = XVECEXP (term, 0, 0);
5202
5203      if (GET_CODE (term) != SYMBOL_REF
5204	  && GET_CODE (term) != LABEL_REF)
5205	return x;
5206
5207      return term;
5208    }
5209
5210  if (GET_CODE (x) != PLUS
5211      || XEXP (x, 0) != pic_offset_table_rtx
5212      || GET_CODE (XEXP (x, 1)) != CONST)
5213    return x;
5214
5215  term = XEXP (XEXP (x, 1), 0);
5216
5217  if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5218    term = XEXP (term, 0);
5219
5220  if (GET_CODE (term) != UNSPEC
5221      || XINT (term, 1) != UNSPEC_GOTOFF)
5222    return x;
5223
5224  term = XVECEXP (term, 0, 0);
5225
5226  if (GET_CODE (term) != SYMBOL_REF
5227      && GET_CODE (term) != LABEL_REF)
5228    return x;
5229
5230  return term;
5231}
5232
5233/* Determine if a given RTX is a valid constant.  We already know this
5234   satisfies CONSTANT_P.  */
5235
5236bool
5237legitimate_constant_p (x)
5238     rtx x;
5239{
5240  rtx inner;
5241
5242  switch (GET_CODE (x))
5243    {
5244    case SYMBOL_REF:
5245      /* TLS symbols are not constant.  */
5246      if (tls_symbolic_operand (x, Pmode))
5247	return false;
5248      break;
5249
5250    case CONST:
5251      inner = XEXP (x, 0);
5252
5253      /* Offsets of TLS symbols are never valid.
5254	 Discourage CSE from creating them.  */
5255      if (GET_CODE (inner) == PLUS
5256	  && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5257	return false;
5258
5259      /* Only some unspecs are valid as "constants".  */
5260      if (GET_CODE (inner) == UNSPEC)
5261	switch (XINT (inner, 1))
5262	  {
5263	  case UNSPEC_TPOFF:
5264	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5265	  default:
5266	    return false;
5267	  }
5268      break;
5269
5270    default:
5271      break;
5272    }
5273
5274  /* Otherwise we handle everything else in the move patterns.  */
5275  return true;
5276}
5277
5278/* Determine if it's legal to put X into the constant pool.  This
5279   is not possible for the address of thread-local symbols, which
5280   is checked above.  */
5281
5282static bool
5283ix86_cannot_force_const_mem (x)
5284     rtx x;
5285{
5286  return !legitimate_constant_p (x);
5287}
5288
5289/* Determine if a given RTX is a valid constant address.  */
5290
5291bool
5292constant_address_p (x)
5293     rtx x;
5294{
5295  switch (GET_CODE (x))
5296    {
5297    case LABEL_REF:
5298    case CONST_INT:
5299      return true;
5300
5301    case CONST_DOUBLE:
5302      return TARGET_64BIT;
5303
5304    case CONST:
5305      /* For Mach-O, really believe the CONST.  */
5306      if (TARGET_MACHO)
5307	return true;
5308      /* Otherwise fall through.  */
5309    case SYMBOL_REF:
5310      return !flag_pic && legitimate_constant_p (x);
5311
5312    default:
5313      return false;
5314    }
5315}
5316
5317/* Nonzero if the constant value X is a legitimate general operand
5318   when generating PIC code.  It is given that flag_pic is on and
5319   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5320
5321bool
5322legitimate_pic_operand_p (x)
5323     rtx x;
5324{
5325  rtx inner;
5326
5327  switch (GET_CODE (x))
5328    {
5329    case CONST:
5330      inner = XEXP (x, 0);
5331
5332      /* Only some unspecs are valid as "constants".  */
5333      if (GET_CODE (inner) == UNSPEC)
5334	switch (XINT (inner, 1))
5335	  {
5336	  case UNSPEC_TPOFF:
5337	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5338	  default:
5339	    return false;
5340	  }
5341      /* FALLTHRU */
5342
5343    case SYMBOL_REF:
5344    case LABEL_REF:
5345      return legitimate_pic_address_disp_p (x);
5346
5347    default:
5348      return true;
5349    }
5350}
5351
5352/* Determine if a given CONST RTX is a valid memory displacement
5353   in PIC mode.  */
5354
5355int
5356legitimate_pic_address_disp_p (disp)
5357     register rtx disp;
5358{
5359  bool saw_plus;
5360
5361  /* In 64bit mode we can allow direct addresses of symbols and labels
5362     when they are not dynamic symbols.  */
5363  if (TARGET_64BIT)
5364    {
5365      /* TLS references should always be enclosed in UNSPEC.  */
5366      if (tls_symbolic_operand (disp, GET_MODE (disp)))
5367	return 0;
5368      if (GET_CODE (disp) == SYMBOL_REF
5369	  && ix86_cmodel == CM_SMALL_PIC
5370	  && (CONSTANT_POOL_ADDRESS_P (disp)
5371	      || SYMBOL_REF_FLAG (disp)))
5372	return 1;
5373      if (GET_CODE (disp) == LABEL_REF)
5374	return 1;
5375      if (GET_CODE (disp) == CONST
5376	  && GET_CODE (XEXP (disp, 0)) == PLUS
5377	  && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5378	       && ix86_cmodel == CM_SMALL_PIC
5379	       && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5380		   || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5381	      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5382	  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5383	  && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5384	  && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5385	return 1;
5386    }
5387  if (GET_CODE (disp) != CONST)
5388    return 0;
5389  disp = XEXP (disp, 0);
5390
5391  if (TARGET_64BIT)
5392    {
5393      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5394         of GOT tables.  We should not need these anyway.  */
5395      if (GET_CODE (disp) != UNSPEC
5396	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5397	return 0;
5398
5399      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5400	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5401	return 0;
5402      return 1;
5403    }
5404
5405  saw_plus = false;
5406  if (GET_CODE (disp) == PLUS)
5407    {
5408      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5409	return 0;
5410      disp = XEXP (disp, 0);
5411      saw_plus = true;
5412    }
5413
5414  /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5415  if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5416    {
5417      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5418          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5419        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5420          {
5421            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5422            if (strstr (sym_name, "$pb") != 0)
5423              return 1;
5424          }
5425    }
5426
5427  if (GET_CODE (disp) != UNSPEC)
5428    return 0;
5429
5430  switch (XINT (disp, 1))
5431    {
5432    case UNSPEC_GOT:
5433      if (saw_plus)
5434	return false;
5435      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5436    case UNSPEC_GOTOFF:
5437      return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5438    case UNSPEC_GOTTPOFF:
5439    case UNSPEC_GOTNTPOFF:
5440    case UNSPEC_INDNTPOFF:
5441      if (saw_plus)
5442	return false;
5443      return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5444    case UNSPEC_NTPOFF:
5445      return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5446    case UNSPEC_DTPOFF:
5447      return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5448    }
5449
5450  return 0;
5451}
5452
5453/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5454   memory address for an instruction.  The MODE argument is the machine mode
5455   for the MEM expression that wants to use this address.
5456
5457   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5458   convert common non-canonical forms to canonical form so that they will
5459   be recognized.  */
5460
5461int
5462legitimate_address_p (mode, addr, strict)
5463     enum machine_mode mode;
5464     register rtx addr;
5465     int strict;
5466{
5467  struct ix86_address parts;
5468  rtx base, index, disp;
5469  HOST_WIDE_INT scale;
5470  const char *reason = NULL;
5471  rtx reason_rtx = NULL_RTX;
5472
5473  if (TARGET_DEBUG_ADDR)
5474    {
5475      fprintf (stderr,
5476	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5477	       GET_MODE_NAME (mode), strict);
5478      debug_rtx (addr);
5479    }
5480
5481  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5482    {
5483      if (TARGET_DEBUG_ADDR)
5484	fprintf (stderr, "Success.\n");
5485      return TRUE;
5486    }
5487
5488  if (ix86_decompose_address (addr, &parts) <= 0)
5489    {
5490      reason = "decomposition failed";
5491      goto report_error;
5492    }
5493
5494  base = parts.base;
5495  index = parts.index;
5496  disp = parts.disp;
5497  scale = parts.scale;
5498
5499  /* Validate base register.
5500
5501     Don't allow SUBREG's here, it can lead to spill failures when the base
5502     is one word out of a two word structure, which is represented internally
5503     as a DImode int.  */
5504
5505  if (base)
5506    {
5507      rtx reg;
5508      reason_rtx = base;
5509
5510      if (GET_CODE (base) == SUBREG)
5511	reg = SUBREG_REG (base);
5512      else
5513	reg = base;
5514
5515      if (GET_CODE (reg) != REG)
5516	{
5517	  reason = "base is not a register";
5518	  goto report_error;
5519	}
5520
5521      if (GET_MODE (base) != Pmode)
5522	{
5523	  reason = "base is not in Pmode";
5524	  goto report_error;
5525	}
5526
5527      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5528	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5529	{
5530	  reason = "base is not valid";
5531	  goto report_error;
5532	}
5533    }
5534
5535  /* Validate index register.
5536
5537     Don't allow SUBREG's here, it can lead to spill failures when the index
5538     is one word out of a two word structure, which is represented internally
5539     as a DImode int.  */
5540
5541  if (index)
5542    {
5543      rtx reg;
5544      reason_rtx = index;
5545
5546      if (GET_CODE (index) == SUBREG)
5547	reg = SUBREG_REG (index);
5548      else
5549	reg = index;
5550
5551      if (GET_CODE (reg) != REG)
5552	{
5553	  reason = "index is not a register";
5554	  goto report_error;
5555	}
5556
5557      if (GET_MODE (index) != Pmode)
5558	{
5559	  reason = "index is not in Pmode";
5560	  goto report_error;
5561	}
5562
5563      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5564	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5565	{
5566	  reason = "index is not valid";
5567	  goto report_error;
5568	}
5569    }
5570
5571  /* Validate scale factor.  */
5572  if (scale != 1)
5573    {
5574      reason_rtx = GEN_INT (scale);
5575      if (!index)
5576	{
5577	  reason = "scale without index";
5578	  goto report_error;
5579	}
5580
5581      if (scale != 2 && scale != 4 && scale != 8)
5582	{
5583	  reason = "scale is not a valid multiplier";
5584	  goto report_error;
5585	}
5586    }
5587
5588  /* Validate displacement.  */
5589  if (disp)
5590    {
5591      reason_rtx = disp;
5592
5593      if (GET_CODE (disp) == CONST
5594	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5595	switch (XINT (XEXP (disp, 0), 1))
5596	  {
5597	  case UNSPEC_GOT:
5598	  case UNSPEC_GOTOFF:
5599	  case UNSPEC_GOTPCREL:
5600	    if (!flag_pic)
5601	      abort ();
5602	    goto is_legitimate_pic;
5603
5604	  case UNSPEC_GOTTPOFF:
5605	  case UNSPEC_GOTNTPOFF:
5606	  case UNSPEC_INDNTPOFF:
5607	  case UNSPEC_NTPOFF:
5608	  case UNSPEC_DTPOFF:
5609	    break;
5610
5611	  default:
5612	    reason = "invalid address unspec";
5613	    goto report_error;
5614	  }
5615
5616      else if (flag_pic && (SYMBOLIC_CONST (disp)
5617#if TARGET_MACHO
5618			    && !machopic_operand_p (disp)
5619#endif
5620			    ))
5621	{
5622	is_legitimate_pic:
5623	  if (TARGET_64BIT && (index || base))
5624	    {
5625	      /* foo@dtpoff(%rX) is ok.  */
5626	      if (GET_CODE (disp) != CONST
5627		  || GET_CODE (XEXP (disp, 0)) != PLUS
5628		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5629		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5630		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5631		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5632		{
5633		  reason = "non-constant pic memory reference";
5634		  goto report_error;
5635		}
5636	    }
5637	  else if (! legitimate_pic_address_disp_p (disp))
5638	    {
5639	      reason = "displacement is an invalid pic construct";
5640	      goto report_error;
5641	    }
5642
5643          /* This code used to verify that a symbolic pic displacement
5644	     includes the pic_offset_table_rtx register.
5645
5646	     While this is good idea, unfortunately these constructs may
5647	     be created by "adds using lea" optimization for incorrect
5648	     code like:
5649
5650	     int a;
5651	     int foo(int i)
5652	       {
5653	         return *(&a+i);
5654	       }
5655
5656	     This code is nonsensical, but results in addressing
5657	     GOT table with pic_offset_table_rtx base.  We can't
5658	     just refuse it easily, since it gets matched by
5659	     "addsi3" pattern, that later gets split to lea in the
5660	     case output register differs from input.  While this
5661	     can be handled by separate addsi pattern for this case
5662	     that never results in lea, this seems to be easier and
5663	     correct fix for crash to disable this test.  */
5664	}
5665      else if (!CONSTANT_ADDRESS_P (disp))
5666	{
5667	  reason = "displacement is not constant";
5668	  goto report_error;
5669	}
5670      else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5671	{
5672	  reason = "displacement is out of range";
5673	  goto report_error;
5674	}
5675      else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5676	{
5677	  reason = "displacement is a const_double";
5678	  goto report_error;
5679	}
5680    }
5681
5682  /* Everything looks valid.  */
5683  if (TARGET_DEBUG_ADDR)
5684    fprintf (stderr, "Success.\n");
5685  return TRUE;
5686
5687 report_error:
5688  if (TARGET_DEBUG_ADDR)
5689    {
5690      fprintf (stderr, "Error: %s\n", reason);
5691      debug_rtx (reason_rtx);
5692    }
5693  return FALSE;
5694}
5695
5696/* Return an unique alias set for the GOT.  */
5697
5698static HOST_WIDE_INT
5699ix86_GOT_alias_set ()
5700{
5701  static HOST_WIDE_INT set = -1;
5702  if (set == -1)
5703    set = new_alias_set ();
5704  return set;
5705}
5706
5707/* Return a legitimate reference for ORIG (an address) using the
5708   register REG.  If REG is 0, a new pseudo is generated.
5709
5710   There are two types of references that must be handled:
5711
5712   1. Global data references must load the address from the GOT, via
5713      the PIC reg.  An insn is emitted to do this load, and the reg is
5714      returned.
5715
5716   2. Static data references, constant pool addresses, and code labels
5717      compute the address as an offset from the GOT, whose base is in
5718      the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
5719      differentiate them from global data objects.  The returned
5720      address is the PIC reg + an unspec constant.
5721
5722   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5723   reg also appears in the address.  */
5724
5725rtx
5726legitimize_pic_address (orig, reg)
5727     rtx orig;
5728     rtx reg;
5729{
5730  rtx addr = orig;
5731  rtx new = orig;
5732  rtx base;
5733
5734#if TARGET_MACHO
5735  if (reg == 0)
5736    reg = gen_reg_rtx (Pmode);
5737  /* Use the generic Mach-O PIC machinery.  */
5738  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5739#endif
5740
5741  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5742    new = addr;
5743  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5744    {
5745      /* This symbol may be referenced via a displacement from the PIC
5746	 base address (@GOTOFF).  */
5747
5748      if (reload_in_progress)
5749	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5750      new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5751      new = gen_rtx_CONST (Pmode, new);
5752      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5753
5754      if (reg != 0)
5755	{
5756	  emit_move_insn (reg, new);
5757	  new = reg;
5758	}
5759    }
5760  else if (GET_CODE (addr) == SYMBOL_REF)
5761    {
5762      if (TARGET_64BIT)
5763	{
5764	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5765	  new = gen_rtx_CONST (Pmode, new);
5766	  new = gen_rtx_MEM (Pmode, new);
5767	  RTX_UNCHANGING_P (new) = 1;
5768	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5769
5770	  if (reg == 0)
5771	    reg = gen_reg_rtx (Pmode);
5772	  /* Use directly gen_movsi, otherwise the address is loaded
5773	     into register for CSE.  We don't want to CSE this addresses,
5774	     instead we CSE addresses from the GOT table, so skip this.  */
5775	  emit_insn (gen_movsi (reg, new));
5776	  new = reg;
5777	}
5778      else
5779	{
5780	  /* This symbol must be referenced via a load from the
5781	     Global Offset Table (@GOT).  */
5782
5783	  if (reload_in_progress)
5784	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5785	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5786	  new = gen_rtx_CONST (Pmode, new);
5787	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5788	  new = gen_rtx_MEM (Pmode, new);
5789	  RTX_UNCHANGING_P (new) = 1;
5790	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5791
5792	  if (reg == 0)
5793	    reg = gen_reg_rtx (Pmode);
5794	  emit_move_insn (reg, new);
5795	  new = reg;
5796	}
5797    }
5798  else
5799    {
5800      if (GET_CODE (addr) == CONST)
5801	{
5802	  addr = XEXP (addr, 0);
5803
5804	  /* We must match stuff we generate before.  Assume the only
5805	     unspecs that can get here are ours.  Not that we could do
5806	     anything with them anyway...  */
5807	  if (GET_CODE (addr) == UNSPEC
5808	      || (GET_CODE (addr) == PLUS
5809		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5810	    return orig;
5811	  if (GET_CODE (addr) != PLUS)
5812	    abort ();
5813	}
5814      if (GET_CODE (addr) == PLUS)
5815	{
5816	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5817
5818	  /* Check first to see if this is a constant offset from a @GOTOFF
5819	     symbol reference.  */
5820	  if (local_symbolic_operand (op0, Pmode)
5821	      && GET_CODE (op1) == CONST_INT)
5822	    {
5823	      if (!TARGET_64BIT)
5824		{
5825		  if (reload_in_progress)
5826		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5827		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5828					UNSPEC_GOTOFF);
5829		  new = gen_rtx_PLUS (Pmode, new, op1);
5830		  new = gen_rtx_CONST (Pmode, new);
5831		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5832
5833		  if (reg != 0)
5834		    {
5835		      emit_move_insn (reg, new);
5836		      new = reg;
5837		    }
5838		}
5839	      else
5840		{
5841		  if (INTVAL (op1) < -16*1024*1024
5842		      || INTVAL (op1) >= 16*1024*1024)
5843		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5844		}
5845	    }
5846	  else
5847	    {
5848	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5849	      new  = legitimize_pic_address (XEXP (addr, 1),
5850					     base == reg ? NULL_RTX : reg);
5851
5852	      if (GET_CODE (new) == CONST_INT)
5853		new = plus_constant (base, INTVAL (new));
5854	      else
5855		{
5856		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5857		    {
5858		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5859		      new = XEXP (new, 1);
5860		    }
5861		  new = gen_rtx_PLUS (Pmode, base, new);
5862		}
5863	    }
5864	}
5865    }
5866  return new;
5867}
5868
5869static void
5870ix86_encode_section_info (decl, first)
5871     tree decl;
5872     int first ATTRIBUTE_UNUSED;
5873{
5874  bool local_p = (*targetm.binds_local_p) (decl);
5875  rtx rtl, symbol;
5876
5877  rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5878  if (GET_CODE (rtl) != MEM)
5879    return;
5880  symbol = XEXP (rtl, 0);
5881  if (GET_CODE (symbol) != SYMBOL_REF)
5882    return;
5883
5884  /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5885     symbol so that we may access it directly in the GOT.  */
5886
5887  if (flag_pic)
5888    SYMBOL_REF_FLAG (symbol) = local_p;
5889
5890  /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5891     "local dynamic", "initial exec" or "local exec" TLS models
5892     respectively.  */
5893
5894  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5895    {
5896      const char *symbol_str;
5897      char *newstr;
5898      size_t len;
5899      enum tls_model kind = decl_tls_model (decl);
5900
5901      if (TARGET_64BIT && ! flag_pic)
5902	{
5903	  /* x86-64 doesn't allow non-pic code for shared libraries,
5904	     so don't generate GD/LD TLS models for non-pic code.  */
5905	  switch (kind)
5906	    {
5907	    case TLS_MODEL_GLOBAL_DYNAMIC:
5908	      kind = TLS_MODEL_INITIAL_EXEC; break;
5909	    case TLS_MODEL_LOCAL_DYNAMIC:
5910	      kind = TLS_MODEL_LOCAL_EXEC; break;
5911	    default:
5912	      break;
5913	    }
5914	}
5915
5916      symbol_str = XSTR (symbol, 0);
5917
5918      if (symbol_str[0] == '%')
5919	{
5920	  if (symbol_str[1] == tls_model_chars[kind])
5921	    return;
5922	  symbol_str += 2;
5923	}
5924      len = strlen (symbol_str) + 1;
5925      newstr = alloca (len + 2);
5926
5927      newstr[0] = '%';
5928      newstr[1] = tls_model_chars[kind];
5929      memcpy (newstr + 2, symbol_str, len);
5930
5931      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5932    }
5933}
5934
5935/* Undo the above when printing symbol names.  */
5936
5937static const char *
5938ix86_strip_name_encoding (str)
5939     const char *str;
5940{
5941  if (str[0] == '%')
5942    str += 2;
5943  if (str [0] == '*')
5944    str += 1;
5945  return str;
5946}
5947
5948/* Load the thread pointer into a register.  */
5949
5950static rtx
5951get_thread_pointer ()
5952{
5953  rtx tp;
5954
5955  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5956  tp = gen_rtx_MEM (Pmode, tp);
5957  RTX_UNCHANGING_P (tp) = 1;
5958  set_mem_alias_set (tp, ix86_GOT_alias_set ());
5959  tp = force_reg (Pmode, tp);
5960
5961  return tp;
5962}
5963
5964/* Try machine-dependent ways of modifying an illegitimate address
5965   to be legitimate.  If we find one, return the new, valid address.
5966   This macro is used in only one place: `memory_address' in explow.c.
5967
5968   OLDX is the address as it was before break_out_memory_refs was called.
5969   In some cases it is useful to look at this to decide what needs to be done.
5970
5971   MODE and WIN are passed so that this macro can use
5972   GO_IF_LEGITIMATE_ADDRESS.
5973
5974   It is always safe for this macro to do nothing.  It exists to recognize
5975   opportunities to optimize the output.
5976
5977   For the 80386, we handle X+REG by loading X into a register R and
5978   using R+REG.  R will go in a general reg and indexing will be used.
5979   However, if REG is a broken-out memory address or multiplication,
5980   nothing needs to be done because REG can certainly go in a general reg.
5981
5982   When -fpic is used, special handling is needed for symbolic references.
5983   See comments by legitimize_pic_address in i386.c for details.  */
5984
5985rtx
5986legitimize_address (x, oldx, mode)
5987     register rtx x;
5988     register rtx oldx ATTRIBUTE_UNUSED;
5989     enum machine_mode mode;
5990{
5991  int changed = 0;
5992  unsigned log;
5993
5994  if (TARGET_DEBUG_ADDR)
5995    {
5996      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5997	       GET_MODE_NAME (mode));
5998      debug_rtx (x);
5999    }
6000
6001  log = tls_symbolic_operand (x, mode);
6002  if (log)
6003    {
6004      rtx dest, base, off, pic;
6005      int type;
6006
6007      switch (log)
6008        {
6009        case TLS_MODEL_GLOBAL_DYNAMIC:
6010	  dest = gen_reg_rtx (Pmode);
6011	  if (TARGET_64BIT)
6012	    {
6013	      rtx rax = gen_rtx_REG (Pmode, 0), insns;
6014
6015	      start_sequence ();
6016	      emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6017	      insns = get_insns ();
6018	      end_sequence ();
6019
6020	      emit_libcall_block (insns, dest, rax, x);
6021	    }
6022	  else
6023	    emit_insn (gen_tls_global_dynamic_32 (dest, x));
6024	  break;
6025
6026        case TLS_MODEL_LOCAL_DYNAMIC:
6027	  base = gen_reg_rtx (Pmode);
6028	  if (TARGET_64BIT)
6029	    {
6030	      rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6031
6032	      start_sequence ();
6033	      emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6034	      insns = get_insns ();
6035	      end_sequence ();
6036
6037	      note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6038	      note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6039	      emit_libcall_block (insns, base, rax, note);
6040	    }
6041	  else
6042	    emit_insn (gen_tls_local_dynamic_base_32 (base));
6043
6044	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6045	  off = gen_rtx_CONST (Pmode, off);
6046
6047	  return gen_rtx_PLUS (Pmode, base, off);
6048
6049        case TLS_MODEL_INITIAL_EXEC:
6050	  if (TARGET_64BIT)
6051	    {
6052	      pic = NULL;
6053	      type = UNSPEC_GOTNTPOFF;
6054	    }
6055	  else if (flag_pic)
6056	    {
6057	      if (reload_in_progress)
6058		regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6059	      pic = pic_offset_table_rtx;
6060	      type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6061	    }
6062	  else if (!TARGET_GNU_TLS)
6063	    {
6064	      pic = gen_reg_rtx (Pmode);
6065	      emit_insn (gen_set_got (pic));
6066	      type = UNSPEC_GOTTPOFF;
6067	    }
6068	  else
6069	    {
6070	      pic = NULL;
6071	      type = UNSPEC_INDNTPOFF;
6072	    }
6073
6074	  base = get_thread_pointer ();
6075
6076	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6077	  off = gen_rtx_CONST (Pmode, off);
6078	  if (pic)
6079	    off = gen_rtx_PLUS (Pmode, pic, off);
6080	  off = gen_rtx_MEM (Pmode, off);
6081	  RTX_UNCHANGING_P (off) = 1;
6082	  set_mem_alias_set (off, ix86_GOT_alias_set ());
6083	  dest = gen_reg_rtx (Pmode);
6084
6085	  if (TARGET_64BIT || TARGET_GNU_TLS)
6086	    {
6087	      emit_move_insn (dest, off);
6088	      return gen_rtx_PLUS (Pmode, base, dest);
6089	    }
6090	  else
6091	    emit_insn (gen_subsi3 (dest, base, off));
6092	  break;
6093
6094        case TLS_MODEL_LOCAL_EXEC:
6095	  base = get_thread_pointer ();
6096
6097	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6098				(TARGET_64BIT || TARGET_GNU_TLS)
6099				? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6100	  off = gen_rtx_CONST (Pmode, off);
6101
6102	  if (TARGET_64BIT || TARGET_GNU_TLS)
6103	    return gen_rtx_PLUS (Pmode, base, off);
6104	  else
6105	    {
6106	      dest = gen_reg_rtx (Pmode);
6107	      emit_insn (gen_subsi3 (dest, base, off));
6108	    }
6109	  break;
6110
6111	default:
6112	  abort ();
6113        }
6114
6115      return dest;
6116    }
6117
6118  if (flag_pic && SYMBOLIC_CONST (x))
6119    return legitimize_pic_address (x, 0);
6120
6121  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6122  if (GET_CODE (x) == ASHIFT
6123      && GET_CODE (XEXP (x, 1)) == CONST_INT
6124      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6125    {
6126      changed = 1;
6127      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6128			GEN_INT (1 << log));
6129    }
6130
6131  if (GET_CODE (x) == PLUS)
6132    {
6133      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6134
6135      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6136	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6137	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6138	{
6139	  changed = 1;
6140	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6141				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6142				      GEN_INT (1 << log));
6143	}
6144
6145      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6146	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6147	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6148	{
6149	  changed = 1;
6150	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6151				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6152				      GEN_INT (1 << log));
6153	}
6154
6155      /* Put multiply first if it isn't already.  */
6156      if (GET_CODE (XEXP (x, 1)) == MULT)
6157	{
6158	  rtx tmp = XEXP (x, 0);
6159	  XEXP (x, 0) = XEXP (x, 1);
6160	  XEXP (x, 1) = tmp;
6161	  changed = 1;
6162	}
6163
6164      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6165	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6166	 created by virtual register instantiation, register elimination, and
6167	 similar optimizations.  */
6168      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6169	{
6170	  changed = 1;
6171	  x = gen_rtx_PLUS (Pmode,
6172			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6173					  XEXP (XEXP (x, 1), 0)),
6174			    XEXP (XEXP (x, 1), 1));
6175	}
6176
6177      /* Canonicalize
6178	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6179	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6180      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6181	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6182	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6183	       && CONSTANT_P (XEXP (x, 1)))
6184	{
6185	  rtx constant;
6186	  rtx other = NULL_RTX;
6187
6188	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6189	    {
6190	      constant = XEXP (x, 1);
6191	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6192	    }
6193	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6194	    {
6195	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6196	      other = XEXP (x, 1);
6197	    }
6198	  else
6199	    constant = 0;
6200
6201	  if (constant)
6202	    {
6203	      changed = 1;
6204	      x = gen_rtx_PLUS (Pmode,
6205				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6206					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6207				plus_constant (other, INTVAL (constant)));
6208	    }
6209	}
6210
6211      if (changed && legitimate_address_p (mode, x, FALSE))
6212	return x;
6213
6214      if (GET_CODE (XEXP (x, 0)) == MULT)
6215	{
6216	  changed = 1;
6217	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6218	}
6219
6220      if (GET_CODE (XEXP (x, 1)) == MULT)
6221	{
6222	  changed = 1;
6223	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6224	}
6225
6226      if (changed
6227	  && GET_CODE (XEXP (x, 1)) == REG
6228	  && GET_CODE (XEXP (x, 0)) == REG)
6229	return x;
6230
6231      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6232	{
6233	  changed = 1;
6234	  x = legitimize_pic_address (x, 0);
6235	}
6236
6237      if (changed && legitimate_address_p (mode, x, FALSE))
6238	return x;
6239
6240      if (GET_CODE (XEXP (x, 0)) == REG)
6241	{
6242	  register rtx temp = gen_reg_rtx (Pmode);
6243	  register rtx val  = force_operand (XEXP (x, 1), temp);
6244	  if (val != temp)
6245	    emit_move_insn (temp, val);
6246
6247	  XEXP (x, 1) = temp;
6248	  return x;
6249	}
6250
6251      else if (GET_CODE (XEXP (x, 1)) == REG)
6252	{
6253	  register rtx temp = gen_reg_rtx (Pmode);
6254	  register rtx val  = force_operand (XEXP (x, 0), temp);
6255	  if (val != temp)
6256	    emit_move_insn (temp, val);
6257
6258	  XEXP (x, 0) = temp;
6259	  return x;
6260	}
6261    }
6262
6263  return x;
6264}
6265
6266/* Print an integer constant expression in assembler syntax.  Addition
6267   and subtraction are the only arithmetic that may appear in these
6268   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6269   CODE is the operand print code from the output string.  */
6270
6271static void
6272output_pic_addr_const (file, x, code)
6273     FILE *file;
6274     rtx x;
6275     int code;
6276{
6277  char buf[256];
6278
6279  switch (GET_CODE (x))
6280    {
6281    case PC:
6282      if (flag_pic)
6283	putc ('.', file);
6284      else
6285	abort ();
6286      break;
6287
6288    case SYMBOL_REF:
6289      assemble_name (file, XSTR (x, 0));
6290      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6291	fputs ("@PLT", file);
6292      break;
6293
6294    case LABEL_REF:
6295      x = XEXP (x, 0);
6296      /* FALLTHRU */
6297    case CODE_LABEL:
6298      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6299      assemble_name (asm_out_file, buf);
6300      break;
6301
6302    case CONST_INT:
6303      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6304      break;
6305
6306    case CONST:
6307      /* This used to output parentheses around the expression,
6308	 but that does not work on the 386 (either ATT or BSD assembler).  */
6309      output_pic_addr_const (file, XEXP (x, 0), code);
6310      break;
6311
6312    case CONST_DOUBLE:
6313      if (GET_MODE (x) == VOIDmode)
6314	{
6315	  /* We can use %d if the number is <32 bits and positive.  */
6316	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6317	    fprintf (file, "0x%lx%08lx",
6318		     (unsigned long) CONST_DOUBLE_HIGH (x),
6319		     (unsigned long) CONST_DOUBLE_LOW (x));
6320	  else
6321	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6322	}
6323      else
6324	/* We can't handle floating point constants;
6325	   PRINT_OPERAND must handle them.  */
6326	output_operand_lossage ("floating constant misused");
6327      break;
6328
6329    case PLUS:
6330      /* Some assemblers need integer constants to appear first.  */
6331      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6332	{
6333	  output_pic_addr_const (file, XEXP (x, 0), code);
6334	  putc ('+', file);
6335	  output_pic_addr_const (file, XEXP (x, 1), code);
6336	}
6337      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6338	{
6339	  output_pic_addr_const (file, XEXP (x, 1), code);
6340	  putc ('+', file);
6341	  output_pic_addr_const (file, XEXP (x, 0), code);
6342	}
6343      else
6344	abort ();
6345      break;
6346
6347    case MINUS:
6348      if (!TARGET_MACHO)
6349	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6350      output_pic_addr_const (file, XEXP (x, 0), code);
6351      putc ('-', file);
6352      output_pic_addr_const (file, XEXP (x, 1), code);
6353      if (!TARGET_MACHO)
6354	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6355      break;
6356
6357     case UNSPEC:
6358       if (XVECLEN (x, 0) != 1)
6359	 abort ();
6360       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6361       switch (XINT (x, 1))
6362	{
6363	case UNSPEC_GOT:
6364	  fputs ("@GOT", file);
6365	  break;
6366	case UNSPEC_GOTOFF:
6367	  fputs ("@GOTOFF", file);
6368	  break;
6369	case UNSPEC_GOTPCREL:
6370	  fputs ("@GOTPCREL(%rip)", file);
6371	  break;
6372	case UNSPEC_GOTTPOFF:
6373	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6374	  fputs ("@GOTTPOFF", file);
6375	  break;
6376	case UNSPEC_TPOFF:
6377	  fputs ("@TPOFF", file);
6378	  break;
6379	case UNSPEC_NTPOFF:
6380	  if (TARGET_64BIT)
6381	    fputs ("@TPOFF", file);
6382	  else
6383	    fputs ("@NTPOFF", file);
6384	  break;
6385	case UNSPEC_DTPOFF:
6386	  fputs ("@DTPOFF", file);
6387	  break;
6388	case UNSPEC_GOTNTPOFF:
6389	  if (TARGET_64BIT)
6390	    fputs ("@GOTTPOFF(%rip)", file);
6391	  else
6392	    fputs ("@GOTNTPOFF", file);
6393	  break;
6394	case UNSPEC_INDNTPOFF:
6395	  fputs ("@INDNTPOFF", file);
6396	  break;
6397	default:
6398	  output_operand_lossage ("invalid UNSPEC as operand");
6399	  break;
6400	}
6401       break;
6402
6403    default:
6404      output_operand_lossage ("invalid expression as operand");
6405    }
6406}
6407
6408/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6409   We need to handle our special PIC relocations.  */
6410
6411void
6412i386_dwarf_output_addr_const (file, x)
6413     FILE *file;
6414     rtx x;
6415{
6416#ifdef ASM_QUAD
6417  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6418#else
6419  if (TARGET_64BIT)
6420    abort ();
6421  fprintf (file, "%s", ASM_LONG);
6422#endif
6423  if (flag_pic)
6424    output_pic_addr_const (file, x, '\0');
6425  else
6426    output_addr_const (file, x);
6427  fputc ('\n', file);
6428}
6429
6430/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6431   We need to emit DTP-relative relocations.  */
6432
6433void
6434i386_output_dwarf_dtprel (file, size, x)
6435     FILE *file;
6436     int size;
6437     rtx x;
6438{
6439  fputs (ASM_LONG, file);
6440  output_addr_const (file, x);
6441  fputs ("@DTPOFF", file);
6442  switch (size)
6443    {
6444    case 4:
6445      break;
6446    case 8:
6447      fputs (", 0", file);
6448      break;
6449    default:
6450      abort ();
6451   }
6452}
6453
6454/* In the name of slightly smaller debug output, and to cater to
6455   general assembler losage, recognize PIC+GOTOFF and turn it back
6456   into a direct symbol reference.  */
6457
6458rtx
6459i386_simplify_dwarf_addr (orig_x)
6460     rtx orig_x;
6461{
6462  rtx x = orig_x, y;
6463
6464  if (GET_CODE (x) == MEM)
6465    x = XEXP (x, 0);
6466
6467  if (TARGET_64BIT)
6468    {
6469      if (GET_CODE (x) != CONST
6470	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6471	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6472	  || GET_CODE (orig_x) != MEM)
6473	return orig_x;
6474      return XVECEXP (XEXP (x, 0), 0, 0);
6475    }
6476
6477  if (GET_CODE (x) != PLUS
6478      || GET_CODE (XEXP (x, 1)) != CONST)
6479    return orig_x;
6480
6481  if (GET_CODE (XEXP (x, 0)) == REG
6482      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6483    /* %ebx + GOT/GOTOFF */
6484    y = NULL;
6485  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6486    {
6487      /* %ebx + %reg * scale + GOT/GOTOFF */
6488      y = XEXP (x, 0);
6489      if (GET_CODE (XEXP (y, 0)) == REG
6490	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6491	y = XEXP (y, 1);
6492      else if (GET_CODE (XEXP (y, 1)) == REG
6493	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6494	y = XEXP (y, 0);
6495      else
6496	return orig_x;
6497      if (GET_CODE (y) != REG
6498	  && GET_CODE (y) != MULT
6499	  && GET_CODE (y) != ASHIFT)
6500	return orig_x;
6501    }
6502  else
6503    return orig_x;
6504
6505  x = XEXP (XEXP (x, 1), 0);
6506  if (GET_CODE (x) == UNSPEC
6507      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6508	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6509    {
6510      if (y)
6511	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6512      return XVECEXP (x, 0, 0);
6513    }
6514
6515  if (GET_CODE (x) == PLUS
6516      && GET_CODE (XEXP (x, 0)) == UNSPEC
6517      && GET_CODE (XEXP (x, 1)) == CONST_INT
6518      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6519	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6520	      && GET_CODE (orig_x) != MEM)))
6521    {
6522      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6523      if (y)
6524	return gen_rtx_PLUS (Pmode, y, x);
6525      return x;
6526    }
6527
6528  return orig_x;
6529}
6530
6531static void
6532put_condition_code (code, mode, reverse, fp, file)
6533     enum rtx_code code;
6534     enum machine_mode mode;
6535     int reverse, fp;
6536     FILE *file;
6537{
6538  const char *suffix;
6539
6540  if (mode == CCFPmode || mode == CCFPUmode)
6541    {
6542      enum rtx_code second_code, bypass_code;
6543      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6544      if (bypass_code != NIL || second_code != NIL)
6545	abort ();
6546      code = ix86_fp_compare_code_to_integer (code);
6547      mode = CCmode;
6548    }
6549  if (reverse)
6550    code = reverse_condition (code);
6551
6552  switch (code)
6553    {
6554    case EQ:
6555      suffix = "e";
6556      break;
6557    case NE:
6558      suffix = "ne";
6559      break;
6560    case GT:
6561      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6562	abort ();
6563      suffix = "g";
6564      break;
6565    case GTU:
6566      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6567	 Those same assemblers have the same but opposite losage on cmov.  */
6568      if (mode != CCmode)
6569	abort ();
6570      suffix = fp ? "nbe" : "a";
6571      break;
6572    case LT:
6573      if (mode == CCNOmode || mode == CCGOCmode)
6574	suffix = "s";
6575      else if (mode == CCmode || mode == CCGCmode)
6576	suffix = "l";
6577      else
6578	abort ();
6579      break;
6580    case LTU:
6581      if (mode != CCmode)
6582	abort ();
6583      suffix = "b";
6584      break;
6585    case GE:
6586      if (mode == CCNOmode || mode == CCGOCmode)
6587	suffix = "ns";
6588      else if (mode == CCmode || mode == CCGCmode)
6589	suffix = "ge";
6590      else
6591	abort ();
6592      break;
6593    case GEU:
6594      /* ??? As above.  */
6595      if (mode != CCmode)
6596	abort ();
6597      suffix = fp ? "nb" : "ae";
6598      break;
6599    case LE:
6600      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6601	abort ();
6602      suffix = "le";
6603      break;
6604    case LEU:
6605      if (mode != CCmode)
6606	abort ();
6607      suffix = "be";
6608      break;
6609    case UNORDERED:
6610      suffix = fp ? "u" : "p";
6611      break;
6612    case ORDERED:
6613      suffix = fp ? "nu" : "np";
6614      break;
6615    default:
6616      abort ();
6617    }
6618  fputs (suffix, file);
6619}
6620
6621void
6622print_reg (x, code, file)
6623     rtx x;
6624     int code;
6625     FILE *file;
6626{
6627  if (REGNO (x) == ARG_POINTER_REGNUM
6628      || REGNO (x) == FRAME_POINTER_REGNUM
6629      || REGNO (x) == FLAGS_REG
6630      || REGNO (x) == FPSR_REG)
6631    abort ();
6632
6633  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6634    putc ('%', file);
6635
6636  if (code == 'w' || MMX_REG_P (x))
6637    code = 2;
6638  else if (code == 'b')
6639    code = 1;
6640  else if (code == 'k')
6641    code = 4;
6642  else if (code == 'q')
6643    code = 8;
6644  else if (code == 'y')
6645    code = 3;
6646  else if (code == 'h')
6647    code = 0;
6648  else
6649    code = GET_MODE_SIZE (GET_MODE (x));
6650
6651  /* Irritatingly, AMD extended registers use different naming convention
6652     from the normal registers.  */
6653  if (REX_INT_REG_P (x))
6654    {
6655      if (!TARGET_64BIT)
6656	abort ();
6657      switch (code)
6658	{
6659	  case 0:
6660	    error ("extended registers have no high halves");
6661	    break;
6662	  case 1:
6663	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6664	    break;
6665	  case 2:
6666	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6667	    break;
6668	  case 4:
6669	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6670	    break;
6671	  case 8:
6672	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6673	    break;
6674	  default:
6675	    error ("unsupported operand size for extended register");
6676	    break;
6677	}
6678      return;
6679    }
6680  switch (code)
6681    {
6682    case 3:
6683      if (STACK_TOP_P (x))
6684	{
6685	  fputs ("st(0)", file);
6686	  break;
6687	}
6688      /* FALLTHRU */
6689    case 8:
6690    case 4:
6691    case 12:
6692      if (! ANY_FP_REG_P (x))
6693	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6694      /* FALLTHRU */
6695    case 16:
6696    case 2:
6697      fputs (hi_reg_name[REGNO (x)], file);
6698      break;
6699    case 1:
6700      fputs (qi_reg_name[REGNO (x)], file);
6701      break;
6702    case 0:
6703      fputs (qi_high_reg_name[REGNO (x)], file);
6704      break;
6705    default:
6706      abort ();
6707    }
6708}
6709
6710/* Locate some local-dynamic symbol still in use by this function
6711   so that we can print its name in some tls_local_dynamic_base
6712   pattern.  */
6713
6714static const char *
6715get_some_local_dynamic_name ()
6716{
6717  rtx insn;
6718
6719  if (cfun->machine->some_ld_name)
6720    return cfun->machine->some_ld_name;
6721
6722  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6723    if (INSN_P (insn)
6724	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6725      return cfun->machine->some_ld_name;
6726
6727  abort ();
6728}
6729
6730static int
6731get_some_local_dynamic_name_1 (px, data)
6732     rtx *px;
6733     void *data ATTRIBUTE_UNUSED;
6734{
6735  rtx x = *px;
6736
6737  if (GET_CODE (x) == SYMBOL_REF
6738      && local_dynamic_symbolic_operand (x, Pmode))
6739    {
6740      cfun->machine->some_ld_name = XSTR (x, 0);
6741      return 1;
6742    }
6743
6744  return 0;
6745}
6746
6747/* Meaning of CODE:
6748   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6749   C -- print opcode suffix for set/cmov insn.
6750   c -- like C, but print reversed condition
6751   F,f -- likewise, but for floating-point.
6752   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
6753        otherwise nothing
6754   R -- print the prefix for register names.
6755   z -- print the opcode suffix for the size of the current operand.
6756   * -- print a star (in certain assembler syntax)
6757   A -- print an absolute memory reference.
6758   w -- print the operand as if it's a "word" (HImode) even if it isn't.
6759   s -- print a shift double count, followed by the assemblers argument
6760	delimiter.
6761   b -- print the QImode name of the register for the indicated operand.
6762	%b0 would print %al if operands[0] is reg 0.
6763   w --  likewise, print the HImode name of the register.
6764   k --  likewise, print the SImode name of the register.
6765   q --  likewise, print the DImode name of the register.
6766   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6767   y -- print "st(0)" instead of "st" as a register.
6768   D -- print condition for SSE cmp instruction.
6769   P -- if PIC, print an @PLT suffix.
6770   X -- don't print any sort of PIC '@' suffix for a symbol.
6771   & -- print some in-use local-dynamic symbol name.
6772 */
6773
6774void
6775print_operand (file, x, code)
6776     FILE *file;
6777     rtx x;
6778     int code;
6779{
6780  if (code)
6781    {
6782      switch (code)
6783	{
6784	case '*':
6785	  if (ASSEMBLER_DIALECT == ASM_ATT)
6786	    putc ('*', file);
6787	  return;
6788
6789	case '&':
6790	  assemble_name (file, get_some_local_dynamic_name ());
6791	  return;
6792
6793	case 'A':
6794	  if (ASSEMBLER_DIALECT == ASM_ATT)
6795	    putc ('*', file);
6796	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
6797	    {
6798	      /* Intel syntax. For absolute addresses, registers should not
6799		 be surrounded by braces.  */
6800	      if (GET_CODE (x) != REG)
6801		{
6802		  putc ('[', file);
6803		  PRINT_OPERAND (file, x, 0);
6804		  putc (']', file);
6805		  return;
6806		}
6807	    }
6808	  else
6809	    abort ();
6810
6811	  PRINT_OPERAND (file, x, 0);
6812	  return;
6813
6814
6815	case 'L':
6816	  if (ASSEMBLER_DIALECT == ASM_ATT)
6817	    putc ('l', file);
6818	  return;
6819
6820	case 'W':
6821	  if (ASSEMBLER_DIALECT == ASM_ATT)
6822	    putc ('w', file);
6823	  return;
6824
6825	case 'B':
6826	  if (ASSEMBLER_DIALECT == ASM_ATT)
6827	    putc ('b', file);
6828	  return;
6829
6830	case 'Q':
6831	  if (ASSEMBLER_DIALECT == ASM_ATT)
6832	    putc ('l', file);
6833	  return;
6834
6835	case 'S':
6836	  if (ASSEMBLER_DIALECT == ASM_ATT)
6837	    putc ('s', file);
6838	  return;
6839
6840	case 'T':
6841	  if (ASSEMBLER_DIALECT == ASM_ATT)
6842	    putc ('t', file);
6843	  return;
6844
6845	case 'z':
6846	  /* 387 opcodes don't get size suffixes if the operands are
6847	     registers.  */
6848	  if (STACK_REG_P (x))
6849	    return;
6850
6851	  /* Likewise if using Intel opcodes.  */
6852	  if (ASSEMBLER_DIALECT == ASM_INTEL)
6853	    return;
6854
6855	  /* This is the size of op from size of operand.  */
6856	  switch (GET_MODE_SIZE (GET_MODE (x)))
6857	    {
6858	    case 2:
6859#ifdef HAVE_GAS_FILDS_FISTS
6860	      putc ('s', file);
6861#endif
6862	      return;
6863
6864	    case 4:
6865	      if (GET_MODE (x) == SFmode)
6866		{
6867		  putc ('s', file);
6868		  return;
6869		}
6870	      else
6871		putc ('l', file);
6872	      return;
6873
6874	    case 12:
6875	    case 16:
6876	      putc ('t', file);
6877	      return;
6878
6879	    case 8:
6880	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6881		{
6882#ifdef GAS_MNEMONICS
6883		  putc ('q', file);
6884#else
6885		  putc ('l', file);
6886		  putc ('l', file);
6887#endif
6888		}
6889	      else
6890	        putc ('l', file);
6891	      return;
6892
6893	    default:
6894	      abort ();
6895	    }
6896
6897	case 'b':
6898	case 'w':
6899	case 'k':
6900	case 'q':
6901	case 'h':
6902	case 'y':
6903	case 'X':
6904	case 'P':
6905	  break;
6906
6907	case 's':
6908	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6909	    {
6910	      PRINT_OPERAND (file, x, 0);
6911	      putc (',', file);
6912	    }
6913	  return;
6914
6915	case 'D':
6916	  /* Little bit of braindamage here.  The SSE compare instructions
6917	     does use completely different names for the comparisons that the
6918	     fp conditional moves.  */
6919	  switch (GET_CODE (x))
6920	    {
6921	    case EQ:
6922	    case UNEQ:
6923	      fputs ("eq", file);
6924	      break;
6925	    case LT:
6926	    case UNLT:
6927	      fputs ("lt", file);
6928	      break;
6929	    case LE:
6930	    case UNLE:
6931	      fputs ("le", file);
6932	      break;
6933	    case UNORDERED:
6934	      fputs ("unord", file);
6935	      break;
6936	    case NE:
6937	    case LTGT:
6938	      fputs ("neq", file);
6939	      break;
6940	    case UNGE:
6941	    case GE:
6942	      fputs ("nlt", file);
6943	      break;
6944	    case UNGT:
6945	    case GT:
6946	      fputs ("nle", file);
6947	      break;
6948	    case ORDERED:
6949	      fputs ("ord", file);
6950	      break;
6951	    default:
6952	      abort ();
6953	      break;
6954	    }
6955	  return;
6956	case 'O':
6957#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6958	  if (ASSEMBLER_DIALECT == ASM_ATT)
6959	    {
6960	      switch (GET_MODE (x))
6961		{
6962		case HImode: putc ('w', file); break;
6963		case SImode:
6964		case SFmode: putc ('l', file); break;
6965		case DImode:
6966		case DFmode: putc ('q', file); break;
6967		default: abort ();
6968		}
6969	      putc ('.', file);
6970	    }
6971#endif
6972	  return;
6973	case 'C':
6974	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6975	  return;
6976	case 'F':
6977#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6978	  if (ASSEMBLER_DIALECT == ASM_ATT)
6979	    putc ('.', file);
6980#endif
6981	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6982	  return;
6983
6984	  /* Like above, but reverse condition */
6985	case 'c':
6986	  /* Check to see if argument to %c is really a constant
6987	     and not a condition code which needs to be reversed.  */
6988	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6989	  {
6990	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6991	     return;
6992	  }
6993	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6994	  return;
6995	case 'f':
6996#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
6997	  if (ASSEMBLER_DIALECT == ASM_ATT)
6998	    putc ('.', file);
6999#endif
7000	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7001	  return;
7002	case '+':
7003	  {
7004	    rtx x;
7005
7006	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7007	      return;
7008
7009	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7010	    if (x)
7011	      {
7012		int pred_val = INTVAL (XEXP (x, 0));
7013
7014		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7015		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7016		  {
7017		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7018		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7019
7020		    /* Emit hints only in the case default branch prediction
7021		       heruistics would fail.  */
7022		    if (taken != cputaken)
7023		      {
7024			/* We use 3e (DS) prefix for taken branches and
7025			   2e (CS) prefix for not taken branches.  */
7026			if (taken)
7027			  fputs ("ds ; ", file);
7028			else
7029			  fputs ("cs ; ", file);
7030		      }
7031		  }
7032	      }
7033	    return;
7034	  }
7035	default:
7036	    output_operand_lossage ("invalid operand code `%c'", code);
7037	}
7038    }
7039
7040  if (GET_CODE (x) == REG)
7041    {
7042      PRINT_REG (x, code, file);
7043    }
7044
7045  else if (GET_CODE (x) == MEM)
7046    {
7047      /* No `byte ptr' prefix for call instructions.  */
7048      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7049	{
7050	  const char * size;
7051	  switch (GET_MODE_SIZE (GET_MODE (x)))
7052	    {
7053	    case 1: size = "BYTE"; break;
7054	    case 2: size = "WORD"; break;
7055	    case 4: size = "DWORD"; break;
7056	    case 8: size = "QWORD"; break;
7057	    case 12: size = "XWORD"; break;
7058	    case 16: size = "XMMWORD"; break;
7059	    default:
7060	      abort ();
7061	    }
7062
7063	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7064	  if (code == 'b')
7065	    size = "BYTE";
7066	  else if (code == 'w')
7067	    size = "WORD";
7068	  else if (code == 'k')
7069	    size = "DWORD";
7070
7071	  fputs (size, file);
7072	  fputs (" PTR ", file);
7073	}
7074
7075      x = XEXP (x, 0);
7076      if (flag_pic && CONSTANT_ADDRESS_P (x))
7077	output_pic_addr_const (file, x, code);
7078      /* Avoid (%rip) for call operands.  */
7079      else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7080	       && GET_CODE (x) != CONST_INT)
7081	output_addr_const (file, x);
7082      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7083	output_operand_lossage ("invalid constraints for operand");
7084      else
7085	output_address (x);
7086    }
7087
7088  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7089    {
7090      REAL_VALUE_TYPE r;
7091      long l;
7092
7093      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7094      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7095
7096      if (ASSEMBLER_DIALECT == ASM_ATT)
7097	putc ('$', file);
7098      fprintf (file, "0x%lx", l);
7099    }
7100
7101 /* These float cases don't actually occur as immediate operands.  */
7102 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7103    {
7104      char dstr[30];
7105
7106      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7107      fprintf (file, "%s", dstr);
7108    }
7109
7110  else if (GET_CODE (x) == CONST_DOUBLE
7111	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7112    {
7113      char dstr[30];
7114
7115      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7116      fprintf (file, "%s", dstr);
7117    }
7118
7119  else
7120    {
7121      if (code != 'P')
7122	{
7123	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7124	    {
7125	      if (ASSEMBLER_DIALECT == ASM_ATT)
7126		putc ('$', file);
7127	    }
7128	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7129		   || GET_CODE (x) == LABEL_REF)
7130	    {
7131	      if (ASSEMBLER_DIALECT == ASM_ATT)
7132		putc ('$', file);
7133	      else
7134		fputs ("OFFSET FLAT:", file);
7135	    }
7136	}
7137      if (GET_CODE (x) == CONST_INT)
7138	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7139      else if (flag_pic)
7140	output_pic_addr_const (file, x, code);
7141      else
7142	output_addr_const (file, x);
7143    }
7144}
7145
7146/* Print a memory operand whose address is ADDR.  */
7147
7148void
7149print_operand_address (file, addr)
7150     FILE *file;
7151     register rtx addr;
7152{
7153  struct ix86_address parts;
7154  rtx base, index, disp;
7155  int scale;
7156
7157  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7158    {
7159      if (ASSEMBLER_DIALECT == ASM_INTEL)
7160	fputs ("DWORD PTR ", file);
7161      if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7162	putc ('%', file);
7163      if (TARGET_64BIT)
7164	fputs ("fs:0", file);
7165      else
7166	fputs ("gs:0", file);
7167      return;
7168    }
7169
7170  if (! ix86_decompose_address (addr, &parts))
7171    abort ();
7172
7173  base = parts.base;
7174  index = parts.index;
7175  disp = parts.disp;
7176  scale = parts.scale;
7177
7178  if (!base && !index)
7179    {
7180      /* Displacement only requires special attention.  */
7181
7182      if (GET_CODE (disp) == CONST_INT)
7183	{
7184	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7185	    {
7186	      if (USER_LABEL_PREFIX[0] == 0)
7187		putc ('%', file);
7188	      fputs ("ds:", file);
7189	    }
7190	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7191	}
7192      else if (flag_pic)
7193	output_pic_addr_const (file, addr, 0);
7194      else
7195	output_addr_const (file, addr);
7196
7197      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7198      if (TARGET_64BIT
7199	  && ((GET_CODE (addr) == SYMBOL_REF
7200	       && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7201	      || GET_CODE (addr) == LABEL_REF
7202	      || (GET_CODE (addr) == CONST
7203		  && GET_CODE (XEXP (addr, 0)) == PLUS
7204		  && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7205		      || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7206		  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7207	fputs ("(%rip)", file);
7208    }
7209  else
7210    {
7211      if (ASSEMBLER_DIALECT == ASM_ATT)
7212	{
7213	  if (disp)
7214	    {
7215	      if (flag_pic)
7216		output_pic_addr_const (file, disp, 0);
7217	      else if (GET_CODE (disp) == LABEL_REF)
7218		output_asm_label (disp);
7219	      else
7220		output_addr_const (file, disp);
7221	    }
7222
7223	  putc ('(', file);
7224	  if (base)
7225	    PRINT_REG (base, 0, file);
7226	  if (index)
7227	    {
7228	      putc (',', file);
7229	      PRINT_REG (index, 0, file);
7230	      if (scale != 1)
7231		fprintf (file, ",%d", scale);
7232	    }
7233	  putc (')', file);
7234	}
7235      else
7236	{
7237	  rtx offset = NULL_RTX;
7238
7239	  if (disp)
7240	    {
7241	      /* Pull out the offset of a symbol; print any symbol itself.  */
7242	      if (GET_CODE (disp) == CONST
7243		  && GET_CODE (XEXP (disp, 0)) == PLUS
7244		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7245		{
7246		  offset = XEXP (XEXP (disp, 0), 1);
7247		  disp = gen_rtx_CONST (VOIDmode,
7248					XEXP (XEXP (disp, 0), 0));
7249		}
7250
7251	      if (flag_pic)
7252		output_pic_addr_const (file, disp, 0);
7253	      else if (GET_CODE (disp) == LABEL_REF)
7254		output_asm_label (disp);
7255	      else if (GET_CODE (disp) == CONST_INT)
7256		offset = disp;
7257	      else
7258		output_addr_const (file, disp);
7259	    }
7260
7261	  putc ('[', file);
7262	  if (base)
7263	    {
7264	      PRINT_REG (base, 0, file);
7265	      if (offset)
7266		{
7267		  if (INTVAL (offset) >= 0)
7268		    putc ('+', file);
7269		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7270		}
7271	    }
7272	  else if (offset)
7273	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7274	  else
7275	    putc ('0', file);
7276
7277	  if (index)
7278	    {
7279	      putc ('+', file);
7280	      PRINT_REG (index, 0, file);
7281	      if (scale != 1)
7282		fprintf (file, "*%d", scale);
7283	    }
7284	  putc (']', file);
7285	}
7286    }
7287}
7288
7289bool
7290output_addr_const_extra (file, x)
7291     FILE *file;
7292     rtx x;
7293{
7294  rtx op;
7295
7296  if (GET_CODE (x) != UNSPEC)
7297    return false;
7298
7299  op = XVECEXP (x, 0, 0);
7300  switch (XINT (x, 1))
7301    {
7302    case UNSPEC_GOTTPOFF:
7303      output_addr_const (file, op);
7304      /* FIXME: This might be @TPOFF in Sun ld.  */
7305      fputs ("@GOTTPOFF", file);
7306      break;
7307    case UNSPEC_TPOFF:
7308      output_addr_const (file, op);
7309      fputs ("@TPOFF", file);
7310      break;
7311    case UNSPEC_NTPOFF:
7312      output_addr_const (file, op);
7313      if (TARGET_64BIT)
7314	fputs ("@TPOFF", file);
7315      else
7316	fputs ("@NTPOFF", file);
7317      break;
7318    case UNSPEC_DTPOFF:
7319      output_addr_const (file, op);
7320      fputs ("@DTPOFF", file);
7321      break;
7322    case UNSPEC_GOTNTPOFF:
7323      output_addr_const (file, op);
7324      if (TARGET_64BIT)
7325	fputs ("@GOTTPOFF(%rip)", file);
7326      else
7327	fputs ("@GOTNTPOFF", file);
7328      break;
7329    case UNSPEC_INDNTPOFF:
7330      output_addr_const (file, op);
7331      fputs ("@INDNTPOFF", file);
7332      break;
7333
7334    default:
7335      return false;
7336    }
7337
7338  return true;
7339}
7340
7341/* Split one or more DImode RTL references into pairs of SImode
7342   references.  The RTL can be REG, offsettable MEM, integer constant, or
7343   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7344   split and "num" is its length.  lo_half and hi_half are output arrays
7345   that parallel "operands".  */
7346
7347void
7348split_di (operands, num, lo_half, hi_half)
7349     rtx operands[];
7350     int num;
7351     rtx lo_half[], hi_half[];
7352{
7353  while (num--)
7354    {
7355      rtx op = operands[num];
7356
7357      /* simplify_subreg refuse to split volatile memory addresses,
7358         but we still have to handle it.  */
7359      if (GET_CODE (op) == MEM)
7360	{
7361	  lo_half[num] = adjust_address (op, SImode, 0);
7362	  hi_half[num] = adjust_address (op, SImode, 4);
7363	}
7364      else
7365	{
7366	  lo_half[num] = simplify_gen_subreg (SImode, op,
7367					      GET_MODE (op) == VOIDmode
7368					      ? DImode : GET_MODE (op), 0);
7369	  hi_half[num] = simplify_gen_subreg (SImode, op,
7370					      GET_MODE (op) == VOIDmode
7371					      ? DImode : GET_MODE (op), 4);
7372	}
7373    }
7374}
7375/* Split one or more TImode RTL references into pairs of SImode
7376   references.  The RTL can be REG, offsettable MEM, integer constant, or
7377   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7378   split and "num" is its length.  lo_half and hi_half are output arrays
7379   that parallel "operands".  */
7380
7381void
7382split_ti (operands, num, lo_half, hi_half)
7383     rtx operands[];
7384     int num;
7385     rtx lo_half[], hi_half[];
7386{
7387  while (num--)
7388    {
7389      rtx op = operands[num];
7390
7391      /* simplify_subreg refuse to split volatile memory addresses, but we
7392         still have to handle it.  */
7393      if (GET_CODE (op) == MEM)
7394	{
7395	  lo_half[num] = adjust_address (op, DImode, 0);
7396	  hi_half[num] = adjust_address (op, DImode, 8);
7397	}
7398      else
7399	{
7400	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7401	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7402	}
7403    }
7404}
7405
7406/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7407   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7408   is the expression of the binary operation.  The output may either be
7409   emitted here, or returned to the caller, like all output_* functions.
7410
7411   There is no guarantee that the operands are the same mode, as they
7412   might be within FLOAT or FLOAT_EXTEND expressions.  */
7413
7414#ifndef SYSV386_COMPAT
7415/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7416   wants to fix the assemblers because that causes incompatibility
7417   with gcc.  No-one wants to fix gcc because that causes
7418   incompatibility with assemblers...  You can use the option of
7419   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7420#define SYSV386_COMPAT 1
7421#endif
7422
7423const char *
7424output_387_binary_op (insn, operands)
7425     rtx insn;
7426     rtx *operands;
7427{
7428  static char buf[30];
7429  const char *p;
7430  const char *ssep;
7431  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7432
7433#ifdef ENABLE_CHECKING
7434  /* Even if we do not want to check the inputs, this documents input
7435     constraints.  Which helps in understanding the following code.  */
7436  if (STACK_REG_P (operands[0])
7437      && ((REG_P (operands[1])
7438	   && REGNO (operands[0]) == REGNO (operands[1])
7439	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7440	  || (REG_P (operands[2])
7441	      && REGNO (operands[0]) == REGNO (operands[2])
7442	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7443      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7444    ; /* ok */
7445  else if (!is_sse)
7446    abort ();
7447#endif
7448
7449  switch (GET_CODE (operands[3]))
7450    {
7451    case PLUS:
7452      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7453	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7454	p = "fiadd";
7455      else
7456	p = "fadd";
7457      ssep = "add";
7458      break;
7459
7460    case MINUS:
7461      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7462	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7463	p = "fisub";
7464      else
7465	p = "fsub";
7466      ssep = "sub";
7467      break;
7468
7469    case MULT:
7470      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7471	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7472	p = "fimul";
7473      else
7474	p = "fmul";
7475      ssep = "mul";
7476      break;
7477
7478    case DIV:
7479      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7480	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7481	p = "fidiv";
7482      else
7483	p = "fdiv";
7484      ssep = "div";
7485      break;
7486
7487    default:
7488      abort ();
7489    }
7490
7491  if (is_sse)
7492   {
7493      strcpy (buf, ssep);
7494      if (GET_MODE (operands[0]) == SFmode)
7495	strcat (buf, "ss\t{%2, %0|%0, %2}");
7496      else
7497	strcat (buf, "sd\t{%2, %0|%0, %2}");
7498      return buf;
7499   }
7500  strcpy (buf, p);
7501
7502  switch (GET_CODE (operands[3]))
7503    {
7504    case MULT:
7505    case PLUS:
7506      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7507	{
7508	  rtx temp = operands[2];
7509	  operands[2] = operands[1];
7510	  operands[1] = temp;
7511	}
7512
7513      /* know operands[0] == operands[1].  */
7514
7515      if (GET_CODE (operands[2]) == MEM)
7516	{
7517	  p = "%z2\t%2";
7518	  break;
7519	}
7520
7521      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7522	{
7523	  if (STACK_TOP_P (operands[0]))
7524	    /* How is it that we are storing to a dead operand[2]?
7525	       Well, presumably operands[1] is dead too.  We can't
7526	       store the result to st(0) as st(0) gets popped on this
7527	       instruction.  Instead store to operands[2] (which I
7528	       think has to be st(1)).  st(1) will be popped later.
7529	       gcc <= 2.8.1 didn't have this check and generated
7530	       assembly code that the Unixware assembler rejected.  */
7531	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7532	  else
7533	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7534	  break;
7535	}
7536
7537      if (STACK_TOP_P (operands[0]))
7538	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7539      else
7540	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7541      break;
7542
7543    case MINUS:
7544    case DIV:
7545      if (GET_CODE (operands[1]) == MEM)
7546	{
7547	  p = "r%z1\t%1";
7548	  break;
7549	}
7550
7551      if (GET_CODE (operands[2]) == MEM)
7552	{
7553	  p = "%z2\t%2";
7554	  break;
7555	}
7556
7557      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7558	{
7559#if SYSV386_COMPAT
7560	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7561	     derived assemblers, confusingly reverse the direction of
7562	     the operation for fsub{r} and fdiv{r} when the
7563	     destination register is not st(0).  The Intel assembler
7564	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7565	     figure out what the hardware really does.  */
7566	  if (STACK_TOP_P (operands[0]))
7567	    p = "{p\t%0, %2|rp\t%2, %0}";
7568	  else
7569	    p = "{rp\t%2, %0|p\t%0, %2}";
7570#else
7571	  if (STACK_TOP_P (operands[0]))
7572	    /* As above for fmul/fadd, we can't store to st(0).  */
7573	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7574	  else
7575	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7576#endif
7577	  break;
7578	}
7579
7580      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7581	{
7582#if SYSV386_COMPAT
7583	  if (STACK_TOP_P (operands[0]))
7584	    p = "{rp\t%0, %1|p\t%1, %0}";
7585	  else
7586	    p = "{p\t%1, %0|rp\t%0, %1}";
7587#else
7588	  if (STACK_TOP_P (operands[0]))
7589	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
7590	  else
7591	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
7592#endif
7593	  break;
7594	}
7595
7596      if (STACK_TOP_P (operands[0]))
7597	{
7598	  if (STACK_TOP_P (operands[1]))
7599	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7600	  else
7601	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
7602	  break;
7603	}
7604      else if (STACK_TOP_P (operands[1]))
7605	{
7606#if SYSV386_COMPAT
7607	  p = "{\t%1, %0|r\t%0, %1}";
7608#else
7609	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
7610#endif
7611	}
7612      else
7613	{
7614#if SYSV386_COMPAT
7615	  p = "{r\t%2, %0|\t%0, %2}";
7616#else
7617	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7618#endif
7619	}
7620      break;
7621
7622    default:
7623      abort ();
7624    }
7625
7626  strcat (buf, p);
7627  return buf;
7628}
7629
7630/* Output code to initialize control word copies used by
7631   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
7632   is set to control word rounding downwards.  */
7633void
7634emit_i387_cw_initialization (normal, round_down)
7635     rtx normal, round_down;
7636{
7637  rtx reg = gen_reg_rtx (HImode);
7638
7639  emit_insn (gen_x86_fnstcw_1 (normal));
7640  emit_move_insn (reg, normal);
7641  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7642      && !TARGET_64BIT)
7643    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7644  else
7645    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7646  emit_move_insn (round_down, reg);
7647}
7648
7649/* Output code for INSN to convert a float to a signed int.  OPERANDS
7650   are the insn operands.  The output may be [HSD]Imode and the input
7651   operand may be [SDX]Fmode.  */
7652
7653const char *
7654output_fix_trunc (insn, operands)
7655     rtx insn;
7656     rtx *operands;
7657{
7658  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7659  int dimode_p = GET_MODE (operands[0]) == DImode;
7660
7661  /* Jump through a hoop or two for DImode, since the hardware has no
7662     non-popping instruction.  We used to do this a different way, but
7663     that was somewhat fragile and broke with post-reload splitters.  */
7664  if (dimode_p && !stack_top_dies)
7665    output_asm_insn ("fld\t%y1", operands);
7666
7667  if (!STACK_TOP_P (operands[1]))
7668    abort ();
7669
7670  if (GET_CODE (operands[0]) != MEM)
7671    abort ();
7672
7673  output_asm_insn ("fldcw\t%3", operands);
7674  if (stack_top_dies || dimode_p)
7675    output_asm_insn ("fistp%z0\t%0", operands);
7676  else
7677    output_asm_insn ("fist%z0\t%0", operands);
7678  output_asm_insn ("fldcw\t%2", operands);
7679
7680  return "";
7681}
7682
7683/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
7684   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
7685   when fucom should be used.  */
7686
7687const char *
7688output_fp_compare (insn, operands, eflags_p, unordered_p)
7689     rtx insn;
7690     rtx *operands;
7691     int eflags_p, unordered_p;
7692{
7693  int stack_top_dies;
7694  rtx cmp_op0 = operands[0];
7695  rtx cmp_op1 = operands[1];
7696  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7697
7698  if (eflags_p == 2)
7699    {
7700      cmp_op0 = cmp_op1;
7701      cmp_op1 = operands[2];
7702    }
7703  if (is_sse)
7704    {
7705      if (GET_MODE (operands[0]) == SFmode)
7706	if (unordered_p)
7707	  return "ucomiss\t{%1, %0|%0, %1}";
7708	else
7709	  return "comiss\t{%1, %0|%0, %1}";
7710      else
7711	if (unordered_p)
7712	  return "ucomisd\t{%1, %0|%0, %1}";
7713	else
7714	  return "comisd\t{%1, %0|%0, %1}";
7715    }
7716
7717  if (! STACK_TOP_P (cmp_op0))
7718    abort ();
7719
7720  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7721
7722  if (STACK_REG_P (cmp_op1)
7723      && stack_top_dies
7724      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7725      && REGNO (cmp_op1) != FIRST_STACK_REG)
7726    {
7727      /* If both the top of the 387 stack dies, and the other operand
7728	 is also a stack register that dies, then this must be a
7729	 `fcompp' float compare */
7730
7731      if (eflags_p == 1)
7732	{
7733	  /* There is no double popping fcomi variant.  Fortunately,
7734	     eflags is immune from the fstp's cc clobbering.  */
7735	  if (unordered_p)
7736	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7737	  else
7738	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7739	  return "fstp\t%y0";
7740	}
7741      else
7742	{
7743	  if (eflags_p == 2)
7744	    {
7745	      if (unordered_p)
7746		return "fucompp\n\tfnstsw\t%0";
7747	      else
7748		return "fcompp\n\tfnstsw\t%0";
7749	    }
7750	  else
7751	    {
7752	      if (unordered_p)
7753		return "fucompp";
7754	      else
7755		return "fcompp";
7756	    }
7757	}
7758    }
7759  else
7760    {
7761      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
7762
7763      static const char * const alt[24] =
7764      {
7765	"fcom%z1\t%y1",
7766	"fcomp%z1\t%y1",
7767	"fucom%z1\t%y1",
7768	"fucomp%z1\t%y1",
7769
7770	"ficom%z1\t%y1",
7771	"ficomp%z1\t%y1",
7772	NULL,
7773	NULL,
7774
7775	"fcomi\t{%y1, %0|%0, %y1}",
7776	"fcomip\t{%y1, %0|%0, %y1}",
7777	"fucomi\t{%y1, %0|%0, %y1}",
7778	"fucomip\t{%y1, %0|%0, %y1}",
7779
7780	NULL,
7781	NULL,
7782	NULL,
7783	NULL,
7784
7785	"fcom%z2\t%y2\n\tfnstsw\t%0",
7786	"fcomp%z2\t%y2\n\tfnstsw\t%0",
7787	"fucom%z2\t%y2\n\tfnstsw\t%0",
7788	"fucomp%z2\t%y2\n\tfnstsw\t%0",
7789
7790	"ficom%z2\t%y2\n\tfnstsw\t%0",
7791	"ficomp%z2\t%y2\n\tfnstsw\t%0",
7792	NULL,
7793	NULL
7794      };
7795
7796      int mask;
7797      const char *ret;
7798
7799      mask  = eflags_p << 3;
7800      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7801      mask |= unordered_p << 1;
7802      mask |= stack_top_dies;
7803
7804      if (mask >= 24)
7805	abort ();
7806      ret = alt[mask];
7807      if (ret == NULL)
7808	abort ();
7809
7810      return ret;
7811    }
7812}
7813
7814void
7815ix86_output_addr_vec_elt (file, value)
7816     FILE *file;
7817     int value;
7818{
7819  const char *directive = ASM_LONG;
7820
7821  if (TARGET_64BIT)
7822    {
7823#ifdef ASM_QUAD
7824      directive = ASM_QUAD;
7825#else
7826      abort ();
7827#endif
7828    }
7829
7830  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7831}
7832
7833void
7834ix86_output_addr_diff_elt (file, value, rel)
7835     FILE *file;
7836     int value, rel;
7837{
7838  if (TARGET_64BIT)
7839    fprintf (file, "%s%s%d-%s%d\n",
7840	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
7841  else if (HAVE_AS_GOTOFF_IN_DATA)
7842    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7843#if TARGET_MACHO
7844  else if (TARGET_MACHO)
7845    fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7846	     machopic_function_base_name () + 1);
7847#endif
7848  else
7849    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7850		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7851}
7852
7853/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7854   for the target.  */
7855
7856void
7857ix86_expand_clear (dest)
7858     rtx dest;
7859{
7860  rtx tmp;
7861
7862  /* We play register width games, which are only valid after reload.  */
7863  if (!reload_completed)
7864    abort ();
7865
7866  /* Avoid HImode and its attendant prefix byte.  */
7867  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7868    dest = gen_rtx_REG (SImode, REGNO (dest));
7869
7870  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7871
7872  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
7873  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7874    {
7875      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7876      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7877    }
7878
7879  emit_insn (tmp);
7880}
7881
7882/* X is an unchanging MEM.  If it is a constant pool reference, return
7883   the constant pool rtx, else NULL.  */
7884
7885static rtx
7886maybe_get_pool_constant (x)
7887     rtx x;
7888{
7889  x = XEXP (x, 0);
7890
7891  if (flag_pic && ! TARGET_64BIT)
7892    {
7893      if (GET_CODE (x) != PLUS)
7894	return NULL_RTX;
7895      if (XEXP (x, 0) != pic_offset_table_rtx)
7896	return NULL_RTX;
7897      x = XEXP (x, 1);
7898      if (GET_CODE (x) != CONST)
7899	return NULL_RTX;
7900      x = XEXP (x, 0);
7901      if (GET_CODE (x) != UNSPEC)
7902	return NULL_RTX;
7903      if (XINT (x, 1) != UNSPEC_GOTOFF)
7904	return NULL_RTX;
7905      x = XVECEXP (x, 0, 0);
7906    }
7907
7908  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7909    return get_pool_constant (x);
7910
7911  return NULL_RTX;
7912}
7913
7914void
7915ix86_expand_move (mode, operands)
7916     enum machine_mode mode;
7917     rtx operands[];
7918{
7919  int strict = (reload_in_progress || reload_completed);
7920  rtx insn, op0, op1, tmp;
7921
7922  op0 = operands[0];
7923  op1 = operands[1];
7924
7925  if (tls_symbolic_operand (op1, Pmode))
7926    {
7927      op1 = legitimize_address (op1, op1, VOIDmode);
7928      if (GET_CODE (op0) == MEM)
7929	{
7930	  tmp = gen_reg_rtx (mode);
7931	  emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7932	  op1 = tmp;
7933	}
7934    }
7935  else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7936    {
7937#if TARGET_MACHO
7938      if (MACHOPIC_PURE)
7939	{
7940	  rtx temp = ((reload_in_progress
7941		       || ((op0 && GET_CODE (op0) == REG)
7942			   && mode == Pmode))
7943		      ? op0 : gen_reg_rtx (Pmode));
7944	  op1 = machopic_indirect_data_reference (op1, temp);
7945	  op1 = machopic_legitimize_pic_address (op1, mode,
7946						 temp == op1 ? 0 : temp);
7947	}
7948      else
7949	{
7950	  if (MACHOPIC_INDIRECT)
7951	    op1 = machopic_indirect_data_reference (op1, 0);
7952	}
7953      if (op0 != op1)
7954	{
7955	  insn = gen_rtx_SET (VOIDmode, op0, op1);
7956	  emit_insn (insn);
7957	}
7958      return;
7959#endif /* TARGET_MACHO */
7960      if (GET_CODE (op0) == MEM)
7961	op1 = force_reg (Pmode, op1);
7962      else
7963	{
7964	  rtx temp = op0;
7965	  if (GET_CODE (temp) != REG)
7966	    temp = gen_reg_rtx (Pmode);
7967	  temp = legitimize_pic_address (op1, temp);
7968	  if (temp == op0)
7969	    return;
7970	  op1 = temp;
7971	}
7972    }
7973  else
7974    {
7975      if (GET_CODE (op0) == MEM
7976	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7977	      || !push_operand (op0, mode))
7978	  && GET_CODE (op1) == MEM)
7979	op1 = force_reg (mode, op1);
7980
7981      if (push_operand (op0, mode)
7982	  && ! general_no_elim_operand (op1, mode))
7983	op1 = copy_to_mode_reg (mode, op1);
7984
7985      /* Force large constants in 64bit compilation into register
7986	 to get them CSEed.  */
7987      if (TARGET_64BIT && mode == DImode
7988	  && immediate_operand (op1, mode)
7989	  && !x86_64_zero_extended_value (op1)
7990	  && !register_operand (op0, mode)
7991	  && optimize && !reload_completed && !reload_in_progress)
7992	op1 = copy_to_mode_reg (mode, op1);
7993
7994      if (FLOAT_MODE_P (mode))
7995	{
7996	  /* If we are loading a floating point constant to a register,
7997	     force the value to memory now, since we'll get better code
7998	     out the back end.  */
7999
8000	  if (strict)
8001	    ;
8002	  else if (GET_CODE (op1) == CONST_DOUBLE)
8003	    {
8004	      op1 = validize_mem (force_const_mem (mode, op1));
8005	      if (!register_operand (op0, mode))
8006		{
8007		  rtx temp = gen_reg_rtx (mode);
8008		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8009		  emit_move_insn (op0, temp);
8010		  return;
8011		}
8012	    }
8013	}
8014    }
8015
8016  insn = gen_rtx_SET (VOIDmode, op0, op1);
8017
8018  emit_insn (insn);
8019}
8020
8021void
8022ix86_expand_vector_move (mode, operands)
8023     enum machine_mode mode;
8024     rtx operands[];
8025{
8026  /* Force constants other than zero into memory.  We do not know how
8027     the instructions used to build constants modify the upper 64 bits
8028     of the register, once we have that information we may be able
8029     to handle some of them more efficiently.  */
8030  if ((reload_in_progress | reload_completed) == 0
8031      && register_operand (operands[0], mode)
8032      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8033    {
8034      operands[1] = force_const_mem (mode, operands[1]);
8035      emit_move_insn (operands[0], operands[1]);
8036      return;
8037    }
8038
8039  /* Make operand1 a register if it isn't already.  */
8040  if (!no_new_pseudos
8041      && !register_operand (operands[0], mode)
8042      && !register_operand (operands[1], mode))
8043    {
8044      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8045      emit_move_insn (operands[0], temp);
8046      return;
8047    }
8048
8049  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8050}
8051
8052/* Attempt to expand a binary operator.  Make the expansion closer to the
8053   actual machine, then just general_operand, which will allow 3 separate
8054   memory references (one output, two input) in a single insn.  */
8055
8056void
8057ix86_expand_binary_operator (code, mode, operands)
8058     enum rtx_code code;
8059     enum machine_mode mode;
8060     rtx operands[];
8061{
8062  int matching_memory;
8063  rtx src1, src2, dst, op, clob;
8064
8065  dst = operands[0];
8066  src1 = operands[1];
8067  src2 = operands[2];
8068
8069  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8070  if (GET_RTX_CLASS (code) == 'c'
8071      && (rtx_equal_p (dst, src2)
8072	  || immediate_operand (src1, mode)))
8073    {
8074      rtx temp = src1;
8075      src1 = src2;
8076      src2 = temp;
8077    }
8078
8079  /* If the destination is memory, and we do not have matching source
8080     operands, do things in registers.  */
8081  matching_memory = 0;
8082  if (GET_CODE (dst) == MEM)
8083    {
8084      if (rtx_equal_p (dst, src1))
8085	matching_memory = 1;
8086      else if (GET_RTX_CLASS (code) == 'c'
8087	       && rtx_equal_p (dst, src2))
8088	matching_memory = 2;
8089      else
8090	dst = gen_reg_rtx (mode);
8091    }
8092
8093  /* Both source operands cannot be in memory.  */
8094  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8095    {
8096      if (matching_memory != 2)
8097	src2 = force_reg (mode, src2);
8098      else
8099	src1 = force_reg (mode, src1);
8100    }
8101
8102  /* If the operation is not commutable, source 1 cannot be a constant
8103     or non-matching memory.  */
8104  if ((CONSTANT_P (src1)
8105       || (!matching_memory && GET_CODE (src1) == MEM))
8106      && GET_RTX_CLASS (code) != 'c')
8107    src1 = force_reg (mode, src1);
8108
8109  /* If optimizing, copy to regs to improve CSE */
8110  if (optimize && ! no_new_pseudos)
8111    {
8112      if (GET_CODE (dst) == MEM)
8113	dst = gen_reg_rtx (mode);
8114      if (GET_CODE (src1) == MEM)
8115	src1 = force_reg (mode, src1);
8116      if (GET_CODE (src2) == MEM)
8117	src2 = force_reg (mode, src2);
8118    }
8119
8120  /* Emit the instruction.  */
8121
8122  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8123  if (reload_in_progress)
8124    {
8125      /* Reload doesn't know about the flags register, and doesn't know that
8126         it doesn't want to clobber it.  We can only do this with PLUS.  */
8127      if (code != PLUS)
8128	abort ();
8129      emit_insn (op);
8130    }
8131  else
8132    {
8133      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8134      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8135    }
8136
8137  /* Fix up the destination if needed.  */
8138  if (dst != operands[0])
8139    emit_move_insn (operands[0], dst);
8140}
8141
8142/* Return TRUE or FALSE depending on whether the binary operator meets the
8143   appropriate constraints.  */
8144
8145int
8146ix86_binary_operator_ok (code, mode, operands)
8147     enum rtx_code code;
8148     enum machine_mode mode ATTRIBUTE_UNUSED;
8149     rtx operands[3];
8150{
8151  /* Both source operands cannot be in memory.  */
8152  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8153    return 0;
8154  /* If the operation is not commutable, source 1 cannot be a constant.  */
8155  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8156    return 0;
8157  /* If the destination is memory, we must have a matching source operand.  */
8158  if (GET_CODE (operands[0]) == MEM
8159      && ! (rtx_equal_p (operands[0], operands[1])
8160	    || (GET_RTX_CLASS (code) == 'c'
8161		&& rtx_equal_p (operands[0], operands[2]))))
8162    return 0;
8163  /* If the operation is not commutable and the source 1 is memory, we must
8164     have a matching destination.  */
8165  if (GET_CODE (operands[1]) == MEM
8166      && GET_RTX_CLASS (code) != 'c'
8167      && ! rtx_equal_p (operands[0], operands[1]))
8168    return 0;
8169  return 1;
8170}
8171
8172/* Attempt to expand a unary operator.  Make the expansion closer to the
8173   actual machine, then just general_operand, which will allow 2 separate
8174   memory references (one output, one input) in a single insn.  */
8175
8176void
8177ix86_expand_unary_operator (code, mode, operands)
8178     enum rtx_code code;
8179     enum machine_mode mode;
8180     rtx operands[];
8181{
8182  int matching_memory;
8183  rtx src, dst, op, clob;
8184
8185  dst = operands[0];
8186  src = operands[1];
8187
8188  /* If the destination is memory, and we do not have matching source
8189     operands, do things in registers.  */
8190  matching_memory = 0;
8191  if (GET_CODE (dst) == MEM)
8192    {
8193      if (rtx_equal_p (dst, src))
8194	matching_memory = 1;
8195      else
8196	dst = gen_reg_rtx (mode);
8197    }
8198
8199  /* When source operand is memory, destination must match.  */
8200  if (!matching_memory && GET_CODE (src) == MEM)
8201    src = force_reg (mode, src);
8202
8203  /* If optimizing, copy to regs to improve CSE */
8204  if (optimize && ! no_new_pseudos)
8205    {
8206      if (GET_CODE (dst) == MEM)
8207	dst = gen_reg_rtx (mode);
8208      if (GET_CODE (src) == MEM)
8209	src = force_reg (mode, src);
8210    }
8211
8212  /* Emit the instruction.  */
8213
8214  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8215  if (reload_in_progress || code == NOT)
8216    {
8217      /* Reload doesn't know about the flags register, and doesn't know that
8218         it doesn't want to clobber it.  */
8219      if (code != NOT)
8220        abort ();
8221      emit_insn (op);
8222    }
8223  else
8224    {
8225      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8226      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8227    }
8228
8229  /* Fix up the destination if needed.  */
8230  if (dst != operands[0])
8231    emit_move_insn (operands[0], dst);
8232}
8233
8234/* Return TRUE or FALSE depending on whether the unary operator meets the
8235   appropriate constraints.  */
8236
8237int
8238ix86_unary_operator_ok (code, mode, operands)
8239     enum rtx_code code ATTRIBUTE_UNUSED;
8240     enum machine_mode mode ATTRIBUTE_UNUSED;
8241     rtx operands[2] ATTRIBUTE_UNUSED;
8242{
8243  /* If one of operands is memory, source and destination must match.  */
8244  if ((GET_CODE (operands[0]) == MEM
8245       || GET_CODE (operands[1]) == MEM)
8246      && ! rtx_equal_p (operands[0], operands[1]))
8247    return FALSE;
8248  return TRUE;
8249}
8250
8251/* Return TRUE or FALSE depending on whether the first SET in INSN
8252   has source and destination with matching CC modes, and that the
8253   CC mode is at least as constrained as REQ_MODE.  */
8254
8255int
8256ix86_match_ccmode (insn, req_mode)
8257     rtx insn;
8258     enum machine_mode req_mode;
8259{
8260  rtx set;
8261  enum machine_mode set_mode;
8262
8263  set = PATTERN (insn);
8264  if (GET_CODE (set) == PARALLEL)
8265    set = XVECEXP (set, 0, 0);
8266  if (GET_CODE (set) != SET)
8267    abort ();
8268  if (GET_CODE (SET_SRC (set)) != COMPARE)
8269    abort ();
8270
8271  set_mode = GET_MODE (SET_DEST (set));
8272  switch (set_mode)
8273    {
8274    case CCNOmode:
8275      if (req_mode != CCNOmode
8276	  && (req_mode != CCmode
8277	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8278	return 0;
8279      break;
8280    case CCmode:
8281      if (req_mode == CCGCmode)
8282	return 0;
8283      /* FALLTHRU */
8284    case CCGCmode:
8285      if (req_mode == CCGOCmode || req_mode == CCNOmode)
8286	return 0;
8287      /* FALLTHRU */
8288    case CCGOCmode:
8289      if (req_mode == CCZmode)
8290	return 0;
8291      /* FALLTHRU */
8292    case CCZmode:
8293      break;
8294
8295    default:
8296      abort ();
8297    }
8298
8299  return (GET_MODE (SET_SRC (set)) == set_mode);
8300}
8301
8302/* Generate insn patterns to do an integer compare of OPERANDS.  */
8303
8304static rtx
8305ix86_expand_int_compare (code, op0, op1)
8306     enum rtx_code code;
8307     rtx op0, op1;
8308{
8309  enum machine_mode cmpmode;
8310  rtx tmp, flags;
8311
8312  cmpmode = SELECT_CC_MODE (code, op0, op1);
8313  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8314
8315  /* This is very simple, but making the interface the same as in the
8316     FP case makes the rest of the code easier.  */
8317  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8318  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8319
8320  /* Return the test that should be put into the flags user, i.e.
8321     the bcc, scc, or cmov instruction.  */
8322  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8323}
8324
8325/* Figure out whether to use ordered or unordered fp comparisons.
8326   Return the appropriate mode to use.  */
8327
8328enum machine_mode
8329ix86_fp_compare_mode (code)
8330     enum rtx_code code ATTRIBUTE_UNUSED;
8331{
8332  /* ??? In order to make all comparisons reversible, we do all comparisons
8333     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8334     all forms trapping and nontrapping comparisons, we can make inequality
8335     comparisons trapping again, since it results in better code when using
8336     FCOM based compares.  */
8337  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8338}
8339
8340enum machine_mode
8341ix86_cc_mode (code, op0, op1)
8342     enum rtx_code code;
8343     rtx op0, op1;
8344{
8345  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8346    return ix86_fp_compare_mode (code);
8347  switch (code)
8348    {
8349      /* Only zero flag is needed.  */
8350    case EQ:			/* ZF=0 */
8351    case NE:			/* ZF!=0 */
8352      return CCZmode;
8353      /* Codes needing carry flag.  */
8354    case GEU:			/* CF=0 */
8355    case GTU:			/* CF=0 & ZF=0 */
8356    case LTU:			/* CF=1 */
8357    case LEU:			/* CF=1 | ZF=1 */
8358      return CCmode;
8359      /* Codes possibly doable only with sign flag when
8360         comparing against zero.  */
8361    case GE:			/* SF=OF   or   SF=0 */
8362    case LT:			/* SF<>OF  or   SF=1 */
8363      if (op1 == const0_rtx)
8364	return CCGOCmode;
8365      else
8366	/* For other cases Carry flag is not required.  */
8367	return CCGCmode;
8368      /* Codes doable only with sign flag when comparing
8369         against zero, but we miss jump instruction for it
8370         so we need to use relational tests agains overflow
8371         that thus needs to be zero.  */
8372    case GT:			/* ZF=0 & SF=OF */
8373    case LE:			/* ZF=1 | SF<>OF */
8374      if (op1 == const0_rtx)
8375	return CCNOmode;
8376      else
8377	return CCGCmode;
8378      /* strcmp pattern do (use flags) and combine may ask us for proper
8379	 mode.  */
8380    case USE:
8381      return CCmode;
8382    default:
8383      abort ();
8384    }
8385}
8386
8387/* Return true if we should use an FCOMI instruction for this fp comparison.  */
8388
8389int
8390ix86_use_fcomi_compare (code)
8391     enum rtx_code code ATTRIBUTE_UNUSED;
8392{
8393  enum rtx_code swapped_code = swap_condition (code);
8394  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8395	  || (ix86_fp_comparison_cost (swapped_code)
8396	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8397}
8398
8399/* Swap, force into registers, or otherwise massage the two operands
8400   to a fp comparison.  The operands are updated in place; the new
8401   comparsion code is returned.  */
8402
8403static enum rtx_code
8404ix86_prepare_fp_compare_args (code, pop0, pop1)
8405     enum rtx_code code;
8406     rtx *pop0, *pop1;
8407{
8408  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8409  rtx op0 = *pop0, op1 = *pop1;
8410  enum machine_mode op_mode = GET_MODE (op0);
8411  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8412
8413  /* All of the unordered compare instructions only work on registers.
8414     The same is true of the XFmode compare instructions.  The same is
8415     true of the fcomi compare instructions.  */
8416
8417  if (!is_sse
8418      && (fpcmp_mode == CCFPUmode
8419	  || op_mode == XFmode
8420	  || op_mode == TFmode
8421	  || ix86_use_fcomi_compare (code)))
8422    {
8423      op0 = force_reg (op_mode, op0);
8424      op1 = force_reg (op_mode, op1);
8425    }
8426  else
8427    {
8428      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8429	 things around if they appear profitable, otherwise force op0
8430	 into a register.  */
8431
8432      if (standard_80387_constant_p (op0) == 0
8433	  || (GET_CODE (op0) == MEM
8434	      && ! (standard_80387_constant_p (op1) == 0
8435		    || GET_CODE (op1) == MEM)))
8436	{
8437	  rtx tmp;
8438	  tmp = op0, op0 = op1, op1 = tmp;
8439	  code = swap_condition (code);
8440	}
8441
8442      if (GET_CODE (op0) != REG)
8443	op0 = force_reg (op_mode, op0);
8444
8445      if (CONSTANT_P (op1))
8446	{
8447	  if (standard_80387_constant_p (op1))
8448	    op1 = force_reg (op_mode, op1);
8449	  else
8450	    op1 = validize_mem (force_const_mem (op_mode, op1));
8451	}
8452    }
8453
8454  /* Try to rearrange the comparison to make it cheaper.  */
8455  if (ix86_fp_comparison_cost (code)
8456      > ix86_fp_comparison_cost (swap_condition (code))
8457      && (GET_CODE (op1) == REG || !no_new_pseudos))
8458    {
8459      rtx tmp;
8460      tmp = op0, op0 = op1, op1 = tmp;
8461      code = swap_condition (code);
8462      if (GET_CODE (op0) != REG)
8463	op0 = force_reg (op_mode, op0);
8464    }
8465
8466  *pop0 = op0;
8467  *pop1 = op1;
8468  return code;
8469}
8470
8471/* Convert comparison codes we use to represent FP comparison to integer
8472   code that will result in proper branch.  Return UNKNOWN if no such code
8473   is available.  */
8474static enum rtx_code
8475ix86_fp_compare_code_to_integer (code)
8476     enum rtx_code code;
8477{
8478  switch (code)
8479    {
8480    case GT:
8481      return GTU;
8482    case GE:
8483      return GEU;
8484    case ORDERED:
8485    case UNORDERED:
8486      return code;
8487      break;
8488    case UNEQ:
8489      return EQ;
8490      break;
8491    case UNLT:
8492      return LTU;
8493      break;
8494    case UNLE:
8495      return LEU;
8496      break;
8497    case LTGT:
8498      return NE;
8499      break;
8500    default:
8501      return UNKNOWN;
8502    }
8503}
8504
8505/* Split comparison code CODE into comparisons we can do using branch
8506   instructions.  BYPASS_CODE is comparison code for branch that will
8507   branch around FIRST_CODE and SECOND_CODE.  If some of branches
8508   is not required, set value to NIL.
8509   We never require more than two branches.  */
8510static void
8511ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8512     enum rtx_code code, *bypass_code, *first_code, *second_code;
8513{
8514  *first_code = code;
8515  *bypass_code = NIL;
8516  *second_code = NIL;
8517
8518  /* The fcomi comparison sets flags as follows:
8519
8520     cmp    ZF PF CF
8521     >      0  0  0
8522     <      0  0  1
8523     =      1  0  0
8524     un     1  1  1 */
8525
8526  switch (code)
8527    {
8528    case GT:			/* GTU - CF=0 & ZF=0 */
8529    case GE:			/* GEU - CF=0 */
8530    case ORDERED:		/* PF=0 */
8531    case UNORDERED:		/* PF=1 */
8532    case UNEQ:			/* EQ - ZF=1 */
8533    case UNLT:			/* LTU - CF=1 */
8534    case UNLE:			/* LEU - CF=1 | ZF=1 */
8535    case LTGT:			/* EQ - ZF=0 */
8536      break;
8537    case LT:			/* LTU - CF=1 - fails on unordered */
8538      *first_code = UNLT;
8539      *bypass_code = UNORDERED;
8540      break;
8541    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
8542      *first_code = UNLE;
8543      *bypass_code = UNORDERED;
8544      break;
8545    case EQ:			/* EQ - ZF=1 - fails on unordered */
8546      *first_code = UNEQ;
8547      *bypass_code = UNORDERED;
8548      break;
8549    case NE:			/* NE - ZF=0 - fails on unordered */
8550      *first_code = LTGT;
8551      *second_code = UNORDERED;
8552      break;
8553    case UNGE:			/* GEU - CF=0 - fails on unordered */
8554      *first_code = GE;
8555      *second_code = UNORDERED;
8556      break;
8557    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
8558      *first_code = GT;
8559      *second_code = UNORDERED;
8560      break;
8561    default:
8562      abort ();
8563    }
8564  if (!TARGET_IEEE_FP)
8565    {
8566      *second_code = NIL;
8567      *bypass_code = NIL;
8568    }
8569}
8570
8571/* Return cost of comparison done fcom + arithmetics operations on AX.
8572   All following functions do use number of instructions as an cost metrics.
8573   In future this should be tweaked to compute bytes for optimize_size and
8574   take into account performance of various instructions on various CPUs.  */
8575static int
8576ix86_fp_comparison_arithmetics_cost (code)
8577     enum rtx_code code;
8578{
8579  if (!TARGET_IEEE_FP)
8580    return 4;
8581  /* The cost of code output by ix86_expand_fp_compare.  */
8582  switch (code)
8583    {
8584    case UNLE:
8585    case UNLT:
8586    case LTGT:
8587    case GT:
8588    case GE:
8589    case UNORDERED:
8590    case ORDERED:
8591    case UNEQ:
8592      return 4;
8593      break;
8594    case LT:
8595    case NE:
8596    case EQ:
8597    case UNGE:
8598      return 5;
8599      break;
8600    case LE:
8601    case UNGT:
8602      return 6;
8603      break;
8604    default:
8605      abort ();
8606    }
8607}
8608
8609/* Return cost of comparison done using fcomi operation.
8610   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8611static int
8612ix86_fp_comparison_fcomi_cost (code)
8613     enum rtx_code code;
8614{
8615  enum rtx_code bypass_code, first_code, second_code;
8616  /* Return arbitarily high cost when instruction is not supported - this
8617     prevents gcc from using it.  */
8618  if (!TARGET_CMOVE)
8619    return 1024;
8620  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8621  return (bypass_code != NIL || second_code != NIL) + 2;
8622}
8623
8624/* Return cost of comparison done using sahf operation.
8625   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8626static int
8627ix86_fp_comparison_sahf_cost (code)
8628     enum rtx_code code;
8629{
8630  enum rtx_code bypass_code, first_code, second_code;
8631  /* Return arbitarily high cost when instruction is not preferred - this
8632     avoids gcc from using it.  */
8633  if (!TARGET_USE_SAHF && !optimize_size)
8634    return 1024;
8635  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8636  return (bypass_code != NIL || second_code != NIL) + 3;
8637}
8638
8639/* Compute cost of the comparison done using any method.
8640   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8641static int
8642ix86_fp_comparison_cost (code)
8643     enum rtx_code code;
8644{
8645  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8646  int min;
8647
8648  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8649  sahf_cost = ix86_fp_comparison_sahf_cost (code);
8650
8651  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8652  if (min > sahf_cost)
8653    min = sahf_cost;
8654  if (min > fcomi_cost)
8655    min = fcomi_cost;
8656  return min;
8657}
8658
8659/* Generate insn patterns to do a floating point compare of OPERANDS.  */
8660
8661static rtx
8662ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8663     enum rtx_code code;
8664     rtx op0, op1, scratch;
8665     rtx *second_test;
8666     rtx *bypass_test;
8667{
8668  enum machine_mode fpcmp_mode, intcmp_mode;
8669  rtx tmp, tmp2;
8670  int cost = ix86_fp_comparison_cost (code);
8671  enum rtx_code bypass_code, first_code, second_code;
8672
8673  fpcmp_mode = ix86_fp_compare_mode (code);
8674  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8675
8676  if (second_test)
8677    *second_test = NULL_RTX;
8678  if (bypass_test)
8679    *bypass_test = NULL_RTX;
8680
8681  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8682
8683  /* Do fcomi/sahf based test when profitable.  */
8684  if ((bypass_code == NIL || bypass_test)
8685      && (second_code == NIL || second_test)
8686      && ix86_fp_comparison_arithmetics_cost (code) > cost)
8687    {
8688      if (TARGET_CMOVE)
8689	{
8690	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8691	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8692			     tmp);
8693	  emit_insn (tmp);
8694	}
8695      else
8696	{
8697	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8698	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8699	  if (!scratch)
8700	    scratch = gen_reg_rtx (HImode);
8701	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8702	  emit_insn (gen_x86_sahf_1 (scratch));
8703	}
8704
8705      /* The FP codes work out to act like unsigned.  */
8706      intcmp_mode = fpcmp_mode;
8707      code = first_code;
8708      if (bypass_code != NIL)
8709	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8710				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8711				       const0_rtx);
8712      if (second_code != NIL)
8713	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8714				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8715				       const0_rtx);
8716    }
8717  else
8718    {
8719      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
8720      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8721      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8722      if (!scratch)
8723	scratch = gen_reg_rtx (HImode);
8724      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8725
8726      /* In the unordered case, we have to check C2 for NaN's, which
8727	 doesn't happen to work out to anything nice combination-wise.
8728	 So do some bit twiddling on the value we've got in AH to come
8729	 up with an appropriate set of condition codes.  */
8730
8731      intcmp_mode = CCNOmode;
8732      switch (code)
8733	{
8734	case GT:
8735	case UNGT:
8736	  if (code == GT || !TARGET_IEEE_FP)
8737	    {
8738	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8739	      code = EQ;
8740	    }
8741	  else
8742	    {
8743	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8744	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8745	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8746	      intcmp_mode = CCmode;
8747	      code = GEU;
8748	    }
8749	  break;
8750	case LT:
8751	case UNLT:
8752	  if (code == LT && TARGET_IEEE_FP)
8753	    {
8754	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8755	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8756	      intcmp_mode = CCmode;
8757	      code = EQ;
8758	    }
8759	  else
8760	    {
8761	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8762	      code = NE;
8763	    }
8764	  break;
8765	case GE:
8766	case UNGE:
8767	  if (code == GE || !TARGET_IEEE_FP)
8768	    {
8769	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8770	      code = EQ;
8771	    }
8772	  else
8773	    {
8774	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8775	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8776					     GEN_INT (0x01)));
8777	      code = NE;
8778	    }
8779	  break;
8780	case LE:
8781	case UNLE:
8782	  if (code == LE && TARGET_IEEE_FP)
8783	    {
8784	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8785	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8786	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8787	      intcmp_mode = CCmode;
8788	      code = LTU;
8789	    }
8790	  else
8791	    {
8792	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8793	      code = NE;
8794	    }
8795	  break;
8796	case EQ:
8797	case UNEQ:
8798	  if (code == EQ && TARGET_IEEE_FP)
8799	    {
8800	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8801	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8802	      intcmp_mode = CCmode;
8803	      code = EQ;
8804	    }
8805	  else
8806	    {
8807	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8808	      code = NE;
8809	      break;
8810	    }
8811	  break;
8812	case NE:
8813	case LTGT:
8814	  if (code == NE && TARGET_IEEE_FP)
8815	    {
8816	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8817	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8818					     GEN_INT (0x40)));
8819	      code = NE;
8820	    }
8821	  else
8822	    {
8823	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8824	      code = EQ;
8825	    }
8826	  break;
8827
8828	case UNORDERED:
8829	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8830	  code = NE;
8831	  break;
8832	case ORDERED:
8833	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8834	  code = EQ;
8835	  break;
8836
8837	default:
8838	  abort ();
8839	}
8840    }
8841
8842  /* Return the test that should be put into the flags user, i.e.
8843     the bcc, scc, or cmov instruction.  */
8844  return gen_rtx_fmt_ee (code, VOIDmode,
8845			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8846			 const0_rtx);
8847}
8848
8849rtx
8850ix86_expand_compare (code, second_test, bypass_test)
8851     enum rtx_code code;
8852     rtx *second_test, *bypass_test;
8853{
8854  rtx op0, op1, ret;
8855  op0 = ix86_compare_op0;
8856  op1 = ix86_compare_op1;
8857
8858  if (second_test)
8859    *second_test = NULL_RTX;
8860  if (bypass_test)
8861    *bypass_test = NULL_RTX;
8862
8863  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8864    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8865				  second_test, bypass_test);
8866  else
8867    ret = ix86_expand_int_compare (code, op0, op1);
8868
8869  return ret;
8870}
8871
8872/* Return true if the CODE will result in nontrivial jump sequence.  */
8873bool
8874ix86_fp_jump_nontrivial_p (code)
8875    enum rtx_code code;
8876{
8877  enum rtx_code bypass_code, first_code, second_code;
8878  if (!TARGET_CMOVE)
8879    return true;
8880  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8881  return bypass_code != NIL || second_code != NIL;
8882}
8883
8884void
8885ix86_expand_branch (code, label)
8886     enum rtx_code code;
8887     rtx label;
8888{
8889  rtx tmp;
8890
8891  switch (GET_MODE (ix86_compare_op0))
8892    {
8893    case QImode:
8894    case HImode:
8895    case SImode:
8896      simple:
8897      tmp = ix86_expand_compare (code, NULL, NULL);
8898      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8899				  gen_rtx_LABEL_REF (VOIDmode, label),
8900				  pc_rtx);
8901      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8902      return;
8903
8904    case SFmode:
8905    case DFmode:
8906    case XFmode:
8907    case TFmode:
8908      {
8909	rtvec vec;
8910	int use_fcomi;
8911	enum rtx_code bypass_code, first_code, second_code;
8912
8913	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8914					     &ix86_compare_op1);
8915
8916	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8917
8918	/* Check whether we will use the natural sequence with one jump.  If
8919	   so, we can expand jump early.  Otherwise delay expansion by
8920	   creating compound insn to not confuse optimizers.  */
8921	if (bypass_code == NIL && second_code == NIL
8922	    && TARGET_CMOVE)
8923	  {
8924	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8925				  gen_rtx_LABEL_REF (VOIDmode, label),
8926				  pc_rtx, NULL_RTX);
8927	  }
8928	else
8929	  {
8930	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
8931				  ix86_compare_op0, ix86_compare_op1);
8932	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8933					gen_rtx_LABEL_REF (VOIDmode, label),
8934					pc_rtx);
8935	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8936
8937	    use_fcomi = ix86_use_fcomi_compare (code);
8938	    vec = rtvec_alloc (3 + !use_fcomi);
8939	    RTVEC_ELT (vec, 0) = tmp;
8940	    RTVEC_ELT (vec, 1)
8941	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8942	    RTVEC_ELT (vec, 2)
8943	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8944	    if (! use_fcomi)
8945	      RTVEC_ELT (vec, 3)
8946		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8947
8948	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8949	  }
8950	return;
8951      }
8952
8953    case DImode:
8954      if (TARGET_64BIT)
8955	goto simple;
8956      /* Expand DImode branch into multiple compare+branch.  */
8957      {
8958	rtx lo[2], hi[2], label2;
8959	enum rtx_code code1, code2, code3;
8960
8961	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8962	  {
8963	    tmp = ix86_compare_op0;
8964	    ix86_compare_op0 = ix86_compare_op1;
8965	    ix86_compare_op1 = tmp;
8966	    code = swap_condition (code);
8967	  }
8968	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8969	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8970
8971	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8972	   avoid two branches.  This costs one extra insn, so disable when
8973	   optimizing for size.  */
8974
8975	if ((code == EQ || code == NE)
8976	    && (!optimize_size
8977	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
8978	  {
8979	    rtx xor0, xor1;
8980
8981	    xor1 = hi[0];
8982	    if (hi[1] != const0_rtx)
8983	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8984				   NULL_RTX, 0, OPTAB_WIDEN);
8985
8986	    xor0 = lo[0];
8987	    if (lo[1] != const0_rtx)
8988	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8989				   NULL_RTX, 0, OPTAB_WIDEN);
8990
8991	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8992				NULL_RTX, 0, OPTAB_WIDEN);
8993
8994	    ix86_compare_op0 = tmp;
8995	    ix86_compare_op1 = const0_rtx;
8996	    ix86_expand_branch (code, label);
8997	    return;
8998	  }
8999
9000	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9001	   op1 is a constant and the low word is zero, then we can just
9002	   examine the high word.  */
9003
9004	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9005	  switch (code)
9006	    {
9007	    case LT: case LTU: case GE: case GEU:
9008	      ix86_compare_op0 = hi[0];
9009	      ix86_compare_op1 = hi[1];
9010	      ix86_expand_branch (code, label);
9011	      return;
9012	    default:
9013	      break;
9014	    }
9015
9016	/* Otherwise, we need two or three jumps.  */
9017
9018	label2 = gen_label_rtx ();
9019
9020	code1 = code;
9021	code2 = swap_condition (code);
9022	code3 = unsigned_condition (code);
9023
9024	switch (code)
9025	  {
9026	  case LT: case GT: case LTU: case GTU:
9027	    break;
9028
9029	  case LE:   code1 = LT;  code2 = GT;  break;
9030	  case GE:   code1 = GT;  code2 = LT;  break;
9031	  case LEU:  code1 = LTU; code2 = GTU; break;
9032	  case GEU:  code1 = GTU; code2 = LTU; break;
9033
9034	  case EQ:   code1 = NIL; code2 = NE;  break;
9035	  case NE:   code2 = NIL; break;
9036
9037	  default:
9038	    abort ();
9039	  }
9040
9041	/*
9042	 * a < b =>
9043	 *    if (hi(a) < hi(b)) goto true;
9044	 *    if (hi(a) > hi(b)) goto false;
9045	 *    if (lo(a) < lo(b)) goto true;
9046	 *  false:
9047	 */
9048
9049	ix86_compare_op0 = hi[0];
9050	ix86_compare_op1 = hi[1];
9051
9052	if (code1 != NIL)
9053	  ix86_expand_branch (code1, label);
9054	if (code2 != NIL)
9055	  ix86_expand_branch (code2, label2);
9056
9057	ix86_compare_op0 = lo[0];
9058	ix86_compare_op1 = lo[1];
9059	ix86_expand_branch (code3, label);
9060
9061	if (code2 != NIL)
9062	  emit_label (label2);
9063	return;
9064      }
9065
9066    default:
9067      abort ();
9068    }
9069}
9070
9071/* Split branch based on floating point condition.  */
9072void
9073ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9074     enum rtx_code code;
9075     rtx op1, op2, target1, target2, tmp;
9076{
9077  rtx second, bypass;
9078  rtx label = NULL_RTX;
9079  rtx condition;
9080  int bypass_probability = -1, second_probability = -1, probability = -1;
9081  rtx i;
9082
9083  if (target2 != pc_rtx)
9084    {
9085      rtx tmp = target2;
9086      code = reverse_condition_maybe_unordered (code);
9087      target2 = target1;
9088      target1 = tmp;
9089    }
9090
9091  condition = ix86_expand_fp_compare (code, op1, op2,
9092				      tmp, &second, &bypass);
9093
9094  if (split_branch_probability >= 0)
9095    {
9096      /* Distribute the probabilities across the jumps.
9097	 Assume the BYPASS and SECOND to be always test
9098	 for UNORDERED.  */
9099      probability = split_branch_probability;
9100
9101      /* Value of 1 is low enough to make no need for probability
9102	 to be updated.  Later we may run some experiments and see
9103	 if unordered values are more frequent in practice.  */
9104      if (bypass)
9105	bypass_probability = 1;
9106      if (second)
9107	second_probability = 1;
9108    }
9109  if (bypass != NULL_RTX)
9110    {
9111      label = gen_label_rtx ();
9112      i = emit_jump_insn (gen_rtx_SET
9113			  (VOIDmode, pc_rtx,
9114			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9115						 bypass,
9116						 gen_rtx_LABEL_REF (VOIDmode,
9117								    label),
9118						 pc_rtx)));
9119      if (bypass_probability >= 0)
9120	REG_NOTES (i)
9121	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9122			       GEN_INT (bypass_probability),
9123			       REG_NOTES (i));
9124    }
9125  i = emit_jump_insn (gen_rtx_SET
9126		      (VOIDmode, pc_rtx,
9127		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9128					     condition, target1, target2)));
9129  if (probability >= 0)
9130    REG_NOTES (i)
9131      = gen_rtx_EXPR_LIST (REG_BR_PROB,
9132			   GEN_INT (probability),
9133			   REG_NOTES (i));
9134  if (second != NULL_RTX)
9135    {
9136      i = emit_jump_insn (gen_rtx_SET
9137			  (VOIDmode, pc_rtx,
9138			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9139						 target2)));
9140      if (second_probability >= 0)
9141	REG_NOTES (i)
9142	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9143			       GEN_INT (second_probability),
9144			       REG_NOTES (i));
9145    }
9146  if (label != NULL_RTX)
9147    emit_label (label);
9148}
9149
9150int
9151ix86_expand_setcc (code, dest)
9152     enum rtx_code code;
9153     rtx dest;
9154{
9155  rtx ret, tmp, tmpreg;
9156  rtx second_test, bypass_test;
9157
9158  if (GET_MODE (ix86_compare_op0) == DImode
9159      && !TARGET_64BIT)
9160    return 0; /* FAIL */
9161
9162  if (GET_MODE (dest) != QImode)
9163    abort ();
9164
9165  ret = ix86_expand_compare (code, &second_test, &bypass_test);
9166  PUT_MODE (ret, QImode);
9167
9168  tmp = dest;
9169  tmpreg = dest;
9170
9171  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9172  if (bypass_test || second_test)
9173    {
9174      rtx test = second_test;
9175      int bypass = 0;
9176      rtx tmp2 = gen_reg_rtx (QImode);
9177      if (bypass_test)
9178	{
9179	  if (second_test)
9180	    abort ();
9181	  test = bypass_test;
9182	  bypass = 1;
9183	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9184	}
9185      PUT_MODE (test, QImode);
9186      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9187
9188      if (bypass)
9189	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9190      else
9191	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9192    }
9193
9194  return 1; /* DONE */
9195}
9196
9197int
9198ix86_expand_int_movcc (operands)
9199     rtx operands[];
9200{
9201  enum rtx_code code = GET_CODE (operands[1]), compare_code;
9202  rtx compare_seq, compare_op;
9203  rtx second_test, bypass_test;
9204  enum machine_mode mode = GET_MODE (operands[0]);
9205
9206  /* When the compare code is not LTU or GEU, we can not use sbbl case.
9207     In case comparsion is done with immediate, we can convert it to LTU or
9208     GEU by altering the integer.  */
9209
9210  if ((code == LEU || code == GTU)
9211      && GET_CODE (ix86_compare_op1) == CONST_INT
9212      && mode != HImode
9213      && INTVAL (ix86_compare_op1) != -1
9214      /* For x86-64, the immediate field in the instruction is 32-bit
9215	 signed, so we can't increment a DImode value above 0x7fffffff.  */
9216      && (!TARGET_64BIT
9217	  || GET_MODE (ix86_compare_op0) != DImode
9218	  || INTVAL (ix86_compare_op1) != 0x7fffffff)
9219      && GET_CODE (operands[2]) == CONST_INT
9220      && GET_CODE (operands[3]) == CONST_INT)
9221    {
9222      if (code == LEU)
9223	code = LTU;
9224      else
9225	code = GEU;
9226      ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9227				       GET_MODE (ix86_compare_op0));
9228    }
9229
9230  start_sequence ();
9231  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9232  compare_seq = get_insns ();
9233  end_sequence ();
9234
9235  compare_code = GET_CODE (compare_op);
9236
9237  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9238     HImode insns, we'd be swallowed in word prefix ops.  */
9239
9240  if (mode != HImode
9241      && (mode != DImode || TARGET_64BIT)
9242      && GET_CODE (operands[2]) == CONST_INT
9243      && GET_CODE (operands[3]) == CONST_INT)
9244    {
9245      rtx out = operands[0];
9246      HOST_WIDE_INT ct = INTVAL (operands[2]);
9247      HOST_WIDE_INT cf = INTVAL (operands[3]);
9248      HOST_WIDE_INT diff;
9249
9250      if ((compare_code == LTU || compare_code == GEU)
9251	  && !second_test && !bypass_test)
9252	{
9253	  /* Detect overlap between destination and compare sources.  */
9254	  rtx tmp = out;
9255
9256	  /* To simplify rest of code, restrict to the GEU case.  */
9257	  if (compare_code == LTU)
9258	    {
9259	      HOST_WIDE_INT tmp = ct;
9260	      ct = cf;
9261	      cf = tmp;
9262	      compare_code = reverse_condition (compare_code);
9263	      code = reverse_condition (code);
9264	    }
9265	  diff = ct - cf;
9266
9267	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9268	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
9269	    tmp = gen_reg_rtx (mode);
9270
9271	  emit_insn (compare_seq);
9272	  if (mode == DImode)
9273	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9274	  else
9275	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
9276
9277	  if (diff == 1)
9278	    {
9279	      /*
9280	       * cmpl op0,op1
9281	       * sbbl dest,dest
9282	       * [addl dest, ct]
9283	       *
9284	       * Size 5 - 8.
9285	       */
9286	      if (ct)
9287	       	tmp = expand_simple_binop (mode, PLUS,
9288					   tmp, GEN_INT (ct),
9289					   tmp, 1, OPTAB_DIRECT);
9290	    }
9291	  else if (cf == -1)
9292	    {
9293	      /*
9294	       * cmpl op0,op1
9295	       * sbbl dest,dest
9296	       * orl $ct, dest
9297	       *
9298	       * Size 8.
9299	       */
9300	      tmp = expand_simple_binop (mode, IOR,
9301					 tmp, GEN_INT (ct),
9302					 tmp, 1, OPTAB_DIRECT);
9303	    }
9304	  else if (diff == -1 && ct)
9305	    {
9306	      /*
9307	       * cmpl op0,op1
9308	       * sbbl dest,dest
9309	       * notl dest
9310	       * [addl dest, cf]
9311	       *
9312	       * Size 8 - 11.
9313	       */
9314	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9315	      if (cf)
9316	       	tmp = expand_simple_binop (mode, PLUS,
9317					   tmp, GEN_INT (cf),
9318					   tmp, 1, OPTAB_DIRECT);
9319	    }
9320	  else
9321	    {
9322	      /*
9323	       * cmpl op0,op1
9324	       * sbbl dest,dest
9325	       * [notl dest]
9326	       * andl cf - ct, dest
9327	       * [addl dest, ct]
9328	       *
9329	       * Size 8 - 11.
9330	       */
9331
9332	      if (cf == 0)
9333		{
9334		  cf = ct;
9335		  ct = 0;
9336		  tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9337		}
9338
9339	      tmp = expand_simple_binop (mode, AND,
9340					 tmp,
9341					 gen_int_mode (cf - ct, mode),
9342					 tmp, 1, OPTAB_DIRECT);
9343	      if (ct)
9344	       	tmp = expand_simple_binop (mode, PLUS,
9345					   tmp, GEN_INT (ct),
9346					   tmp, 1, OPTAB_DIRECT);
9347	    }
9348
9349	  if (tmp != out)
9350	    emit_move_insn (out, tmp);
9351
9352	  return 1; /* DONE */
9353	}
9354
9355      diff = ct - cf;
9356      if (diff < 0)
9357	{
9358	  HOST_WIDE_INT tmp;
9359	  tmp = ct, ct = cf, cf = tmp;
9360	  diff = -diff;
9361	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9362	    {
9363	      /* We may be reversing unordered compare to normal compare, that
9364		 is not valid in general (we may convert non-trapping condition
9365		 to trapping one), however on i386 we currently emit all
9366		 comparisons unordered.  */
9367	      compare_code = reverse_condition_maybe_unordered (compare_code);
9368	      code = reverse_condition_maybe_unordered (code);
9369	    }
9370	  else
9371	    {
9372	      compare_code = reverse_condition (compare_code);
9373	      code = reverse_condition (code);
9374	    }
9375	}
9376
9377      compare_code = NIL;
9378      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9379	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9380	{
9381	  if (ix86_compare_op1 == const0_rtx
9382	      && (code == LT || code == GE))
9383	    compare_code = code;
9384	  else if (ix86_compare_op1 == constm1_rtx)
9385	    {
9386	      if (code == LE)
9387		compare_code = LT;
9388	      else if (code == GT)
9389		compare_code = GE;
9390	    }
9391	}
9392
9393      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9394      if (compare_code != NIL
9395	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9396	  && (cf == -1 || ct == -1))
9397	{
9398	  /* If lea code below could be used, only optimize
9399	     if it results in a 2 insn sequence.  */
9400
9401	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9402		 || diff == 3 || diff == 5 || diff == 9)
9403	      || (compare_code == LT && ct == -1)
9404	      || (compare_code == GE && cf == -1))
9405	    {
9406	      /*
9407	       * notl op1	(if necessary)
9408	       * sarl $31, op1
9409	       * orl cf, op1
9410	       */
9411	      if (ct != -1)
9412		{
9413		  cf = ct;
9414	  	  ct = -1;
9415		  code = reverse_condition (code);
9416		}
9417
9418	      out = emit_store_flag (out, code, ix86_compare_op0,
9419				     ix86_compare_op1, VOIDmode, 0, -1);
9420
9421	      out = expand_simple_binop (mode, IOR,
9422					 out, GEN_INT (cf),
9423					 out, 1, OPTAB_DIRECT);
9424	      if (out != operands[0])
9425		emit_move_insn (operands[0], out);
9426
9427	      return 1; /* DONE */
9428	    }
9429	}
9430
9431      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9432	   || diff == 3 || diff == 5 || diff == 9)
9433	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9434	{
9435	  /*
9436	   * xorl dest,dest
9437	   * cmpl op1,op2
9438	   * setcc dest
9439	   * lea cf(dest*(ct-cf)),dest
9440	   *
9441	   * Size 14.
9442	   *
9443	   * This also catches the degenerate setcc-only case.
9444	   */
9445
9446	  rtx tmp;
9447	  int nops;
9448
9449	  out = emit_store_flag (out, code, ix86_compare_op0,
9450				 ix86_compare_op1, VOIDmode, 0, 1);
9451
9452	  nops = 0;
9453	  /* On x86_64 the lea instruction operates on Pmode, so we need
9454	     to get arithmetics done in proper mode to match.  */
9455	  if (diff == 1)
9456	    tmp = copy_rtx (out);
9457	  else
9458	    {
9459	      rtx out1;
9460	      out1 = copy_rtx (out);
9461	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9462	      nops++;
9463	      if (diff & 1)
9464		{
9465		  tmp = gen_rtx_PLUS (mode, tmp, out1);
9466		  nops++;
9467		}
9468	    }
9469	  if (cf != 0)
9470	    {
9471	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9472	      nops++;
9473	    }
9474	  if (tmp != out
9475	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9476	    {
9477	      if (nops == 1)
9478		out = force_operand (tmp, copy_rtx (out));
9479	      else
9480		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9481	    }
9482	  if (out != operands[0])
9483	    emit_move_insn (operands[0], copy_rtx (out));
9484
9485	  return 1; /* DONE */
9486	}
9487
9488      /*
9489       * General case:			Jumpful:
9490       *   xorl dest,dest		cmpl op1, op2
9491       *   cmpl op1, op2		movl ct, dest
9492       *   setcc dest			jcc 1f
9493       *   decl dest			movl cf, dest
9494       *   andl (cf-ct),dest		1:
9495       *   addl ct,dest
9496       *
9497       * Size 20.			Size 14.
9498       *
9499       * This is reasonably steep, but branch mispredict costs are
9500       * high on modern cpus, so consider failing only if optimizing
9501       * for space.
9502       *
9503       * %%% Parameterize branch_cost on the tuning architecture, then
9504       * use that.  The 80386 couldn't care less about mispredicts.
9505       */
9506
9507      if (!optimize_size && !TARGET_CMOVE)
9508	{
9509	  if (cf == 0)
9510	    {
9511	      cf = ct;
9512	      ct = 0;
9513	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9514		/* We may be reversing unordered compare to normal compare,
9515		   that is not valid in general (we may convert non-trapping
9516		   condition to trapping one), however on i386 we currently
9517		   emit all comparisons unordered.  */
9518		code = reverse_condition_maybe_unordered (code);
9519	      else
9520		{
9521		  code = reverse_condition (code);
9522		  if (compare_code != NIL)
9523		    compare_code = reverse_condition (compare_code);
9524		}
9525	    }
9526
9527	  if (compare_code != NIL)
9528	    {
9529	      /* notl op1	(if needed)
9530		 sarl $31, op1
9531		 andl (cf-ct), op1
9532	 	 addl ct, op1
9533
9534		 For x < 0 (resp. x <= -1) there will be no notl,
9535		 so if possible swap the constants to get rid of the
9536		 complement.
9537		 True/false will be -1/0 while code below (store flag
9538		 followed by decrement) is 0/-1, so the constants need
9539		 to be exchanged once more.  */
9540
9541	      if (compare_code == GE || !cf)
9542		{
9543	  	  code = reverse_condition (code);
9544		  compare_code = LT;
9545		}
9546	      else
9547		{
9548		  HOST_WIDE_INT tmp = cf;
9549	  	  cf = ct;
9550		  ct = tmp;
9551		}
9552
9553	      out = emit_store_flag (out, code, ix86_compare_op0,
9554				     ix86_compare_op1, VOIDmode, 0, -1);
9555	    }
9556	  else
9557	    {
9558	      out = emit_store_flag (out, code, ix86_compare_op0,
9559				     ix86_compare_op1, VOIDmode, 0, 1);
9560
9561	      out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9562					 out, 1, OPTAB_DIRECT);
9563	    }
9564
9565	  out = expand_simple_binop (mode, AND, out,
9566				     gen_int_mode (cf - ct, mode),
9567				     out, 1, OPTAB_DIRECT);
9568	  if (ct)
9569	    out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9570				       out, 1, OPTAB_DIRECT);
9571	  if (out != operands[0])
9572	    emit_move_insn (operands[0], out);
9573
9574	  return 1; /* DONE */
9575	}
9576    }
9577
9578  if (!TARGET_CMOVE)
9579    {
9580      /* Try a few things more with specific constants and a variable.  */
9581
9582      optab op;
9583      rtx var, orig_out, out, tmp;
9584
9585      if (optimize_size)
9586	return 0; /* FAIL */
9587
9588      /* If one of the two operands is an interesting constant, load a
9589	 constant with the above and mask it in with a logical operation.  */
9590
9591      if (GET_CODE (operands[2]) == CONST_INT)
9592	{
9593	  var = operands[3];
9594	  if (INTVAL (operands[2]) == 0)
9595	    operands[3] = constm1_rtx, op = and_optab;
9596	  else if (INTVAL (operands[2]) == -1)
9597	    operands[3] = const0_rtx, op = ior_optab;
9598	  else
9599	    return 0; /* FAIL */
9600	}
9601      else if (GET_CODE (operands[3]) == CONST_INT)
9602	{
9603	  var = operands[2];
9604	  if (INTVAL (operands[3]) == 0)
9605	    operands[2] = constm1_rtx, op = and_optab;
9606	  else if (INTVAL (operands[3]) == -1)
9607	    operands[2] = const0_rtx, op = ior_optab;
9608	  else
9609	    return 0; /* FAIL */
9610	}
9611      else
9612        return 0; /* FAIL */
9613
9614      orig_out = operands[0];
9615      tmp = gen_reg_rtx (mode);
9616      operands[0] = tmp;
9617
9618      /* Recurse to get the constant loaded.  */
9619      if (ix86_expand_int_movcc (operands) == 0)
9620        return 0; /* FAIL */
9621
9622      /* Mask in the interesting variable.  */
9623      out = expand_binop (mode, op, var, tmp, orig_out, 0,
9624			  OPTAB_WIDEN);
9625      if (out != orig_out)
9626	emit_move_insn (orig_out, out);
9627
9628      return 1; /* DONE */
9629    }
9630
9631  /*
9632   * For comparison with above,
9633   *
9634   * movl cf,dest
9635   * movl ct,tmp
9636   * cmpl op1,op2
9637   * cmovcc tmp,dest
9638   *
9639   * Size 15.
9640   */
9641
9642  if (! nonimmediate_operand (operands[2], mode))
9643    operands[2] = force_reg (mode, operands[2]);
9644  if (! nonimmediate_operand (operands[3], mode))
9645    operands[3] = force_reg (mode, operands[3]);
9646
9647  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9648    {
9649      rtx tmp = gen_reg_rtx (mode);
9650      emit_move_insn (tmp, operands[3]);
9651      operands[3] = tmp;
9652    }
9653  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9654    {
9655      rtx tmp = gen_reg_rtx (mode);
9656      emit_move_insn (tmp, operands[2]);
9657      operands[2] = tmp;
9658    }
9659  if (! register_operand (operands[2], VOIDmode)
9660      && ! register_operand (operands[3], VOIDmode))
9661    operands[2] = force_reg (mode, operands[2]);
9662
9663  emit_insn (compare_seq);
9664  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9665			  gen_rtx_IF_THEN_ELSE (mode,
9666						compare_op, operands[2],
9667						operands[3])));
9668  if (bypass_test)
9669    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9670			    gen_rtx_IF_THEN_ELSE (mode,
9671				  bypass_test,
9672				  operands[3],
9673				  operands[0])));
9674  if (second_test)
9675    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9676			    gen_rtx_IF_THEN_ELSE (mode,
9677				  second_test,
9678				  operands[2],
9679				  operands[0])));
9680
9681  return 1; /* DONE */
9682}
9683
9684int
9685ix86_expand_fp_movcc (operands)
9686     rtx operands[];
9687{
9688  enum rtx_code code;
9689  rtx tmp;
9690  rtx compare_op, second_test, bypass_test;
9691
9692  /* For SF/DFmode conditional moves based on comparisons
9693     in same mode, we may want to use SSE min/max instructions.  */
9694  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9695       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9696      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9697      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
9698      && (!TARGET_IEEE_FP
9699	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9700      /* We may be called from the post-reload splitter.  */
9701      && (!REG_P (operands[0])
9702	  || SSE_REG_P (operands[0])
9703	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9704    {
9705      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9706      code = GET_CODE (operands[1]);
9707
9708      /* See if we have (cross) match between comparison operands and
9709         conditional move operands.  */
9710      if (rtx_equal_p (operands[2], op1))
9711	{
9712	  rtx tmp = op0;
9713	  op0 = op1;
9714	  op1 = tmp;
9715	  code = reverse_condition_maybe_unordered (code);
9716	}
9717      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9718	{
9719	  /* Check for min operation.  */
9720	  if (code == LT)
9721	    {
9722	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9723	       if (memory_operand (op0, VOIDmode))
9724		 op0 = force_reg (GET_MODE (operands[0]), op0);
9725	       if (GET_MODE (operands[0]) == SFmode)
9726		 emit_insn (gen_minsf3 (operands[0], op0, op1));
9727	       else
9728		 emit_insn (gen_mindf3 (operands[0], op0, op1));
9729	       return 1;
9730	    }
9731	  /* Check for max operation.  */
9732	  if (code == GT)
9733	    {
9734	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9735	       if (memory_operand (op0, VOIDmode))
9736		 op0 = force_reg (GET_MODE (operands[0]), op0);
9737	       if (GET_MODE (operands[0]) == SFmode)
9738		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9739	       else
9740		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9741	       return 1;
9742	    }
9743	}
9744      /* Manage condition to be sse_comparison_operator.  In case we are
9745	 in non-ieee mode, try to canonicalize the destination operand
9746	 to be first in the comparison - this helps reload to avoid extra
9747	 moves.  */
9748      if (!sse_comparison_operator (operands[1], VOIDmode)
9749	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9750	{
9751	  rtx tmp = ix86_compare_op0;
9752	  ix86_compare_op0 = ix86_compare_op1;
9753	  ix86_compare_op1 = tmp;
9754	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9755					VOIDmode, ix86_compare_op0,
9756					ix86_compare_op1);
9757	}
9758      /* Similary try to manage result to be first operand of conditional
9759	 move. We also don't support the NE comparison on SSE, so try to
9760	 avoid it.  */
9761      if ((rtx_equal_p (operands[0], operands[3])
9762	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9763	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9764	{
9765	  rtx tmp = operands[2];
9766	  operands[2] = operands[3];
9767	  operands[3] = tmp;
9768	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9769					  (GET_CODE (operands[1])),
9770					VOIDmode, ix86_compare_op0,
9771					ix86_compare_op1);
9772	}
9773      if (GET_MODE (operands[0]) == SFmode)
9774	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9775				    operands[2], operands[3],
9776				    ix86_compare_op0, ix86_compare_op1));
9777      else
9778	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9779				    operands[2], operands[3],
9780				    ix86_compare_op0, ix86_compare_op1));
9781      return 1;
9782    }
9783
9784  /* The floating point conditional move instructions don't directly
9785     support conditions resulting from a signed integer comparison.  */
9786
9787  code = GET_CODE (operands[1]);
9788  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9789
9790  /* The floating point conditional move instructions don't directly
9791     support signed integer comparisons.  */
9792
9793  if (!fcmov_comparison_operator (compare_op, VOIDmode))
9794    {
9795      if (second_test != NULL || bypass_test != NULL)
9796	abort ();
9797      tmp = gen_reg_rtx (QImode);
9798      ix86_expand_setcc (code, tmp);
9799      code = NE;
9800      ix86_compare_op0 = tmp;
9801      ix86_compare_op1 = const0_rtx;
9802      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
9803    }
9804  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9805    {
9806      tmp = gen_reg_rtx (GET_MODE (operands[0]));
9807      emit_move_insn (tmp, operands[3]);
9808      operands[3] = tmp;
9809    }
9810  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9811    {
9812      tmp = gen_reg_rtx (GET_MODE (operands[0]));
9813      emit_move_insn (tmp, operands[2]);
9814      operands[2] = tmp;
9815    }
9816
9817  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9818			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9819				compare_op,
9820				operands[2],
9821				operands[3])));
9822  if (bypass_test)
9823    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9824			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9825				  bypass_test,
9826				  operands[3],
9827				  operands[0])));
9828  if (second_test)
9829    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9830			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9831				  second_test,
9832				  operands[2],
9833				  operands[0])));
9834
9835  return 1;
9836}
9837
9838/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
9839   works for floating pointer parameters and nonoffsetable memories.
9840   For pushes, it returns just stack offsets; the values will be saved
9841   in the right order.  Maximally three parts are generated.  */
9842
9843static int
9844ix86_split_to_parts (operand, parts, mode)
9845     rtx operand;
9846     rtx *parts;
9847     enum machine_mode mode;
9848{
9849  int size;
9850
9851  if (!TARGET_64BIT)
9852    size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9853  else
9854    size = (GET_MODE_SIZE (mode) + 4) / 8;
9855
9856  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9857    abort ();
9858  if (size < 2 || size > 3)
9859    abort ();
9860
9861  /* Optimize constant pool reference to immediates.  This is used by fp
9862     moves, that force all constants to memory to allow combining.  */
9863  if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9864    {
9865      rtx tmp = maybe_get_pool_constant (operand);
9866      if (tmp)
9867	operand = tmp;
9868    }
9869
9870  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9871    {
9872      /* The only non-offsetable memories we handle are pushes.  */
9873      if (! push_operand (operand, VOIDmode))
9874	abort ();
9875
9876      operand = copy_rtx (operand);
9877      PUT_MODE (operand, Pmode);
9878      parts[0] = parts[1] = parts[2] = operand;
9879    }
9880  else if (!TARGET_64BIT)
9881    {
9882      if (mode == DImode)
9883	split_di (&operand, 1, &parts[0], &parts[1]);
9884      else
9885	{
9886	  if (REG_P (operand))
9887	    {
9888	      if (!reload_completed)
9889		abort ();
9890	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9891	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9892	      if (size == 3)
9893		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9894	    }
9895	  else if (offsettable_memref_p (operand))
9896	    {
9897	      operand = adjust_address (operand, SImode, 0);
9898	      parts[0] = operand;
9899	      parts[1] = adjust_address (operand, SImode, 4);
9900	      if (size == 3)
9901		parts[2] = adjust_address (operand, SImode, 8);
9902	    }
9903	  else if (GET_CODE (operand) == CONST_DOUBLE)
9904	    {
9905	      REAL_VALUE_TYPE r;
9906	      long l[4];
9907
9908	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9909	      switch (mode)
9910		{
9911		case XFmode:
9912		case TFmode:
9913		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9914		  parts[2] = gen_int_mode (l[2], SImode);
9915		  break;
9916		case DFmode:
9917		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9918		  break;
9919		default:
9920		  abort ();
9921		}
9922	      parts[1] = gen_int_mode (l[1], SImode);
9923	      parts[0] = gen_int_mode (l[0], SImode);
9924	    }
9925	  else
9926	    abort ();
9927	}
9928    }
9929  else
9930    {
9931      if (mode == TImode)
9932	split_ti (&operand, 1, &parts[0], &parts[1]);
9933      if (mode == XFmode || mode == TFmode)
9934	{
9935	  if (REG_P (operand))
9936	    {
9937	      if (!reload_completed)
9938		abort ();
9939	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9940	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9941	    }
9942	  else if (offsettable_memref_p (operand))
9943	    {
9944	      operand = adjust_address (operand, DImode, 0);
9945	      parts[0] = operand;
9946	      parts[1] = adjust_address (operand, SImode, 8);
9947	    }
9948	  else if (GET_CODE (operand) == CONST_DOUBLE)
9949	    {
9950	      REAL_VALUE_TYPE r;
9951	      long l[3];
9952
9953	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9954	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9955	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
9956	      if (HOST_BITS_PER_WIDE_INT >= 64)
9957	        parts[0]
9958		  = gen_int_mode
9959		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9960		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9961		       DImode);
9962	      else
9963	        parts[0] = immed_double_const (l[0], l[1], DImode);
9964	      parts[1] = gen_int_mode (l[2], SImode);
9965	    }
9966	  else
9967	    abort ();
9968	}
9969    }
9970
9971  return size;
9972}
9973
9974/* Emit insns to perform a move or push of DI, DF, and XF values.
9975   Return false when normal moves are needed; true when all required
9976   insns have been emitted.  Operands 2-4 contain the input values
9977   int the correct order; operands 5-7 contain the output values.  */
9978
9979void
9980ix86_split_long_move (operands)
9981     rtx operands[];
9982{
9983  rtx part[2][3];
9984  int nparts;
9985  int push = 0;
9986  int collisions = 0;
9987  enum machine_mode mode = GET_MODE (operands[0]);
9988
9989  /* The DFmode expanders may ask us to move double.
9990     For 64bit target this is single move.  By hiding the fact
9991     here we simplify i386.md splitters.  */
9992  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9993    {
9994      /* Optimize constant pool reference to immediates.  This is used by
9995	 fp moves, that force all constants to memory to allow combining.  */
9996
9997      if (GET_CODE (operands[1]) == MEM
9998	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9999	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10000	operands[1] = get_pool_constant (XEXP (operands[1], 0));
10001      if (push_operand (operands[0], VOIDmode))
10002	{
10003	  operands[0] = copy_rtx (operands[0]);
10004	  PUT_MODE (operands[0], Pmode);
10005	}
10006      else
10007        operands[0] = gen_lowpart (DImode, operands[0]);
10008      operands[1] = gen_lowpart (DImode, operands[1]);
10009      emit_move_insn (operands[0], operands[1]);
10010      return;
10011    }
10012
10013  /* The only non-offsettable memory we handle is push.  */
10014  if (push_operand (operands[0], VOIDmode))
10015    push = 1;
10016  else if (GET_CODE (operands[0]) == MEM
10017	   && ! offsettable_memref_p (operands[0]))
10018    abort ();
10019
10020  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10021  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10022
10023  /* When emitting push, take care for source operands on the stack.  */
10024  if (push && GET_CODE (operands[1]) == MEM
10025      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10026    {
10027      if (nparts == 3)
10028	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10029				     XEXP (part[1][2], 0));
10030      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10031				   XEXP (part[1][1], 0));
10032    }
10033
10034  /* We need to do copy in the right order in case an address register
10035     of the source overlaps the destination.  */
10036  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10037    {
10038      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10039	collisions++;
10040      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10041	collisions++;
10042      if (nparts == 3
10043	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10044	collisions++;
10045
10046      /* Collision in the middle part can be handled by reordering.  */
10047      if (collisions == 1 && nparts == 3
10048	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10049	{
10050	  rtx tmp;
10051	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10052	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10053	}
10054
10055      /* If there are more collisions, we can't handle it by reordering.
10056	 Do an lea to the last part and use only one colliding move.  */
10057      else if (collisions > 1)
10058	{
10059	  rtx base;
10060
10061	  collisions = 1;
10062
10063	  base = part[0][nparts - 1];
10064
10065	  /* Handle the case when the last part isn't valid for lea.
10066	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10067	  if (GET_MODE (base) != Pmode)
10068	    base = gen_rtx_REG (Pmode, REGNO (base));
10069
10070	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10071	  part[1][0] = replace_equiv_address (part[1][0], base);
10072	  part[1][1] = replace_equiv_address (part[1][1],
10073				      plus_constant (base, UNITS_PER_WORD));
10074	  if (nparts == 3)
10075	    part[1][2] = replace_equiv_address (part[1][2],
10076				      plus_constant (base, 8));
10077	}
10078    }
10079
10080  if (push)
10081    {
10082      if (!TARGET_64BIT)
10083	{
10084	  if (nparts == 3)
10085	    {
10086	      /* We use only first 12 bytes of TFmode value, but for pushing we
10087		 are required to adjust stack as if we were pushing real 16byte
10088		 value.  */
10089	      if (mode == TFmode && !TARGET_64BIT)
10090		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10091				       GEN_INT (-4)));
10092	      emit_move_insn (part[0][2], part[1][2]);
10093	    }
10094	}
10095      else
10096	{
10097	  /* In 64bit mode we don't have 32bit push available.  In case this is
10098	     register, it is OK - we will just use larger counterpart.  We also
10099	     retype memory - these comes from attempt to avoid REX prefix on
10100	     moving of second half of TFmode value.  */
10101	  if (GET_MODE (part[1][1]) == SImode)
10102	    {
10103	      if (GET_CODE (part[1][1]) == MEM)
10104		part[1][1] = adjust_address (part[1][1], DImode, 0);
10105	      else if (REG_P (part[1][1]))
10106		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10107	      else
10108		abort ();
10109	      if (GET_MODE (part[1][0]) == SImode)
10110		part[1][0] = part[1][1];
10111	    }
10112	}
10113      emit_move_insn (part[0][1], part[1][1]);
10114      emit_move_insn (part[0][0], part[1][0]);
10115      return;
10116    }
10117
10118  /* Choose correct order to not overwrite the source before it is copied.  */
10119  if ((REG_P (part[0][0])
10120       && REG_P (part[1][1])
10121       && (REGNO (part[0][0]) == REGNO (part[1][1])
10122	   || (nparts == 3
10123	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10124      || (collisions > 0
10125	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10126    {
10127      if (nparts == 3)
10128	{
10129	  operands[2] = part[0][2];
10130	  operands[3] = part[0][1];
10131	  operands[4] = part[0][0];
10132	  operands[5] = part[1][2];
10133	  operands[6] = part[1][1];
10134	  operands[7] = part[1][0];
10135	}
10136      else
10137	{
10138	  operands[2] = part[0][1];
10139	  operands[3] = part[0][0];
10140	  operands[5] = part[1][1];
10141	  operands[6] = part[1][0];
10142	}
10143    }
10144  else
10145    {
10146      if (nparts == 3)
10147	{
10148	  operands[2] = part[0][0];
10149	  operands[3] = part[0][1];
10150	  operands[4] = part[0][2];
10151	  operands[5] = part[1][0];
10152	  operands[6] = part[1][1];
10153	  operands[7] = part[1][2];
10154	}
10155      else
10156	{
10157	  operands[2] = part[0][0];
10158	  operands[3] = part[0][1];
10159	  operands[5] = part[1][0];
10160	  operands[6] = part[1][1];
10161	}
10162    }
10163  emit_move_insn (operands[2], operands[5]);
10164  emit_move_insn (operands[3], operands[6]);
10165  if (nparts == 3)
10166    emit_move_insn (operands[4], operands[7]);
10167
10168  return;
10169}
10170
10171void
10172ix86_split_ashldi (operands, scratch)
10173     rtx *operands, scratch;
10174{
10175  rtx low[2], high[2];
10176  int count;
10177
10178  if (GET_CODE (operands[2]) == CONST_INT)
10179    {
10180      split_di (operands, 2, low, high);
10181      count = INTVAL (operands[2]) & 63;
10182
10183      if (count >= 32)
10184	{
10185	  emit_move_insn (high[0], low[1]);
10186	  emit_move_insn (low[0], const0_rtx);
10187
10188	  if (count > 32)
10189	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10190	}
10191      else
10192	{
10193	  if (!rtx_equal_p (operands[0], operands[1]))
10194	    emit_move_insn (operands[0], operands[1]);
10195	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10196	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10197	}
10198    }
10199  else
10200    {
10201      if (!rtx_equal_p (operands[0], operands[1]))
10202	emit_move_insn (operands[0], operands[1]);
10203
10204      split_di (operands, 1, low, high);
10205
10206      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10207      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10208
10209      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10210	{
10211	  if (! no_new_pseudos)
10212	    scratch = force_reg (SImode, const0_rtx);
10213	  else
10214	    emit_move_insn (scratch, const0_rtx);
10215
10216	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10217					  scratch));
10218	}
10219      else
10220	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10221    }
10222}
10223
10224void
10225ix86_split_ashrdi (operands, scratch)
10226     rtx *operands, scratch;
10227{
10228  rtx low[2], high[2];
10229  int count;
10230
10231  if (GET_CODE (operands[2]) == CONST_INT)
10232    {
10233      split_di (operands, 2, low, high);
10234      count = INTVAL (operands[2]) & 63;
10235
10236      if (count >= 32)
10237	{
10238	  emit_move_insn (low[0], high[1]);
10239
10240	  if (! reload_completed)
10241	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10242	  else
10243	    {
10244	      emit_move_insn (high[0], low[0]);
10245	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10246	    }
10247
10248	  if (count > 32)
10249	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10250	}
10251      else
10252	{
10253	  if (!rtx_equal_p (operands[0], operands[1]))
10254	    emit_move_insn (operands[0], operands[1]);
10255	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10256	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10257	}
10258    }
10259  else
10260    {
10261      if (!rtx_equal_p (operands[0], operands[1]))
10262	emit_move_insn (operands[0], operands[1]);
10263
10264      split_di (operands, 1, low, high);
10265
10266      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10267      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10268
10269      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10270	{
10271	  if (! no_new_pseudos)
10272	    scratch = gen_reg_rtx (SImode);
10273	  emit_move_insn (scratch, high[0]);
10274	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10275	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10276					  scratch));
10277	}
10278      else
10279	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10280    }
10281}
10282
10283void
10284ix86_split_lshrdi (operands, scratch)
10285     rtx *operands, scratch;
10286{
10287  rtx low[2], high[2];
10288  int count;
10289
10290  if (GET_CODE (operands[2]) == CONST_INT)
10291    {
10292      split_di (operands, 2, low, high);
10293      count = INTVAL (operands[2]) & 63;
10294
10295      if (count >= 32)
10296	{
10297	  emit_move_insn (low[0], high[1]);
10298	  emit_move_insn (high[0], const0_rtx);
10299
10300	  if (count > 32)
10301	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10302	}
10303      else
10304	{
10305	  if (!rtx_equal_p (operands[0], operands[1]))
10306	    emit_move_insn (operands[0], operands[1]);
10307	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10308	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10309	}
10310    }
10311  else
10312    {
10313      if (!rtx_equal_p (operands[0], operands[1]))
10314	emit_move_insn (operands[0], operands[1]);
10315
10316      split_di (operands, 1, low, high);
10317
10318      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10319      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10320
10321      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
10322      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10323	{
10324	  if (! no_new_pseudos)
10325	    scratch = force_reg (SImode, const0_rtx);
10326	  else
10327	    emit_move_insn (scratch, const0_rtx);
10328
10329	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10330					  scratch));
10331	}
10332      else
10333	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10334    }
10335}
10336
10337/* Helper function for the string operations below.  Dest VARIABLE whether
10338   it is aligned to VALUE bytes.  If true, jump to the label.  */
10339static rtx
10340ix86_expand_aligntest (variable, value)
10341     rtx variable;
10342     int value;
10343{
10344  rtx label = gen_label_rtx ();
10345  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10346  if (GET_MODE (variable) == DImode)
10347    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10348  else
10349    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10350  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10351			   1, label);
10352  return label;
10353}
10354
10355/* Adjust COUNTER by the VALUE.  */
10356static void
10357ix86_adjust_counter (countreg, value)
10358     rtx countreg;
10359     HOST_WIDE_INT value;
10360{
10361  if (GET_MODE (countreg) == DImode)
10362    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10363  else
10364    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10365}
10366
10367/* Zero extend possibly SImode EXP to Pmode register.  */
10368rtx
10369ix86_zero_extend_to_Pmode (exp)
10370   rtx exp;
10371{
10372  rtx r;
10373  if (GET_MODE (exp) == VOIDmode)
10374    return force_reg (Pmode, exp);
10375  if (GET_MODE (exp) == Pmode)
10376    return copy_to_mode_reg (Pmode, exp);
10377  r = gen_reg_rtx (Pmode);
10378  emit_insn (gen_zero_extendsidi2 (r, exp));
10379  return r;
10380}
10381
10382/* Expand string move (memcpy) operation.  Use i386 string operations when
10383   profitable.  expand_clrstr contains similar code.  */
10384int
10385ix86_expand_movstr (dst, src, count_exp, align_exp)
10386     rtx dst, src, count_exp, align_exp;
10387{
10388  rtx srcreg, destreg, countreg;
10389  enum machine_mode counter_mode;
10390  HOST_WIDE_INT align = 0;
10391  unsigned HOST_WIDE_INT count = 0;
10392  rtx insns;
10393
10394  start_sequence ();
10395
10396  if (GET_CODE (align_exp) == CONST_INT)
10397    align = INTVAL (align_exp);
10398
10399  /* This simple hack avoids all inlining code and simplifies code below.  */
10400  if (!TARGET_ALIGN_STRINGOPS)
10401    align = 64;
10402
10403  if (GET_CODE (count_exp) == CONST_INT)
10404    count = INTVAL (count_exp);
10405
10406  /* Figure out proper mode for counter.  For 32bits it is always SImode,
10407     for 64bits use SImode when possible, otherwise DImode.
10408     Set count to number of bytes copied when known at compile time.  */
10409  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10410      || x86_64_zero_extended_value (count_exp))
10411    counter_mode = SImode;
10412  else
10413    counter_mode = DImode;
10414
10415  if (counter_mode != SImode && counter_mode != DImode)
10416    abort ();
10417
10418  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10419  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10420
10421  emit_insn (gen_cld ());
10422
10423  /* When optimizing for size emit simple rep ; movsb instruction for
10424     counts not divisible by 4.  */
10425
10426  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10427    {
10428      countreg = ix86_zero_extend_to_Pmode (count_exp);
10429      if (TARGET_64BIT)
10430	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10431				        destreg, srcreg, countreg));
10432      else
10433	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10434				  destreg, srcreg, countreg));
10435    }
10436
10437  /* For constant aligned (or small unaligned) copies use rep movsl
10438     followed by code copying the rest.  For PentiumPro ensure 8 byte
10439     alignment to allow rep movsl acceleration.  */
10440
10441  else if (count != 0
10442	   && (align >= 8
10443	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10444	       || optimize_size || count < (unsigned int) 64))
10445    {
10446      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10447      if (count & ~(size - 1))
10448	{
10449	  countreg = copy_to_mode_reg (counter_mode,
10450				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10451						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10452	  countreg = ix86_zero_extend_to_Pmode (countreg);
10453	  if (size == 4)
10454	    {
10455	      if (TARGET_64BIT)
10456		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10457					        destreg, srcreg, countreg));
10458	      else
10459		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10460					  destreg, srcreg, countreg));
10461	    }
10462	  else
10463	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10464					    destreg, srcreg, countreg));
10465	}
10466      if (size == 8 && (count & 0x04))
10467	emit_insn (gen_strmovsi (destreg, srcreg));
10468      if (count & 0x02)
10469	emit_insn (gen_strmovhi (destreg, srcreg));
10470      if (count & 0x01)
10471	emit_insn (gen_strmovqi (destreg, srcreg));
10472    }
10473  /* The generic code based on the glibc implementation:
10474     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10475     allowing accelerated copying there)
10476     - copy the data using rep movsl
10477     - copy the rest.  */
10478  else
10479    {
10480      rtx countreg2;
10481      rtx label = NULL;
10482      int desired_alignment = (TARGET_PENTIUMPRO
10483			       && (count == 0 || count >= (unsigned int) 260)
10484			       ? 8 : UNITS_PER_WORD);
10485
10486      /* In case we don't know anything about the alignment, default to
10487         library version, since it is usually equally fast and result in
10488         shorter code.  */
10489      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10490	{
10491	  end_sequence ();
10492	  return 0;
10493	}
10494
10495      if (TARGET_SINGLE_STRINGOP)
10496	emit_insn (gen_cld ());
10497
10498      countreg2 = gen_reg_rtx (Pmode);
10499      countreg = copy_to_mode_reg (counter_mode, count_exp);
10500
10501      /* We don't use loops to align destination and to copy parts smaller
10502         than 4 bytes, because gcc is able to optimize such code better (in
10503         the case the destination or the count really is aligned, gcc is often
10504         able to predict the branches) and also it is friendlier to the
10505         hardware branch prediction.
10506
10507         Using loops is benefical for generic case, because we can
10508         handle small counts using the loops.  Many CPUs (such as Athlon)
10509         have large REP prefix setup costs.
10510
10511         This is quite costy.  Maybe we can revisit this decision later or
10512         add some customizability to this code.  */
10513
10514      if (count == 0 && align < desired_alignment)
10515	{
10516	  label = gen_label_rtx ();
10517	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10518				   LEU, 0, counter_mode, 1, label);
10519	}
10520      if (align <= 1)
10521	{
10522	  rtx label = ix86_expand_aligntest (destreg, 1);
10523	  emit_insn (gen_strmovqi (destreg, srcreg));
10524	  ix86_adjust_counter (countreg, 1);
10525	  emit_label (label);
10526	  LABEL_NUSES (label) = 1;
10527	}
10528      if (align <= 2)
10529	{
10530	  rtx label = ix86_expand_aligntest (destreg, 2);
10531	  emit_insn (gen_strmovhi (destreg, srcreg));
10532	  ix86_adjust_counter (countreg, 2);
10533	  emit_label (label);
10534	  LABEL_NUSES (label) = 1;
10535	}
10536      if (align <= 4 && desired_alignment > 4)
10537	{
10538	  rtx label = ix86_expand_aligntest (destreg, 4);
10539	  emit_insn (gen_strmovsi (destreg, srcreg));
10540	  ix86_adjust_counter (countreg, 4);
10541	  emit_label (label);
10542	  LABEL_NUSES (label) = 1;
10543	}
10544
10545      if (label && desired_alignment > 4 && !TARGET_64BIT)
10546	{
10547	  emit_label (label);
10548	  LABEL_NUSES (label) = 1;
10549	  label = NULL_RTX;
10550	}
10551      if (!TARGET_SINGLE_STRINGOP)
10552	emit_insn (gen_cld ());
10553      if (TARGET_64BIT)
10554	{
10555	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10556				  GEN_INT (3)));
10557	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10558					  destreg, srcreg, countreg2));
10559	}
10560      else
10561	{
10562	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10563	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10564				    destreg, srcreg, countreg2));
10565	}
10566
10567      if (label)
10568	{
10569	  emit_label (label);
10570	  LABEL_NUSES (label) = 1;
10571	}
10572      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10573	emit_insn (gen_strmovsi (destreg, srcreg));
10574      if ((align <= 4 || count == 0) && TARGET_64BIT)
10575	{
10576	  rtx label = ix86_expand_aligntest (countreg, 4);
10577	  emit_insn (gen_strmovsi (destreg, srcreg));
10578	  emit_label (label);
10579	  LABEL_NUSES (label) = 1;
10580	}
10581      if (align > 2 && count != 0 && (count & 2))
10582	emit_insn (gen_strmovhi (destreg, srcreg));
10583      if (align <= 2 || count == 0)
10584	{
10585	  rtx label = ix86_expand_aligntest (countreg, 2);
10586	  emit_insn (gen_strmovhi (destreg, srcreg));
10587	  emit_label (label);
10588	  LABEL_NUSES (label) = 1;
10589	}
10590      if (align > 1 && count != 0 && (count & 1))
10591	emit_insn (gen_strmovqi (destreg, srcreg));
10592      if (align <= 1 || count == 0)
10593	{
10594	  rtx label = ix86_expand_aligntest (countreg, 1);
10595	  emit_insn (gen_strmovqi (destreg, srcreg));
10596	  emit_label (label);
10597	  LABEL_NUSES (label) = 1;
10598	}
10599    }
10600
10601  insns = get_insns ();
10602  end_sequence ();
10603
10604  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10605  emit_insn (insns);
10606  return 1;
10607}
10608
10609/* Expand string clear operation (bzero).  Use i386 string operations when
10610   profitable.  expand_movstr contains similar code.  */
10611int
10612ix86_expand_clrstr (src, count_exp, align_exp)
10613     rtx src, count_exp, align_exp;
10614{
10615  rtx destreg, zeroreg, countreg;
10616  enum machine_mode counter_mode;
10617  HOST_WIDE_INT align = 0;
10618  unsigned HOST_WIDE_INT count = 0;
10619
10620  if (GET_CODE (align_exp) == CONST_INT)
10621    align = INTVAL (align_exp);
10622
10623  /* This simple hack avoids all inlining code and simplifies code below.  */
10624  if (!TARGET_ALIGN_STRINGOPS)
10625    align = 32;
10626
10627  if (GET_CODE (count_exp) == CONST_INT)
10628    count = INTVAL (count_exp);
10629  /* Figure out proper mode for counter.  For 32bits it is always SImode,
10630     for 64bits use SImode when possible, otherwise DImode.
10631     Set count to number of bytes copied when known at compile time.  */
10632  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10633      || x86_64_zero_extended_value (count_exp))
10634    counter_mode = SImode;
10635  else
10636    counter_mode = DImode;
10637
10638  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10639
10640  emit_insn (gen_cld ());
10641
10642  /* When optimizing for size emit simple rep ; movsb instruction for
10643     counts not divisible by 4.  */
10644
10645  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10646    {
10647      countreg = ix86_zero_extend_to_Pmode (count_exp);
10648      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10649      if (TARGET_64BIT)
10650	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10651				         destreg, countreg));
10652      else
10653	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10654				   destreg, countreg));
10655    }
10656  else if (count != 0
10657	   && (align >= 8
10658	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10659	       || optimize_size || count < (unsigned int) 64))
10660    {
10661      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10662      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10663      if (count & ~(size - 1))
10664	{
10665	  countreg = copy_to_mode_reg (counter_mode,
10666				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10667						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10668	  countreg = ix86_zero_extend_to_Pmode (countreg);
10669	  if (size == 4)
10670	    {
10671	      if (TARGET_64BIT)
10672		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10673					         destreg, countreg));
10674	      else
10675		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10676					   destreg, countreg));
10677	    }
10678	  else
10679	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10680					     destreg, countreg));
10681	}
10682      if (size == 8 && (count & 0x04))
10683	emit_insn (gen_strsetsi (destreg,
10684				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10685      if (count & 0x02)
10686	emit_insn (gen_strsethi (destreg,
10687				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10688      if (count & 0x01)
10689	emit_insn (gen_strsetqi (destreg,
10690				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10691    }
10692  else
10693    {
10694      rtx countreg2;
10695      rtx label = NULL;
10696      /* Compute desired alignment of the string operation.  */
10697      int desired_alignment = (TARGET_PENTIUMPRO
10698			       && (count == 0 || count >= (unsigned int) 260)
10699			       ? 8 : UNITS_PER_WORD);
10700
10701      /* In case we don't know anything about the alignment, default to
10702         library version, since it is usually equally fast and result in
10703         shorter code.  */
10704      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10705	return 0;
10706
10707      if (TARGET_SINGLE_STRINGOP)
10708	emit_insn (gen_cld ());
10709
10710      countreg2 = gen_reg_rtx (Pmode);
10711      countreg = copy_to_mode_reg (counter_mode, count_exp);
10712      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10713
10714      if (count == 0 && align < desired_alignment)
10715	{
10716	  label = gen_label_rtx ();
10717	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10718				   LEU, 0, counter_mode, 1, label);
10719	}
10720      if (align <= 1)
10721	{
10722	  rtx label = ix86_expand_aligntest (destreg, 1);
10723	  emit_insn (gen_strsetqi (destreg,
10724				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10725	  ix86_adjust_counter (countreg, 1);
10726	  emit_label (label);
10727	  LABEL_NUSES (label) = 1;
10728	}
10729      if (align <= 2)
10730	{
10731	  rtx label = ix86_expand_aligntest (destreg, 2);
10732	  emit_insn (gen_strsethi (destreg,
10733				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10734	  ix86_adjust_counter (countreg, 2);
10735	  emit_label (label);
10736	  LABEL_NUSES (label) = 1;
10737	}
10738      if (align <= 4 && desired_alignment > 4)
10739	{
10740	  rtx label = ix86_expand_aligntest (destreg, 4);
10741	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10742					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10743					     : zeroreg)));
10744	  ix86_adjust_counter (countreg, 4);
10745	  emit_label (label);
10746	  LABEL_NUSES (label) = 1;
10747	}
10748
10749      if (label && desired_alignment > 4 && !TARGET_64BIT)
10750	{
10751	  emit_label (label);
10752	  LABEL_NUSES (label) = 1;
10753	  label = NULL_RTX;
10754	}
10755
10756      if (!TARGET_SINGLE_STRINGOP)
10757	emit_insn (gen_cld ());
10758      if (TARGET_64BIT)
10759	{
10760	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10761				  GEN_INT (3)));
10762	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10763					   destreg, countreg2));
10764	}
10765      else
10766	{
10767	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10768	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10769				     destreg, countreg2));
10770	}
10771      if (label)
10772	{
10773	  emit_label (label);
10774	  LABEL_NUSES (label) = 1;
10775	}
10776
10777      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10778	emit_insn (gen_strsetsi (destreg,
10779				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10780      if (TARGET_64BIT && (align <= 4 || count == 0))
10781	{
10782	  rtx label = ix86_expand_aligntest (countreg, 4);
10783	  emit_insn (gen_strsetsi (destreg,
10784				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
10785	  emit_label (label);
10786	  LABEL_NUSES (label) = 1;
10787	}
10788      if (align > 2 && count != 0 && (count & 2))
10789	emit_insn (gen_strsethi (destreg,
10790				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10791      if (align <= 2 || count == 0)
10792	{
10793	  rtx label = ix86_expand_aligntest (countreg, 2);
10794	  emit_insn (gen_strsethi (destreg,
10795				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10796	  emit_label (label);
10797	  LABEL_NUSES (label) = 1;
10798	}
10799      if (align > 1 && count != 0 && (count & 1))
10800	emit_insn (gen_strsetqi (destreg,
10801				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10802      if (align <= 1 || count == 0)
10803	{
10804	  rtx label = ix86_expand_aligntest (countreg, 1);
10805	  emit_insn (gen_strsetqi (destreg,
10806				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10807	  emit_label (label);
10808	  LABEL_NUSES (label) = 1;
10809	}
10810    }
10811  return 1;
10812}
10813/* Expand strlen.  */
10814int
10815ix86_expand_strlen (out, src, eoschar, align)
10816     rtx out, src, eoschar, align;
10817{
10818  rtx addr, scratch1, scratch2, scratch3, scratch4;
10819
10820  /* The generic case of strlen expander is long.  Avoid it's
10821     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
10822
10823  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10824      && !TARGET_INLINE_ALL_STRINGOPS
10825      && !optimize_size
10826      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10827    return 0;
10828
10829  addr = force_reg (Pmode, XEXP (src, 0));
10830  scratch1 = gen_reg_rtx (Pmode);
10831
10832  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10833      && !optimize_size)
10834    {
10835      /* Well it seems that some optimizer does not combine a call like
10836         foo(strlen(bar), strlen(bar));
10837         when the move and the subtraction is done here.  It does calculate
10838         the length just once when these instructions are done inside of
10839         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
10840         often used and I use one fewer register for the lifetime of
10841         output_strlen_unroll() this is better.  */
10842
10843      emit_move_insn (out, addr);
10844
10845      ix86_expand_strlensi_unroll_1 (out, align);
10846
10847      /* strlensi_unroll_1 returns the address of the zero at the end of
10848         the string, like memchr(), so compute the length by subtracting
10849         the start address.  */
10850      if (TARGET_64BIT)
10851	emit_insn (gen_subdi3 (out, out, addr));
10852      else
10853	emit_insn (gen_subsi3 (out, out, addr));
10854    }
10855  else
10856    {
10857      scratch2 = gen_reg_rtx (Pmode);
10858      scratch3 = gen_reg_rtx (Pmode);
10859      scratch4 = force_reg (Pmode, constm1_rtx);
10860
10861      emit_move_insn (scratch3, addr);
10862      eoschar = force_reg (QImode, eoschar);
10863
10864      emit_insn (gen_cld ());
10865      if (TARGET_64BIT)
10866	{
10867	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10868					 align, scratch4, scratch3));
10869	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10870	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10871	}
10872      else
10873	{
10874	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10875				     align, scratch4, scratch3));
10876	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10877	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10878	}
10879    }
10880  return 1;
10881}
10882
10883/* Expand the appropriate insns for doing strlen if not just doing
10884   repnz; scasb
10885
10886   out = result, initialized with the start address
10887   align_rtx = alignment of the address.
10888   scratch = scratch register, initialized with the startaddress when
10889	not aligned, otherwise undefined
10890
10891   This is just the body. It needs the initialisations mentioned above and
10892   some address computing at the end.  These things are done in i386.md.  */
10893
10894static void
10895ix86_expand_strlensi_unroll_1 (out, align_rtx)
10896     rtx out, align_rtx;
10897{
10898  int align;
10899  rtx tmp;
10900  rtx align_2_label = NULL_RTX;
10901  rtx align_3_label = NULL_RTX;
10902  rtx align_4_label = gen_label_rtx ();
10903  rtx end_0_label = gen_label_rtx ();
10904  rtx mem;
10905  rtx tmpreg = gen_reg_rtx (SImode);
10906  rtx scratch = gen_reg_rtx (SImode);
10907
10908  align = 0;
10909  if (GET_CODE (align_rtx) == CONST_INT)
10910    align = INTVAL (align_rtx);
10911
10912  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
10913
10914  /* Is there a known alignment and is it less than 4?  */
10915  if (align < 4)
10916    {
10917      rtx scratch1 = gen_reg_rtx (Pmode);
10918      emit_move_insn (scratch1, out);
10919      /* Is there a known alignment and is it not 2? */
10920      if (align != 2)
10921	{
10922	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10923	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10924
10925	  /* Leave just the 3 lower bits.  */
10926	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10927				    NULL_RTX, 0, OPTAB_WIDEN);
10928
10929	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10930				   Pmode, 1, align_4_label);
10931	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10932				   Pmode, 1, align_2_label);
10933	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10934				   Pmode, 1, align_3_label);
10935	}
10936      else
10937        {
10938	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
10939	     check if is aligned to 4 - byte.  */
10940
10941	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10942				    NULL_RTX, 0, OPTAB_WIDEN);
10943
10944	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10945				   Pmode, 1, align_4_label);
10946        }
10947
10948      mem = gen_rtx_MEM (QImode, out);
10949
10950      /* Now compare the bytes.  */
10951
10952      /* Compare the first n unaligned byte on a byte per byte basis.  */
10953      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10954			       QImode, 1, end_0_label);
10955
10956      /* Increment the address.  */
10957      if (TARGET_64BIT)
10958	emit_insn (gen_adddi3 (out, out, const1_rtx));
10959      else
10960	emit_insn (gen_addsi3 (out, out, const1_rtx));
10961
10962      /* Not needed with an alignment of 2 */
10963      if (align != 2)
10964	{
10965	  emit_label (align_2_label);
10966
10967	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10968				   end_0_label);
10969
10970	  if (TARGET_64BIT)
10971	    emit_insn (gen_adddi3 (out, out, const1_rtx));
10972	  else
10973	    emit_insn (gen_addsi3 (out, out, const1_rtx));
10974
10975	  emit_label (align_3_label);
10976	}
10977
10978      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10979			       end_0_label);
10980
10981      if (TARGET_64BIT)
10982	emit_insn (gen_adddi3 (out, out, const1_rtx));
10983      else
10984	emit_insn (gen_addsi3 (out, out, const1_rtx));
10985    }
10986
10987  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
10988     align this loop.  It gives only huge programs, but does not help to
10989     speed up.  */
10990  emit_label (align_4_label);
10991
10992  mem = gen_rtx_MEM (SImode, out);
10993  emit_move_insn (scratch, mem);
10994  if (TARGET_64BIT)
10995    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10996  else
10997    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10998
10999  /* This formula yields a nonzero result iff one of the bytes is zero.
11000     This saves three branches inside loop and many cycles.  */
11001
11002  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11003  emit_insn (gen_one_cmplsi2 (scratch, scratch));
11004  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11005  emit_insn (gen_andsi3 (tmpreg, tmpreg,
11006			 gen_int_mode (0x80808080, SImode)));
11007  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11008			   align_4_label);
11009
11010  if (TARGET_CMOVE)
11011    {
11012       rtx reg = gen_reg_rtx (SImode);
11013       rtx reg2 = gen_reg_rtx (Pmode);
11014       emit_move_insn (reg, tmpreg);
11015       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11016
11017       /* If zero is not in the first two bytes, move two bytes forward.  */
11018       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11019       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11020       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11021       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11022			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
11023						     reg,
11024						     tmpreg)));
11025       /* Emit lea manually to avoid clobbering of flags.  */
11026       emit_insn (gen_rtx_SET (SImode, reg2,
11027			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11028
11029       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11030       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11031       emit_insn (gen_rtx_SET (VOIDmode, out,
11032			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11033						     reg2,
11034						     out)));
11035
11036    }
11037  else
11038    {
11039       rtx end_2_label = gen_label_rtx ();
11040       /* Is zero in the first two bytes? */
11041
11042       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11043       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11044       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11045       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11046                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11047                            pc_rtx);
11048       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11049       JUMP_LABEL (tmp) = end_2_label;
11050
11051       /* Not in the first two.  Move two bytes forward.  */
11052       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11053       if (TARGET_64BIT)
11054	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11055       else
11056	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11057
11058       emit_label (end_2_label);
11059
11060    }
11061
11062  /* Avoid branch in fixing the byte.  */
11063  tmpreg = gen_lowpart (QImode, tmpreg);
11064  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11065  if (TARGET_64BIT)
11066    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11067  else
11068    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11069
11070  emit_label (end_0_label);
11071}
11072
11073void
11074ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
11075     rtx retval, fnaddr, callarg1, callarg2, pop;
11076{
11077  rtx use = NULL, call;
11078
11079  if (pop == const0_rtx)
11080    pop = NULL;
11081  if (TARGET_64BIT && pop)
11082    abort ();
11083
11084#if TARGET_MACHO
11085  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11086    fnaddr = machopic_indirect_call_target (fnaddr);
11087#else
11088  /* Static functions and indirect calls don't need the pic register.  */
11089  if (! TARGET_64BIT && flag_pic
11090      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11091      && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11092    use_reg (&use, pic_offset_table_rtx);
11093
11094  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11095    {
11096      rtx al = gen_rtx_REG (QImode, 0);
11097      emit_move_insn (al, callarg2);
11098      use_reg (&use, al);
11099    }
11100#endif /* TARGET_MACHO */
11101
11102  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11103    {
11104      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11105      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11106    }
11107
11108  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11109  if (retval)
11110    call = gen_rtx_SET (VOIDmode, retval, call);
11111  if (pop)
11112    {
11113      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11114      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11115      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11116    }
11117
11118  call = emit_call_insn (call);
11119  if (use)
11120    CALL_INSN_FUNCTION_USAGE (call) = use;
11121}
11122
11123
11124/* Clear stack slot assignments remembered from previous functions.
11125   This is called from INIT_EXPANDERS once before RTL is emitted for each
11126   function.  */
11127
11128static struct machine_function *
11129ix86_init_machine_status ()
11130{
11131  return ggc_alloc_cleared (sizeof (struct machine_function));
11132}
11133
11134/* Return a MEM corresponding to a stack slot with mode MODE.
11135   Allocate a new slot if necessary.
11136
11137   The RTL for a function can have several slots available: N is
11138   which slot to use.  */
11139
11140rtx
11141assign_386_stack_local (mode, n)
11142     enum machine_mode mode;
11143     int n;
11144{
11145  if (n < 0 || n >= MAX_386_STACK_LOCALS)
11146    abort ();
11147
11148  if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11149    ix86_stack_locals[(int) mode][n]
11150      = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11151
11152  return ix86_stack_locals[(int) mode][n];
11153}
11154
11155/* Construct the SYMBOL_REF for the tls_get_addr function.  */
11156
11157static GTY(()) rtx ix86_tls_symbol;
11158rtx
11159ix86_tls_get_addr ()
11160{
11161
11162  if (!ix86_tls_symbol)
11163    {
11164      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11165					    (TARGET_GNU_TLS && !TARGET_64BIT)
11166					    ? "___tls_get_addr"
11167					    : "__tls_get_addr");
11168    }
11169
11170  return ix86_tls_symbol;
11171}
11172
11173/* Calculate the length of the memory address in the instruction
11174   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11175
11176static int
11177memory_address_length (addr)
11178     rtx addr;
11179{
11180  struct ix86_address parts;
11181  rtx base, index, disp;
11182  int len;
11183
11184  if (GET_CODE (addr) == PRE_DEC
11185      || GET_CODE (addr) == POST_INC
11186      || GET_CODE (addr) == PRE_MODIFY
11187      || GET_CODE (addr) == POST_MODIFY)
11188    return 0;
11189
11190  if (! ix86_decompose_address (addr, &parts))
11191    abort ();
11192
11193  base = parts.base;
11194  index = parts.index;
11195  disp = parts.disp;
11196  len = 0;
11197
11198  /* Rule of thumb:
11199       - esp as the base always wants an index,
11200       - ebp as the base always wants a displacement.  */
11201
11202  /* Register Indirect.  */
11203  if (base && !index && !disp)
11204    {
11205      /* esp (for its index) and ebp (for its displacement) need
11206	 the two-byte modrm form.  */
11207      if (addr == stack_pointer_rtx
11208	  || addr == arg_pointer_rtx
11209	  || addr == frame_pointer_rtx
11210	  || addr == hard_frame_pointer_rtx)
11211	len = 1;
11212    }
11213
11214  /* Direct Addressing.  */
11215  else if (disp && !base && !index)
11216    len = 4;
11217
11218  else
11219    {
11220      /* Find the length of the displacement constant.  */
11221      if (disp)
11222	{
11223	  if (GET_CODE (disp) == CONST_INT
11224	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11225	      && base)
11226	    len = 1;
11227	  else
11228	    len = 4;
11229	}
11230      /* ebp always wants a displacement.  */
11231      else if (base == hard_frame_pointer_rtx)
11232        len = 1;
11233
11234      /* An index requires the two-byte modrm form...  */
11235      if (index
11236	  /* ...like esp, which always wants an index.  */
11237	  || base == stack_pointer_rtx
11238	  || base == arg_pointer_rtx
11239	  || base == frame_pointer_rtx)
11240	len += 1;
11241    }
11242
11243  return len;
11244}
11245
11246/* Compute default value for "length_immediate" attribute.  When SHORTFORM
11247   is set, expect that insn have 8bit immediate alternative.  */
11248int
11249ix86_attr_length_immediate_default (insn, shortform)
11250     rtx insn;
11251     int shortform;
11252{
11253  int len = 0;
11254  int i;
11255  extract_insn_cached (insn);
11256  for (i = recog_data.n_operands - 1; i >= 0; --i)
11257    if (CONSTANT_P (recog_data.operand[i]))
11258      {
11259	if (len)
11260	  abort ();
11261	if (shortform
11262	    && GET_CODE (recog_data.operand[i]) == CONST_INT
11263	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11264	  len = 1;
11265	else
11266	  {
11267	    switch (get_attr_mode (insn))
11268	      {
11269		case MODE_QI:
11270		  len+=1;
11271		  break;
11272		case MODE_HI:
11273		  len+=2;
11274		  break;
11275		case MODE_SI:
11276		  len+=4;
11277		  break;
11278		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
11279		case MODE_DI:
11280		  len+=4;
11281		  break;
11282		default:
11283		  fatal_insn ("unknown insn mode", insn);
11284	      }
11285	  }
11286      }
11287  return len;
11288}
11289/* Compute default value for "length_address" attribute.  */
11290int
11291ix86_attr_length_address_default (insn)
11292     rtx insn;
11293{
11294  int i;
11295
11296  if (get_attr_type (insn) == TYPE_LEA)
11297    {
11298      rtx set = PATTERN (insn);
11299      if (GET_CODE (set) == SET)
11300	;
11301      else if (GET_CODE (set) == PARALLEL
11302	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11303	set = XVECEXP (set, 0, 0);
11304      else
11305	{
11306#ifdef ENABLE_CHECKING
11307	  abort ();
11308#endif
11309	  return 0;
11310	}
11311
11312      return memory_address_length (SET_SRC (set));
11313    }
11314
11315  extract_insn_cached (insn);
11316  for (i = recog_data.n_operands - 1; i >= 0; --i)
11317    if (GET_CODE (recog_data.operand[i]) == MEM)
11318      {
11319	return memory_address_length (XEXP (recog_data.operand[i], 0));
11320	break;
11321      }
11322  return 0;
11323}
11324
11325/* Return the maximum number of instructions a cpu can issue.  */
11326
11327static int
11328ix86_issue_rate ()
11329{
11330  switch (ix86_cpu)
11331    {
11332    case PROCESSOR_PENTIUM:
11333    case PROCESSOR_K6:
11334      return 2;
11335
11336    case PROCESSOR_PENTIUMPRO:
11337    case PROCESSOR_PENTIUM4:
11338    case PROCESSOR_ATHLON:
11339      return 3;
11340
11341    default:
11342      return 1;
11343    }
11344}
11345
11346/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11347   by DEP_INSN and nothing set by DEP_INSN.  */
11348
11349static int
11350ix86_flags_dependant (insn, dep_insn, insn_type)
11351     rtx insn, dep_insn;
11352     enum attr_type insn_type;
11353{
11354  rtx set, set2;
11355
11356  /* Simplify the test for uninteresting insns.  */
11357  if (insn_type != TYPE_SETCC
11358      && insn_type != TYPE_ICMOV
11359      && insn_type != TYPE_FCMOV
11360      && insn_type != TYPE_IBR)
11361    return 0;
11362
11363  if ((set = single_set (dep_insn)) != 0)
11364    {
11365      set = SET_DEST (set);
11366      set2 = NULL_RTX;
11367    }
11368  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11369	   && XVECLEN (PATTERN (dep_insn), 0) == 2
11370	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11371	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11372    {
11373      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11374      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11375    }
11376  else
11377    return 0;
11378
11379  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11380    return 0;
11381
11382  /* This test is true if the dependent insn reads the flags but
11383     not any other potentially set register.  */
11384  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11385    return 0;
11386
11387  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11388    return 0;
11389
11390  return 1;
11391}
11392
11393/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11394   address with operands set by DEP_INSN.  */
11395
11396static int
11397ix86_agi_dependant (insn, dep_insn, insn_type)
11398     rtx insn, dep_insn;
11399     enum attr_type insn_type;
11400{
11401  rtx addr;
11402
11403  if (insn_type == TYPE_LEA
11404      && TARGET_PENTIUM)
11405    {
11406      addr = PATTERN (insn);
11407      if (GET_CODE (addr) == SET)
11408	;
11409      else if (GET_CODE (addr) == PARALLEL
11410	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11411	addr = XVECEXP (addr, 0, 0);
11412      else
11413	abort ();
11414      addr = SET_SRC (addr);
11415    }
11416  else
11417    {
11418      int i;
11419      extract_insn_cached (insn);
11420      for (i = recog_data.n_operands - 1; i >= 0; --i)
11421	if (GET_CODE (recog_data.operand[i]) == MEM)
11422	  {
11423	    addr = XEXP (recog_data.operand[i], 0);
11424	    goto found;
11425	  }
11426      return 0;
11427    found:;
11428    }
11429
11430  return modified_in_p (addr, dep_insn);
11431}
11432
11433static int
11434ix86_adjust_cost (insn, link, dep_insn, cost)
11435     rtx insn, link, dep_insn;
11436     int cost;
11437{
11438  enum attr_type insn_type, dep_insn_type;
11439  enum attr_memory memory, dep_memory;
11440  rtx set, set2;
11441  int dep_insn_code_number;
11442
11443  /* Anti and output depenancies have zero cost on all CPUs.  */
11444  if (REG_NOTE_KIND (link) != 0)
11445    return 0;
11446
11447  dep_insn_code_number = recog_memoized (dep_insn);
11448
11449  /* If we can't recognize the insns, we can't really do anything.  */
11450  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11451    return cost;
11452
11453  insn_type = get_attr_type (insn);
11454  dep_insn_type = get_attr_type (dep_insn);
11455
11456  switch (ix86_cpu)
11457    {
11458    case PROCESSOR_PENTIUM:
11459      /* Address Generation Interlock adds a cycle of latency.  */
11460      if (ix86_agi_dependant (insn, dep_insn, insn_type))
11461	cost += 1;
11462
11463      /* ??? Compares pair with jump/setcc.  */
11464      if (ix86_flags_dependant (insn, dep_insn, insn_type))
11465	cost = 0;
11466
11467      /* Floating point stores require value to be ready one cycle ealier.  */
11468      if (insn_type == TYPE_FMOV
11469	  && get_attr_memory (insn) == MEMORY_STORE
11470	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11471	cost += 1;
11472      break;
11473
11474    case PROCESSOR_PENTIUMPRO:
11475      memory = get_attr_memory (insn);
11476      dep_memory = get_attr_memory (dep_insn);
11477
11478      /* Since we can't represent delayed latencies of load+operation,
11479	 increase the cost here for non-imov insns.  */
11480      if (dep_insn_type != TYPE_IMOV
11481          && dep_insn_type != TYPE_FMOV
11482          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11483	cost += 1;
11484
11485      /* INT->FP conversion is expensive.  */
11486      if (get_attr_fp_int_src (dep_insn))
11487	cost += 5;
11488
11489      /* There is one cycle extra latency between an FP op and a store.  */
11490      if (insn_type == TYPE_FMOV
11491	  && (set = single_set (dep_insn)) != NULL_RTX
11492	  && (set2 = single_set (insn)) != NULL_RTX
11493	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11494	  && GET_CODE (SET_DEST (set2)) == MEM)
11495	cost += 1;
11496
11497      /* Show ability of reorder buffer to hide latency of load by executing
11498	 in parallel with previous instruction in case
11499	 previous instruction is not needed to compute the address.  */
11500      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11501	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11502 	{
11503	  /* Claim moves to take one cycle, as core can issue one load
11504	     at time and the next load can start cycle later.  */
11505	  if (dep_insn_type == TYPE_IMOV
11506	      || dep_insn_type == TYPE_FMOV)
11507	    cost = 1;
11508	  else if (cost > 1)
11509	    cost--;
11510	}
11511      break;
11512
11513    case PROCESSOR_K6:
11514      memory = get_attr_memory (insn);
11515      dep_memory = get_attr_memory (dep_insn);
11516      /* The esp dependency is resolved before the instruction is really
11517         finished.  */
11518      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11519	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11520	return 1;
11521
11522      /* Since we can't represent delayed latencies of load+operation,
11523	 increase the cost here for non-imov insns.  */
11524      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11525	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11526
11527      /* INT->FP conversion is expensive.  */
11528      if (get_attr_fp_int_src (dep_insn))
11529	cost += 5;
11530
11531      /* Show ability of reorder buffer to hide latency of load by executing
11532	 in parallel with previous instruction in case
11533	 previous instruction is not needed to compute the address.  */
11534      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11535	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11536 	{
11537	  /* Claim moves to take one cycle, as core can issue one load
11538	     at time and the next load can start cycle later.  */
11539	  if (dep_insn_type == TYPE_IMOV
11540	      || dep_insn_type == TYPE_FMOV)
11541	    cost = 1;
11542	  else if (cost > 2)
11543	    cost -= 2;
11544	  else
11545	    cost = 1;
11546	}
11547      break;
11548
11549    case PROCESSOR_ATHLON:
11550      memory = get_attr_memory (insn);
11551      dep_memory = get_attr_memory (dep_insn);
11552
11553      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11554	{
11555	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11556	    cost += 2;
11557	  else
11558	    cost += 3;
11559        }
11560      /* Show ability of reorder buffer to hide latency of load by executing
11561	 in parallel with previous instruction in case
11562	 previous instruction is not needed to compute the address.  */
11563      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11564	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11565 	{
11566	  /* Claim moves to take one cycle, as core can issue one load
11567	     at time and the next load can start cycle later.  */
11568	  if (dep_insn_type == TYPE_IMOV
11569	      || dep_insn_type == TYPE_FMOV)
11570	    cost = 0;
11571	  else if (cost >= 3)
11572	    cost -= 3;
11573	  else
11574	    cost = 0;
11575	}
11576
11577    default:
11578      break;
11579    }
11580
11581  return cost;
11582}
11583
11584static union
11585{
11586  struct ppro_sched_data
11587  {
11588    rtx decode[3];
11589    int issued_this_cycle;
11590  } ppro;
11591} ix86_sched_data;
11592
11593static enum attr_ppro_uops
11594ix86_safe_ppro_uops (insn)
11595     rtx insn;
11596{
11597  if (recog_memoized (insn) >= 0)
11598    return get_attr_ppro_uops (insn);
11599  else
11600    return PPRO_UOPS_MANY;
11601}
11602
11603static void
11604ix86_dump_ppro_packet (dump)
11605     FILE *dump;
11606{
11607  if (ix86_sched_data.ppro.decode[0])
11608    {
11609      fprintf (dump, "PPRO packet: %d",
11610	       INSN_UID (ix86_sched_data.ppro.decode[0]));
11611      if (ix86_sched_data.ppro.decode[1])
11612	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11613      if (ix86_sched_data.ppro.decode[2])
11614	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11615      fputc ('\n', dump);
11616    }
11617}
11618
11619/* We're beginning a new block.  Initialize data structures as necessary.  */
11620
11621static void
11622ix86_sched_init (dump, sched_verbose, veclen)
11623     FILE *dump ATTRIBUTE_UNUSED;
11624     int sched_verbose ATTRIBUTE_UNUSED;
11625     int veclen ATTRIBUTE_UNUSED;
11626{
11627  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11628}
11629
11630/* Shift INSN to SLOT, and shift everything else down.  */
11631
11632static void
11633ix86_reorder_insn (insnp, slot)
11634     rtx *insnp, *slot;
11635{
11636  if (insnp != slot)
11637    {
11638      rtx insn = *insnp;
11639      do
11640	insnp[0] = insnp[1];
11641      while (++insnp != slot);
11642      *insnp = insn;
11643    }
11644}
11645
11646static void
11647ix86_sched_reorder_ppro (ready, e_ready)
11648     rtx *ready;
11649     rtx *e_ready;
11650{
11651  rtx decode[3];
11652  enum attr_ppro_uops cur_uops;
11653  int issued_this_cycle;
11654  rtx *insnp;
11655  int i;
11656
11657  /* At this point .ppro.decode contains the state of the three
11658     decoders from last "cycle".  That is, those insns that were
11659     actually independent.  But here we're scheduling for the
11660     decoder, and we may find things that are decodable in the
11661     same cycle.  */
11662
11663  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11664  issued_this_cycle = 0;
11665
11666  insnp = e_ready;
11667  cur_uops = ix86_safe_ppro_uops (*insnp);
11668
11669  /* If the decoders are empty, and we've a complex insn at the
11670     head of the priority queue, let it issue without complaint.  */
11671  if (decode[0] == NULL)
11672    {
11673      if (cur_uops == PPRO_UOPS_MANY)
11674	{
11675	  decode[0] = *insnp;
11676	  goto ppro_done;
11677	}
11678
11679      /* Otherwise, search for a 2-4 uop unsn to issue.  */
11680      while (cur_uops != PPRO_UOPS_FEW)
11681	{
11682	  if (insnp == ready)
11683	    break;
11684	  cur_uops = ix86_safe_ppro_uops (*--insnp);
11685	}
11686
11687      /* If so, move it to the head of the line.  */
11688      if (cur_uops == PPRO_UOPS_FEW)
11689	ix86_reorder_insn (insnp, e_ready);
11690
11691      /* Issue the head of the queue.  */
11692      issued_this_cycle = 1;
11693      decode[0] = *e_ready--;
11694    }
11695
11696  /* Look for simple insns to fill in the other two slots.  */
11697  for (i = 1; i < 3; ++i)
11698    if (decode[i] == NULL)
11699      {
11700	if (ready > e_ready)
11701	  goto ppro_done;
11702
11703	insnp = e_ready;
11704	cur_uops = ix86_safe_ppro_uops (*insnp);
11705	while (cur_uops != PPRO_UOPS_ONE)
11706	  {
11707	    if (insnp == ready)
11708	      break;
11709	    cur_uops = ix86_safe_ppro_uops (*--insnp);
11710	  }
11711
11712	/* Found one.  Move it to the head of the queue and issue it.  */
11713	if (cur_uops == PPRO_UOPS_ONE)
11714	  {
11715	    ix86_reorder_insn (insnp, e_ready);
11716	    decode[i] = *e_ready--;
11717	    issued_this_cycle++;
11718	    continue;
11719	  }
11720
11721	/* ??? Didn't find one.  Ideally, here we would do a lazy split
11722	   of 2-uop insns, issue one and queue the other.  */
11723      }
11724
11725 ppro_done:
11726  if (issued_this_cycle == 0)
11727    issued_this_cycle = 1;
11728  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11729}
11730
11731/* We are about to being issuing insns for this clock cycle.
11732   Override the default sort algorithm to better slot instructions.  */
11733static int
11734ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11735     FILE *dump ATTRIBUTE_UNUSED;
11736     int sched_verbose ATTRIBUTE_UNUSED;
11737     rtx *ready;
11738     int *n_readyp;
11739     int clock_var ATTRIBUTE_UNUSED;
11740{
11741  int n_ready = *n_readyp;
11742  rtx *e_ready = ready + n_ready - 1;
11743
11744  /* Make sure to go ahead and initialize key items in
11745     ix86_sched_data if we are not going to bother trying to
11746     reorder the ready queue.  */
11747  if (n_ready < 2)
11748    {
11749      ix86_sched_data.ppro.issued_this_cycle = 1;
11750      goto out;
11751    }
11752
11753  switch (ix86_cpu)
11754    {
11755    default:
11756      break;
11757
11758    case PROCESSOR_PENTIUMPRO:
11759      ix86_sched_reorder_ppro (ready, e_ready);
11760      break;
11761    }
11762
11763out:
11764  return ix86_issue_rate ();
11765}
11766
11767/* We are about to issue INSN.  Return the number of insns left on the
11768   ready queue that can be issued this cycle.  */
11769
11770static int
11771ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11772     FILE *dump;
11773     int sched_verbose;
11774     rtx insn;
11775     int can_issue_more;
11776{
11777  int i;
11778  switch (ix86_cpu)
11779    {
11780    default:
11781      return can_issue_more - 1;
11782
11783    case PROCESSOR_PENTIUMPRO:
11784      {
11785	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11786
11787	if (uops == PPRO_UOPS_MANY)
11788	  {
11789	    if (sched_verbose)
11790	      ix86_dump_ppro_packet (dump);
11791	    ix86_sched_data.ppro.decode[0] = insn;
11792	    ix86_sched_data.ppro.decode[1] = NULL;
11793	    ix86_sched_data.ppro.decode[2] = NULL;
11794	    if (sched_verbose)
11795	      ix86_dump_ppro_packet (dump);
11796	    ix86_sched_data.ppro.decode[0] = NULL;
11797	  }
11798	else if (uops == PPRO_UOPS_FEW)
11799	  {
11800	    if (sched_verbose)
11801	      ix86_dump_ppro_packet (dump);
11802	    ix86_sched_data.ppro.decode[0] = insn;
11803	    ix86_sched_data.ppro.decode[1] = NULL;
11804	    ix86_sched_data.ppro.decode[2] = NULL;
11805	  }
11806	else
11807	  {
11808	    for (i = 0; i < 3; ++i)
11809	      if (ix86_sched_data.ppro.decode[i] == NULL)
11810		{
11811		  ix86_sched_data.ppro.decode[i] = insn;
11812		  break;
11813		}
11814	    if (i == 3)
11815	      abort ();
11816	    if (i == 2)
11817	      {
11818	        if (sched_verbose)
11819	          ix86_dump_ppro_packet (dump);
11820		ix86_sched_data.ppro.decode[0] = NULL;
11821		ix86_sched_data.ppro.decode[1] = NULL;
11822		ix86_sched_data.ppro.decode[2] = NULL;
11823	      }
11824	  }
11825      }
11826      return --ix86_sched_data.ppro.issued_this_cycle;
11827    }
11828}
11829
11830static int
11831ia32_use_dfa_pipeline_interface ()
11832{
11833  if (ix86_cpu == PROCESSOR_PENTIUM)
11834    return 1;
11835  return 0;
11836}
11837
11838/* How many alternative schedules to try.  This should be as wide as the
11839   scheduling freedom in the DFA, but no wider.  Making this value too
11840   large results extra work for the scheduler.  */
11841
11842static int
11843ia32_multipass_dfa_lookahead ()
11844{
11845  if (ix86_cpu == PROCESSOR_PENTIUM)
11846    return 2;
11847  else
11848   return 0;
11849}
11850
11851
11852/* Walk through INSNS and look for MEM references whose address is DSTREG or
11853   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11854   appropriate.  */
11855
11856void
11857ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11858     rtx insns;
11859     rtx dstref, srcref, dstreg, srcreg;
11860{
11861  rtx insn;
11862
11863  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11864    if (INSN_P (insn))
11865      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11866				 dstreg, srcreg);
11867}
11868
11869/* Subroutine of above to actually do the updating by recursively walking
11870   the rtx.  */
11871
11872static void
11873ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11874     rtx x;
11875     rtx dstref, srcref, dstreg, srcreg;
11876{
11877  enum rtx_code code = GET_CODE (x);
11878  const char *format_ptr = GET_RTX_FORMAT (code);
11879  int i, j;
11880
11881  if (code == MEM && XEXP (x, 0) == dstreg)
11882    MEM_COPY_ATTRIBUTES (x, dstref);
11883  else if (code == MEM && XEXP (x, 0) == srcreg)
11884    MEM_COPY_ATTRIBUTES (x, srcref);
11885
11886  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11887    {
11888      if (*format_ptr == 'e')
11889	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11890				   dstreg, srcreg);
11891      else if (*format_ptr == 'E')
11892	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11893	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11894				     dstreg, srcreg);
11895    }
11896}
11897
11898/* Compute the alignment given to a constant that is being placed in memory.
11899   EXP is the constant and ALIGN is the alignment that the object would
11900   ordinarily have.
11901   The value of this function is used instead of that alignment to align
11902   the object.  */
11903
11904int
11905ix86_constant_alignment (exp, align)
11906     tree exp;
11907     int align;
11908{
11909  if (TREE_CODE (exp) == REAL_CST)
11910    {
11911      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11912	return 64;
11913      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11914	return 128;
11915    }
11916  else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS
11917	   && TREE_STRING_LENGTH (exp) >= 31 && align < 256)
11918    return 256;
11919
11920  return align;
11921}
11922
11923/* Compute the alignment for a static variable.
11924   TYPE is the data type, and ALIGN is the alignment that
11925   the object would ordinarily have.  The value of this function is used
11926   instead of that alignment to align the object.  */
11927
11928int
11929ix86_data_alignment (type, align)
11930     tree type;
11931     int align;
11932{
11933  if (AGGREGATE_TYPE_P (type)
11934       && TYPE_SIZE (type)
11935       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11936       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11937	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11938    return 256;
11939
11940  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11941     to 16byte boundary.  */
11942  if (TARGET_64BIT)
11943    {
11944      if (AGGREGATE_TYPE_P (type)
11945	   && TYPE_SIZE (type)
11946	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11947	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11948	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11949	return 128;
11950    }
11951
11952  if (TREE_CODE (type) == ARRAY_TYPE)
11953    {
11954      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11955	return 64;
11956      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11957	return 128;
11958    }
11959  else if (TREE_CODE (type) == COMPLEX_TYPE)
11960    {
11961
11962      if (TYPE_MODE (type) == DCmode && align < 64)
11963	return 64;
11964      if (TYPE_MODE (type) == XCmode && align < 128)
11965	return 128;
11966    }
11967  else if ((TREE_CODE (type) == RECORD_TYPE
11968	    || TREE_CODE (type) == UNION_TYPE
11969	    || TREE_CODE (type) == QUAL_UNION_TYPE)
11970	   && TYPE_FIELDS (type))
11971    {
11972      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11973	return 64;
11974      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11975	return 128;
11976    }
11977  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11978	   || TREE_CODE (type) == INTEGER_TYPE)
11979    {
11980      if (TYPE_MODE (type) == DFmode && align < 64)
11981	return 64;
11982      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11983	return 128;
11984    }
11985
11986  return align;
11987}
11988
11989/* Compute the alignment for a local variable.
11990   TYPE is the data type, and ALIGN is the alignment that
11991   the object would ordinarily have.  The value of this macro is used
11992   instead of that alignment to align the object.  */
11993
11994int
11995ix86_local_alignment (type, align)
11996     tree type;
11997     int align;
11998{
11999  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12000     to 16byte boundary.  */
12001  if (TARGET_64BIT)
12002    {
12003      if (AGGREGATE_TYPE_P (type)
12004	   && TYPE_SIZE (type)
12005	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12006	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12007	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12008	return 128;
12009    }
12010  if (TREE_CODE (type) == ARRAY_TYPE)
12011    {
12012      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12013	return 64;
12014      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12015	return 128;
12016    }
12017  else if (TREE_CODE (type) == COMPLEX_TYPE)
12018    {
12019      if (TYPE_MODE (type) == DCmode && align < 64)
12020	return 64;
12021      if (TYPE_MODE (type) == XCmode && align < 128)
12022	return 128;
12023    }
12024  else if ((TREE_CODE (type) == RECORD_TYPE
12025	    || TREE_CODE (type) == UNION_TYPE
12026	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12027	   && TYPE_FIELDS (type))
12028    {
12029      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12030	return 64;
12031      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12032	return 128;
12033    }
12034  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12035	   || TREE_CODE (type) == INTEGER_TYPE)
12036    {
12037
12038      if (TYPE_MODE (type) == DFmode && align < 64)
12039	return 64;
12040      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12041	return 128;
12042    }
12043  return align;
12044}
12045
12046/* Emit RTL insns to initialize the variable parts of a trampoline.
12047   FNADDR is an RTX for the address of the function's pure code.
12048   CXT is an RTX for the static chain value for the function.  */
12049void
12050x86_initialize_trampoline (tramp, fnaddr, cxt)
12051     rtx tramp, fnaddr, cxt;
12052{
12053  if (!TARGET_64BIT)
12054    {
12055      /* Compute offset from the end of the jmp to the target function.  */
12056      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12057			       plus_constant (tramp, 10),
12058			       NULL_RTX, 1, OPTAB_DIRECT);
12059      emit_move_insn (gen_rtx_MEM (QImode, tramp),
12060		      gen_int_mode (0xb9, QImode));
12061      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12062      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12063		      gen_int_mode (0xe9, QImode));
12064      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12065    }
12066  else
12067    {
12068      int offset = 0;
12069      /* Try to load address using shorter movl instead of movabs.
12070         We may want to support movq for kernel mode, but kernel does not use
12071         trampolines at the moment.  */
12072      if (x86_64_zero_extended_value (fnaddr))
12073	{
12074	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12075	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12076			  gen_int_mode (0xbb41, HImode));
12077	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12078			  gen_lowpart (SImode, fnaddr));
12079	  offset += 6;
12080	}
12081      else
12082	{
12083	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12084			  gen_int_mode (0xbb49, HImode));
12085	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12086			  fnaddr);
12087	  offset += 10;
12088	}
12089      /* Load static chain using movabs to r10.  */
12090      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12091		      gen_int_mode (0xba49, HImode));
12092      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12093		      cxt);
12094      offset += 10;
12095      /* Jump to the r11 */
12096      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12097		      gen_int_mode (0xff49, HImode));
12098      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12099		      gen_int_mode (0xe3, QImode));
12100      offset += 3;
12101      if (offset > TRAMPOLINE_SIZE)
12102	abort ();
12103    }
12104
12105#ifdef TRANSFER_FROM_TRAMPOLINE
12106  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12107		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12108#endif
12109}
12110
12111#define def_builtin(MASK, NAME, TYPE, CODE)			\
12112do {								\
12113  if ((MASK) & target_flags					\
12114      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12115    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12116		      NULL, NULL_TREE);				\
12117} while (0)
12118
12119struct builtin_description
12120{
12121  const unsigned int mask;
12122  const enum insn_code icode;
12123  const char *const name;
12124  const enum ix86_builtins code;
12125  const enum rtx_code comparison;
12126  const unsigned int flag;
12127};
12128
12129static const struct builtin_description bdesc_comi[] =
12130{
12131  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12132  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12133  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12134  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12135  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12136  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12137  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12138  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12139  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12140  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12141  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12142  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12143  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12144  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12145  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12146  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12147  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12148  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12149  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12150  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12151  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12152  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12153  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12154  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12155};
12156
12157static const struct builtin_description bdesc_2arg[] =
12158{
12159  /* SSE */
12160  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12161  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12162  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12163  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12164  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12165  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12166  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12167  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12168
12169  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12170  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12171  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12172  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12173  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12174  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12175  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12176  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12177  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12178  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12179  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12180  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12181  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12182  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12183  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12184  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12185  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12186  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12187  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12188  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12189
12190  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12191  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12192  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12193  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12194
12195  { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12196  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12197  { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12198  { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12199
12200  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12201  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12202  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12203  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12204  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12205
12206  /* MMX */
12207  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12208  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12209  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12210  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12211  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12212  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12213  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12214  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12215
12216  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12217  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12218  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12219  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12220  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12221  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12222  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12223  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12224
12225  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12226  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12227  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12228
12229  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12230  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12231  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12232  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12233
12234  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12235  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12236
12237  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12238  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12239  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12240  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12241  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12242  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12243
12244  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12245  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12246  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12247  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12248
12249  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12250  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12251  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12252  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12253  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12254  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12255
12256  /* Special.  */
12257  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12258  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12259  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12260
12261  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12262  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12263  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12264
12265  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12266  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12267  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12268  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12269  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12270  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12271
12272  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12273  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12274  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12275  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12276  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12277  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12278
12279  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12280  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12281  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12282  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12283
12284  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12285  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12286
12287  /* SSE2 */
12288  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12289  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12290  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12291  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12292  { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12293  { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12294  { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12295  { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12296
12297  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12298  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12299  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12300  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12301  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12302  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12303  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12304  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12305  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12306  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12307  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12308  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12309  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12310  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12311  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12312  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12313  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12314  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12315  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12316  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12317
12318  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12319  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12320  { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12321  { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12322
12323  { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12324  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12325  { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12326  { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12327
12328  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12329  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12330  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12331
12332  /* SSE2 MMX */
12333  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12334  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12335  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12336  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12337  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12338  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12339  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12340  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12341
12342  { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12343  { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12344  { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12345  { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12346  { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12347  { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12348  { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12349  { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12350
12351  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12352  { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12353  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12354  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12355
12356  { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12357  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12358  { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12359  { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12360
12361  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12362  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12363
12364  { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12365  { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12366  { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12367  { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12368  { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12369  { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12370
12371  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12372  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12373  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12374  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12375
12376  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12377  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12378  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12379  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12380  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12381  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12382  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12383  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12384
12385  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12386  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12387  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12388
12389  { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12390  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12391
12392  { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12393  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12394  { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12395  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12396  { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12397  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12398
12399  { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12400  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12401  { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12402  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12403  { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12404  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12405
12406  { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12407  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12408  { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12409  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12410
12411  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12412
12413  { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12414  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12415  { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12416  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12417
12418  /* PNI MMX */
12419  { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12420  { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12421  { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12422  { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12423  { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12424  { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12425};
12426
12427static const struct builtin_description bdesc_1arg[] =
12428{
12429  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12430  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12431
12432  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12433  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12434  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12435
12436  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12437  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12438  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12439  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12440  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12441  { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12442
12443  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12444  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12445  { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12446  { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12447
12448  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12449
12450  { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12451  { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12452
12453  { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12454  { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12455  { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12456  { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12457  { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12458
12459  { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12460
12461  { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12462  { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12463  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12464  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12465
12466  { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12467  { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12468  { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12469
12470  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12471
12472  /* PNI */
12473  { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12474  { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12475  { MASK_PNI, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12476};
12477
12478void
12479ix86_init_builtins ()
12480{
12481  if (TARGET_MMX)
12482    ix86_init_mmx_sse_builtins ();
12483}
12484
12485/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
12486   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
12487   builtins.  */
12488static void
12489ix86_init_mmx_sse_builtins ()
12490{
12491  const struct builtin_description * d;
12492  size_t i;
12493
12494  tree pchar_type_node = build_pointer_type (char_type_node);
12495  tree pcchar_type_node = build_pointer_type (
12496			     build_type_variant (char_type_node, 1, 0));
12497  tree pfloat_type_node = build_pointer_type (float_type_node);
12498  tree pcfloat_type_node = build_pointer_type (
12499			     build_type_variant (float_type_node, 1, 0));
12500  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12501  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12502  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12503
12504  /* Comparisons.  */
12505  tree int_ftype_v4sf_v4sf
12506    = build_function_type_list (integer_type_node,
12507				V4SF_type_node, V4SF_type_node, NULL_TREE);
12508  tree v4si_ftype_v4sf_v4sf
12509    = build_function_type_list (V4SI_type_node,
12510				V4SF_type_node, V4SF_type_node, NULL_TREE);
12511  /* MMX/SSE/integer conversions.  */
12512  tree int_ftype_v4sf
12513    = build_function_type_list (integer_type_node,
12514				V4SF_type_node, NULL_TREE);
12515  tree int64_ftype_v4sf
12516    = build_function_type_list (long_long_integer_type_node,
12517				V4SF_type_node, NULL_TREE);
12518  tree int_ftype_v8qi
12519    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12520  tree v4sf_ftype_v4sf_int
12521    = build_function_type_list (V4SF_type_node,
12522				V4SF_type_node, integer_type_node, NULL_TREE);
12523  tree v4sf_ftype_v4sf_int64
12524    = build_function_type_list (V4SF_type_node,
12525				V4SF_type_node, long_long_integer_type_node,
12526				NULL_TREE);
12527  tree v4sf_ftype_v4sf_v2si
12528    = build_function_type_list (V4SF_type_node,
12529				V4SF_type_node, V2SI_type_node, NULL_TREE);
12530  tree int_ftype_v4hi_int
12531    = build_function_type_list (integer_type_node,
12532				V4HI_type_node, integer_type_node, NULL_TREE);
12533  tree v4hi_ftype_v4hi_int_int
12534    = build_function_type_list (V4HI_type_node, V4HI_type_node,
12535				integer_type_node, integer_type_node,
12536				NULL_TREE);
12537  /* Miscellaneous.  */
12538  tree v8qi_ftype_v4hi_v4hi
12539    = build_function_type_list (V8QI_type_node,
12540				V4HI_type_node, V4HI_type_node, NULL_TREE);
12541  tree v4hi_ftype_v2si_v2si
12542    = build_function_type_list (V4HI_type_node,
12543				V2SI_type_node, V2SI_type_node, NULL_TREE);
12544  tree v4sf_ftype_v4sf_v4sf_int
12545    = build_function_type_list (V4SF_type_node,
12546				V4SF_type_node, V4SF_type_node,
12547				integer_type_node, NULL_TREE);
12548  tree v2si_ftype_v4hi_v4hi
12549    = build_function_type_list (V2SI_type_node,
12550				V4HI_type_node, V4HI_type_node, NULL_TREE);
12551  tree v4hi_ftype_v4hi_int
12552    = build_function_type_list (V4HI_type_node,
12553				V4HI_type_node, integer_type_node, NULL_TREE);
12554  tree v4hi_ftype_v4hi_di
12555    = build_function_type_list (V4HI_type_node,
12556				V4HI_type_node, long_long_unsigned_type_node,
12557				NULL_TREE);
12558  tree v2si_ftype_v2si_di
12559    = build_function_type_list (V2SI_type_node,
12560				V2SI_type_node, long_long_unsigned_type_node,
12561				NULL_TREE);
12562  tree void_ftype_void
12563    = build_function_type (void_type_node, void_list_node);
12564  tree void_ftype_unsigned
12565    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12566  tree void_ftype_unsigned_unsigned
12567    = build_function_type_list (void_type_node, unsigned_type_node,
12568				unsigned_type_node, NULL_TREE);
12569  tree void_ftype_pcvoid_unsigned_unsigned
12570    = build_function_type_list (void_type_node, const_ptr_type_node,
12571				unsigned_type_node, unsigned_type_node,
12572				NULL_TREE);
12573  tree unsigned_ftype_void
12574    = build_function_type (unsigned_type_node, void_list_node);
12575  tree di_ftype_void
12576    = build_function_type (long_long_unsigned_type_node, void_list_node);
12577  tree v4sf_ftype_void
12578    = build_function_type (V4SF_type_node, void_list_node);
12579  tree v2si_ftype_v4sf
12580    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12581  /* Loads/stores.  */
12582  tree void_ftype_v8qi_v8qi_pchar
12583    = build_function_type_list (void_type_node,
12584				V8QI_type_node, V8QI_type_node,
12585				pchar_type_node, NULL_TREE);
12586  tree v4sf_ftype_pcfloat
12587    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12588  /* @@@ the type is bogus */
12589  tree v4sf_ftype_v4sf_pv2si
12590    = build_function_type_list (V4SF_type_node,
12591				V4SF_type_node, pv2si_type_node, NULL_TREE);
12592  tree void_ftype_pv2si_v4sf
12593    = build_function_type_list (void_type_node,
12594				pv2si_type_node, V4SF_type_node, NULL_TREE);
12595  tree void_ftype_pfloat_v4sf
12596    = build_function_type_list (void_type_node,
12597				pfloat_type_node, V4SF_type_node, NULL_TREE);
12598  tree void_ftype_pdi_di
12599    = build_function_type_list (void_type_node,
12600				pdi_type_node, long_long_unsigned_type_node,
12601				NULL_TREE);
12602  tree void_ftype_pv2di_v2di
12603    = build_function_type_list (void_type_node,
12604				pv2di_type_node, V2DI_type_node, NULL_TREE);
12605  /* Normal vector unops.  */
12606  tree v4sf_ftype_v4sf
12607    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12608
12609  /* Normal vector binops.  */
12610  tree v4sf_ftype_v4sf_v4sf
12611    = build_function_type_list (V4SF_type_node,
12612				V4SF_type_node, V4SF_type_node, NULL_TREE);
12613  tree v8qi_ftype_v8qi_v8qi
12614    = build_function_type_list (V8QI_type_node,
12615				V8QI_type_node, V8QI_type_node, NULL_TREE);
12616  tree v4hi_ftype_v4hi_v4hi
12617    = build_function_type_list (V4HI_type_node,
12618				V4HI_type_node, V4HI_type_node, NULL_TREE);
12619  tree v2si_ftype_v2si_v2si
12620    = build_function_type_list (V2SI_type_node,
12621				V2SI_type_node, V2SI_type_node, NULL_TREE);
12622  tree di_ftype_di_di
12623    = build_function_type_list (long_long_unsigned_type_node,
12624				long_long_unsigned_type_node,
12625				long_long_unsigned_type_node, NULL_TREE);
12626
12627  tree v2si_ftype_v2sf
12628    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12629  tree v2sf_ftype_v2si
12630    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12631  tree v2si_ftype_v2si
12632    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12633  tree v2sf_ftype_v2sf
12634    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12635  tree v2sf_ftype_v2sf_v2sf
12636    = build_function_type_list (V2SF_type_node,
12637				V2SF_type_node, V2SF_type_node, NULL_TREE);
12638  tree v2si_ftype_v2sf_v2sf
12639    = build_function_type_list (V2SI_type_node,
12640				V2SF_type_node, V2SF_type_node, NULL_TREE);
12641  tree pint_type_node    = build_pointer_type (integer_type_node);
12642  tree pcint_type_node = build_pointer_type (
12643			     build_type_variant (integer_type_node, 1, 0));
12644  tree pdouble_type_node = build_pointer_type (double_type_node);
12645  tree pcdouble_type_node = build_pointer_type (
12646				build_type_variant (double_type_node, 1, 0));
12647  tree int_ftype_v2df_v2df
12648    = build_function_type_list (integer_type_node,
12649				V2DF_type_node, V2DF_type_node, NULL_TREE);
12650
12651  tree ti_ftype_void
12652    = build_function_type (intTI_type_node, void_list_node);
12653  tree v2di_ftype_void
12654    = build_function_type (V2DI_type_node, void_list_node);
12655  tree ti_ftype_ti_ti
12656    = build_function_type_list (intTI_type_node,
12657				intTI_type_node, intTI_type_node, NULL_TREE);
12658  tree void_ftype_pcvoid
12659    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12660  tree v2di_ftype_di
12661    = build_function_type_list (V2DI_type_node,
12662				long_long_unsigned_type_node, NULL_TREE);
12663  tree di_ftype_v2di
12664    = build_function_type_list (long_long_unsigned_type_node,
12665				V2DI_type_node, NULL_TREE);
12666  tree v4sf_ftype_v4si
12667    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12668  tree v4si_ftype_v4sf
12669    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12670  tree v2df_ftype_v4si
12671    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12672  tree v4si_ftype_v2df
12673    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12674  tree v2si_ftype_v2df
12675    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12676  tree v4sf_ftype_v2df
12677    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12678  tree v2df_ftype_v2si
12679    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12680  tree v2df_ftype_v4sf
12681    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12682  tree int_ftype_v2df
12683    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12684  tree int64_ftype_v2df
12685    = build_function_type_list (long_long_integer_type_node,
12686		    		V2DF_type_node, NULL_TREE);
12687  tree v2df_ftype_v2df_int
12688    = build_function_type_list (V2DF_type_node,
12689				V2DF_type_node, integer_type_node, NULL_TREE);
12690  tree v2df_ftype_v2df_int64
12691    = build_function_type_list (V2DF_type_node,
12692				V2DF_type_node, long_long_integer_type_node,
12693				NULL_TREE);
12694  tree v4sf_ftype_v4sf_v2df
12695    = build_function_type_list (V4SF_type_node,
12696				V4SF_type_node, V2DF_type_node, NULL_TREE);
12697  tree v2df_ftype_v2df_v4sf
12698    = build_function_type_list (V2DF_type_node,
12699				V2DF_type_node, V4SF_type_node, NULL_TREE);
12700  tree v2df_ftype_v2df_v2df_int
12701    = build_function_type_list (V2DF_type_node,
12702				V2DF_type_node, V2DF_type_node,
12703				integer_type_node,
12704				NULL_TREE);
12705  tree v2df_ftype_v2df_pv2si
12706    = build_function_type_list (V2DF_type_node,
12707				V2DF_type_node, pv2si_type_node, NULL_TREE);
12708  tree void_ftype_pv2si_v2df
12709    = build_function_type_list (void_type_node,
12710				pv2si_type_node, V2DF_type_node, NULL_TREE);
12711  tree void_ftype_pdouble_v2df
12712    = build_function_type_list (void_type_node,
12713				pdouble_type_node, V2DF_type_node, NULL_TREE);
12714  tree void_ftype_pint_int
12715    = build_function_type_list (void_type_node,
12716				pint_type_node, integer_type_node, NULL_TREE);
12717  tree void_ftype_v16qi_v16qi_pchar
12718    = build_function_type_list (void_type_node,
12719				V16QI_type_node, V16QI_type_node,
12720				pchar_type_node, NULL_TREE);
12721  tree v2df_ftype_pcdouble
12722    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12723  tree v2df_ftype_v2df_v2df
12724    = build_function_type_list (V2DF_type_node,
12725				V2DF_type_node, V2DF_type_node, NULL_TREE);
12726  tree v16qi_ftype_v16qi_v16qi
12727    = build_function_type_list (V16QI_type_node,
12728				V16QI_type_node, V16QI_type_node, NULL_TREE);
12729  tree v8hi_ftype_v8hi_v8hi
12730    = build_function_type_list (V8HI_type_node,
12731				V8HI_type_node, V8HI_type_node, NULL_TREE);
12732  tree v4si_ftype_v4si_v4si
12733    = build_function_type_list (V4SI_type_node,
12734				V4SI_type_node, V4SI_type_node, NULL_TREE);
12735  tree v2di_ftype_v2di_v2di
12736    = build_function_type_list (V2DI_type_node,
12737				V2DI_type_node, V2DI_type_node, NULL_TREE);
12738  tree v2di_ftype_v2df_v2df
12739    = build_function_type_list (V2DI_type_node,
12740				V2DF_type_node, V2DF_type_node, NULL_TREE);
12741  tree v2df_ftype_v2df
12742    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12743  tree v2df_ftype_double
12744    = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12745  tree v2df_ftype_double_double
12746    = build_function_type_list (V2DF_type_node,
12747				double_type_node, double_type_node, NULL_TREE);
12748  tree int_ftype_v8hi_int
12749    = build_function_type_list (integer_type_node,
12750				V8HI_type_node, integer_type_node, NULL_TREE);
12751  tree v8hi_ftype_v8hi_int_int
12752    = build_function_type_list (V8HI_type_node,
12753				V8HI_type_node, integer_type_node,
12754				integer_type_node, NULL_TREE);
12755  tree v2di_ftype_v2di_int
12756    = build_function_type_list (V2DI_type_node,
12757				V2DI_type_node, integer_type_node, NULL_TREE);
12758  tree v4si_ftype_v4si_int
12759    = build_function_type_list (V4SI_type_node,
12760				V4SI_type_node, integer_type_node, NULL_TREE);
12761  tree v8hi_ftype_v8hi_int
12762    = build_function_type_list (V8HI_type_node,
12763				V8HI_type_node, integer_type_node, NULL_TREE);
12764  tree v8hi_ftype_v8hi_v2di
12765    = build_function_type_list (V8HI_type_node,
12766				V8HI_type_node, V2DI_type_node, NULL_TREE);
12767  tree v4si_ftype_v4si_v2di
12768    = build_function_type_list (V4SI_type_node,
12769				V4SI_type_node, V2DI_type_node, NULL_TREE);
12770  tree v4si_ftype_v8hi_v8hi
12771    = build_function_type_list (V4SI_type_node,
12772				V8HI_type_node, V8HI_type_node, NULL_TREE);
12773  tree di_ftype_v8qi_v8qi
12774    = build_function_type_list (long_long_unsigned_type_node,
12775				V8QI_type_node, V8QI_type_node, NULL_TREE);
12776  tree v2di_ftype_v16qi_v16qi
12777    = build_function_type_list (V2DI_type_node,
12778				V16QI_type_node, V16QI_type_node, NULL_TREE);
12779  tree int_ftype_v16qi
12780    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12781  tree v16qi_ftype_pcchar
12782    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12783  tree void_ftype_pchar_v16qi
12784    = build_function_type_list (void_type_node,
12785			        pchar_type_node, V16QI_type_node, NULL_TREE);
12786  tree v4si_ftype_pcint
12787    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12788  tree void_ftype_pcint_v4si
12789    = build_function_type_list (void_type_node,
12790			        pcint_type_node, V4SI_type_node, NULL_TREE);
12791  tree v2di_ftype_v2di
12792    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12793
12794  /* Add all builtins that are more or less simple operations on two
12795     operands.  */
12796  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12797    {
12798      /* Use one of the operands; the target can have a different mode for
12799	 mask-generating compares.  */
12800      enum machine_mode mode;
12801      tree type;
12802
12803      if (d->name == 0)
12804	continue;
12805      mode = insn_data[d->icode].operand[1].mode;
12806
12807      switch (mode)
12808	{
12809	case V16QImode:
12810	  type = v16qi_ftype_v16qi_v16qi;
12811	  break;
12812	case V8HImode:
12813	  type = v8hi_ftype_v8hi_v8hi;
12814	  break;
12815	case V4SImode:
12816	  type = v4si_ftype_v4si_v4si;
12817	  break;
12818	case V2DImode:
12819	  type = v2di_ftype_v2di_v2di;
12820	  break;
12821	case V2DFmode:
12822	  type = v2df_ftype_v2df_v2df;
12823	  break;
12824	case TImode:
12825	  type = ti_ftype_ti_ti;
12826	  break;
12827	case V4SFmode:
12828	  type = v4sf_ftype_v4sf_v4sf;
12829	  break;
12830	case V8QImode:
12831	  type = v8qi_ftype_v8qi_v8qi;
12832	  break;
12833	case V4HImode:
12834	  type = v4hi_ftype_v4hi_v4hi;
12835	  break;
12836	case V2SImode:
12837	  type = v2si_ftype_v2si_v2si;
12838	  break;
12839	case DImode:
12840	  type = di_ftype_di_di;
12841	  break;
12842
12843	default:
12844	  abort ();
12845	}
12846
12847      /* Override for comparisons.  */
12848      if (d->icode == CODE_FOR_maskcmpv4sf3
12849	  || d->icode == CODE_FOR_maskncmpv4sf3
12850	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
12851	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12852	type = v4si_ftype_v4sf_v4sf;
12853
12854      if (d->icode == CODE_FOR_maskcmpv2df3
12855	  || d->icode == CODE_FOR_maskncmpv2df3
12856	  || d->icode == CODE_FOR_vmmaskcmpv2df3
12857	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
12858	type = v2di_ftype_v2df_v2df;
12859
12860      def_builtin (d->mask, d->name, type, d->code);
12861    }
12862
12863  /* Add the remaining MMX insns with somewhat more complicated types.  */
12864  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12865  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12866  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12867  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12868  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12869
12870  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12871  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12872  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12873
12874  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12875  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12876
12877  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12878  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12879
12880  /* comi/ucomi insns.  */
12881  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12882    if (d->mask == MASK_SSE2)
12883      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12884    else
12885      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12886
12887  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12888  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12889  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12890
12891  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12892  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12893  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12894  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12895  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12896  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12897  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12898  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12899  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12900  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12901  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12902
12903  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12904  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12905
12906  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12907
12908  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12909  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12910  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12911  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12912  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12913  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12914
12915  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12916  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12917  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12918  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12919
12920  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12921  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12922  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12923  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12924
12925  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12926
12927  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12928
12929  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12930  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12931  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12932  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12933  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12934  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12935
12936  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12937
12938  /* Original 3DNow!  */
12939  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12940  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12941  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12942  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12943  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12944  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12945  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12946  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12947  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12948  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12949  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12950  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12951  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12952  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12953  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12954  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12955  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12956  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12957  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12958  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12959
12960  /* 3DNow! extension as used in the Athlon CPU.  */
12961  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12962  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12963  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12964  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12965  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12966  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12967
12968  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12969
12970  /* SSE2 */
12971  def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12972  def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12973
12974  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12975  def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12976  def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12977
12978  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12979  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12980  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12981  def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12982  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12983  def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12984
12985  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12986  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12987  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12988  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12989
12990  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12991  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12992  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12993  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12994  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12995
12996  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12997  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12998  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12999  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13000
13001  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13002  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13003
13004  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13005
13006  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13007  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13008
13009  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13010  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13011  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13012  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13013  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13014
13015  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13016
13017  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13018  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13019  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13020  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13021
13022  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13023  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13024  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13025
13026  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13027  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13028  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13029  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13030
13031  def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13032  def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13033  def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13034  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13035  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13036  def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13037  def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13038
13039  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13040  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13041  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13042
13043  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13044  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13045  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13046  def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13047  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13048  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13049  def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13050
13051  def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13052
13053  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13054  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13055  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13056
13057  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13058  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13059  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13060
13061  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13062  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13063
13064  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13065  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13066  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13067  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13068
13069  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13070  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13071  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13072  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13073
13074  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13075  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13076
13077  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13078
13079  /* Prescott New Instructions.  */
13080  def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13081	       void_ftype_pcvoid_unsigned_unsigned,
13082	       IX86_BUILTIN_MONITOR);
13083  def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13084	       void_ftype_unsigned_unsigned,
13085	       IX86_BUILTIN_MWAIT);
13086  def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13087	       v4sf_ftype_v4sf,
13088	       IX86_BUILTIN_MOVSHDUP);
13089  def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13090	       v4sf_ftype_v4sf,
13091	       IX86_BUILTIN_MOVSLDUP);
13092  def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13093	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13094  def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13095	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13096  def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13097	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13098}
13099
13100/* Errors in the source file can cause expand_expr to return const0_rtx
13101   where we expect a vector.  To avoid crashing, use one of the vector
13102   clear instructions.  */
13103static rtx
13104safe_vector_operand (x, mode)
13105     rtx x;
13106     enum machine_mode mode;
13107{
13108  if (x != const0_rtx)
13109    return x;
13110  x = gen_reg_rtx (mode);
13111
13112  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13113    emit_insn (gen_mmx_clrdi (mode == DImode ? x
13114			      : gen_rtx_SUBREG (DImode, x, 0)));
13115  else
13116    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13117				: gen_rtx_SUBREG (V4SFmode, x, 0)));
13118  return x;
13119}
13120
13121/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13122
13123static rtx
13124ix86_expand_binop_builtin (icode, arglist, target)
13125     enum insn_code icode;
13126     tree arglist;
13127     rtx target;
13128{
13129  rtx pat;
13130  tree arg0 = TREE_VALUE (arglist);
13131  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13132  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13133  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13134  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13135  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13136  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13137
13138  if (VECTOR_MODE_P (mode0))
13139    op0 = safe_vector_operand (op0, mode0);
13140  if (VECTOR_MODE_P (mode1))
13141    op1 = safe_vector_operand (op1, mode1);
13142
13143  if (! target
13144      || GET_MODE (target) != tmode
13145      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13146    target = gen_reg_rtx (tmode);
13147
13148  if (GET_MODE (op1) == SImode && mode1 == TImode)
13149    {
13150      rtx x = gen_reg_rtx (V4SImode);
13151      emit_insn (gen_sse2_loadd (x, op1));
13152      op1 = gen_lowpart (TImode, x);
13153    }
13154
13155  /* In case the insn wants input operands in modes different from
13156     the result, abort.  */
13157  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13158    abort ();
13159
13160  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13161    op0 = copy_to_mode_reg (mode0, op0);
13162  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13163    op1 = copy_to_mode_reg (mode1, op1);
13164
13165  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13166     yet one of the two must not be a memory.  This is normally enforced
13167     by expanders, but we didn't bother to create one here.  */
13168  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13169    op0 = copy_to_mode_reg (mode0, op0);
13170
13171  pat = GEN_FCN (icode) (target, op0, op1);
13172  if (! pat)
13173    return 0;
13174  emit_insn (pat);
13175  return target;
13176}
13177
13178/* Subroutine of ix86_expand_builtin to take care of stores.  */
13179
13180static rtx
13181ix86_expand_store_builtin (icode, arglist)
13182     enum insn_code icode;
13183     tree arglist;
13184{
13185  rtx pat;
13186  tree arg0 = TREE_VALUE (arglist);
13187  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13188  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13189  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13190  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13191  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13192
13193  if (VECTOR_MODE_P (mode1))
13194    op1 = safe_vector_operand (op1, mode1);
13195
13196  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13197  op1 = copy_to_mode_reg (mode1, op1);
13198
13199  pat = GEN_FCN (icode) (op0, op1);
13200  if (pat)
13201    emit_insn (pat);
13202  return 0;
13203}
13204
13205/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13206
13207static rtx
13208ix86_expand_unop_builtin (icode, arglist, target, do_load)
13209     enum insn_code icode;
13210     tree arglist;
13211     rtx target;
13212     int do_load;
13213{
13214  rtx pat;
13215  tree arg0 = TREE_VALUE (arglist);
13216  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13217  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13218  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13219
13220  if (! target
13221      || GET_MODE (target) != tmode
13222      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13223    target = gen_reg_rtx (tmode);
13224  if (do_load)
13225    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13226  else
13227    {
13228      if (VECTOR_MODE_P (mode0))
13229	op0 = safe_vector_operand (op0, mode0);
13230
13231      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13232	op0 = copy_to_mode_reg (mode0, op0);
13233    }
13234
13235  pat = GEN_FCN (icode) (target, op0);
13236  if (! pat)
13237    return 0;
13238  emit_insn (pat);
13239  return target;
13240}
13241
13242/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13243   sqrtss, rsqrtss, rcpss.  */
13244
13245static rtx
13246ix86_expand_unop1_builtin (icode, arglist, target)
13247     enum insn_code icode;
13248     tree arglist;
13249     rtx target;
13250{
13251  rtx pat;
13252  tree arg0 = TREE_VALUE (arglist);
13253  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13254  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13255  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13256
13257  if (! target
13258      || GET_MODE (target) != tmode
13259      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13260    target = gen_reg_rtx (tmode);
13261
13262  if (VECTOR_MODE_P (mode0))
13263    op0 = safe_vector_operand (op0, mode0);
13264
13265  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13266    op0 = copy_to_mode_reg (mode0, op0);
13267
13268  op1 = op0;
13269  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13270    op1 = copy_to_mode_reg (mode0, op1);
13271
13272  pat = GEN_FCN (icode) (target, op0, op1);
13273  if (! pat)
13274    return 0;
13275  emit_insn (pat);
13276  return target;
13277}
13278
13279/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
13280
13281static rtx
13282ix86_expand_sse_compare (d, arglist, target)
13283     const struct builtin_description *d;
13284     tree arglist;
13285     rtx target;
13286{
13287  rtx pat;
13288  tree arg0 = TREE_VALUE (arglist);
13289  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13290  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13291  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13292  rtx op2;
13293  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13294  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13295  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13296  enum rtx_code comparison = d->comparison;
13297
13298  if (VECTOR_MODE_P (mode0))
13299    op0 = safe_vector_operand (op0, mode0);
13300  if (VECTOR_MODE_P (mode1))
13301    op1 = safe_vector_operand (op1, mode1);
13302
13303  /* Swap operands if we have a comparison that isn't available in
13304     hardware.  */
13305  if (d->flag)
13306    {
13307      rtx tmp = gen_reg_rtx (mode1);
13308      emit_move_insn (tmp, op1);
13309      op1 = op0;
13310      op0 = tmp;
13311    }
13312
13313  if (! target
13314      || GET_MODE (target) != tmode
13315      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13316    target = gen_reg_rtx (tmode);
13317
13318  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13319    op0 = copy_to_mode_reg (mode0, op0);
13320  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13321    op1 = copy_to_mode_reg (mode1, op1);
13322
13323  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13324  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13325  if (! pat)
13326    return 0;
13327  emit_insn (pat);
13328  return target;
13329}
13330
13331/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
13332
13333static rtx
13334ix86_expand_sse_comi (d, arglist, target)
13335     const struct builtin_description *d;
13336     tree arglist;
13337     rtx target;
13338{
13339  rtx pat;
13340  tree arg0 = TREE_VALUE (arglist);
13341  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13342  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13343  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13344  rtx op2;
13345  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13346  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13347  enum rtx_code comparison = d->comparison;
13348
13349  if (VECTOR_MODE_P (mode0))
13350    op0 = safe_vector_operand (op0, mode0);
13351  if (VECTOR_MODE_P (mode1))
13352    op1 = safe_vector_operand (op1, mode1);
13353
13354  /* Swap operands if we have a comparison that isn't available in
13355     hardware.  */
13356  if (d->flag)
13357    {
13358      rtx tmp = op1;
13359      op1 = op0;
13360      op0 = tmp;
13361    }
13362
13363  target = gen_reg_rtx (SImode);
13364  emit_move_insn (target, const0_rtx);
13365  target = gen_rtx_SUBREG (QImode, target, 0);
13366
13367  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13368    op0 = copy_to_mode_reg (mode0, op0);
13369  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13370    op1 = copy_to_mode_reg (mode1, op1);
13371
13372  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13373  pat = GEN_FCN (d->icode) (op0, op1);
13374  if (! pat)
13375    return 0;
13376  emit_insn (pat);
13377  emit_insn (gen_rtx_SET (VOIDmode,
13378			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13379			  gen_rtx_fmt_ee (comparison, QImode,
13380					  SET_DEST (pat),
13381					  const0_rtx)));
13382
13383  return SUBREG_REG (target);
13384}
13385
13386/* Expand an expression EXP that calls a built-in function,
13387   with result going to TARGET if that's convenient
13388   (and in mode MODE if that's convenient).
13389   SUBTARGET may be used as the target for computing one of EXP's operands.
13390   IGNORE is nonzero if the value is to be ignored.  */
13391
13392rtx
13393ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13394     tree exp;
13395     rtx target;
13396     rtx subtarget ATTRIBUTE_UNUSED;
13397     enum machine_mode mode ATTRIBUTE_UNUSED;
13398     int ignore ATTRIBUTE_UNUSED;
13399{
13400  const struct builtin_description *d;
13401  size_t i;
13402  enum insn_code icode;
13403  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13404  tree arglist = TREE_OPERAND (exp, 1);
13405  tree arg0, arg1, arg2;
13406  rtx op0, op1, op2, pat;
13407  enum machine_mode tmode, mode0, mode1, mode2;
13408  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13409
13410  switch (fcode)
13411    {
13412    case IX86_BUILTIN_EMMS:
13413      emit_insn (gen_emms ());
13414      return 0;
13415
13416    case IX86_BUILTIN_SFENCE:
13417      emit_insn (gen_sfence ());
13418      return 0;
13419
13420    case IX86_BUILTIN_PEXTRW:
13421    case IX86_BUILTIN_PEXTRW128:
13422      icode = (fcode == IX86_BUILTIN_PEXTRW
13423	       ? CODE_FOR_mmx_pextrw
13424	       : CODE_FOR_sse2_pextrw);
13425      arg0 = TREE_VALUE (arglist);
13426      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13427      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13428      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13429      tmode = insn_data[icode].operand[0].mode;
13430      mode0 = insn_data[icode].operand[1].mode;
13431      mode1 = insn_data[icode].operand[2].mode;
13432
13433      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13434	op0 = copy_to_mode_reg (mode0, op0);
13435      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13436	{
13437	  /* @@@ better error message */
13438	  error ("selector must be an immediate");
13439	  return gen_reg_rtx (tmode);
13440	}
13441      if (target == 0
13442	  || GET_MODE (target) != tmode
13443	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13444	target = gen_reg_rtx (tmode);
13445      pat = GEN_FCN (icode) (target, op0, op1);
13446      if (! pat)
13447	return 0;
13448      emit_insn (pat);
13449      return target;
13450
13451    case IX86_BUILTIN_PINSRW:
13452    case IX86_BUILTIN_PINSRW128:
13453      icode = (fcode == IX86_BUILTIN_PINSRW
13454	       ? CODE_FOR_mmx_pinsrw
13455	       : CODE_FOR_sse2_pinsrw);
13456      arg0 = TREE_VALUE (arglist);
13457      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13458      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13459      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13460      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13461      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13462      tmode = insn_data[icode].operand[0].mode;
13463      mode0 = insn_data[icode].operand[1].mode;
13464      mode1 = insn_data[icode].operand[2].mode;
13465      mode2 = insn_data[icode].operand[3].mode;
13466
13467      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13468	op0 = copy_to_mode_reg (mode0, op0);
13469      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13470	op1 = copy_to_mode_reg (mode1, op1);
13471      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13472	{
13473	  /* @@@ better error message */
13474	  error ("selector must be an immediate");
13475	  return const0_rtx;
13476	}
13477      if (target == 0
13478	  || GET_MODE (target) != tmode
13479	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13480	target = gen_reg_rtx (tmode);
13481      pat = GEN_FCN (icode) (target, op0, op1, op2);
13482      if (! pat)
13483	return 0;
13484      emit_insn (pat);
13485      return target;
13486
13487    case IX86_BUILTIN_MASKMOVQ:
13488    case IX86_BUILTIN_MASKMOVDQU:
13489      icode = (fcode == IX86_BUILTIN_MASKMOVQ
13490	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13491	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13492		  : CODE_FOR_sse2_maskmovdqu));
13493      /* Note the arg order is different from the operand order.  */
13494      arg1 = TREE_VALUE (arglist);
13495      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13496      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13497      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13498      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13499      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13500      mode0 = insn_data[icode].operand[0].mode;
13501      mode1 = insn_data[icode].operand[1].mode;
13502      mode2 = insn_data[icode].operand[2].mode;
13503
13504      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13505	op0 = copy_to_mode_reg (mode0, op0);
13506      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13507	op1 = copy_to_mode_reg (mode1, op1);
13508      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13509	op2 = copy_to_mode_reg (mode2, op2);
13510      pat = GEN_FCN (icode) (op0, op1, op2);
13511      if (! pat)
13512	return 0;
13513      emit_insn (pat);
13514      return 0;
13515
13516    case IX86_BUILTIN_SQRTSS:
13517      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13518    case IX86_BUILTIN_RSQRTSS:
13519      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13520    case IX86_BUILTIN_RCPSS:
13521      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13522
13523    case IX86_BUILTIN_LOADAPS:
13524      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13525
13526    case IX86_BUILTIN_LOADUPS:
13527      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13528
13529    case IX86_BUILTIN_STOREAPS:
13530      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13531
13532    case IX86_BUILTIN_STOREUPS:
13533      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13534
13535    case IX86_BUILTIN_LOADSS:
13536      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13537
13538    case IX86_BUILTIN_STORESS:
13539      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13540
13541    case IX86_BUILTIN_LOADHPS:
13542    case IX86_BUILTIN_LOADLPS:
13543    case IX86_BUILTIN_LOADHPD:
13544    case IX86_BUILTIN_LOADLPD:
13545      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13546	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13547	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13548	       : CODE_FOR_sse2_movlpd);
13549      arg0 = TREE_VALUE (arglist);
13550      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13551      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13552      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13553      tmode = insn_data[icode].operand[0].mode;
13554      mode0 = insn_data[icode].operand[1].mode;
13555      mode1 = insn_data[icode].operand[2].mode;
13556
13557      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13558	op0 = copy_to_mode_reg (mode0, op0);
13559      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13560      if (target == 0
13561	  || GET_MODE (target) != tmode
13562	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13563	target = gen_reg_rtx (tmode);
13564      pat = GEN_FCN (icode) (target, op0, op1);
13565      if (! pat)
13566	return 0;
13567      emit_insn (pat);
13568      return target;
13569
13570    case IX86_BUILTIN_STOREHPS:
13571    case IX86_BUILTIN_STORELPS:
13572    case IX86_BUILTIN_STOREHPD:
13573    case IX86_BUILTIN_STORELPD:
13574      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13575	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13576	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13577	       : CODE_FOR_sse2_movlpd);
13578      arg0 = TREE_VALUE (arglist);
13579      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13580      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13581      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13582      mode0 = insn_data[icode].operand[1].mode;
13583      mode1 = insn_data[icode].operand[2].mode;
13584
13585      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13586      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13587	op1 = copy_to_mode_reg (mode1, op1);
13588
13589      pat = GEN_FCN (icode) (op0, op0, op1);
13590      if (! pat)
13591	return 0;
13592      emit_insn (pat);
13593      return 0;
13594
13595    case IX86_BUILTIN_MOVNTPS:
13596      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13597    case IX86_BUILTIN_MOVNTQ:
13598      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13599
13600    case IX86_BUILTIN_LDMXCSR:
13601      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13602      target = assign_386_stack_local (SImode, 0);
13603      emit_move_insn (target, op0);
13604      emit_insn (gen_ldmxcsr (target));
13605      return 0;
13606
13607    case IX86_BUILTIN_STMXCSR:
13608      target = assign_386_stack_local (SImode, 0);
13609      emit_insn (gen_stmxcsr (target));
13610      return copy_to_mode_reg (SImode, target);
13611
13612    case IX86_BUILTIN_SHUFPS:
13613    case IX86_BUILTIN_SHUFPD:
13614      icode = (fcode == IX86_BUILTIN_SHUFPS
13615	       ? CODE_FOR_sse_shufps
13616	       : CODE_FOR_sse2_shufpd);
13617      arg0 = TREE_VALUE (arglist);
13618      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13619      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13620      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13621      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13622      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13623      tmode = insn_data[icode].operand[0].mode;
13624      mode0 = insn_data[icode].operand[1].mode;
13625      mode1 = insn_data[icode].operand[2].mode;
13626      mode2 = insn_data[icode].operand[3].mode;
13627
13628      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13629	op0 = copy_to_mode_reg (mode0, op0);
13630      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13631	op1 = copy_to_mode_reg (mode1, op1);
13632      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13633	{
13634	  /* @@@ better error message */
13635	  error ("mask must be an immediate");
13636	  return gen_reg_rtx (tmode);
13637	}
13638      if (target == 0
13639	  || GET_MODE (target) != tmode
13640	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13641	target = gen_reg_rtx (tmode);
13642      pat = GEN_FCN (icode) (target, op0, op1, op2);
13643      if (! pat)
13644	return 0;
13645      emit_insn (pat);
13646      return target;
13647
13648    case IX86_BUILTIN_PSHUFW:
13649    case IX86_BUILTIN_PSHUFD:
13650    case IX86_BUILTIN_PSHUFHW:
13651    case IX86_BUILTIN_PSHUFLW:
13652      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13653	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13654	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13655	       : CODE_FOR_mmx_pshufw);
13656      arg0 = TREE_VALUE (arglist);
13657      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13658      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13659      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13660      tmode = insn_data[icode].operand[0].mode;
13661      mode1 = insn_data[icode].operand[1].mode;
13662      mode2 = insn_data[icode].operand[2].mode;
13663
13664      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13665	op0 = copy_to_mode_reg (mode1, op0);
13666      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13667	{
13668	  /* @@@ better error message */
13669	  error ("mask must be an immediate");
13670	  return const0_rtx;
13671	}
13672      if (target == 0
13673	  || GET_MODE (target) != tmode
13674	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13675	target = gen_reg_rtx (tmode);
13676      pat = GEN_FCN (icode) (target, op0, op1);
13677      if (! pat)
13678	return 0;
13679      emit_insn (pat);
13680      return target;
13681
13682    case IX86_BUILTIN_PSLLDQI128:
13683    case IX86_BUILTIN_PSRLDQI128:
13684      icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13685	       : CODE_FOR_sse2_lshrti3);
13686      arg0 = TREE_VALUE (arglist);
13687      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13688      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13689      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13690      tmode = insn_data[icode].operand[0].mode;
13691      mode1 = insn_data[icode].operand[1].mode;
13692      mode2 = insn_data[icode].operand[2].mode;
13693
13694      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13695	{
13696	  op0 = copy_to_reg (op0);
13697	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13698	}
13699      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13700	{
13701	  error ("shift must be an immediate");
13702	  return const0_rtx;
13703	}
13704      target = gen_reg_rtx (V2DImode);
13705      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13706      if (! pat)
13707	return 0;
13708      emit_insn (pat);
13709      return target;
13710
13711    case IX86_BUILTIN_FEMMS:
13712      emit_insn (gen_femms ());
13713      return NULL_RTX;
13714
13715    case IX86_BUILTIN_PAVGUSB:
13716      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13717
13718    case IX86_BUILTIN_PF2ID:
13719      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13720
13721    case IX86_BUILTIN_PFACC:
13722      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13723
13724    case IX86_BUILTIN_PFADD:
13725     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13726
13727    case IX86_BUILTIN_PFCMPEQ:
13728      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13729
13730    case IX86_BUILTIN_PFCMPGE:
13731      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13732
13733    case IX86_BUILTIN_PFCMPGT:
13734      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13735
13736    case IX86_BUILTIN_PFMAX:
13737      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13738
13739    case IX86_BUILTIN_PFMIN:
13740      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13741
13742    case IX86_BUILTIN_PFMUL:
13743      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13744
13745    case IX86_BUILTIN_PFRCP:
13746      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13747
13748    case IX86_BUILTIN_PFRCPIT1:
13749      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13750
13751    case IX86_BUILTIN_PFRCPIT2:
13752      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13753
13754    case IX86_BUILTIN_PFRSQIT1:
13755      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13756
13757    case IX86_BUILTIN_PFRSQRT:
13758      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13759
13760    case IX86_BUILTIN_PFSUB:
13761      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13762
13763    case IX86_BUILTIN_PFSUBR:
13764      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13765
13766    case IX86_BUILTIN_PI2FD:
13767      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13768
13769    case IX86_BUILTIN_PMULHRW:
13770      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13771
13772    case IX86_BUILTIN_PF2IW:
13773      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13774
13775    case IX86_BUILTIN_PFNACC:
13776      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13777
13778    case IX86_BUILTIN_PFPNACC:
13779      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13780
13781    case IX86_BUILTIN_PI2FW:
13782      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13783
13784    case IX86_BUILTIN_PSWAPDSI:
13785      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13786
13787    case IX86_BUILTIN_PSWAPDSF:
13788      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13789
13790    case IX86_BUILTIN_SSE_ZERO:
13791      target = gen_reg_rtx (V4SFmode);
13792      emit_insn (gen_sse_clrv4sf (target));
13793      return target;
13794
13795    case IX86_BUILTIN_MMX_ZERO:
13796      target = gen_reg_rtx (DImode);
13797      emit_insn (gen_mmx_clrdi (target));
13798      return target;
13799
13800    case IX86_BUILTIN_CLRTI:
13801      target = gen_reg_rtx (V2DImode);
13802      emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13803      return target;
13804
13805
13806    case IX86_BUILTIN_SQRTSD:
13807      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13808    case IX86_BUILTIN_LOADAPD:
13809      return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13810    case IX86_BUILTIN_LOADUPD:
13811      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13812
13813    case IX86_BUILTIN_STOREAPD:
13814      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13815    case IX86_BUILTIN_STOREUPD:
13816      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13817
13818    case IX86_BUILTIN_LOADSD:
13819      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13820
13821    case IX86_BUILTIN_STORESD:
13822      return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13823
13824    case IX86_BUILTIN_SETPD1:
13825      target = assign_386_stack_local (DFmode, 0);
13826      arg0 = TREE_VALUE (arglist);
13827      emit_move_insn (adjust_address (target, DFmode, 0),
13828		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13829      op0 = gen_reg_rtx (V2DFmode);
13830      emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13831      emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13832      return op0;
13833
13834    case IX86_BUILTIN_SETPD:
13835      target = assign_386_stack_local (V2DFmode, 0);
13836      arg0 = TREE_VALUE (arglist);
13837      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13838      emit_move_insn (adjust_address (target, DFmode, 0),
13839		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13840      emit_move_insn (adjust_address (target, DFmode, 8),
13841		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13842      op0 = gen_reg_rtx (V2DFmode);
13843      emit_insn (gen_sse2_movapd (op0, target));
13844      return op0;
13845
13846    case IX86_BUILTIN_LOADRPD:
13847      target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13848					 gen_reg_rtx (V2DFmode), 1);
13849      emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13850      return target;
13851
13852    case IX86_BUILTIN_LOADPD1:
13853      target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13854					 gen_reg_rtx (V2DFmode), 1);
13855      emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13856      return target;
13857
13858    case IX86_BUILTIN_STOREPD1:
13859      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13860    case IX86_BUILTIN_STORERPD:
13861      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13862
13863    case IX86_BUILTIN_CLRPD:
13864      target = gen_reg_rtx (V2DFmode);
13865      emit_insn (gen_sse_clrv2df (target));
13866      return target;
13867
13868    case IX86_BUILTIN_MFENCE:
13869	emit_insn (gen_sse2_mfence ());
13870	return 0;
13871    case IX86_BUILTIN_LFENCE:
13872	emit_insn (gen_sse2_lfence ());
13873	return 0;
13874
13875    case IX86_BUILTIN_CLFLUSH:
13876	arg0 = TREE_VALUE (arglist);
13877	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13878	icode = CODE_FOR_sse2_clflush;
13879	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13880	    op0 = copy_to_mode_reg (Pmode, op0);
13881
13882	emit_insn (gen_sse2_clflush (op0));
13883	return 0;
13884
13885    case IX86_BUILTIN_MOVNTPD:
13886      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13887    case IX86_BUILTIN_MOVNTDQ:
13888      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13889    case IX86_BUILTIN_MOVNTI:
13890      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13891
13892    case IX86_BUILTIN_LOADDQA:
13893      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13894    case IX86_BUILTIN_LOADDQU:
13895      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13896    case IX86_BUILTIN_LOADD:
13897      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13898
13899    case IX86_BUILTIN_STOREDQA:
13900      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13901    case IX86_BUILTIN_STOREDQU:
13902      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13903    case IX86_BUILTIN_STORED:
13904      return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13905
13906    case IX86_BUILTIN_MONITOR:
13907      arg0 = TREE_VALUE (arglist);
13908      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13909      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13910      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13911      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13912      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13913      if (!REG_P (op0))
13914	op0 = copy_to_mode_reg (SImode, op0);
13915      if (!REG_P (op1))
13916	op1 = copy_to_mode_reg (SImode, op1);
13917      if (!REG_P (op2))
13918	op2 = copy_to_mode_reg (SImode, op2);
13919      emit_insn (gen_monitor (op0, op1, op2));
13920      return 0;
13921
13922    case IX86_BUILTIN_MWAIT:
13923      arg0 = TREE_VALUE (arglist);
13924      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13925      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13926      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13927      if (!REG_P (op0))
13928	op0 = copy_to_mode_reg (SImode, op0);
13929      if (!REG_P (op1))
13930	op1 = copy_to_mode_reg (SImode, op1);
13931      emit_insn (gen_mwait (op0, op1));
13932      return 0;
13933
13934    case IX86_BUILTIN_LOADDDUP:
13935      return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
13936
13937    case IX86_BUILTIN_LDDQU:
13938      return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
13939				       1);
13940
13941    default:
13942      break;
13943    }
13944
13945  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13946    if (d->code == fcode)
13947      {
13948	/* Compares are treated specially.  */
13949	if (d->icode == CODE_FOR_maskcmpv4sf3
13950	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
13951	    || d->icode == CODE_FOR_maskncmpv4sf3
13952	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
13953	    || d->icode == CODE_FOR_maskcmpv2df3
13954	    || d->icode == CODE_FOR_vmmaskcmpv2df3
13955	    || d->icode == CODE_FOR_maskncmpv2df3
13956	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
13957	  return ix86_expand_sse_compare (d, arglist, target);
13958
13959	return ix86_expand_binop_builtin (d->icode, arglist, target);
13960      }
13961
13962  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13963    if (d->code == fcode)
13964      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13965
13966  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13967    if (d->code == fcode)
13968      return ix86_expand_sse_comi (d, arglist, target);
13969
13970  /* @@@ Should really do something sensible here.  */
13971  return 0;
13972}
13973
13974/* Store OPERAND to the memory after reload is completed.  This means
13975   that we can't easily use assign_stack_local.  */
13976rtx
13977ix86_force_to_memory (mode, operand)
13978     enum machine_mode mode;
13979     rtx operand;
13980{
13981  rtx result;
13982  if (!reload_completed)
13983    abort ();
13984  if (TARGET_64BIT && TARGET_RED_ZONE)
13985    {
13986      result = gen_rtx_MEM (mode,
13987			    gen_rtx_PLUS (Pmode,
13988					  stack_pointer_rtx,
13989					  GEN_INT (-RED_ZONE_SIZE)));
13990      emit_move_insn (result, operand);
13991    }
13992  else if (TARGET_64BIT && !TARGET_RED_ZONE)
13993    {
13994      switch (mode)
13995	{
13996	case HImode:
13997	case SImode:
13998	  operand = gen_lowpart (DImode, operand);
13999	  /* FALLTHRU */
14000	case DImode:
14001	  emit_insn (
14002		      gen_rtx_SET (VOIDmode,
14003				   gen_rtx_MEM (DImode,
14004						gen_rtx_PRE_DEC (DImode,
14005							stack_pointer_rtx)),
14006				   operand));
14007	  break;
14008	default:
14009	  abort ();
14010	}
14011      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14012    }
14013  else
14014    {
14015      switch (mode)
14016	{
14017	case DImode:
14018	  {
14019	    rtx operands[2];
14020	    split_di (&operand, 1, operands, operands + 1);
14021	    emit_insn (
14022			gen_rtx_SET (VOIDmode,
14023				     gen_rtx_MEM (SImode,
14024						  gen_rtx_PRE_DEC (Pmode,
14025							stack_pointer_rtx)),
14026				     operands[1]));
14027	    emit_insn (
14028			gen_rtx_SET (VOIDmode,
14029				     gen_rtx_MEM (SImode,
14030						  gen_rtx_PRE_DEC (Pmode,
14031							stack_pointer_rtx)),
14032				     operands[0]));
14033	  }
14034	  break;
14035	case HImode:
14036	  /* It is better to store HImodes as SImodes.  */
14037	  if (!TARGET_PARTIAL_REG_STALL)
14038	    operand = gen_lowpart (SImode, operand);
14039	  /* FALLTHRU */
14040	case SImode:
14041	  emit_insn (
14042		      gen_rtx_SET (VOIDmode,
14043				   gen_rtx_MEM (GET_MODE (operand),
14044						gen_rtx_PRE_DEC (SImode,
14045							stack_pointer_rtx)),
14046				   operand));
14047	  break;
14048	default:
14049	  abort ();
14050	}
14051      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14052    }
14053  return result;
14054}
14055
14056/* Free operand from the memory.  */
14057void
14058ix86_free_from_memory (mode)
14059     enum machine_mode mode;
14060{
14061  if (!TARGET_64BIT || !TARGET_RED_ZONE)
14062    {
14063      int size;
14064
14065      if (mode == DImode || TARGET_64BIT)
14066	size = 8;
14067      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14068	size = 2;
14069      else
14070	size = 4;
14071      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
14072         to pop or add instruction if registers are available.  */
14073      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14074			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14075					    GEN_INT (size))));
14076    }
14077}
14078
14079/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14080   QImode must go into class Q_REGS.
14081   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
14082   movdf to do mem-to-mem moves through integer regs.  */
14083enum reg_class
14084ix86_preferred_reload_class (x, class)
14085     rtx x;
14086     enum reg_class class;
14087{
14088  if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14089    return NO_REGS;
14090  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14091    {
14092      /* SSE can't load any constant directly yet.  */
14093      if (SSE_CLASS_P (class))
14094	return NO_REGS;
14095      /* Floats can load 0 and 1.  */
14096      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14097	{
14098	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
14099	  if (MAYBE_SSE_CLASS_P (class))
14100	    return (reg_class_subset_p (class, GENERAL_REGS)
14101		    ? GENERAL_REGS : FLOAT_REGS);
14102	  else
14103	    return class;
14104	}
14105      /* General regs can load everything.  */
14106      if (reg_class_subset_p (class, GENERAL_REGS))
14107	return GENERAL_REGS;
14108      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
14109      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14110	return NO_REGS;
14111    }
14112  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14113    return NO_REGS;
14114  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14115    return Q_REGS;
14116  return class;
14117}
14118
14119/* If we are copying between general and FP registers, we need a memory
14120   location. The same is true for SSE and MMX registers.
14121
14122   The macro can't work reliably when one of the CLASSES is class containing
14123   registers from multiple units (SSE, MMX, integer).  We avoid this by never
14124   combining those units in single alternative in the machine description.
14125   Ensure that this constraint holds to avoid unexpected surprises.
14126
14127   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14128   enforce these sanity checks.  */
14129int
14130ix86_secondary_memory_needed (class1, class2, mode, strict)
14131     enum reg_class class1, class2;
14132     enum machine_mode mode;
14133     int strict;
14134{
14135  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14136      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14137      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14138      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14139      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14140      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14141    {
14142      if (strict)
14143	abort ();
14144      else
14145	return 1;
14146    }
14147  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14148	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14149	      && (mode) != SImode)
14150	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14151	      && (mode) != SImode));
14152}
14153/* Return the cost of moving data from a register in class CLASS1 to
14154   one in class CLASS2.
14155
14156   It is not required that the cost always equal 2 when FROM is the same as TO;
14157   on some machines it is expensive to move between registers if they are not
14158   general registers.  */
14159int
14160ix86_register_move_cost (mode, class1, class2)
14161     enum machine_mode mode;
14162     enum reg_class class1, class2;
14163{
14164  /* In case we require secondary memory, compute cost of the store followed
14165     by load.  In order to avoid bad register allocation choices, we need
14166     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14167
14168  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14169    {
14170      int cost = 1;
14171
14172      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14173		   MEMORY_MOVE_COST (mode, class1, 1));
14174      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14175		   MEMORY_MOVE_COST (mode, class2, 1));
14176
14177      /* In case of copying from general_purpose_register we may emit multiple
14178         stores followed by single load causing memory size mismatch stall.
14179         Count this as arbitarily high cost of 20.  */
14180      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14181	cost += 20;
14182
14183      /* In the case of FP/MMX moves, the registers actually overlap, and we
14184	 have to switch modes in order to treat them differently.  */
14185      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14186          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14187	cost += 20;
14188
14189      return cost;
14190    }
14191
14192  /* Moves between SSE/MMX and integer unit are expensive.  */
14193  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14194      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14195    return ix86_cost->mmxsse_to_integer;
14196  if (MAYBE_FLOAT_CLASS_P (class1))
14197    return ix86_cost->fp_move;
14198  if (MAYBE_SSE_CLASS_P (class1))
14199    return ix86_cost->sse_move;
14200  if (MAYBE_MMX_CLASS_P (class1))
14201    return ix86_cost->mmx_move;
14202  return 2;
14203}
14204
14205/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14206int
14207ix86_hard_regno_mode_ok (regno, mode)
14208     int regno;
14209     enum machine_mode mode;
14210{
14211  /* Flags and only flags can only hold CCmode values.  */
14212  if (CC_REGNO_P (regno))
14213    return GET_MODE_CLASS (mode) == MODE_CC;
14214  if (GET_MODE_CLASS (mode) == MODE_CC
14215      || GET_MODE_CLASS (mode) == MODE_RANDOM
14216      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14217    return 0;
14218  if (FP_REGNO_P (regno))
14219    return VALID_FP_MODE_P (mode);
14220  if (SSE_REGNO_P (regno))
14221    return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14222  if (MMX_REGNO_P (regno))
14223    return (TARGET_MMX
14224	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14225  /* We handle both integer and floats in the general purpose registers.
14226     In future we should be able to handle vector modes as well.  */
14227  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14228    return 0;
14229  /* Take care for QImode values - they can be in non-QI regs, but then
14230     they do cause partial register stalls.  */
14231  if (regno < 4 || mode != QImode || TARGET_64BIT)
14232    return 1;
14233  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14234}
14235
14236/* Return the cost of moving data of mode M between a
14237   register and memory.  A value of 2 is the default; this cost is
14238   relative to those in `REGISTER_MOVE_COST'.
14239
14240   If moving between registers and memory is more expensive than
14241   between two registers, you should define this macro to express the
14242   relative cost.
14243
14244   Model also increased moving costs of QImode registers in non
14245   Q_REGS classes.
14246 */
14247int
14248ix86_memory_move_cost (mode, class, in)
14249     enum machine_mode mode;
14250     enum reg_class class;
14251     int in;
14252{
14253  if (FLOAT_CLASS_P (class))
14254    {
14255      int index;
14256      switch (mode)
14257	{
14258	  case SFmode:
14259	    index = 0;
14260	    break;
14261	  case DFmode:
14262	    index = 1;
14263	    break;
14264	  case XFmode:
14265	  case TFmode:
14266	    index = 2;
14267	    break;
14268	  default:
14269	    return 100;
14270	}
14271      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14272    }
14273  if (SSE_CLASS_P (class))
14274    {
14275      int index;
14276      switch (GET_MODE_SIZE (mode))
14277	{
14278	  case 4:
14279	    index = 0;
14280	    break;
14281	  case 8:
14282	    index = 1;
14283	    break;
14284	  case 16:
14285	    index = 2;
14286	    break;
14287	  default:
14288	    return 100;
14289	}
14290      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14291    }
14292  if (MMX_CLASS_P (class))
14293    {
14294      int index;
14295      switch (GET_MODE_SIZE (mode))
14296	{
14297	  case 4:
14298	    index = 0;
14299	    break;
14300	  case 8:
14301	    index = 1;
14302	    break;
14303	  default:
14304	    return 100;
14305	}
14306      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14307    }
14308  switch (GET_MODE_SIZE (mode))
14309    {
14310      case 1:
14311	if (in)
14312	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14313		  : ix86_cost->movzbl_load);
14314	else
14315	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14316		  : ix86_cost->int_store[0] + 4);
14317	break;
14318      case 2:
14319	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14320      default:
14321	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
14322	if (mode == TFmode)
14323	  mode = XFmode;
14324	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14325		* ((int) GET_MODE_SIZE (mode)
14326		   + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14327    }
14328}
14329
14330#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14331static void
14332ix86_svr3_asm_out_constructor (symbol, priority)
14333     rtx symbol;
14334     int priority ATTRIBUTE_UNUSED;
14335{
14336  init_section ();
14337  fputs ("\tpushl $", asm_out_file);
14338  assemble_name (asm_out_file, XSTR (symbol, 0));
14339  fputc ('\n', asm_out_file);
14340}
14341#endif
14342
14343#if TARGET_MACHO
14344
14345static int current_machopic_label_num;
14346
14347/* Given a symbol name and its associated stub, write out the
14348   definition of the stub.  */
14349
14350void
14351machopic_output_stub (file, symb, stub)
14352     FILE *file;
14353     const char *symb, *stub;
14354{
14355  unsigned int length;
14356  char *binder_name, *symbol_name, lazy_ptr_name[32];
14357  int label = ++current_machopic_label_num;
14358
14359  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
14360  symb = (*targetm.strip_name_encoding) (symb);
14361
14362  length = strlen (stub);
14363  binder_name = alloca (length + 32);
14364  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14365
14366  length = strlen (symb);
14367  symbol_name = alloca (length + 32);
14368  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14369
14370  sprintf (lazy_ptr_name, "L%d$lz", label);
14371
14372  if (MACHOPIC_PURE)
14373    machopic_picsymbol_stub_section ();
14374  else
14375    machopic_symbol_stub_section ();
14376
14377  fprintf (file, "%s:\n", stub);
14378  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14379
14380  if (MACHOPIC_PURE)
14381    {
14382      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14383      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14384      fprintf (file, "\tjmp %%edx\n");
14385    }
14386  else
14387    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14388
14389  fprintf (file, "%s:\n", binder_name);
14390
14391  if (MACHOPIC_PURE)
14392    {
14393      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14394      fprintf (file, "\tpushl %%eax\n");
14395    }
14396  else
14397    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14398
14399  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14400
14401  machopic_lazy_symbol_ptr_section ();
14402  fprintf (file, "%s:\n", lazy_ptr_name);
14403  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14404  fprintf (file, "\t.long %s\n", binder_name);
14405}
14406#endif /* TARGET_MACHO */
14407
14408/* Order the registers for register allocator.  */
14409
14410void
14411x86_order_regs_for_local_alloc ()
14412{
14413   int pos = 0;
14414   int i;
14415
14416   /* First allocate the local general purpose registers.  */
14417   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14418     if (GENERAL_REGNO_P (i) && call_used_regs[i])
14419	reg_alloc_order [pos++] = i;
14420
14421   /* Global general purpose registers.  */
14422   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14423     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14424	reg_alloc_order [pos++] = i;
14425
14426   /* x87 registers come first in case we are doing FP math
14427      using them.  */
14428   if (!TARGET_SSE_MATH)
14429     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14430       reg_alloc_order [pos++] = i;
14431
14432   /* SSE registers.  */
14433   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14434     reg_alloc_order [pos++] = i;
14435   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14436     reg_alloc_order [pos++] = i;
14437
14438   /* x87 registerts.  */
14439   if (TARGET_SSE_MATH)
14440     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14441       reg_alloc_order [pos++] = i;
14442
14443   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14444     reg_alloc_order [pos++] = i;
14445
14446   /* Initialize the rest of array as we do not allocate some registers
14447      at all.  */
14448   while (pos < FIRST_PSEUDO_REGISTER)
14449     reg_alloc_order [pos++] = 0;
14450}
14451
14452/* Returns an expression indicating where the this parameter is
14453   located on entry to the FUNCTION.  */
14454
14455static rtx
14456x86_this_parameter (function)
14457     tree function;
14458{
14459  tree type = TREE_TYPE (function);
14460
14461  if (TARGET_64BIT)
14462    {
14463      int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14464      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14465    }
14466
14467  if (ix86_fntype_regparm (type) > 0)
14468    {
14469      tree parm;
14470
14471      parm = TYPE_ARG_TYPES (type);
14472      /* Figure out whether or not the function has a variable number of
14473	 arguments.  */
14474      for (; parm; parm = TREE_CHAIN (parm))
14475	if (TREE_VALUE (parm) == void_type_node)
14476	  break;
14477      /* If not, the this parameter is in %eax.  */
14478      if (parm)
14479	return gen_rtx_REG (SImode, 0);
14480    }
14481
14482  if (aggregate_value_p (TREE_TYPE (type)))
14483    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14484  else
14485    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14486}
14487
14488/* Determine whether x86_output_mi_thunk can succeed.  */
14489
14490static bool
14491x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14492     tree thunk ATTRIBUTE_UNUSED;
14493     HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14494     HOST_WIDE_INT vcall_offset;
14495     tree function;
14496{
14497  /* 64-bit can handle anything.  */
14498  if (TARGET_64BIT)
14499    return true;
14500
14501  /* For 32-bit, everything's fine if we have one free register.  */
14502  if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14503    return true;
14504
14505  /* Need a free register for vcall_offset.  */
14506  if (vcall_offset)
14507    return false;
14508
14509  /* Need a free register for GOT references.  */
14510  if (flag_pic && !(*targetm.binds_local_p) (function))
14511    return false;
14512
14513  /* Otherwise ok.  */
14514  return true;
14515}
14516
14517/* Output the assembler code for a thunk function.  THUNK_DECL is the
14518   declaration for the thunk function itself, FUNCTION is the decl for
14519   the target function.  DELTA is an immediate constant offset to be
14520   added to THIS.  If VCALL_OFFSET is non-zero, the word at
14521   *(*this + vcall_offset) should be added to THIS.  */
14522
14523static void
14524x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14525     FILE *file ATTRIBUTE_UNUSED;
14526     tree thunk ATTRIBUTE_UNUSED;
14527     HOST_WIDE_INT delta;
14528     HOST_WIDE_INT vcall_offset;
14529     tree function;
14530{
14531  rtx xops[3];
14532  rtx this = x86_this_parameter (function);
14533  rtx this_reg, tmp;
14534
14535  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
14536     pull it in now and let DELTA benefit.  */
14537  if (REG_P (this))
14538    this_reg = this;
14539  else if (vcall_offset)
14540    {
14541      /* Put the this parameter into %eax.  */
14542      xops[0] = this;
14543      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14544      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14545    }
14546  else
14547    this_reg = NULL_RTX;
14548
14549  /* Adjust the this parameter by a fixed constant.  */
14550  if (delta)
14551    {
14552      xops[0] = GEN_INT (delta);
14553      xops[1] = this_reg ? this_reg : this;
14554      if (TARGET_64BIT)
14555	{
14556	  if (!x86_64_general_operand (xops[0], DImode))
14557	    {
14558	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14559	      xops[1] = tmp;
14560	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14561	      xops[0] = tmp;
14562	      xops[1] = this;
14563	    }
14564	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14565	}
14566      else
14567	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14568    }
14569
14570  /* Adjust the this parameter by a value stored in the vtable.  */
14571  if (vcall_offset)
14572    {
14573      if (TARGET_64BIT)
14574	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14575      else
14576	tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14577
14578      xops[0] = gen_rtx_MEM (Pmode, this_reg);
14579      xops[1] = tmp;
14580      if (TARGET_64BIT)
14581	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14582      else
14583	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14584
14585      /* Adjust the this parameter.  */
14586      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14587      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14588	{
14589	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14590	  xops[0] = GEN_INT (vcall_offset);
14591	  xops[1] = tmp2;
14592	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14593	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14594	}
14595      xops[1] = this_reg;
14596      if (TARGET_64BIT)
14597	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14598      else
14599	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14600    }
14601
14602  /* If necessary, drop THIS back to its stack slot.  */
14603  if (this_reg && this_reg != this)
14604    {
14605      xops[0] = this_reg;
14606      xops[1] = this;
14607      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14608    }
14609
14610  xops[0] = XEXP (DECL_RTL (function), 0);
14611  if (TARGET_64BIT)
14612    {
14613      if (!flag_pic || (*targetm.binds_local_p) (function))
14614	output_asm_insn ("jmp\t%P0", xops);
14615      else
14616	{
14617	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
14618	  tmp = gen_rtx_CONST (Pmode, tmp);
14619	  tmp = gen_rtx_MEM (QImode, tmp);
14620	  xops[0] = tmp;
14621	  output_asm_insn ("jmp\t%A0", xops);
14622	}
14623    }
14624  else
14625    {
14626      if (!flag_pic || (*targetm.binds_local_p) (function))
14627	output_asm_insn ("jmp\t%P0", xops);
14628      else
14629	{
14630	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14631	  output_set_got (tmp);
14632
14633	  xops[1] = tmp;
14634	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14635	  output_asm_insn ("jmp\t{*}%1", xops);
14636	}
14637    }
14638}
14639
14640int
14641x86_field_alignment (field, computed)
14642     tree field;
14643     int computed;
14644{
14645  enum machine_mode mode;
14646  tree type = TREE_TYPE (field);
14647
14648  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14649    return computed;
14650  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14651		    ? get_inner_array_type (type) : type);
14652  if (mode == DFmode || mode == DCmode
14653      || GET_MODE_CLASS (mode) == MODE_INT
14654      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14655    return MIN (32, computed);
14656  return computed;
14657}
14658
14659/* Output assembler code to FILE to increment profiler label # LABELNO
14660   for profiling a function entry.  */
14661void
14662x86_function_profiler (file, labelno)
14663     FILE *file;
14664     int labelno;
14665{
14666  if (TARGET_64BIT)
14667    if (flag_pic)
14668      {
14669#ifndef NO_PROFILE_COUNTERS
14670	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14671#endif
14672	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14673      }
14674    else
14675      {
14676#ifndef NO_PROFILE_COUNTERS
14677	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14678#endif
14679	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14680      }
14681  else if (flag_pic)
14682    {
14683#ifndef NO_PROFILE_COUNTERS
14684      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14685	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14686#endif
14687      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14688    }
14689  else
14690    {
14691#ifndef NO_PROFILE_COUNTERS
14692      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14693	       PROFILE_COUNT_REGISTER);
14694#endif
14695      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14696    }
14697}
14698
14699/* Implement machine specific optimizations.
14700   At the moment we implement single transformation: AMD Athlon works faster
14701   when RET is not destination of conditional jump or directly preceeded
14702   by other jump instruction.  We avoid the penalty by inserting NOP just
14703   before the RET instructions in such cases.  */
14704void
14705x86_machine_dependent_reorg (first)
14706     rtx first ATTRIBUTE_UNUSED;
14707{
14708  edge e;
14709
14710  if (!TARGET_ATHLON || !optimize || optimize_size)
14711    return;
14712  for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14713  {
14714    basic_block bb = e->src;
14715    rtx ret = bb->end;
14716    rtx prev;
14717    bool insert = false;
14718
14719    if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14720      continue;
14721    prev = prev_nonnote_insn (ret);
14722    if (prev && GET_CODE (prev) == CODE_LABEL)
14723      {
14724	edge e;
14725	for (e = bb->pred; e; e = e->pred_next)
14726	  if (EDGE_FREQUENCY (e) && e->src->index > 0
14727	      && !(e->flags & EDGE_FALLTHRU))
14728	    insert = 1;
14729      }
14730    if (!insert)
14731      {
14732	prev = prev_real_insn (ret);
14733	if (prev && GET_CODE (prev) == JUMP_INSN
14734	    && any_condjump_p (prev))
14735	  insert = 1;
14736      }
14737    if (insert)
14738      emit_insn_before (gen_nop (), ret);
14739  }
14740}
14741
14742#include "gt-i386.h"
14743