i386.c revision 117408
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 117408 2003-07-11 04:29:35Z kan $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-attr.h"
38#include "flags.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49#include "langhooks.h"
50
51#ifndef CHECK_STACK_LIMIT
52#define CHECK_STACK_LIMIT (-1)
53#endif
54
55/* Processor costs (relative to an add) */
56static const
57struct processor_costs size_cost = {	/* costs for tunning for size */
58  2,					/* cost of an add instruction */
59  3,					/* cost of a lea instruction */
60  2,					/* variable shift costs */
61  3,					/* constant shift costs */
62  3,					/* cost of starting a multiply */
63  0,					/* cost of multiply per each bit set */
64  3,					/* cost of a divide/mod */
65  3,					/* cost of movsx */
66  3,					/* cost of movzx */
67  0,					/* "large" insn */
68  2,					/* MOVE_RATIO */
69  2,					/* cost for loading QImode using movzbl */
70  {2, 2, 2},				/* cost of loading integer registers
71					   in QImode, HImode and SImode.
72					   Relative to reg-reg move (2).  */
73  {2, 2, 2},				/* cost of storing integer registers */
74  2,					/* cost of reg,reg fld/fst */
75  {2, 2, 2},				/* cost of loading fp registers
76					   in SFmode, DFmode and XFmode */
77  {2, 2, 2},				/* cost of loading integer registers */
78  3,					/* cost of moving MMX register */
79  {3, 3},				/* cost of loading MMX registers
80					   in SImode and DImode */
81  {3, 3},				/* cost of storing MMX registers
82					   in SImode and DImode */
83  3,					/* cost of moving SSE register */
84  {3, 3, 3},				/* cost of loading SSE registers
85					   in SImode, DImode and TImode */
86  {3, 3, 3},				/* cost of storing SSE registers
87					   in SImode, DImode and TImode */
88  3,					/* MMX or SSE register to integer */
89  0,					/* size of prefetch block */
90  0,					/* number of parallel prefetches */
91  2,					/* cost of FADD and FSUB insns.  */
92  2,					/* cost of FMUL instruction.  */
93  2,					/* cost of FDIV instruction.  */
94  2,					/* cost of FABS instruction.  */
95  2,					/* cost of FCHS instruction.  */
96  2,					/* cost of FSQRT instruction.  */
97};
98
99/* Processor costs (relative to an add) */
100static const
101struct processor_costs i386_cost = {	/* 386 specific costs */
102  1,					/* cost of an add instruction */
103  1,					/* cost of a lea instruction */
104  3,					/* variable shift costs */
105  2,					/* constant shift costs */
106  6,					/* cost of starting a multiply */
107  1,					/* cost of multiply per each bit set */
108  23,					/* cost of a divide/mod */
109  3,					/* cost of movsx */
110  2,					/* cost of movzx */
111  15,					/* "large" insn */
112  3,					/* MOVE_RATIO */
113  4,					/* cost for loading QImode using movzbl */
114  {2, 4, 2},				/* cost of loading integer registers
115					   in QImode, HImode and SImode.
116					   Relative to reg-reg move (2).  */
117  {2, 4, 2},				/* cost of storing integer registers */
118  2,					/* cost of reg,reg fld/fst */
119  {8, 8, 8},				/* cost of loading fp registers
120					   in SFmode, DFmode and XFmode */
121  {8, 8, 8},				/* cost of loading integer registers */
122  2,					/* cost of moving MMX register */
123  {4, 8},				/* cost of loading MMX registers
124					   in SImode and DImode */
125  {4, 8},				/* cost of storing MMX registers
126					   in SImode and DImode */
127  2,					/* cost of moving SSE register */
128  {4, 8, 16},				/* cost of loading SSE registers
129					   in SImode, DImode and TImode */
130  {4, 8, 16},				/* cost of storing SSE registers
131					   in SImode, DImode and TImode */
132  3,					/* MMX or SSE register to integer */
133  0,					/* size of prefetch block */
134  0,					/* number of parallel prefetches */
135  23,					/* cost of FADD and FSUB insns.  */
136  27,					/* cost of FMUL instruction.  */
137  88,					/* cost of FDIV instruction.  */
138  22,					/* cost of FABS instruction.  */
139  24,					/* cost of FCHS instruction.  */
140  122,					/* cost of FSQRT instruction.  */
141};
142
143static const
144struct processor_costs i486_cost = {	/* 486 specific costs */
145  1,					/* cost of an add instruction */
146  1,					/* cost of a lea instruction */
147  3,					/* variable shift costs */
148  2,					/* constant shift costs */
149  12,					/* cost of starting a multiply */
150  1,					/* cost of multiply per each bit set */
151  40,					/* cost of a divide/mod */
152  3,					/* cost of movsx */
153  2,					/* cost of movzx */
154  15,					/* "large" insn */
155  3,					/* MOVE_RATIO */
156  4,					/* cost for loading QImode using movzbl */
157  {2, 4, 2},				/* cost of loading integer registers
158					   in QImode, HImode and SImode.
159					   Relative to reg-reg move (2).  */
160  {2, 4, 2},				/* cost of storing integer registers */
161  2,					/* cost of reg,reg fld/fst */
162  {8, 8, 8},				/* cost of loading fp registers
163					   in SFmode, DFmode and XFmode */
164  {8, 8, 8},				/* cost of loading integer registers */
165  2,					/* cost of moving MMX register */
166  {4, 8},				/* cost of loading MMX registers
167					   in SImode and DImode */
168  {4, 8},				/* cost of storing MMX registers
169					   in SImode and DImode */
170  2,					/* cost of moving SSE register */
171  {4, 8, 16},				/* cost of loading SSE registers
172					   in SImode, DImode and TImode */
173  {4, 8, 16},				/* cost of storing SSE registers
174					   in SImode, DImode and TImode */
175  3,					/* MMX or SSE register to integer */
176  0,					/* size of prefetch block */
177  0,					/* number of parallel prefetches */
178  8,					/* cost of FADD and FSUB insns.  */
179  16,					/* cost of FMUL instruction.  */
180  73,					/* cost of FDIV instruction.  */
181  3,					/* cost of FABS instruction.  */
182  3,					/* cost of FCHS instruction.  */
183  83,					/* cost of FSQRT instruction.  */
184};
185
186static const
187struct processor_costs pentium_cost = {
188  1,					/* cost of an add instruction */
189  1,					/* cost of a lea instruction */
190  4,					/* variable shift costs */
191  1,					/* constant shift costs */
192  11,					/* cost of starting a multiply */
193  0,					/* cost of multiply per each bit set */
194  25,					/* cost of a divide/mod */
195  3,					/* cost of movsx */
196  2,					/* cost of movzx */
197  8,					/* "large" insn */
198  6,					/* MOVE_RATIO */
199  6,					/* cost for loading QImode using movzbl */
200  {2, 4, 2},				/* cost of loading integer registers
201					   in QImode, HImode and SImode.
202					   Relative to reg-reg move (2).  */
203  {2, 4, 2},				/* cost of storing integer registers */
204  2,					/* cost of reg,reg fld/fst */
205  {2, 2, 6},				/* cost of loading fp registers
206					   in SFmode, DFmode and XFmode */
207  {4, 4, 6},				/* cost of loading integer registers */
208  8,					/* cost of moving MMX register */
209  {8, 8},				/* cost of loading MMX registers
210					   in SImode and DImode */
211  {8, 8},				/* cost of storing MMX registers
212					   in SImode and DImode */
213  2,					/* cost of moving SSE register */
214  {4, 8, 16},				/* cost of loading SSE registers
215					   in SImode, DImode and TImode */
216  {4, 8, 16},				/* cost of storing SSE registers
217					   in SImode, DImode and TImode */
218  3,					/* MMX or SSE register to integer */
219  0,					/* size of prefetch block */
220  0,					/* number of parallel prefetches */
221  3,					/* cost of FADD and FSUB insns.  */
222  3,					/* cost of FMUL instruction.  */
223  39,					/* cost of FDIV instruction.  */
224  1,					/* cost of FABS instruction.  */
225  1,					/* cost of FCHS instruction.  */
226  70,					/* cost of FSQRT instruction.  */
227};
228
229static const
230struct processor_costs pentiumpro_cost = {
231  1,					/* cost of an add instruction */
232  1,					/* cost of a lea instruction */
233  1,					/* variable shift costs */
234  1,					/* constant shift costs */
235  4,					/* cost of starting a multiply */
236  0,					/* cost of multiply per each bit set */
237  17,					/* cost of a divide/mod */
238  1,					/* cost of movsx */
239  1,					/* cost of movzx */
240  8,					/* "large" insn */
241  6,					/* MOVE_RATIO */
242  2,					/* cost for loading QImode using movzbl */
243  {4, 4, 4},				/* cost of loading integer registers
244					   in QImode, HImode and SImode.
245					   Relative to reg-reg move (2).  */
246  {2, 2, 2},				/* cost of storing integer registers */
247  2,					/* cost of reg,reg fld/fst */
248  {2, 2, 6},				/* cost of loading fp registers
249					   in SFmode, DFmode and XFmode */
250  {4, 4, 6},				/* cost of loading integer registers */
251  2,					/* cost of moving MMX register */
252  {2, 2},				/* cost of loading MMX registers
253					   in SImode and DImode */
254  {2, 2},				/* cost of storing MMX registers
255					   in SImode and DImode */
256  2,					/* cost of moving SSE register */
257  {2, 2, 8},				/* cost of loading SSE registers
258					   in SImode, DImode and TImode */
259  {2, 2, 8},				/* cost of storing SSE registers
260					   in SImode, DImode and TImode */
261  3,					/* MMX or SSE register to integer */
262  32,					/* size of prefetch block */
263  6,					/* number of parallel prefetches */
264  3,					/* cost of FADD and FSUB insns.  */
265  5,					/* cost of FMUL instruction.  */
266  56,					/* cost of FDIV instruction.  */
267  2,					/* cost of FABS instruction.  */
268  2,					/* cost of FCHS instruction.  */
269  56,					/* cost of FSQRT instruction.  */
270};
271
272static const
273struct processor_costs k6_cost = {
274  1,					/* cost of an add instruction */
275  2,					/* cost of a lea instruction */
276  1,					/* variable shift costs */
277  1,					/* constant shift costs */
278  3,					/* cost of starting a multiply */
279  0,					/* cost of multiply per each bit set */
280  18,					/* cost of a divide/mod */
281  2,					/* cost of movsx */
282  2,					/* cost of movzx */
283  8,					/* "large" insn */
284  4,					/* MOVE_RATIO */
285  3,					/* cost for loading QImode using movzbl */
286  {4, 5, 4},				/* cost of loading integer registers
287					   in QImode, HImode and SImode.
288					   Relative to reg-reg move (2).  */
289  {2, 3, 2},				/* cost of storing integer registers */
290  4,					/* cost of reg,reg fld/fst */
291  {6, 6, 6},				/* cost of loading fp registers
292					   in SFmode, DFmode and XFmode */
293  {4, 4, 4},				/* cost of loading integer registers */
294  2,					/* cost of moving MMX register */
295  {2, 2},				/* cost of loading MMX registers
296					   in SImode and DImode */
297  {2, 2},				/* cost of storing MMX registers
298					   in SImode and DImode */
299  2,					/* cost of moving SSE register */
300  {2, 2, 8},				/* cost of loading SSE registers
301					   in SImode, DImode and TImode */
302  {2, 2, 8},				/* cost of storing SSE registers
303					   in SImode, DImode and TImode */
304  6,					/* MMX or SSE register to integer */
305  32,					/* size of prefetch block */
306  1,					/* number of parallel prefetches */
307  2,					/* cost of FADD and FSUB insns.  */
308  2,					/* cost of FMUL instruction.  */
309  56,					/* cost of FDIV instruction.  */
310  2,					/* cost of FABS instruction.  */
311  2,					/* cost of FCHS instruction.  */
312  56,					/* cost of FSQRT instruction.  */
313};
314
315static const
316struct processor_costs athlon_cost = {
317  1,					/* cost of an add instruction */
318  2,					/* cost of a lea instruction */
319  1,					/* variable shift costs */
320  1,					/* constant shift costs */
321  5,					/* cost of starting a multiply */
322  0,					/* cost of multiply per each bit set */
323  42,					/* cost of a divide/mod */
324  1,					/* cost of movsx */
325  1,					/* cost of movzx */
326  8,					/* "large" insn */
327  9,					/* MOVE_RATIO */
328  4,					/* cost for loading QImode using movzbl */
329  {3, 4, 3},				/* cost of loading integer registers
330					   in QImode, HImode and SImode.
331					   Relative to reg-reg move (2).  */
332  {3, 4, 3},				/* cost of storing integer registers */
333  4,					/* cost of reg,reg fld/fst */
334  {4, 4, 12},				/* cost of loading fp registers
335					   in SFmode, DFmode and XFmode */
336  {6, 6, 8},				/* cost of loading integer registers */
337  2,					/* cost of moving MMX register */
338  {4, 4},				/* cost of loading MMX registers
339					   in SImode and DImode */
340  {4, 4},				/* cost of storing MMX registers
341					   in SImode and DImode */
342  2,					/* cost of moving SSE register */
343  {4, 4, 6},				/* cost of loading SSE registers
344					   in SImode, DImode and TImode */
345  {4, 4, 5},				/* cost of storing SSE registers
346					   in SImode, DImode and TImode */
347  5,					/* MMX or SSE register to integer */
348  64,					/* size of prefetch block */
349  6,					/* number of parallel prefetches */
350  4,					/* cost of FADD and FSUB insns.  */
351  4,					/* cost of FMUL instruction.  */
352  24,					/* cost of FDIV instruction.  */
353  2,					/* cost of FABS instruction.  */
354  2,					/* cost of FCHS instruction.  */
355  35,					/* cost of FSQRT instruction.  */
356};
357
358static const
359struct processor_costs pentium4_cost = {
360  1,					/* cost of an add instruction */
361  1,					/* cost of a lea instruction */
362  8,					/* variable shift costs */
363  8,					/* constant shift costs */
364  30,					/* cost of starting a multiply */
365  0,					/* cost of multiply per each bit set */
366  112,					/* cost of a divide/mod */
367  1,					/* cost of movsx */
368  1,					/* cost of movzx */
369  16,					/* "large" insn */
370  6,					/* MOVE_RATIO */
371  2,					/* cost for loading QImode using movzbl */
372  {4, 5, 4},				/* cost of loading integer registers
373					   in QImode, HImode and SImode.
374					   Relative to reg-reg move (2).  */
375  {2, 3, 2},				/* cost of storing integer registers */
376  2,					/* cost of reg,reg fld/fst */
377  {2, 2, 6},				/* cost of loading fp registers
378					   in SFmode, DFmode and XFmode */
379  {4, 4, 6},				/* cost of loading integer registers */
380  2,					/* cost of moving MMX register */
381  {2, 2},				/* cost of loading MMX registers
382					   in SImode and DImode */
383  {2, 2},				/* cost of storing MMX registers
384					   in SImode and DImode */
385  12,					/* cost of moving SSE register */
386  {12, 12, 12},				/* cost of loading SSE registers
387					   in SImode, DImode and TImode */
388  {2, 2, 8},				/* cost of storing SSE registers
389					   in SImode, DImode and TImode */
390  10,					/* MMX or SSE register to integer */
391  64,					/* size of prefetch block */
392  6,					/* number of parallel prefetches */
393  5,					/* cost of FADD and FSUB insns.  */
394  7,					/* cost of FMUL instruction.  */
395  43,					/* cost of FDIV instruction.  */
396  2,					/* cost of FABS instruction.  */
397  2,					/* cost of FCHS instruction.  */
398  43,					/* cost of FSQRT instruction.  */
399};
400
401const struct processor_costs *ix86_cost = &pentium_cost;
402
403/* Processor feature/optimization bitmasks.  */
404#define m_386 (1<<PROCESSOR_I386)
405#define m_486 (1<<PROCESSOR_I486)
406#define m_PENT (1<<PROCESSOR_PENTIUM)
407#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
408#define m_K6  (1<<PROCESSOR_K6)
409#define m_ATHLON  (1<<PROCESSOR_ATHLON)
410#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
411
412const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
413const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
414const int x86_zero_extend_with_and = m_486 | m_PENT;
415const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
416const int x86_double_with_add = ~m_386;
417const int x86_use_bit_test = m_386;
418const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
419const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
420const int x86_3dnow_a = m_ATHLON;
421const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
422const int x86_branch_hints = m_PENT4;
423const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
424const int x86_partial_reg_stall = m_PPRO;
425const int x86_use_loop = m_K6;
426const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
427const int x86_use_mov0 = m_K6;
428const int x86_use_cltd = ~(m_PENT | m_K6);
429const int x86_read_modify_write = ~m_PENT;
430const int x86_read_modify = ~(m_PENT | m_PPRO);
431const int x86_split_long_moves = m_PPRO;
432const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
433const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
434const int x86_single_stringop = m_386 | m_PENT4;
435const int x86_qimode_math = ~(0);
436const int x86_promote_qi_regs = 0;
437const int x86_himode_math = ~(m_PPRO);
438const int x86_promote_hi_regs = m_PPRO;
439const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
440const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
441const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
442const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
443const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
444const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
445const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
446const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
447const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
448const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
449const int x86_decompose_lea = m_PENT4;
450const int x86_shift1 = ~m_486;
451const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
452
453/* In case the avreage insn count for single function invocation is
454   lower than this constant, emit fast (but longer) prologue and
455   epilogue code.  */
456#define FAST_PROLOGUE_INSN_COUNT 30
457
458/* Set by prologue expander and used by epilogue expander to determine
459   the style used.  */
460static int use_fast_prologue_epilogue;
461
462/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
463static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
464static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
465static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
466
467/* Array of the smallest class containing reg number REGNO, indexed by
468   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
469
470enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
471{
472  /* ax, dx, cx, bx */
473  AREG, DREG, CREG, BREG,
474  /* si, di, bp, sp */
475  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
476  /* FP registers */
477  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
478  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
479  /* arg pointer */
480  NON_Q_REGS,
481  /* flags, fpsr, dirflag, frame */
482  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
483  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
484  SSE_REGS, SSE_REGS,
485  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
486  MMX_REGS, MMX_REGS,
487  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
488  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
489  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
490  SSE_REGS, SSE_REGS,
491};
492
493/* The "default" register map used in 32bit mode.  */
494
495int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
496{
497  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
498  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
499  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
500  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
501  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
502  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
503  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
504};
505
506static int const x86_64_int_parameter_registers[6] =
507{
508  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
509  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
510};
511
512static int const x86_64_int_return_registers[4] =
513{
514  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
515};
516
517/* The "default" register map used in 64bit mode.  */
518int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
519{
520  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
521  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
522  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
523  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
524  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
525  8,9,10,11,12,13,14,15,		/* extended integer registers */
526  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
527};
528
529/* Define the register numbers to be used in Dwarf debugging information.
530   The SVR4 reference port C compiler uses the following register numbers
531   in its Dwarf output code:
532	0 for %eax (gcc regno = 0)
533	1 for %ecx (gcc regno = 2)
534	2 for %edx (gcc regno = 1)
535	3 for %ebx (gcc regno = 3)
536	4 for %esp (gcc regno = 7)
537	5 for %ebp (gcc regno = 6)
538	6 for %esi (gcc regno = 4)
539	7 for %edi (gcc regno = 5)
540   The following three DWARF register numbers are never generated by
541   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
542   believes these numbers have these meanings.
543	8  for %eip    (no gcc equivalent)
544	9  for %eflags (gcc regno = 17)
545	10 for %trapno (no gcc equivalent)
546   It is not at all clear how we should number the FP stack registers
547   for the x86 architecture.  If the version of SDB on x86/svr4 were
548   a bit less brain dead with respect to floating-point then we would
549   have a precedent to follow with respect to DWARF register numbers
550   for x86 FP registers, but the SDB on x86/svr4 is so completely
551   broken with respect to FP registers that it is hardly worth thinking
552   of it as something to strive for compatibility with.
553   The version of x86/svr4 SDB I have at the moment does (partially)
554   seem to believe that DWARF register number 11 is associated with
555   the x86 register %st(0), but that's about all.  Higher DWARF
556   register numbers don't seem to be associated with anything in
557   particular, and even for DWARF regno 11, SDB only seems to under-
558   stand that it should say that a variable lives in %st(0) (when
559   asked via an `=' command) if we said it was in DWARF regno 11,
560   but SDB still prints garbage when asked for the value of the
561   variable in question (via a `/' command).
562   (Also note that the labels SDB prints for various FP stack regs
563   when doing an `x' command are all wrong.)
564   Note that these problems generally don't affect the native SVR4
565   C compiler because it doesn't allow the use of -O with -g and
566   because when it is *not* optimizing, it allocates a memory
567   location for each floating-point variable, and the memory
568   location is what gets described in the DWARF AT_location
569   attribute for the variable in question.
570   Regardless of the severe mental illness of the x86/svr4 SDB, we
571   do something sensible here and we use the following DWARF
572   register numbers.  Note that these are all stack-top-relative
573   numbers.
574	11 for %st(0) (gcc regno = 8)
575	12 for %st(1) (gcc regno = 9)
576	13 for %st(2) (gcc regno = 10)
577	14 for %st(3) (gcc regno = 11)
578	15 for %st(4) (gcc regno = 12)
579	16 for %st(5) (gcc regno = 13)
580	17 for %st(6) (gcc regno = 14)
581	18 for %st(7) (gcc regno = 15)
582*/
583int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
584{
585  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
586  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
587  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
588  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
589  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
590  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
591  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
592};
593
594/* Test and compare insns in i386.md store the information needed to
595   generate branch and scc insns here.  */
596
597rtx ix86_compare_op0 = NULL_RTX;
598rtx ix86_compare_op1 = NULL_RTX;
599
600/* The encoding characters for the four TLS models present in ELF.  */
601
602static char const tls_model_chars[] = " GLil";
603
604#define MAX_386_STACK_LOCALS 3
605/* Size of the register save area.  */
606#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
607
608/* Define the structure for the machine field in struct function.  */
609struct machine_function GTY(())
610{
611  rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
612  const char *some_ld_name;
613  int save_varrargs_registers;
614  int accesses_prev_frame;
615};
616
617#define ix86_stack_locals (cfun->machine->stack_locals)
618#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
619
620/* Structure describing stack frame layout.
621   Stack grows downward:
622
623   [arguments]
624					      <- ARG_POINTER
625   saved pc
626
627   saved frame pointer if frame_pointer_needed
628					      <- HARD_FRAME_POINTER
629   [saved regs]
630
631   [padding1]          \
632		        )
633   [va_arg registers]  (
634		        > to_allocate	      <- FRAME_POINTER
635   [frame]	       (
636		        )
637   [padding2]	       /
638  */
639struct ix86_frame
640{
641  int nregs;
642  int padding1;
643  int va_arg_size;
644  HOST_WIDE_INT frame;
645  int padding2;
646  int outgoing_arguments_size;
647  int red_zone_size;
648
649  HOST_WIDE_INT to_allocate;
650  /* The offsets relative to ARG_POINTER.  */
651  HOST_WIDE_INT frame_pointer_offset;
652  HOST_WIDE_INT hard_frame_pointer_offset;
653  HOST_WIDE_INT stack_pointer_offset;
654};
655
656/* Used to enable/disable debugging features.  */
657const char *ix86_debug_arg_string, *ix86_debug_addr_string;
658/* Code model option as passed by user.  */
659const char *ix86_cmodel_string;
660/* Parsed value.  */
661enum cmodel ix86_cmodel;
662/* Asm dialect.  */
663const char *ix86_asm_string;
664enum asm_dialect ix86_asm_dialect = ASM_ATT;
665/* TLS dialext.  */
666const char *ix86_tls_dialect_string;
667enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
668
669/* Which unit we are generating floating point math for.  */
670enum fpmath_unit ix86_fpmath;
671
672/* Which cpu are we scheduling for.  */
673enum processor_type ix86_cpu;
674/* Which instruction set architecture to use.  */
675enum processor_type ix86_arch;
676
677/* Strings to hold which cpu and instruction set architecture  to use.  */
678const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
679const char *ix86_arch_string;		/* for -march=<xxx> */
680const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
681
682/* # of registers to use to pass arguments.  */
683const char *ix86_regparm_string;
684
685/* true if sse prefetch instruction is not NOOP.  */
686int x86_prefetch_sse;
687
688/* ix86_regparm_string as a number */
689int ix86_regparm;
690
691/* Alignment to use for loops and jumps:  */
692
693/* Power of two alignment for loops.  */
694const char *ix86_align_loops_string;
695
696/* Power of two alignment for non-loop jumps.  */
697const char *ix86_align_jumps_string;
698
699/* Power of two alignment for stack boundary in bytes.  */
700const char *ix86_preferred_stack_boundary_string;
701
702/* Preferred alignment for stack boundary in bits.  */
703int ix86_preferred_stack_boundary;
704
705/* Values 1-5: see jump.c */
706int ix86_branch_cost;
707const char *ix86_branch_cost_string;
708
709/* Power of two alignment for functions.  */
710const char *ix86_align_funcs_string;
711
712/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
713static char internal_label_prefix[16];
714static int internal_label_prefix_len;
715
716static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
717static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
718static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
719static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
720				       int, int, FILE *));
721static const char *get_some_local_dynamic_name PARAMS ((void));
722static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
723static rtx maybe_get_pool_constant PARAMS ((rtx));
724static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
725static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
726							   rtx *, rtx *));
727static rtx get_thread_pointer PARAMS ((void));
728static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
729static rtx gen_push PARAMS ((rtx));
730static int memory_address_length PARAMS ((rtx addr));
731static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
732static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
733static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
734static void ix86_dump_ppro_packet PARAMS ((FILE *));
735static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
736static struct machine_function * ix86_init_machine_status PARAMS ((void));
737static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
738static int ix86_nsaved_regs PARAMS ((void));
739static void ix86_emit_save_regs PARAMS ((void));
740static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
741static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
742static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
743static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
744static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
745static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
746static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
747static rtx ix86_expand_aligntest PARAMS ((rtx, int));
748static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
749static int ix86_issue_rate PARAMS ((void));
750static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
751static void ix86_sched_init PARAMS ((FILE *, int, int));
752static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
753static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
754static int ia32_use_dfa_pipeline_interface PARAMS ((void));
755static int ia32_multipass_dfa_lookahead PARAMS ((void));
756static void ix86_init_mmx_sse_builtins PARAMS ((void));
757static rtx x86_this_parameter PARAMS ((tree));
758static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
759					 HOST_WIDE_INT, tree));
760static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
761					     HOST_WIDE_INT, tree));
762
763struct ix86_address
764{
765  rtx base, index, disp;
766  HOST_WIDE_INT scale;
767};
768
769static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
770static bool ix86_cannot_force_const_mem PARAMS ((rtx));
771
772static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
773static const char *ix86_strip_name_encoding PARAMS ((const char *))
774     ATTRIBUTE_UNUSED;
775
776struct builtin_description;
777static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
778					 tree, rtx));
779static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
780					    tree, rtx));
781static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
782static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
783static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
784static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
785static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
786static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
787static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
788					      enum rtx_code *,
789					      enum rtx_code *,
790					      enum rtx_code *));
791static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
792					  rtx *, rtx *));
793static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
794static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
795static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
796static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
797static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
798static int ix86_save_reg PARAMS ((unsigned int, int));
799static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
800static int ix86_comp_type_attributes PARAMS ((tree, tree));
801static int ix86_fntype_regparm PARAMS ((tree));
802const struct attribute_spec ix86_attribute_table[];
803static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
804static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
805static int ix86_value_regno PARAMS ((enum machine_mode));
806static bool contains_128bit_aligned_vector_p PARAMS ((tree));
807
808#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
809static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
810#endif
811
812/* Register class used for passing given 64bit part of the argument.
813   These represent classes as documented by the PS ABI, with the exception
814   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
815   use SF or DFmode move instead of DImode to avoid reformating penalties.
816
817   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
818   whenever possible (upper half does contain padding).
819 */
820enum x86_64_reg_class
821  {
822    X86_64_NO_CLASS,
823    X86_64_INTEGER_CLASS,
824    X86_64_INTEGERSI_CLASS,
825    X86_64_SSE_CLASS,
826    X86_64_SSESF_CLASS,
827    X86_64_SSEDF_CLASS,
828    X86_64_SSEUP_CLASS,
829    X86_64_X87_CLASS,
830    X86_64_X87UP_CLASS,
831    X86_64_MEMORY_CLASS
832  };
833static const char * const x86_64_reg_class_name[] =
834   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
835
836#define MAX_CLASSES 4
837static int classify_argument PARAMS ((enum machine_mode, tree,
838				      enum x86_64_reg_class [MAX_CLASSES],
839				      int));
840static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
841				     int *));
842static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
843					const int *, int));
844static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
845						    enum x86_64_reg_class));
846
847/* Initialize the GCC target structure.  */
848#undef TARGET_ATTRIBUTE_TABLE
849#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
850#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
851#  undef TARGET_MERGE_DECL_ATTRIBUTES
852#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
853#endif
854
855#undef TARGET_COMP_TYPE_ATTRIBUTES
856#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
857
858#undef TARGET_INIT_BUILTINS
859#define TARGET_INIT_BUILTINS ix86_init_builtins
860
861#undef TARGET_EXPAND_BUILTIN
862#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
863
864#undef TARGET_ASM_FUNCTION_EPILOGUE
865#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
866
867#undef TARGET_ASM_OPEN_PAREN
868#define TARGET_ASM_OPEN_PAREN ""
869#undef TARGET_ASM_CLOSE_PAREN
870#define TARGET_ASM_CLOSE_PAREN ""
871
872#undef TARGET_ASM_ALIGNED_HI_OP
873#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
874#undef TARGET_ASM_ALIGNED_SI_OP
875#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
876#ifdef ASM_QUAD
877#undef TARGET_ASM_ALIGNED_DI_OP
878#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
879#endif
880
881#undef TARGET_ASM_UNALIGNED_HI_OP
882#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
883#undef TARGET_ASM_UNALIGNED_SI_OP
884#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
885#undef TARGET_ASM_UNALIGNED_DI_OP
886#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
887
888#undef TARGET_SCHED_ADJUST_COST
889#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
890#undef TARGET_SCHED_ISSUE_RATE
891#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
892#undef TARGET_SCHED_VARIABLE_ISSUE
893#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
894#undef TARGET_SCHED_INIT
895#define TARGET_SCHED_INIT ix86_sched_init
896#undef TARGET_SCHED_REORDER
897#define TARGET_SCHED_REORDER ix86_sched_reorder
898#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
899#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
900  ia32_use_dfa_pipeline_interface
901#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
902#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
903  ia32_multipass_dfa_lookahead
904
905#ifdef HAVE_AS_TLS
906#undef TARGET_HAVE_TLS
907#define TARGET_HAVE_TLS true
908#endif
909#undef TARGET_CANNOT_FORCE_CONST_MEM
910#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
911
912#undef TARGET_ASM_OUTPUT_MI_THUNK
913#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
914#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
915#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
916
917struct gcc_target targetm = TARGET_INITIALIZER;
918
919/* The svr4 ABI for the i386 says that records and unions are returned
920   in memory.  */
921#ifndef DEFAULT_PCC_STRUCT_RETURN
922#define DEFAULT_PCC_STRUCT_RETURN 1
923#endif
924
925/* Sometimes certain combinations of command options do not make
926   sense on a particular target machine.  You can define a macro
927   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
928   defined, is executed once just after all the command options have
929   been parsed.
930
931   Don't use this macro to turn on various extra optimizations for
932   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
933
934void
935override_options ()
936{
937  int i;
938  /* Comes from final.c -- no real reason to change it.  */
939#define MAX_CODE_ALIGN 16
940
941  static struct ptt
942    {
943      const struct processor_costs *cost;	/* Processor costs */
944      const int target_enable;			/* Target flags to enable.  */
945      const int target_disable;			/* Target flags to disable.  */
946      const int align_loop;			/* Default alignments.  */
947      const int align_loop_max_skip;
948      const int align_jump;
949      const int align_jump_max_skip;
950      const int align_func;
951      const int branch_cost;
952    }
953  const processor_target_table[PROCESSOR_max] =
954    {
955      {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
956      {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
957      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
958      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
959      {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
960      {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
961      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
962    };
963
964  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
965  static struct pta
966    {
967      const char *const name;		/* processor name or nickname.  */
968      const enum processor_type processor;
969      const enum pta_flags
970	{
971	  PTA_SSE = 1,
972	  PTA_SSE2 = 2,
973	  PTA_MMX = 4,
974	  PTA_PREFETCH_SSE = 8,
975	  PTA_3DNOW = 16,
976	  PTA_3DNOW_A = 64
977	} flags;
978    }
979  const processor_alias_table[] =
980    {
981      {"i386", PROCESSOR_I386, 0},
982      {"i486", PROCESSOR_I486, 0},
983      {"i586", PROCESSOR_PENTIUM, 0},
984      {"pentium", PROCESSOR_PENTIUM, 0},
985      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
986      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
987      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
988      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
989      {"i686", PROCESSOR_PENTIUMPRO, 0},
990      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
991      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
992      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
993      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
994				       PTA_MMX | PTA_PREFETCH_SSE},
995      {"k6", PROCESSOR_K6, PTA_MMX},
996      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
997      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
998      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
999				   | PTA_3DNOW_A},
1000      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1001					 | PTA_3DNOW | PTA_3DNOW_A},
1002      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1003				    | PTA_3DNOW_A | PTA_SSE},
1004      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1005				      | PTA_3DNOW_A | PTA_SSE},
1006      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1007				      | PTA_3DNOW_A | PTA_SSE},
1008    };
1009
1010  int const pta_size = ARRAY_SIZE (processor_alias_table);
1011
1012  /* By default our XFmode is the 80-bit extended format.  If we have
1013     use TFmode instead, it's also the 80-bit format, but with padding.  */
1014  real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1015  real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1016
1017  /* Set the default values for switches whose default depends on TARGET_64BIT
1018     in case they weren't overwriten by command line options.  */
1019  if (TARGET_64BIT)
1020    {
1021      if (flag_omit_frame_pointer == 2)
1022	flag_omit_frame_pointer = 1;
1023      if (flag_asynchronous_unwind_tables == 2)
1024	flag_asynchronous_unwind_tables = 1;
1025      if (flag_pcc_struct_return == 2)
1026	flag_pcc_struct_return = 0;
1027    }
1028  else
1029    {
1030      if (flag_omit_frame_pointer == 2)
1031	flag_omit_frame_pointer = 0;
1032      if (flag_asynchronous_unwind_tables == 2)
1033	flag_asynchronous_unwind_tables = 0;
1034      if (flag_pcc_struct_return == 2)
1035	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1036    }
1037
1038#ifdef SUBTARGET_OVERRIDE_OPTIONS
1039  SUBTARGET_OVERRIDE_OPTIONS;
1040#endif
1041
1042  if (!ix86_cpu_string && ix86_arch_string)
1043    ix86_cpu_string = ix86_arch_string;
1044  if (!ix86_cpu_string)
1045    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1046  if (!ix86_arch_string)
1047    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1048
1049  if (ix86_cmodel_string != 0)
1050    {
1051      if (!strcmp (ix86_cmodel_string, "small"))
1052	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1053      else if (flag_pic)
1054	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1055      else if (!strcmp (ix86_cmodel_string, "32"))
1056	ix86_cmodel = CM_32;
1057      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1058	ix86_cmodel = CM_KERNEL;
1059      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1060	ix86_cmodel = CM_MEDIUM;
1061      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1062	ix86_cmodel = CM_LARGE;
1063      else
1064	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1065    }
1066  else
1067    {
1068      ix86_cmodel = CM_32;
1069      if (TARGET_64BIT)
1070	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1071    }
1072  if (ix86_asm_string != 0)
1073    {
1074      if (!strcmp (ix86_asm_string, "intel"))
1075	ix86_asm_dialect = ASM_INTEL;
1076      else if (!strcmp (ix86_asm_string, "att"))
1077	ix86_asm_dialect = ASM_ATT;
1078      else
1079	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1080    }
1081  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1082    error ("code model `%s' not supported in the %s bit mode",
1083	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1084  if (ix86_cmodel == CM_LARGE)
1085    sorry ("code model `large' not supported yet");
1086  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1087    sorry ("%i-bit mode not compiled in",
1088	   (target_flags & MASK_64BIT) ? 64 : 32);
1089
1090  for (i = 0; i < pta_size; i++)
1091    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1092      {
1093	ix86_arch = processor_alias_table[i].processor;
1094	/* Default cpu tuning to the architecture.  */
1095	ix86_cpu = ix86_arch;
1096	if (processor_alias_table[i].flags & PTA_MMX
1097	    && !(target_flags_explicit & MASK_MMX))
1098	  target_flags |= MASK_MMX;
1099	if (processor_alias_table[i].flags & PTA_3DNOW
1100	    && !(target_flags_explicit & MASK_3DNOW))
1101	  target_flags |= MASK_3DNOW;
1102	if (processor_alias_table[i].flags & PTA_3DNOW_A
1103	    && !(target_flags_explicit & MASK_3DNOW_A))
1104	  target_flags |= MASK_3DNOW_A;
1105	if (processor_alias_table[i].flags & PTA_SSE
1106	    && !(target_flags_explicit & MASK_SSE))
1107	  target_flags |= MASK_SSE;
1108	if (processor_alias_table[i].flags & PTA_SSE2
1109	    && !(target_flags_explicit & MASK_SSE2))
1110	  target_flags |= MASK_SSE2;
1111	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112	  x86_prefetch_sse = true;
1113	break;
1114      }
1115
1116  if (i == pta_size)
1117    error ("bad value (%s) for -march= switch", ix86_arch_string);
1118
1119  for (i = 0; i < pta_size; i++)
1120    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1121      {
1122	ix86_cpu = processor_alias_table[i].processor;
1123	break;
1124      }
1125  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1126    x86_prefetch_sse = true;
1127  if (i == pta_size)
1128    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1129
1130  if (optimize_size)
1131    ix86_cost = &size_cost;
1132  else
1133    ix86_cost = processor_target_table[ix86_cpu].cost;
1134  target_flags |= processor_target_table[ix86_cpu].target_enable;
1135  target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1136
1137  /* Arrange to set up i386_stack_locals for all functions.  */
1138  init_machine_status = ix86_init_machine_status;
1139
1140  /* Validate -mregparm= value.  */
1141  if (ix86_regparm_string)
1142    {
1143      i = atoi (ix86_regparm_string);
1144      if (i < 0 || i > REGPARM_MAX)
1145	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1146      else
1147	ix86_regparm = i;
1148    }
1149  else
1150   if (TARGET_64BIT)
1151     ix86_regparm = REGPARM_MAX;
1152
1153  /* If the user has provided any of the -malign-* options,
1154     warn and use that value only if -falign-* is not set.
1155     Remove this code in GCC 3.2 or later.  */
1156  if (ix86_align_loops_string)
1157    {
1158      warning ("-malign-loops is obsolete, use -falign-loops");
1159      if (align_loops == 0)
1160	{
1161	  i = atoi (ix86_align_loops_string);
1162	  if (i < 0 || i > MAX_CODE_ALIGN)
1163	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1164	  else
1165	    align_loops = 1 << i;
1166	}
1167    }
1168
1169  if (ix86_align_jumps_string)
1170    {
1171      warning ("-malign-jumps is obsolete, use -falign-jumps");
1172      if (align_jumps == 0)
1173	{
1174	  i = atoi (ix86_align_jumps_string);
1175	  if (i < 0 || i > MAX_CODE_ALIGN)
1176	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1177	  else
1178	    align_jumps = 1 << i;
1179	}
1180    }
1181
1182  if (ix86_align_funcs_string)
1183    {
1184      warning ("-malign-functions is obsolete, use -falign-functions");
1185      if (align_functions == 0)
1186	{
1187	  i = atoi (ix86_align_funcs_string);
1188	  if (i < 0 || i > MAX_CODE_ALIGN)
1189	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1190	  else
1191	    align_functions = 1 << i;
1192	}
1193    }
1194
1195  /* Default align_* from the processor table.  */
1196  if (align_loops == 0)
1197    {
1198      align_loops = processor_target_table[ix86_cpu].align_loop;
1199      align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1200    }
1201  if (align_jumps == 0)
1202    {
1203      align_jumps = processor_target_table[ix86_cpu].align_jump;
1204      align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1205    }
1206  if (align_functions == 0)
1207    {
1208      align_functions = processor_target_table[ix86_cpu].align_func;
1209    }
1210
1211  /* Validate -mpreferred-stack-boundary= value, or provide default.
1212     The default of 128 bits is for Pentium III's SSE __m128, but we
1213     don't want additional code to keep the stack aligned when
1214     optimizing for code size.  */
1215  ix86_preferred_stack_boundary = (optimize_size
1216				   ? TARGET_64BIT ? 128 : 32
1217				   : 128);
1218  if (ix86_preferred_stack_boundary_string)
1219    {
1220      i = atoi (ix86_preferred_stack_boundary_string);
1221      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1222	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1223	       TARGET_64BIT ? 4 : 2);
1224      else
1225	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1226    }
1227
1228  /* Validate -mbranch-cost= value, or provide default.  */
1229  ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1230  if (ix86_branch_cost_string)
1231    {
1232      i = atoi (ix86_branch_cost_string);
1233      if (i < 0 || i > 5)
1234	error ("-mbranch-cost=%d is not between 0 and 5", i);
1235      else
1236	ix86_branch_cost = i;
1237    }
1238
1239  if (ix86_tls_dialect_string)
1240    {
1241      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1242	ix86_tls_dialect = TLS_DIALECT_GNU;
1243      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1244	ix86_tls_dialect = TLS_DIALECT_SUN;
1245      else
1246	error ("bad value (%s) for -mtls-dialect= switch",
1247	       ix86_tls_dialect_string);
1248    }
1249
1250  /* Keep nonleaf frame pointers.  */
1251  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1252    flag_omit_frame_pointer = 1;
1253
1254  /* If we're doing fast math, we don't care about comparison order
1255     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1256  if (flag_unsafe_math_optimizations)
1257    target_flags &= ~MASK_IEEE_FP;
1258
1259  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1260     since the insns won't need emulation.  */
1261  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1262    target_flags &= ~MASK_NO_FANCY_MATH_387;
1263
1264  if (TARGET_64BIT)
1265    {
1266      if (TARGET_ALIGN_DOUBLE)
1267	error ("-malign-double makes no sense in the 64bit mode");
1268      if (TARGET_RTD)
1269	error ("-mrtd calling convention not supported in the 64bit mode");
1270      /* Enable by default the SSE and MMX builtins.  */
1271      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1272      ix86_fpmath = FPMATH_SSE;
1273     }
1274  else
1275    ix86_fpmath = FPMATH_387;
1276
1277  if (ix86_fpmath_string != 0)
1278    {
1279      if (! strcmp (ix86_fpmath_string, "387"))
1280	ix86_fpmath = FPMATH_387;
1281      else if (! strcmp (ix86_fpmath_string, "sse"))
1282	{
1283	  if (!TARGET_SSE)
1284	    {
1285	      warning ("SSE instruction set disabled, using 387 arithmetics");
1286	      ix86_fpmath = FPMATH_387;
1287	    }
1288	  else
1289	    ix86_fpmath = FPMATH_SSE;
1290	}
1291      else if (! strcmp (ix86_fpmath_string, "387,sse")
1292	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1293	{
1294	  if (!TARGET_SSE)
1295	    {
1296	      warning ("SSE instruction set disabled, using 387 arithmetics");
1297	      ix86_fpmath = FPMATH_387;
1298	    }
1299	  else if (!TARGET_80387)
1300	    {
1301	      warning ("387 instruction set disabled, using SSE arithmetics");
1302	      ix86_fpmath = FPMATH_SSE;
1303	    }
1304	  else
1305	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1306	}
1307      else
1308	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1309    }
1310
1311  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1312     on by -msse.  */
1313  if (TARGET_SSE)
1314    {
1315      target_flags |= MASK_MMX;
1316      x86_prefetch_sse = true;
1317    }
1318
1319  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1320  if (TARGET_3DNOW)
1321    {
1322      target_flags |= MASK_MMX;
1323      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1324	 extensions it adds.  */
1325      if (x86_3dnow_a & (1 << ix86_arch))
1326	target_flags |= MASK_3DNOW_A;
1327    }
1328  if ((x86_accumulate_outgoing_args & CPUMASK)
1329      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1330      && !optimize_size)
1331    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1332
1333  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1334  {
1335    char *p;
1336    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1337    p = strchr (internal_label_prefix, 'X');
1338    internal_label_prefix_len = p - internal_label_prefix;
1339    *p = '\0';
1340  }
1341}
1342
1343void
1344optimization_options (level, size)
1345     int level;
1346     int size ATTRIBUTE_UNUSED;
1347{
1348  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1349     make the problem with not enough registers even worse.  */
1350#ifdef INSN_SCHEDULING
1351  if (level > 1)
1352    flag_schedule_insns = 0;
1353#endif
1354
1355  /* The default values of these switches depend on the TARGET_64BIT
1356     that is not known at this moment.  Mark these values with 2 and
1357     let user the to override these.  In case there is no command line option
1358     specifying them, we will set the defaults in override_options.  */
1359  if (optimize >= 1)
1360    flag_omit_frame_pointer = 2;
1361  flag_pcc_struct_return = 2;
1362  flag_asynchronous_unwind_tables = 2;
1363}
1364
1365/* Table of valid machine attributes.  */
1366const struct attribute_spec ix86_attribute_table[] =
1367{
1368  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1369  /* Stdcall attribute says callee is responsible for popping arguments
1370     if they are not variable.  */
1371  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1372  /* Cdecl attribute says the callee is a normal C declaration */
1373  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1374  /* Regparm attribute specifies how many integer arguments are to be
1375     passed in registers.  */
1376  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1377#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1378  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1379  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1380  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1381#endif
1382  { NULL,        0, 0, false, false, false, NULL }
1383};
1384
1385/* Handle a "cdecl" or "stdcall" attribute;
1386   arguments as in struct attribute_spec.handler.  */
1387static tree
1388ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1389     tree *node;
1390     tree name;
1391     tree args ATTRIBUTE_UNUSED;
1392     int flags ATTRIBUTE_UNUSED;
1393     bool *no_add_attrs;
1394{
1395  if (TREE_CODE (*node) != FUNCTION_TYPE
1396      && TREE_CODE (*node) != METHOD_TYPE
1397      && TREE_CODE (*node) != FIELD_DECL
1398      && TREE_CODE (*node) != TYPE_DECL)
1399    {
1400      warning ("`%s' attribute only applies to functions",
1401	       IDENTIFIER_POINTER (name));
1402      *no_add_attrs = true;
1403    }
1404
1405  if (TARGET_64BIT)
1406    {
1407      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1408      *no_add_attrs = true;
1409    }
1410
1411  return NULL_TREE;
1412}
1413
1414/* Handle a "regparm" attribute;
1415   arguments as in struct attribute_spec.handler.  */
1416static tree
1417ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1418     tree *node;
1419     tree name;
1420     tree args;
1421     int flags ATTRIBUTE_UNUSED;
1422     bool *no_add_attrs;
1423{
1424  if (TREE_CODE (*node) != FUNCTION_TYPE
1425      && TREE_CODE (*node) != METHOD_TYPE
1426      && TREE_CODE (*node) != FIELD_DECL
1427      && TREE_CODE (*node) != TYPE_DECL)
1428    {
1429      warning ("`%s' attribute only applies to functions",
1430	       IDENTIFIER_POINTER (name));
1431      *no_add_attrs = true;
1432    }
1433  else
1434    {
1435      tree cst;
1436
1437      cst = TREE_VALUE (args);
1438      if (TREE_CODE (cst) != INTEGER_CST)
1439	{
1440	  warning ("`%s' attribute requires an integer constant argument",
1441		   IDENTIFIER_POINTER (name));
1442	  *no_add_attrs = true;
1443	}
1444      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1445	{
1446	  warning ("argument to `%s' attribute larger than %d",
1447		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1448	  *no_add_attrs = true;
1449	}
1450    }
1451
1452  return NULL_TREE;
1453}
1454
1455/* Return 0 if the attributes for two types are incompatible, 1 if they
1456   are compatible, and 2 if they are nearly compatible (which causes a
1457   warning to be generated).  */
1458
1459static int
1460ix86_comp_type_attributes (type1, type2)
1461     tree type1;
1462     tree type2;
1463{
1464  /* Check for mismatch of non-default calling convention.  */
1465  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1466
1467  if (TREE_CODE (type1) != FUNCTION_TYPE)
1468    return 1;
1469
1470  /* Check for mismatched return types (cdecl vs stdcall).  */
1471  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1472      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1473    return 0;
1474  return 1;
1475}
1476
1477/* Return the regparm value for a fuctio with the indicated TYPE.  */
1478
1479static int
1480ix86_fntype_regparm (type)
1481     tree type;
1482{
1483  tree attr;
1484
1485  attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1486  if (attr)
1487    return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1488  else
1489    return ix86_regparm;
1490}
1491
1492/* Value is the number of bytes of arguments automatically
1493   popped when returning from a subroutine call.
1494   FUNDECL is the declaration node of the function (as a tree),
1495   FUNTYPE is the data type of the function (as a tree),
1496   or for a library call it is an identifier node for the subroutine name.
1497   SIZE is the number of bytes of arguments passed on the stack.
1498
1499   On the 80386, the RTD insn may be used to pop them if the number
1500     of args is fixed, but if the number is variable then the caller
1501     must pop them all.  RTD can't be used for library calls now
1502     because the library is compiled with the Unix compiler.
1503   Use of RTD is a selectable option, since it is incompatible with
1504   standard Unix calling sequences.  If the option is not selected,
1505   the caller must always pop the args.
1506
1507   The attribute stdcall is equivalent to RTD on a per module basis.  */
1508
1509int
1510ix86_return_pops_args (fundecl, funtype, size)
1511     tree fundecl;
1512     tree funtype;
1513     int size;
1514{
1515  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1516
1517    /* Cdecl functions override -mrtd, and never pop the stack.  */
1518  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1519
1520    /* Stdcall functions will pop the stack if not variable args.  */
1521    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1522      rtd = 1;
1523
1524    if (rtd
1525        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1526	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1527		== void_type_node)))
1528      return size;
1529  }
1530
1531  /* Lose any fake structure return argument if it is passed on the stack.  */
1532  if (aggregate_value_p (TREE_TYPE (funtype))
1533      && !TARGET_64BIT)
1534    {
1535      int nregs = ix86_fntype_regparm (funtype);
1536
1537      if (!nregs)
1538	return GET_MODE_SIZE (Pmode);
1539    }
1540
1541  return 0;
1542}
1543
1544/* Argument support functions.  */
1545
1546/* Return true when register may be used to pass function parameters.  */
1547bool
1548ix86_function_arg_regno_p (regno)
1549     int regno;
1550{
1551  int i;
1552  if (!TARGET_64BIT)
1553    return (regno < REGPARM_MAX
1554	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1555  if (SSE_REGNO_P (regno) && TARGET_SSE)
1556    return true;
1557  /* RAX is used as hidden argument to va_arg functions.  */
1558  if (!regno)
1559    return true;
1560  for (i = 0; i < REGPARM_MAX; i++)
1561    if (regno == x86_64_int_parameter_registers[i])
1562      return true;
1563  return false;
1564}
1565
1566/* Initialize a variable CUM of type CUMULATIVE_ARGS
1567   for a call to a function whose data type is FNTYPE.
1568   For a library call, FNTYPE is 0.  */
1569
1570void
1571init_cumulative_args (cum, fntype, libname)
1572     CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1573     tree fntype;		/* tree ptr for function decl */
1574     rtx libname;		/* SYMBOL_REF of library name or 0 */
1575{
1576  static CUMULATIVE_ARGS zero_cum;
1577  tree param, next_param;
1578
1579  if (TARGET_DEBUG_ARG)
1580    {
1581      fprintf (stderr, "\ninit_cumulative_args (");
1582      if (fntype)
1583	fprintf (stderr, "fntype code = %s, ret code = %s",
1584		 tree_code_name[(int) TREE_CODE (fntype)],
1585		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1586      else
1587	fprintf (stderr, "no fntype");
1588
1589      if (libname)
1590	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1591    }
1592
1593  *cum = zero_cum;
1594
1595  /* Set up the number of registers to use for passing arguments.  */
1596  cum->nregs = ix86_regparm;
1597  cum->sse_nregs = SSE_REGPARM_MAX;
1598  if (fntype && !TARGET_64BIT)
1599    {
1600      tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1601
1602      if (attr)
1603	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1604    }
1605  cum->maybe_vaarg = false;
1606
1607  /* Determine if this function has variable arguments.  This is
1608     indicated by the last argument being 'void_type_mode' if there
1609     are no variable arguments.  If there are variable arguments, then
1610     we won't pass anything in registers */
1611
1612  if (cum->nregs)
1613    {
1614      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1615	   param != 0; param = next_param)
1616	{
1617	  next_param = TREE_CHAIN (param);
1618	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1619	    {
1620	      if (!TARGET_64BIT)
1621		cum->nregs = 0;
1622	      cum->maybe_vaarg = true;
1623	    }
1624	}
1625    }
1626  if ((!fntype && !libname)
1627      || (fntype && !TYPE_ARG_TYPES (fntype)))
1628    cum->maybe_vaarg = 1;
1629
1630  if (TARGET_DEBUG_ARG)
1631    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1632
1633  return;
1634}
1635
1636/* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1637   of this code is to classify each 8bytes of incoming argument by the register
1638   class and assign registers accordingly.  */
1639
1640/* Return the union class of CLASS1 and CLASS2.
1641   See the x86-64 PS ABI for details.  */
1642
1643static enum x86_64_reg_class
1644merge_classes (class1, class2)
1645     enum x86_64_reg_class class1, class2;
1646{
1647  /* Rule #1: If both classes are equal, this is the resulting class.  */
1648  if (class1 == class2)
1649    return class1;
1650
1651  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1652     the other class.  */
1653  if (class1 == X86_64_NO_CLASS)
1654    return class2;
1655  if (class2 == X86_64_NO_CLASS)
1656    return class1;
1657
1658  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1659  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1660    return X86_64_MEMORY_CLASS;
1661
1662  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1663  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1664      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1665    return X86_64_INTEGERSI_CLASS;
1666  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1667      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1668    return X86_64_INTEGER_CLASS;
1669
1670  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1671  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1672      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1673    return X86_64_MEMORY_CLASS;
1674
1675  /* Rule #6: Otherwise class SSE is used.  */
1676  return X86_64_SSE_CLASS;
1677}
1678
1679/* Classify the argument of type TYPE and mode MODE.
1680   CLASSES will be filled by the register class used to pass each word
1681   of the operand.  The number of words is returned.  In case the parameter
1682   should be passed in memory, 0 is returned. As a special case for zero
1683   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1684
1685   BIT_OFFSET is used internally for handling records and specifies offset
1686   of the offset in bits modulo 256 to avoid overflow cases.
1687
1688   See the x86-64 PS ABI for details.
1689*/
1690
1691static int
1692classify_argument (mode, type, classes, bit_offset)
1693     enum machine_mode mode;
1694     tree type;
1695     enum x86_64_reg_class classes[MAX_CLASSES];
1696     int bit_offset;
1697{
1698  int bytes =
1699    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1700  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1701
1702  /* Variable sized entities are always passed/returned in memory.  */
1703  if (bytes < 0)
1704    return 0;
1705
1706  if (type && AGGREGATE_TYPE_P (type))
1707    {
1708      int i;
1709      tree field;
1710      enum x86_64_reg_class subclasses[MAX_CLASSES];
1711
1712      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1713      if (bytes > 16)
1714	return 0;
1715
1716      for (i = 0; i < words; i++)
1717	classes[i] = X86_64_NO_CLASS;
1718
1719      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1720	 signalize memory class, so handle it as special case.  */
1721      if (!words)
1722	{
1723	  classes[0] = X86_64_NO_CLASS;
1724	  return 1;
1725	}
1726
1727      /* Classify each field of record and merge classes.  */
1728      if (TREE_CODE (type) == RECORD_TYPE)
1729	{
1730	  /* For classes first merge in the field of the subclasses.  */
1731	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1732	    {
1733	      tree bases = TYPE_BINFO_BASETYPES (type);
1734	      int n_bases = TREE_VEC_LENGTH (bases);
1735	      int i;
1736
1737	      for (i = 0; i < n_bases; ++i)
1738		{
1739		   tree binfo = TREE_VEC_ELT (bases, i);
1740		   int num;
1741		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1742		   tree type = BINFO_TYPE (binfo);
1743
1744		   num = classify_argument (TYPE_MODE (type),
1745					    type, subclasses,
1746					    (offset + bit_offset) % 256);
1747		   if (!num)
1748		     return 0;
1749		   for (i = 0; i < num; i++)
1750		     {
1751		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1752		       classes[i + pos] =
1753			 merge_classes (subclasses[i], classes[i + pos]);
1754		     }
1755		}
1756	    }
1757	  /* And now merge the fields of structure.   */
1758	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1759	    {
1760	      if (TREE_CODE (field) == FIELD_DECL)
1761		{
1762		  int num;
1763
1764		  /* Bitfields are always classified as integer.  Handle them
1765		     early, since later code would consider them to be
1766		     misaligned integers.  */
1767		  if (DECL_BIT_FIELD (field))
1768		    {
1769		      for (i = int_bit_position (field) / 8 / 8;
1770			   i < (int_bit_position (field)
1771			        + tree_low_cst (DECL_SIZE (field), 0)
1772			       	+ 63) / 8 / 8; i++)
1773			classes[i] =
1774			  merge_classes (X86_64_INTEGER_CLASS,
1775					 classes[i]);
1776		    }
1777		  else
1778		    {
1779		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1780					       TREE_TYPE (field), subclasses,
1781					       (int_bit_position (field)
1782						+ bit_offset) % 256);
1783		      if (!num)
1784			return 0;
1785		      for (i = 0; i < num; i++)
1786			{
1787			  int pos =
1788			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1789			  classes[i + pos] =
1790			    merge_classes (subclasses[i], classes[i + pos]);
1791			}
1792		    }
1793		}
1794	    }
1795	}
1796      /* Arrays are handled as small records.  */
1797      else if (TREE_CODE (type) == ARRAY_TYPE)
1798	{
1799	  int num;
1800	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1801				   TREE_TYPE (type), subclasses, bit_offset);
1802	  if (!num)
1803	    return 0;
1804
1805	  /* The partial classes are now full classes.  */
1806	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1807	    subclasses[0] = X86_64_SSE_CLASS;
1808	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1809	    subclasses[0] = X86_64_INTEGER_CLASS;
1810
1811	  for (i = 0; i < words; i++)
1812	    classes[i] = subclasses[i % num];
1813	}
1814      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1815      else if (TREE_CODE (type) == UNION_TYPE
1816	       || TREE_CODE (type) == QUAL_UNION_TYPE)
1817	{
1818	  /* For classes first merge in the field of the subclasses.  */
1819	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1820	    {
1821	      tree bases = TYPE_BINFO_BASETYPES (type);
1822	      int n_bases = TREE_VEC_LENGTH (bases);
1823	      int i;
1824
1825	      for (i = 0; i < n_bases; ++i)
1826		{
1827		   tree binfo = TREE_VEC_ELT (bases, i);
1828		   int num;
1829		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1830		   tree type = BINFO_TYPE (binfo);
1831
1832		   num = classify_argument (TYPE_MODE (type),
1833					    type, subclasses,
1834					    (offset + (bit_offset % 64)) % 256);
1835		   if (!num)
1836		     return 0;
1837		   for (i = 0; i < num; i++)
1838		     {
1839		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
1840		       classes[i + pos] =
1841			 merge_classes (subclasses[i], classes[i + pos]);
1842		     }
1843		}
1844	    }
1845	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1846	    {
1847	      if (TREE_CODE (field) == FIELD_DECL)
1848		{
1849		  int num;
1850		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1851					   TREE_TYPE (field), subclasses,
1852					   bit_offset);
1853		  if (!num)
1854		    return 0;
1855		  for (i = 0; i < num; i++)
1856		    classes[i] = merge_classes (subclasses[i], classes[i]);
1857		}
1858	    }
1859	}
1860      else
1861	abort ();
1862
1863      /* Final merger cleanup.  */
1864      for (i = 0; i < words; i++)
1865	{
1866	  /* If one class is MEMORY, everything should be passed in
1867	     memory.  */
1868	  if (classes[i] == X86_64_MEMORY_CLASS)
1869	    return 0;
1870
1871	  /* The X86_64_SSEUP_CLASS should be always preceded by
1872	     X86_64_SSE_CLASS.  */
1873	  if (classes[i] == X86_64_SSEUP_CLASS
1874	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1875	    classes[i] = X86_64_SSE_CLASS;
1876
1877	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1878	  if (classes[i] == X86_64_X87UP_CLASS
1879	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1880	    classes[i] = X86_64_SSE_CLASS;
1881	}
1882      return words;
1883    }
1884
1885  /* Compute alignment needed.  We align all types to natural boundaries with
1886     exception of XFmode that is aligned to 64bits.  */
1887  if (mode != VOIDmode && mode != BLKmode)
1888    {
1889      int mode_alignment = GET_MODE_BITSIZE (mode);
1890
1891      if (mode == XFmode)
1892	mode_alignment = 128;
1893      else if (mode == XCmode)
1894	mode_alignment = 256;
1895      /* Misaligned fields are always returned in memory.  */
1896      if (bit_offset % mode_alignment)
1897	return 0;
1898    }
1899
1900  /* Classification of atomic types.  */
1901  switch (mode)
1902    {
1903    case DImode:
1904    case SImode:
1905    case HImode:
1906    case QImode:
1907    case CSImode:
1908    case CHImode:
1909    case CQImode:
1910      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1911	classes[0] = X86_64_INTEGERSI_CLASS;
1912      else
1913	classes[0] = X86_64_INTEGER_CLASS;
1914      return 1;
1915    case CDImode:
1916    case TImode:
1917      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1918      return 2;
1919    case CTImode:
1920      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1921      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1922      return 4;
1923    case SFmode:
1924      if (!(bit_offset % 64))
1925	classes[0] = X86_64_SSESF_CLASS;
1926      else
1927	classes[0] = X86_64_SSE_CLASS;
1928      return 1;
1929    case DFmode:
1930      classes[0] = X86_64_SSEDF_CLASS;
1931      return 1;
1932    case TFmode:
1933      classes[0] = X86_64_X87_CLASS;
1934      classes[1] = X86_64_X87UP_CLASS;
1935      return 2;
1936    case TCmode:
1937      classes[0] = X86_64_X87_CLASS;
1938      classes[1] = X86_64_X87UP_CLASS;
1939      classes[2] = X86_64_X87_CLASS;
1940      classes[3] = X86_64_X87UP_CLASS;
1941      return 4;
1942    case DCmode:
1943      classes[0] = X86_64_SSEDF_CLASS;
1944      classes[1] = X86_64_SSEDF_CLASS;
1945      return 2;
1946    case SCmode:
1947      classes[0] = X86_64_SSE_CLASS;
1948      return 1;
1949    case V4SFmode:
1950    case V4SImode:
1951    case V16QImode:
1952    case V8HImode:
1953    case V2DFmode:
1954    case V2DImode:
1955      classes[0] = X86_64_SSE_CLASS;
1956      classes[1] = X86_64_SSEUP_CLASS;
1957      return 2;
1958    case V2SFmode:
1959    case V2SImode:
1960    case V4HImode:
1961    case V8QImode:
1962      return 0;
1963    case BLKmode:
1964    case VOIDmode:
1965      return 0;
1966    default:
1967      abort ();
1968    }
1969}
1970
1971/* Examine the argument and return set number of register required in each
1972   class.  Return 0 iff parameter should be passed in memory.  */
1973static int
1974examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1975     enum machine_mode mode;
1976     tree type;
1977     int *int_nregs, *sse_nregs;
1978     int in_return;
1979{
1980  enum x86_64_reg_class class[MAX_CLASSES];
1981  int n = classify_argument (mode, type, class, 0);
1982
1983  *int_nregs = 0;
1984  *sse_nregs = 0;
1985  if (!n)
1986    return 0;
1987  for (n--; n >= 0; n--)
1988    switch (class[n])
1989      {
1990      case X86_64_INTEGER_CLASS:
1991      case X86_64_INTEGERSI_CLASS:
1992	(*int_nregs)++;
1993	break;
1994      case X86_64_SSE_CLASS:
1995      case X86_64_SSESF_CLASS:
1996      case X86_64_SSEDF_CLASS:
1997	(*sse_nregs)++;
1998	break;
1999      case X86_64_NO_CLASS:
2000      case X86_64_SSEUP_CLASS:
2001	break;
2002      case X86_64_X87_CLASS:
2003      case X86_64_X87UP_CLASS:
2004	if (!in_return)
2005	  return 0;
2006	break;
2007      case X86_64_MEMORY_CLASS:
2008	abort ();
2009      }
2010  return 1;
2011}
2012/* Construct container for the argument used by GCC interface.  See
2013   FUNCTION_ARG for the detailed description.  */
2014static rtx
2015construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2016     enum machine_mode mode;
2017     tree type;
2018     int in_return;
2019     int nintregs, nsseregs;
2020     const int * intreg;
2021     int sse_regno;
2022{
2023  enum machine_mode tmpmode;
2024  int bytes =
2025    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2026  enum x86_64_reg_class class[MAX_CLASSES];
2027  int n;
2028  int i;
2029  int nexps = 0;
2030  int needed_sseregs, needed_intregs;
2031  rtx exp[MAX_CLASSES];
2032  rtx ret;
2033
2034  n = classify_argument (mode, type, class, 0);
2035  if (TARGET_DEBUG_ARG)
2036    {
2037      if (!n)
2038	fprintf (stderr, "Memory class\n");
2039      else
2040	{
2041	  fprintf (stderr, "Classes:");
2042	  for (i = 0; i < n; i++)
2043	    {
2044	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2045	    }
2046	   fprintf (stderr, "\n");
2047	}
2048    }
2049  if (!n)
2050    return NULL;
2051  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2052    return NULL;
2053  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2054    return NULL;
2055
2056  /* First construct simple cases.  Avoid SCmode, since we want to use
2057     single register to pass this type.  */
2058  if (n == 1 && mode != SCmode)
2059    switch (class[0])
2060      {
2061      case X86_64_INTEGER_CLASS:
2062      case X86_64_INTEGERSI_CLASS:
2063	return gen_rtx_REG (mode, intreg[0]);
2064      case X86_64_SSE_CLASS:
2065      case X86_64_SSESF_CLASS:
2066      case X86_64_SSEDF_CLASS:
2067	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2068      case X86_64_X87_CLASS:
2069	return gen_rtx_REG (mode, FIRST_STACK_REG);
2070      case X86_64_NO_CLASS:
2071	/* Zero sized array, struct or class.  */
2072	return NULL;
2073      default:
2074	abort ();
2075      }
2076  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2077    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2078  if (n == 2
2079      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2080    return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2081  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2082      && class[1] == X86_64_INTEGER_CLASS
2083      && (mode == CDImode || mode == TImode)
2084      && intreg[0] + 1 == intreg[1])
2085    return gen_rtx_REG (mode, intreg[0]);
2086  if (n == 4
2087      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2088      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2089    return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2090
2091  /* Otherwise figure out the entries of the PARALLEL.  */
2092  for (i = 0; i < n; i++)
2093    {
2094      switch (class[i])
2095        {
2096	  case X86_64_NO_CLASS:
2097	    break;
2098	  case X86_64_INTEGER_CLASS:
2099	  case X86_64_INTEGERSI_CLASS:
2100	    /* Merge TImodes on aligned occassions here too.  */
2101	    if (i * 8 + 8 > bytes)
2102	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2103	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2104	      tmpmode = SImode;
2105	    else
2106	      tmpmode = DImode;
2107	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2108	    if (tmpmode == BLKmode)
2109	      tmpmode = DImode;
2110	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2111					       gen_rtx_REG (tmpmode, *intreg),
2112					       GEN_INT (i*8));
2113	    intreg++;
2114	    break;
2115	  case X86_64_SSESF_CLASS:
2116	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2117					       gen_rtx_REG (SFmode,
2118							    SSE_REGNO (sse_regno)),
2119					       GEN_INT (i*8));
2120	    sse_regno++;
2121	    break;
2122	  case X86_64_SSEDF_CLASS:
2123	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2124					       gen_rtx_REG (DFmode,
2125							    SSE_REGNO (sse_regno)),
2126					       GEN_INT (i*8));
2127	    sse_regno++;
2128	    break;
2129	  case X86_64_SSE_CLASS:
2130	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2131	      tmpmode = TImode;
2132	    else
2133	      tmpmode = DImode;
2134	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2135					       gen_rtx_REG (tmpmode,
2136							    SSE_REGNO (sse_regno)),
2137					       GEN_INT (i*8));
2138	    if (tmpmode == TImode)
2139	      i++;
2140	    sse_regno++;
2141	    break;
2142	  default:
2143	    abort ();
2144	}
2145    }
2146  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2147  for (i = 0; i < nexps; i++)
2148    XVECEXP (ret, 0, i) = exp [i];
2149  return ret;
2150}
2151
2152/* Update the data in CUM to advance over an argument
2153   of mode MODE and data type TYPE.
2154   (TYPE is null for libcalls where that information may not be available.)  */
2155
2156void
2157function_arg_advance (cum, mode, type, named)
2158     CUMULATIVE_ARGS *cum;	/* current arg information */
2159     enum machine_mode mode;	/* current arg mode */
2160     tree type;			/* type of the argument or 0 if lib support */
2161     int named;			/* whether or not the argument was named */
2162{
2163  int bytes =
2164    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2165  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2166
2167  if (TARGET_DEBUG_ARG)
2168    fprintf (stderr,
2169	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2170	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2171  if (TARGET_64BIT)
2172    {
2173      int int_nregs, sse_nregs;
2174      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2175	cum->words += words;
2176      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2177	{
2178	  cum->nregs -= int_nregs;
2179	  cum->sse_nregs -= sse_nregs;
2180	  cum->regno += int_nregs;
2181	  cum->sse_regno += sse_nregs;
2182	}
2183      else
2184	cum->words += words;
2185    }
2186  else
2187    {
2188      if (TARGET_SSE && mode == TImode)
2189	{
2190	  cum->sse_words += words;
2191	  cum->sse_nregs -= 1;
2192	  cum->sse_regno += 1;
2193	  if (cum->sse_nregs <= 0)
2194	    {
2195	      cum->sse_nregs = 0;
2196	      cum->sse_regno = 0;
2197	    }
2198	}
2199      else
2200	{
2201	  cum->words += words;
2202	  cum->nregs -= words;
2203	  cum->regno += words;
2204
2205	  if (cum->nregs <= 0)
2206	    {
2207	      cum->nregs = 0;
2208	      cum->regno = 0;
2209	    }
2210	}
2211    }
2212  return;
2213}
2214
2215/* Define where to put the arguments to a function.
2216   Value is zero to push the argument on the stack,
2217   or a hard register in which to store the argument.
2218
2219   MODE is the argument's machine mode.
2220   TYPE is the data type of the argument (as a tree).
2221    This is null for libcalls where that information may
2222    not be available.
2223   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2224    the preceding args and about the function being called.
2225   NAMED is nonzero if this argument is a named parameter
2226    (otherwise it is an extra parameter matching an ellipsis).  */
2227
2228rtx
2229function_arg (cum, mode, type, named)
2230     CUMULATIVE_ARGS *cum;	/* current arg information */
2231     enum machine_mode mode;	/* current arg mode */
2232     tree type;			/* type of the argument or 0 if lib support */
2233     int named;			/* != 0 for normal args, == 0 for ... args */
2234{
2235  rtx ret   = NULL_RTX;
2236  int bytes =
2237    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2238  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2239
2240  /* Handle an hidden AL argument containing number of registers for varargs
2241     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2242     any AL settings.  */
2243  if (mode == VOIDmode)
2244    {
2245      if (TARGET_64BIT)
2246	return GEN_INT (cum->maybe_vaarg
2247			? (cum->sse_nregs < 0
2248			   ? SSE_REGPARM_MAX
2249			   : cum->sse_regno)
2250			: -1);
2251      else
2252	return constm1_rtx;
2253    }
2254  if (TARGET_64BIT)
2255    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2256			       &x86_64_int_parameter_registers [cum->regno],
2257			       cum->sse_regno);
2258  else
2259    switch (mode)
2260      {
2261	/* For now, pass fp/complex values on the stack.  */
2262      default:
2263	break;
2264
2265      case BLKmode:
2266	if (bytes < 0)
2267	  break;
2268	/* FALLTHRU */
2269      case DImode:
2270      case SImode:
2271      case HImode:
2272      case QImode:
2273	if (words <= cum->nregs)
2274	  ret = gen_rtx_REG (mode, cum->regno);
2275	break;
2276      case TImode:
2277	if (cum->sse_nregs)
2278	  ret = gen_rtx_REG (mode, cum->sse_regno);
2279	break;
2280      }
2281
2282  if (TARGET_DEBUG_ARG)
2283    {
2284      fprintf (stderr,
2285	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2286	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2287
2288      if (ret)
2289	print_simple_rtl (stderr, ret);
2290      else
2291	fprintf (stderr, ", stack");
2292
2293      fprintf (stderr, " )\n");
2294    }
2295
2296  return ret;
2297}
2298
2299/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2300   ABI  */
2301static bool
2302contains_128bit_aligned_vector_p (type)
2303     tree type;
2304{
2305  enum machine_mode mode = TYPE_MODE (type);
2306  if (SSE_REG_MODE_P (mode)
2307      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2308    return true;
2309  if (TYPE_ALIGN (type) < 128)
2310    return false;
2311
2312  if (AGGREGATE_TYPE_P (type))
2313    {
2314      /* Walk the agregates recursivly.  */
2315      if (TREE_CODE (type) == RECORD_TYPE
2316	  || TREE_CODE (type) == UNION_TYPE
2317	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2318	{
2319	  tree field;
2320
2321	  if (TYPE_BINFO (type) != NULL
2322	      && TYPE_BINFO_BASETYPES (type) != NULL)
2323	    {
2324	      tree bases = TYPE_BINFO_BASETYPES (type);
2325	      int n_bases = TREE_VEC_LENGTH (bases);
2326	      int i;
2327
2328	      for (i = 0; i < n_bases; ++i)
2329		{
2330		  tree binfo = TREE_VEC_ELT (bases, i);
2331		  tree type = BINFO_TYPE (binfo);
2332
2333		  if (contains_128bit_aligned_vector_p (type))
2334		    return true;
2335		}
2336	    }
2337	  /* And now merge the fields of structure.   */
2338	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2339	    {
2340	      if (TREE_CODE (field) == FIELD_DECL
2341		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2342		return true;
2343	    }
2344	}
2345      /* Just for use if some languages passes arrays by value.  */
2346      else if (TREE_CODE (type) == ARRAY_TYPE)
2347	{
2348	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2349	    return true;
2350	}
2351      else
2352	abort ();
2353    }
2354  return false;
2355}
2356
2357/* A C expression that indicates when an argument must be passed by
2358   reference.  If nonzero for an argument, a copy of that argument is
2359   made in memory and a pointer to the argument is passed instead of
2360   the argument itself.  The pointer is passed in whatever way is
2361   appropriate for passing a pointer to that type.  */
2362
2363int
2364function_arg_pass_by_reference (cum, mode, type, named)
2365     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2366     enum machine_mode mode ATTRIBUTE_UNUSED;
2367     tree type;
2368     int named ATTRIBUTE_UNUSED;
2369{
2370  if (!TARGET_64BIT)
2371    return 0;
2372
2373  if (type && int_size_in_bytes (type) == -1)
2374    {
2375      if (TARGET_DEBUG_ARG)
2376	fprintf (stderr, "function_arg_pass_by_reference\n");
2377      return 1;
2378    }
2379
2380  return 0;
2381}
2382
2383/* Gives the alignment boundary, in bits, of an argument with the specified mode
2384   and type.   */
2385
2386int
2387ix86_function_arg_boundary (mode, type)
2388     enum machine_mode mode;
2389     tree type;
2390{
2391  int align;
2392  if (type)
2393    align = TYPE_ALIGN (type);
2394  else
2395    align = GET_MODE_ALIGNMENT (mode);
2396  if (align < PARM_BOUNDARY)
2397    align = PARM_BOUNDARY;
2398  if (!TARGET_64BIT)
2399    {
2400      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2401	 make an exception for SSE modes since these require 128bit
2402	 alignment.
2403
2404	 The handling here differs from field_alignment.  ICC aligns MMX
2405	 arguments to 4 byte boundaries, while structure fields are aligned
2406	 to 8 byte boundaries.  */
2407      if (!type)
2408	{
2409	  if (!SSE_REG_MODE_P (mode))
2410	    align = PARM_BOUNDARY;
2411	}
2412      else
2413	{
2414	  if (!contains_128bit_aligned_vector_p (type))
2415	    align = PARM_BOUNDARY;
2416	}
2417      if (align != PARM_BOUNDARY && !TARGET_SSE)
2418	abort();
2419    }
2420  if (align > 128)
2421    align = 128;
2422  return align;
2423}
2424
2425/* Return true if N is a possible register number of function value.  */
2426bool
2427ix86_function_value_regno_p (regno)
2428     int regno;
2429{
2430  if (!TARGET_64BIT)
2431    {
2432      return ((regno) == 0
2433	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2434	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2435    }
2436  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2437	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2438	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2439}
2440
2441/* Define how to find the value returned by a function.
2442   VALTYPE is the data type of the value (as a tree).
2443   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2444   otherwise, FUNC is 0.  */
2445rtx
2446ix86_function_value (valtype)
2447     tree valtype;
2448{
2449  if (TARGET_64BIT)
2450    {
2451      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2452				     REGPARM_MAX, SSE_REGPARM_MAX,
2453				     x86_64_int_return_registers, 0);
2454      /* For zero sized structures, construct_continer return NULL, but we need
2455         to keep rest of compiler happy by returning meaningfull value.  */
2456      if (!ret)
2457	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2458      return ret;
2459    }
2460  else
2461    return gen_rtx_REG (TYPE_MODE (valtype),
2462			ix86_value_regno (TYPE_MODE (valtype)));
2463}
2464
2465/* Return false iff type is returned in memory.  */
2466int
2467ix86_return_in_memory (type)
2468     tree type;
2469{
2470  int needed_intregs, needed_sseregs;
2471  if (TARGET_64BIT)
2472    {
2473      return !examine_argument (TYPE_MODE (type), type, 1,
2474				&needed_intregs, &needed_sseregs);
2475    }
2476  else
2477    {
2478      if (TYPE_MODE (type) == BLKmode
2479	  || (VECTOR_MODE_P (TYPE_MODE (type))
2480	      && int_size_in_bytes (type) == 8)
2481	  || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2482	      && TYPE_MODE (type) != TFmode
2483	      && !VECTOR_MODE_P (TYPE_MODE (type))))
2484	return 1;
2485      return 0;
2486    }
2487}
2488
2489/* Define how to find the value returned by a library function
2490   assuming the value has mode MODE.  */
2491rtx
2492ix86_libcall_value (mode)
2493   enum machine_mode mode;
2494{
2495  if (TARGET_64BIT)
2496    {
2497      switch (mode)
2498	{
2499	  case SFmode:
2500	  case SCmode:
2501	  case DFmode:
2502	  case DCmode:
2503	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2504	  case TFmode:
2505	  case TCmode:
2506	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2507	  default:
2508	    return gen_rtx_REG (mode, 0);
2509	}
2510    }
2511  else
2512   return gen_rtx_REG (mode, ix86_value_regno (mode));
2513}
2514
2515/* Given a mode, return the register to use for a return value.  */
2516
2517static int
2518ix86_value_regno (mode)
2519     enum machine_mode mode;
2520{
2521  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2522    return FIRST_FLOAT_REG;
2523  if (mode == TImode || VECTOR_MODE_P (mode))
2524    return FIRST_SSE_REG;
2525  return 0;
2526}
2527
2528/* Create the va_list data type.  */
2529
2530tree
2531ix86_build_va_list ()
2532{
2533  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2534
2535  /* For i386 we use plain pointer to argument area.  */
2536  if (!TARGET_64BIT)
2537    return build_pointer_type (char_type_node);
2538
2539  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2540  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2541
2542  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2543		      unsigned_type_node);
2544  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2545		      unsigned_type_node);
2546  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2547		      ptr_type_node);
2548  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2549		      ptr_type_node);
2550
2551  DECL_FIELD_CONTEXT (f_gpr) = record;
2552  DECL_FIELD_CONTEXT (f_fpr) = record;
2553  DECL_FIELD_CONTEXT (f_ovf) = record;
2554  DECL_FIELD_CONTEXT (f_sav) = record;
2555
2556  TREE_CHAIN (record) = type_decl;
2557  TYPE_NAME (record) = type_decl;
2558  TYPE_FIELDS (record) = f_gpr;
2559  TREE_CHAIN (f_gpr) = f_fpr;
2560  TREE_CHAIN (f_fpr) = f_ovf;
2561  TREE_CHAIN (f_ovf) = f_sav;
2562
2563  layout_type (record);
2564
2565  /* The correct type is an array type of one element.  */
2566  return build_array_type (record, build_index_type (size_zero_node));
2567}
2568
2569/* Perform any needed actions needed for a function that is receiving a
2570   variable number of arguments.
2571
2572   CUM is as above.
2573
2574   MODE and TYPE are the mode and type of the current parameter.
2575
2576   PRETEND_SIZE is a variable that should be set to the amount of stack
2577   that must be pushed by the prolog to pretend that our caller pushed
2578   it.
2579
2580   Normally, this macro will push all remaining incoming registers on the
2581   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2582
2583void
2584ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2585     CUMULATIVE_ARGS *cum;
2586     enum machine_mode mode;
2587     tree type;
2588     int *pretend_size ATTRIBUTE_UNUSED;
2589     int no_rtl;
2590
2591{
2592  CUMULATIVE_ARGS next_cum;
2593  rtx save_area = NULL_RTX, mem;
2594  rtx label;
2595  rtx label_ref;
2596  rtx tmp_reg;
2597  rtx nsse_reg;
2598  int set;
2599  tree fntype;
2600  int stdarg_p;
2601  int i;
2602
2603  if (!TARGET_64BIT)
2604    return;
2605
2606  /* Indicate to allocate space on the stack for varargs save area.  */
2607  ix86_save_varrargs_registers = 1;
2608
2609  cfun->stack_alignment_needed = 128;
2610
2611  fntype = TREE_TYPE (current_function_decl);
2612  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2613	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2614		  != void_type_node));
2615
2616  /* For varargs, we do not want to skip the dummy va_dcl argument.
2617     For stdargs, we do want to skip the last named argument.  */
2618  next_cum = *cum;
2619  if (stdarg_p)
2620    function_arg_advance (&next_cum, mode, type, 1);
2621
2622  if (!no_rtl)
2623    save_area = frame_pointer_rtx;
2624
2625  set = get_varargs_alias_set ();
2626
2627  for (i = next_cum.regno; i < ix86_regparm; i++)
2628    {
2629      mem = gen_rtx_MEM (Pmode,
2630			 plus_constant (save_area, i * UNITS_PER_WORD));
2631      set_mem_alias_set (mem, set);
2632      emit_move_insn (mem, gen_rtx_REG (Pmode,
2633					x86_64_int_parameter_registers[i]));
2634    }
2635
2636  if (next_cum.sse_nregs)
2637    {
2638      /* Now emit code to save SSE registers.  The AX parameter contains number
2639	 of SSE parameter regsiters used to call this function.  We use
2640	 sse_prologue_save insn template that produces computed jump across
2641	 SSE saves.  We need some preparation work to get this working.  */
2642
2643      label = gen_label_rtx ();
2644      label_ref = gen_rtx_LABEL_REF (Pmode, label);
2645
2646      /* Compute address to jump to :
2647         label - 5*eax + nnamed_sse_arguments*5  */
2648      tmp_reg = gen_reg_rtx (Pmode);
2649      nsse_reg = gen_reg_rtx (Pmode);
2650      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2651      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2652			      gen_rtx_MULT (Pmode, nsse_reg,
2653					    GEN_INT (4))));
2654      if (next_cum.sse_regno)
2655	emit_move_insn
2656	  (nsse_reg,
2657	   gen_rtx_CONST (DImode,
2658			  gen_rtx_PLUS (DImode,
2659					label_ref,
2660					GEN_INT (next_cum.sse_regno * 4))));
2661      else
2662	emit_move_insn (nsse_reg, label_ref);
2663      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2664
2665      /* Compute address of memory block we save into.  We always use pointer
2666	 pointing 127 bytes after first byte to store - this is needed to keep
2667	 instruction size limited by 4 bytes.  */
2668      tmp_reg = gen_reg_rtx (Pmode);
2669      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2670			      plus_constant (save_area,
2671					     8 * REGPARM_MAX + 127)));
2672      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2673      set_mem_alias_set (mem, set);
2674      set_mem_align (mem, BITS_PER_WORD);
2675
2676      /* And finally do the dirty job!  */
2677      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2678					GEN_INT (next_cum.sse_regno), label));
2679    }
2680
2681}
2682
2683/* Implement va_start.  */
2684
2685void
2686ix86_va_start (valist, nextarg)
2687     tree valist;
2688     rtx nextarg;
2689{
2690  HOST_WIDE_INT words, n_gpr, n_fpr;
2691  tree f_gpr, f_fpr, f_ovf, f_sav;
2692  tree gpr, fpr, ovf, sav, t;
2693
2694  /* Only 64bit target needs something special.  */
2695  if (!TARGET_64BIT)
2696    {
2697      std_expand_builtin_va_start (valist, nextarg);
2698      return;
2699    }
2700
2701  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2702  f_fpr = TREE_CHAIN (f_gpr);
2703  f_ovf = TREE_CHAIN (f_fpr);
2704  f_sav = TREE_CHAIN (f_ovf);
2705
2706  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2707  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2708  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2709  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2710  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2711
2712  /* Count number of gp and fp argument registers used.  */
2713  words = current_function_args_info.words;
2714  n_gpr = current_function_args_info.regno;
2715  n_fpr = current_function_args_info.sse_regno;
2716
2717  if (TARGET_DEBUG_ARG)
2718    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2719	     (int) words, (int) n_gpr, (int) n_fpr);
2720
2721  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2722	     build_int_2 (n_gpr * 8, 0));
2723  TREE_SIDE_EFFECTS (t) = 1;
2724  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2725
2726  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2727	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2728  TREE_SIDE_EFFECTS (t) = 1;
2729  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2730
2731  /* Find the overflow area.  */
2732  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2733  if (words != 0)
2734    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2735	       build_int_2 (words * UNITS_PER_WORD, 0));
2736  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2737  TREE_SIDE_EFFECTS (t) = 1;
2738  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2739
2740  /* Find the register save area.
2741     Prologue of the function save it right above stack frame.  */
2742  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2743  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2744  TREE_SIDE_EFFECTS (t) = 1;
2745  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2746}
2747
2748/* Implement va_arg.  */
2749rtx
2750ix86_va_arg (valist, type)
2751     tree valist, type;
2752{
2753  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2754  tree f_gpr, f_fpr, f_ovf, f_sav;
2755  tree gpr, fpr, ovf, sav, t;
2756  int size, rsize;
2757  rtx lab_false, lab_over = NULL_RTX;
2758  rtx addr_rtx, r;
2759  rtx container;
2760  int indirect_p = 0;
2761
2762  /* Only 64bit target needs something special.  */
2763  if (!TARGET_64BIT)
2764    {
2765      return std_expand_builtin_va_arg (valist, type);
2766    }
2767
2768  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2769  f_fpr = TREE_CHAIN (f_gpr);
2770  f_ovf = TREE_CHAIN (f_fpr);
2771  f_sav = TREE_CHAIN (f_ovf);
2772
2773  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2774  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2775  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2776  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2777  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2778
2779  size = int_size_in_bytes (type);
2780  if (size == -1)
2781    {
2782      /* Passed by reference.  */
2783      indirect_p = 1;
2784      type = build_pointer_type (type);
2785      size = int_size_in_bytes (type);
2786    }
2787  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2788
2789  container = construct_container (TYPE_MODE (type), type, 0,
2790				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2791  /*
2792   * Pull the value out of the saved registers ...
2793   */
2794
2795  addr_rtx = gen_reg_rtx (Pmode);
2796
2797  if (container)
2798    {
2799      rtx int_addr_rtx, sse_addr_rtx;
2800      int needed_intregs, needed_sseregs;
2801      int need_temp;
2802
2803      lab_over = gen_label_rtx ();
2804      lab_false = gen_label_rtx ();
2805
2806      examine_argument (TYPE_MODE (type), type, 0,
2807		        &needed_intregs, &needed_sseregs);
2808
2809
2810      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2811		   || TYPE_ALIGN (type) > 128);
2812
2813      /* In case we are passing structure, verify that it is consetuctive block
2814         on the register save area.  If not we need to do moves.  */
2815      if (!need_temp && !REG_P (container))
2816	{
2817	  /* Verify that all registers are strictly consetuctive  */
2818	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2819	    {
2820	      int i;
2821
2822	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2823		{
2824		  rtx slot = XVECEXP (container, 0, i);
2825		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2826		      || INTVAL (XEXP (slot, 1)) != i * 16)
2827		    need_temp = 1;
2828		}
2829	    }
2830	  else
2831	    {
2832	      int i;
2833
2834	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2835		{
2836		  rtx slot = XVECEXP (container, 0, i);
2837		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2838		      || INTVAL (XEXP (slot, 1)) != i * 8)
2839		    need_temp = 1;
2840		}
2841	    }
2842	}
2843      if (!need_temp)
2844	{
2845	  int_addr_rtx = addr_rtx;
2846	  sse_addr_rtx = addr_rtx;
2847	}
2848      else
2849	{
2850	  int_addr_rtx = gen_reg_rtx (Pmode);
2851	  sse_addr_rtx = gen_reg_rtx (Pmode);
2852	}
2853      /* First ensure that we fit completely in registers.  */
2854      if (needed_intregs)
2855	{
2856	  emit_cmp_and_jump_insns (expand_expr
2857				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2858				   GEN_INT ((REGPARM_MAX - needed_intregs +
2859					     1) * 8), GE, const1_rtx, SImode,
2860				   1, lab_false);
2861	}
2862      if (needed_sseregs)
2863	{
2864	  emit_cmp_and_jump_insns (expand_expr
2865				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2866				   GEN_INT ((SSE_REGPARM_MAX -
2867					     needed_sseregs + 1) * 16 +
2868					    REGPARM_MAX * 8), GE, const1_rtx,
2869				   SImode, 1, lab_false);
2870	}
2871
2872      /* Compute index to start of area used for integer regs.  */
2873      if (needed_intregs)
2874	{
2875	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2876	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2877	  if (r != int_addr_rtx)
2878	    emit_move_insn (int_addr_rtx, r);
2879	}
2880      if (needed_sseregs)
2881	{
2882	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2883	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2884	  if (r != sse_addr_rtx)
2885	    emit_move_insn (sse_addr_rtx, r);
2886	}
2887      if (need_temp)
2888	{
2889	  int i;
2890	  rtx mem;
2891	  rtx x;
2892
2893	  /* Never use the memory itself, as it has the alias set.  */
2894	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
2895	  mem = gen_rtx_MEM (BLKmode, x);
2896	  force_operand (x, addr_rtx);
2897	  set_mem_alias_set (mem, get_varargs_alias_set ());
2898	  set_mem_align (mem, BITS_PER_UNIT);
2899
2900	  for (i = 0; i < XVECLEN (container, 0); i++)
2901	    {
2902	      rtx slot = XVECEXP (container, 0, i);
2903	      rtx reg = XEXP (slot, 0);
2904	      enum machine_mode mode = GET_MODE (reg);
2905	      rtx src_addr;
2906	      rtx src_mem;
2907	      int src_offset;
2908	      rtx dest_mem;
2909
2910	      if (SSE_REGNO_P (REGNO (reg)))
2911		{
2912		  src_addr = sse_addr_rtx;
2913		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2914		}
2915	      else
2916		{
2917		  src_addr = int_addr_rtx;
2918		  src_offset = REGNO (reg) * 8;
2919		}
2920	      src_mem = gen_rtx_MEM (mode, src_addr);
2921	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2922	      src_mem = adjust_address (src_mem, mode, src_offset);
2923	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2924	      emit_move_insn (dest_mem, src_mem);
2925	    }
2926	}
2927
2928      if (needed_intregs)
2929	{
2930	  t =
2931	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2932		   build_int_2 (needed_intregs * 8, 0));
2933	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2934	  TREE_SIDE_EFFECTS (t) = 1;
2935	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2936	}
2937      if (needed_sseregs)
2938	{
2939	  t =
2940	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2941		   build_int_2 (needed_sseregs * 16, 0));
2942	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2943	  TREE_SIDE_EFFECTS (t) = 1;
2944	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2945	}
2946
2947      emit_jump_insn (gen_jump (lab_over));
2948      emit_barrier ();
2949      emit_label (lab_false);
2950    }
2951
2952  /* ... otherwise out of the overflow area.  */
2953
2954  /* Care for on-stack alignment if needed.  */
2955  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2956    t = ovf;
2957  else
2958    {
2959      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2960      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2961      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2962    }
2963  t = save_expr (t);
2964
2965  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2966  if (r != addr_rtx)
2967    emit_move_insn (addr_rtx, r);
2968
2969  t =
2970    build (PLUS_EXPR, TREE_TYPE (t), t,
2971	   build_int_2 (rsize * UNITS_PER_WORD, 0));
2972  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2973  TREE_SIDE_EFFECTS (t) = 1;
2974  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2975
2976  if (container)
2977    emit_label (lab_over);
2978
2979  if (indirect_p)
2980    {
2981      r = gen_rtx_MEM (Pmode, addr_rtx);
2982      set_mem_alias_set (r, get_varargs_alias_set ());
2983      emit_move_insn (addr_rtx, r);
2984    }
2985
2986  return addr_rtx;
2987}
2988
2989/* Return nonzero if OP is either a i387 or SSE fp register.  */
2990int
2991any_fp_register_operand (op, mode)
2992     rtx op;
2993     enum machine_mode mode ATTRIBUTE_UNUSED;
2994{
2995  return ANY_FP_REG_P (op);
2996}
2997
2998/* Return nonzero if OP is an i387 fp register.  */
2999int
3000fp_register_operand (op, mode)
3001     rtx op;
3002     enum machine_mode mode ATTRIBUTE_UNUSED;
3003{
3004  return FP_REG_P (op);
3005}
3006
3007/* Return nonzero if OP is a non-fp register_operand.  */
3008int
3009register_and_not_any_fp_reg_operand (op, mode)
3010     rtx op;
3011     enum machine_mode mode;
3012{
3013  return register_operand (op, mode) && !ANY_FP_REG_P (op);
3014}
3015
3016/* Return nonzero of OP is a register operand other than an
3017   i387 fp register.  */
3018int
3019register_and_not_fp_reg_operand (op, mode)
3020     rtx op;
3021     enum machine_mode mode;
3022{
3023  return register_operand (op, mode) && !FP_REG_P (op);
3024}
3025
3026/* Return nonzero if OP is general operand representable on x86_64.  */
3027
3028int
3029x86_64_general_operand (op, mode)
3030     rtx op;
3031     enum machine_mode mode;
3032{
3033  if (!TARGET_64BIT)
3034    return general_operand (op, mode);
3035  if (nonimmediate_operand (op, mode))
3036    return 1;
3037  return x86_64_sign_extended_value (op);
3038}
3039
3040/* Return nonzero if OP is general operand representable on x86_64
3041   as either sign extended or zero extended constant.  */
3042
3043int
3044x86_64_szext_general_operand (op, mode)
3045     rtx op;
3046     enum machine_mode mode;
3047{
3048  if (!TARGET_64BIT)
3049    return general_operand (op, mode);
3050  if (nonimmediate_operand (op, mode))
3051    return 1;
3052  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3053}
3054
3055/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3056
3057int
3058x86_64_nonmemory_operand (op, mode)
3059     rtx op;
3060     enum machine_mode mode;
3061{
3062  if (!TARGET_64BIT)
3063    return nonmemory_operand (op, mode);
3064  if (register_operand (op, mode))
3065    return 1;
3066  return x86_64_sign_extended_value (op);
3067}
3068
3069/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3070
3071int
3072x86_64_movabs_operand (op, mode)
3073     rtx op;
3074     enum machine_mode mode;
3075{
3076  if (!TARGET_64BIT || !flag_pic)
3077    return nonmemory_operand (op, mode);
3078  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3079    return 1;
3080  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3081    return 1;
3082  return 0;
3083}
3084
3085/* Return nonzero if OPNUM's MEM should be matched
3086   in movabs* patterns.  */
3087
3088int
3089ix86_check_movabs (insn, opnum)
3090     rtx insn;
3091     int opnum;
3092{
3093  rtx set, mem;
3094
3095  set = PATTERN (insn);
3096  if (GET_CODE (set) == PARALLEL)
3097    set = XVECEXP (set, 0, 0);
3098  if (GET_CODE (set) != SET)
3099    abort ();
3100  mem = XEXP (set, opnum);
3101  while (GET_CODE (mem) == SUBREG)
3102    mem = SUBREG_REG (mem);
3103  if (GET_CODE (mem) != MEM)
3104    abort ();
3105  return (volatile_ok || !MEM_VOLATILE_P (mem));
3106}
3107
3108/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3109
3110int
3111x86_64_szext_nonmemory_operand (op, mode)
3112     rtx op;
3113     enum machine_mode mode;
3114{
3115  if (!TARGET_64BIT)
3116    return nonmemory_operand (op, mode);
3117  if (register_operand (op, mode))
3118    return 1;
3119  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3120}
3121
3122/* Return nonzero if OP is immediate operand representable on x86_64.  */
3123
3124int
3125x86_64_immediate_operand (op, mode)
3126     rtx op;
3127     enum machine_mode mode;
3128{
3129  if (!TARGET_64BIT)
3130    return immediate_operand (op, mode);
3131  return x86_64_sign_extended_value (op);
3132}
3133
3134/* Return nonzero if OP is immediate operand representable on x86_64.  */
3135
3136int
3137x86_64_zext_immediate_operand (op, mode)
3138     rtx op;
3139     enum machine_mode mode ATTRIBUTE_UNUSED;
3140{
3141  return x86_64_zero_extended_value (op);
3142}
3143
3144/* Return nonzero if OP is (const_int 1), else return zero.  */
3145
3146int
3147const_int_1_operand (op, mode)
3148     rtx op;
3149     enum machine_mode mode ATTRIBUTE_UNUSED;
3150{
3151  return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3152}
3153
3154/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3155   for shift & compare patterns, as shifting by 0 does not change flags),
3156   else return zero.  */
3157
3158int
3159const_int_1_31_operand (op, mode)
3160     rtx op;
3161     enum machine_mode mode ATTRIBUTE_UNUSED;
3162{
3163  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3164}
3165
3166/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3167   reference and a constant.  */
3168
3169int
3170symbolic_operand (op, mode)
3171     register rtx op;
3172     enum machine_mode mode ATTRIBUTE_UNUSED;
3173{
3174  switch (GET_CODE (op))
3175    {
3176    case SYMBOL_REF:
3177    case LABEL_REF:
3178      return 1;
3179
3180    case CONST:
3181      op = XEXP (op, 0);
3182      if (GET_CODE (op) == SYMBOL_REF
3183	  || GET_CODE (op) == LABEL_REF
3184	  || (GET_CODE (op) == UNSPEC
3185	      && (XINT (op, 1) == UNSPEC_GOT
3186		  || XINT (op, 1) == UNSPEC_GOTOFF
3187		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3188	return 1;
3189      if (GET_CODE (op) != PLUS
3190	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3191	return 0;
3192
3193      op = XEXP (op, 0);
3194      if (GET_CODE (op) == SYMBOL_REF
3195	  || GET_CODE (op) == LABEL_REF)
3196	return 1;
3197      /* Only @GOTOFF gets offsets.  */
3198      if (GET_CODE (op) != UNSPEC
3199	  || XINT (op, 1) != UNSPEC_GOTOFF)
3200	return 0;
3201
3202      op = XVECEXP (op, 0, 0);
3203      if (GET_CODE (op) == SYMBOL_REF
3204	  || GET_CODE (op) == LABEL_REF)
3205	return 1;
3206      return 0;
3207
3208    default:
3209      return 0;
3210    }
3211}
3212
3213/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3214
3215int
3216pic_symbolic_operand (op, mode)
3217     register rtx op;
3218     enum machine_mode mode ATTRIBUTE_UNUSED;
3219{
3220  if (GET_CODE (op) != CONST)
3221    return 0;
3222  op = XEXP (op, 0);
3223  if (TARGET_64BIT)
3224    {
3225      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3226	return 1;
3227    }
3228  else
3229    {
3230      if (GET_CODE (op) == UNSPEC)
3231	return 1;
3232      if (GET_CODE (op) != PLUS
3233	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3234	return 0;
3235      op = XEXP (op, 0);
3236      if (GET_CODE (op) == UNSPEC)
3237	return 1;
3238    }
3239  return 0;
3240}
3241
3242/* Return true if OP is a symbolic operand that resolves locally.  */
3243
3244static int
3245local_symbolic_operand (op, mode)
3246     rtx op;
3247     enum machine_mode mode ATTRIBUTE_UNUSED;
3248{
3249  if (GET_CODE (op) == CONST
3250      && GET_CODE (XEXP (op, 0)) == PLUS
3251      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3252    op = XEXP (XEXP (op, 0), 0);
3253
3254  if (GET_CODE (op) == LABEL_REF)
3255    return 1;
3256
3257  if (GET_CODE (op) != SYMBOL_REF)
3258    return 0;
3259
3260  /* These we've been told are local by varasm and encode_section_info
3261     respectively.  */
3262  if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3263    return 1;
3264
3265  /* There is, however, a not insubstantial body of code in the rest of
3266     the compiler that assumes it can just stick the results of
3267     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3268  /* ??? This is a hack.  Should update the body of the compiler to
3269     always create a DECL an invoke targetm.encode_section_info.  */
3270  if (strncmp (XSTR (op, 0), internal_label_prefix,
3271	       internal_label_prefix_len) == 0)
3272    return 1;
3273
3274  return 0;
3275}
3276
3277/* Test for various thread-local symbols.  See ix86_encode_section_info. */
3278
3279int
3280tls_symbolic_operand (op, mode)
3281     register rtx op;
3282     enum machine_mode mode ATTRIBUTE_UNUSED;
3283{
3284  const char *symbol_str;
3285
3286  if (GET_CODE (op) != SYMBOL_REF)
3287    return 0;
3288  symbol_str = XSTR (op, 0);
3289
3290  if (symbol_str[0] != '%')
3291    return 0;
3292  return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3293}
3294
3295static int
3296tls_symbolic_operand_1 (op, kind)
3297     rtx op;
3298     enum tls_model kind;
3299{
3300  const char *symbol_str;
3301
3302  if (GET_CODE (op) != SYMBOL_REF)
3303    return 0;
3304  symbol_str = XSTR (op, 0);
3305
3306  return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3307}
3308
3309int
3310global_dynamic_symbolic_operand (op, mode)
3311     register rtx op;
3312     enum machine_mode mode ATTRIBUTE_UNUSED;
3313{
3314  return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3315}
3316
3317int
3318local_dynamic_symbolic_operand (op, mode)
3319     register rtx op;
3320     enum machine_mode mode ATTRIBUTE_UNUSED;
3321{
3322  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3323}
3324
3325int
3326initial_exec_symbolic_operand (op, mode)
3327     register rtx op;
3328     enum machine_mode mode ATTRIBUTE_UNUSED;
3329{
3330  return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3331}
3332
3333int
3334local_exec_symbolic_operand (op, mode)
3335     register rtx op;
3336     enum machine_mode mode ATTRIBUTE_UNUSED;
3337{
3338  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3339}
3340
3341/* Test for a valid operand for a call instruction.  Don't allow the
3342   arg pointer register or virtual regs since they may decay into
3343   reg + const, which the patterns can't handle.  */
3344
3345int
3346call_insn_operand (op, mode)
3347     rtx op;
3348     enum machine_mode mode ATTRIBUTE_UNUSED;
3349{
3350  /* Disallow indirect through a virtual register.  This leads to
3351     compiler aborts when trying to eliminate them.  */
3352  if (GET_CODE (op) == REG
3353      && (op == arg_pointer_rtx
3354	  || op == frame_pointer_rtx
3355	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3356	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3357    return 0;
3358
3359  /* Disallow `call 1234'.  Due to varying assembler lameness this
3360     gets either rejected or translated to `call .+1234'.  */
3361  if (GET_CODE (op) == CONST_INT)
3362    return 0;
3363
3364  /* Explicitly allow SYMBOL_REF even if pic.  */
3365  if (GET_CODE (op) == SYMBOL_REF)
3366    return 1;
3367
3368  /* Otherwise we can allow any general_operand in the address.  */
3369  return general_operand (op, Pmode);
3370}
3371
3372int
3373constant_call_address_operand (op, mode)
3374     rtx op;
3375     enum machine_mode mode ATTRIBUTE_UNUSED;
3376{
3377  if (GET_CODE (op) == CONST
3378      && GET_CODE (XEXP (op, 0)) == PLUS
3379      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3380    op = XEXP (XEXP (op, 0), 0);
3381  return GET_CODE (op) == SYMBOL_REF;
3382}
3383
3384/* Match exactly zero and one.  */
3385
3386int
3387const0_operand (op, mode)
3388     register rtx op;
3389     enum machine_mode mode;
3390{
3391  return op == CONST0_RTX (mode);
3392}
3393
3394int
3395const1_operand (op, mode)
3396     register rtx op;
3397     enum machine_mode mode ATTRIBUTE_UNUSED;
3398{
3399  return op == const1_rtx;
3400}
3401
3402/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3403
3404int
3405const248_operand (op, mode)
3406     register rtx op;
3407     enum machine_mode mode ATTRIBUTE_UNUSED;
3408{
3409  return (GET_CODE (op) == CONST_INT
3410	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3411}
3412
3413/* True if this is a constant appropriate for an increment or decremenmt.  */
3414
3415int
3416incdec_operand (op, mode)
3417     register rtx op;
3418     enum machine_mode mode ATTRIBUTE_UNUSED;
3419{
3420  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3421     registers, since carry flag is not set.  */
3422  if (TARGET_PENTIUM4 && !optimize_size)
3423    return 0;
3424  return op == const1_rtx || op == constm1_rtx;
3425}
3426
3427/* Return nonzero if OP is acceptable as operand of DImode shift
3428   expander.  */
3429
3430int
3431shiftdi_operand (op, mode)
3432     rtx op;
3433     enum machine_mode mode ATTRIBUTE_UNUSED;
3434{
3435  if (TARGET_64BIT)
3436    return nonimmediate_operand (op, mode);
3437  else
3438    return register_operand (op, mode);
3439}
3440
3441/* Return false if this is the stack pointer, or any other fake
3442   register eliminable to the stack pointer.  Otherwise, this is
3443   a register operand.
3444
3445   This is used to prevent esp from being used as an index reg.
3446   Which would only happen in pathological cases.  */
3447
3448int
3449reg_no_sp_operand (op, mode)
3450     register rtx op;
3451     enum machine_mode mode;
3452{
3453  rtx t = op;
3454  if (GET_CODE (t) == SUBREG)
3455    t = SUBREG_REG (t);
3456  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3457    return 0;
3458
3459  return register_operand (op, mode);
3460}
3461
3462int
3463mmx_reg_operand (op, mode)
3464     register rtx op;
3465     enum machine_mode mode ATTRIBUTE_UNUSED;
3466{
3467  return MMX_REG_P (op);
3468}
3469
3470/* Return false if this is any eliminable register.  Otherwise
3471   general_operand.  */
3472
3473int
3474general_no_elim_operand (op, mode)
3475     register rtx op;
3476     enum machine_mode mode;
3477{
3478  rtx t = op;
3479  if (GET_CODE (t) == SUBREG)
3480    t = SUBREG_REG (t);
3481  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3482      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3483      || t == virtual_stack_dynamic_rtx)
3484    return 0;
3485  if (REG_P (t)
3486      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3487      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3488    return 0;
3489
3490  return general_operand (op, mode);
3491}
3492
3493/* Return false if this is any eliminable register.  Otherwise
3494   register_operand or const_int.  */
3495
3496int
3497nonmemory_no_elim_operand (op, mode)
3498     register rtx op;
3499     enum machine_mode mode;
3500{
3501  rtx t = op;
3502  if (GET_CODE (t) == SUBREG)
3503    t = SUBREG_REG (t);
3504  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3505      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3506      || t == virtual_stack_dynamic_rtx)
3507    return 0;
3508
3509  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3510}
3511
3512/* Return false if this is any eliminable register or stack register,
3513   otherwise work like register_operand.  */
3514
3515int
3516index_register_operand (op, mode)
3517     register rtx op;
3518     enum machine_mode mode;
3519{
3520  rtx t = op;
3521  if (GET_CODE (t) == SUBREG)
3522    t = SUBREG_REG (t);
3523  if (!REG_P (t))
3524    return 0;
3525  if (t == arg_pointer_rtx
3526      || t == frame_pointer_rtx
3527      || t == virtual_incoming_args_rtx
3528      || t == virtual_stack_vars_rtx
3529      || t == virtual_stack_dynamic_rtx
3530      || REGNO (t) == STACK_POINTER_REGNUM)
3531    return 0;
3532
3533  return general_operand (op, mode);
3534}
3535
3536/* Return true if op is a Q_REGS class register.  */
3537
3538int
3539q_regs_operand (op, mode)
3540     register rtx op;
3541     enum machine_mode mode;
3542{
3543  if (mode != VOIDmode && GET_MODE (op) != mode)
3544    return 0;
3545  if (GET_CODE (op) == SUBREG)
3546    op = SUBREG_REG (op);
3547  return ANY_QI_REG_P (op);
3548}
3549
3550/* Return true if op is a NON_Q_REGS class register.  */
3551
3552int
3553non_q_regs_operand (op, mode)
3554     register rtx op;
3555     enum machine_mode mode;
3556{
3557  if (mode != VOIDmode && GET_MODE (op) != mode)
3558    return 0;
3559  if (GET_CODE (op) == SUBREG)
3560    op = SUBREG_REG (op);
3561  return NON_QI_REG_P (op);
3562}
3563
3564/*  Return 1 when OP is operand acceptable for standard SSE move.  */
3565int
3566vector_move_operand (op, mode)
3567     rtx op;
3568     enum machine_mode mode;
3569{
3570  if (nonimmediate_operand (op, mode))
3571    return 1;
3572  if (GET_MODE (op) != mode && mode != VOIDmode)
3573    return 0;
3574  return (op == CONST0_RTX (GET_MODE (op)));
3575}
3576
3577/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3578   insns.  */
3579int
3580sse_comparison_operator (op, mode)
3581     rtx op;
3582     enum machine_mode mode ATTRIBUTE_UNUSED;
3583{
3584  enum rtx_code code = GET_CODE (op);
3585  switch (code)
3586    {
3587    /* Operations supported directly.  */
3588    case EQ:
3589    case LT:
3590    case LE:
3591    case UNORDERED:
3592    case NE:
3593    case UNGE:
3594    case UNGT:
3595    case ORDERED:
3596      return 1;
3597    /* These are equivalent to ones above in non-IEEE comparisons.  */
3598    case UNEQ:
3599    case UNLT:
3600    case UNLE:
3601    case LTGT:
3602    case GE:
3603    case GT:
3604      return !TARGET_IEEE_FP;
3605    default:
3606      return 0;
3607    }
3608}
3609/* Return 1 if OP is a valid comparison operator in valid mode.  */
3610int
3611ix86_comparison_operator (op, mode)
3612     register rtx op;
3613     enum machine_mode mode;
3614{
3615  enum machine_mode inmode;
3616  enum rtx_code code = GET_CODE (op);
3617  if (mode != VOIDmode && GET_MODE (op) != mode)
3618    return 0;
3619  if (GET_RTX_CLASS (code) != '<')
3620    return 0;
3621  inmode = GET_MODE (XEXP (op, 0));
3622
3623  if (inmode == CCFPmode || inmode == CCFPUmode)
3624    {
3625      enum rtx_code second_code, bypass_code;
3626      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3627      return (bypass_code == NIL && second_code == NIL);
3628    }
3629  switch (code)
3630    {
3631    case EQ: case NE:
3632      return 1;
3633    case LT: case GE:
3634      if (inmode == CCmode || inmode == CCGCmode
3635	  || inmode == CCGOCmode || inmode == CCNOmode)
3636	return 1;
3637      return 0;
3638    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3639      if (inmode == CCmode)
3640	return 1;
3641      return 0;
3642    case GT: case LE:
3643      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3644	return 1;
3645      return 0;
3646    default:
3647      return 0;
3648    }
3649}
3650
3651/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3652
3653int
3654fcmov_comparison_operator (op, mode)
3655    register rtx op;
3656    enum machine_mode mode;
3657{
3658  enum machine_mode inmode;
3659  enum rtx_code code = GET_CODE (op);
3660  if (mode != VOIDmode && GET_MODE (op) != mode)
3661    return 0;
3662  if (GET_RTX_CLASS (code) != '<')
3663    return 0;
3664  inmode = GET_MODE (XEXP (op, 0));
3665  if (inmode == CCFPmode || inmode == CCFPUmode)
3666    {
3667      enum rtx_code second_code, bypass_code;
3668      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3669      if (bypass_code != NIL || second_code != NIL)
3670	return 0;
3671      code = ix86_fp_compare_code_to_integer (code);
3672    }
3673  /* i387 supports just limited amount of conditional codes.  */
3674  switch (code)
3675    {
3676    case LTU: case GTU: case LEU: case GEU:
3677      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3678	return 1;
3679      return 0;
3680    case ORDERED: case UNORDERED:
3681    case EQ: case NE:
3682      return 1;
3683    default:
3684      return 0;
3685    }
3686}
3687
3688/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3689
3690int
3691promotable_binary_operator (op, mode)
3692     register rtx op;
3693     enum machine_mode mode ATTRIBUTE_UNUSED;
3694{
3695  switch (GET_CODE (op))
3696    {
3697    case MULT:
3698      /* Modern CPUs have same latency for HImode and SImode multiply,
3699         but 386 and 486 do HImode multiply faster.  */
3700      return ix86_cpu > PROCESSOR_I486;
3701    case PLUS:
3702    case AND:
3703    case IOR:
3704    case XOR:
3705    case ASHIFT:
3706      return 1;
3707    default:
3708      return 0;
3709    }
3710}
3711
3712/* Nearly general operand, but accept any const_double, since we wish
3713   to be able to drop them into memory rather than have them get pulled
3714   into registers.  */
3715
3716int
3717cmp_fp_expander_operand (op, mode)
3718     register rtx op;
3719     enum machine_mode mode;
3720{
3721  if (mode != VOIDmode && mode != GET_MODE (op))
3722    return 0;
3723  if (GET_CODE (op) == CONST_DOUBLE)
3724    return 1;
3725  return general_operand (op, mode);
3726}
3727
3728/* Match an SI or HImode register for a zero_extract.  */
3729
3730int
3731ext_register_operand (op, mode)
3732     register rtx op;
3733     enum machine_mode mode ATTRIBUTE_UNUSED;
3734{
3735  int regno;
3736  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3737      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3738    return 0;
3739
3740  if (!register_operand (op, VOIDmode))
3741    return 0;
3742
3743  /* Be curefull to accept only registers having upper parts.  */
3744  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3745  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3746}
3747
3748/* Return 1 if this is a valid binary floating-point operation.
3749   OP is the expression matched, and MODE is its mode.  */
3750
3751int
3752binary_fp_operator (op, mode)
3753    register rtx op;
3754    enum machine_mode mode;
3755{
3756  if (mode != VOIDmode && mode != GET_MODE (op))
3757    return 0;
3758
3759  switch (GET_CODE (op))
3760    {
3761    case PLUS:
3762    case MINUS:
3763    case MULT:
3764    case DIV:
3765      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3766
3767    default:
3768      return 0;
3769    }
3770}
3771
3772int
3773mult_operator (op, mode)
3774    register rtx op;
3775    enum machine_mode mode ATTRIBUTE_UNUSED;
3776{
3777  return GET_CODE (op) == MULT;
3778}
3779
3780int
3781div_operator (op, mode)
3782    register rtx op;
3783    enum machine_mode mode ATTRIBUTE_UNUSED;
3784{
3785  return GET_CODE (op) == DIV;
3786}
3787
3788int
3789arith_or_logical_operator (op, mode)
3790      rtx op;
3791      enum machine_mode mode;
3792{
3793  return ((mode == VOIDmode || GET_MODE (op) == mode)
3794          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3795              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3796}
3797
3798/* Returns 1 if OP is memory operand with a displacement.  */
3799
3800int
3801memory_displacement_operand (op, mode)
3802     register rtx op;
3803     enum machine_mode mode;
3804{
3805  struct ix86_address parts;
3806
3807  if (! memory_operand (op, mode))
3808    return 0;
3809
3810  if (! ix86_decompose_address (XEXP (op, 0), &parts))
3811    abort ();
3812
3813  return parts.disp != NULL_RTX;
3814}
3815
3816/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3817   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3818
3819   ??? It seems likely that this will only work because cmpsi is an
3820   expander, and no actual insns use this.  */
3821
3822int
3823cmpsi_operand (op, mode)
3824      rtx op;
3825      enum machine_mode mode;
3826{
3827  if (nonimmediate_operand (op, mode))
3828    return 1;
3829
3830  if (GET_CODE (op) == AND
3831      && GET_MODE (op) == SImode
3832      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3833      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3834      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3835      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3836      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3837      && GET_CODE (XEXP (op, 1)) == CONST_INT)
3838    return 1;
3839
3840  return 0;
3841}
3842
3843/* Returns 1 if OP is memory operand that can not be represented by the
3844   modRM array.  */
3845
3846int
3847long_memory_operand (op, mode)
3848     register rtx op;
3849     enum machine_mode mode;
3850{
3851  if (! memory_operand (op, mode))
3852    return 0;
3853
3854  return memory_address_length (op) != 0;
3855}
3856
3857/* Return nonzero if the rtx is known aligned.  */
3858
3859int
3860aligned_operand (op, mode)
3861     rtx op;
3862     enum machine_mode mode;
3863{
3864  struct ix86_address parts;
3865
3866  if (!general_operand (op, mode))
3867    return 0;
3868
3869  /* Registers and immediate operands are always "aligned".  */
3870  if (GET_CODE (op) != MEM)
3871    return 1;
3872
3873  /* Don't even try to do any aligned optimizations with volatiles.  */
3874  if (MEM_VOLATILE_P (op))
3875    return 0;
3876
3877  op = XEXP (op, 0);
3878
3879  /* Pushes and pops are only valid on the stack pointer.  */
3880  if (GET_CODE (op) == PRE_DEC
3881      || GET_CODE (op) == POST_INC)
3882    return 1;
3883
3884  /* Decode the address.  */
3885  if (! ix86_decompose_address (op, &parts))
3886    abort ();
3887
3888  if (parts.base && GET_CODE (parts.base) == SUBREG)
3889    parts.base = SUBREG_REG (parts.base);
3890  if (parts.index && GET_CODE (parts.index) == SUBREG)
3891    parts.index = SUBREG_REG (parts.index);
3892
3893  /* Look for some component that isn't known to be aligned.  */
3894  if (parts.index)
3895    {
3896      if (parts.scale < 4
3897	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3898	return 0;
3899    }
3900  if (parts.base)
3901    {
3902      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3903	return 0;
3904    }
3905  if (parts.disp)
3906    {
3907      if (GET_CODE (parts.disp) != CONST_INT
3908	  || (INTVAL (parts.disp) & 3) != 0)
3909	return 0;
3910    }
3911
3912  /* Didn't find one -- this must be an aligned address.  */
3913  return 1;
3914}
3915
3916/* Return true if the constant is something that can be loaded with
3917   a special instruction.  Only handle 0.0 and 1.0; others are less
3918   worthwhile.  */
3919
3920int
3921standard_80387_constant_p (x)
3922     rtx x;
3923{
3924  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3925    return -1;
3926  /* Note that on the 80387, other constants, such as pi, that we should support
3927     too.  On some machines, these are much slower to load as standard constant,
3928     than to load from doubles in memory.  */
3929  if (x == CONST0_RTX (GET_MODE (x)))
3930    return 1;
3931  if (x == CONST1_RTX (GET_MODE (x)))
3932    return 2;
3933  return 0;
3934}
3935
3936/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3937 */
3938int
3939standard_sse_constant_p (x)
3940     rtx x;
3941{
3942  if (x == const0_rtx)
3943    return 1;
3944  return (x == CONST0_RTX (GET_MODE (x)));
3945}
3946
3947/* Returns 1 if OP contains a symbol reference */
3948
3949int
3950symbolic_reference_mentioned_p (op)
3951     rtx op;
3952{
3953  register const char *fmt;
3954  register int i;
3955
3956  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3957    return 1;
3958
3959  fmt = GET_RTX_FORMAT (GET_CODE (op));
3960  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3961    {
3962      if (fmt[i] == 'E')
3963	{
3964	  register int j;
3965
3966	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3967	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3968	      return 1;
3969	}
3970
3971      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3972	return 1;
3973    }
3974
3975  return 0;
3976}
3977
3978/* Return 1 if it is appropriate to emit `ret' instructions in the
3979   body of a function.  Do this only if the epilogue is simple, needing a
3980   couple of insns.  Prior to reloading, we can't tell how many registers
3981   must be saved, so return 0 then.  Return 0 if there is no frame
3982   marker to de-allocate.
3983
3984   If NON_SAVING_SETJMP is defined and true, then it is not possible
3985   for the epilogue to be simple, so return 0.  This is a special case
3986   since NON_SAVING_SETJMP will not cause regs_ever_live to change
3987   until final, but jump_optimize may need to know sooner if a
3988   `return' is OK.  */
3989
3990int
3991ix86_can_use_return_insn_p ()
3992{
3993  struct ix86_frame frame;
3994
3995#ifdef NON_SAVING_SETJMP
3996  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3997    return 0;
3998#endif
3999
4000  if (! reload_completed || frame_pointer_needed)
4001    return 0;
4002
4003  /* Don't allow more than 32 pop, since that's all we can do
4004     with one instruction.  */
4005  if (current_function_pops_args
4006      && current_function_args_size >= 32768)
4007    return 0;
4008
4009  ix86_compute_frame_layout (&frame);
4010  return frame.to_allocate == 0 && frame.nregs == 0;
4011}
4012
4013/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4014int
4015x86_64_sign_extended_value (value)
4016     rtx value;
4017{
4018  switch (GET_CODE (value))
4019    {
4020      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4021         to be at least 32 and this all acceptable constants are
4022	 represented as CONST_INT.  */
4023      case CONST_INT:
4024	if (HOST_BITS_PER_WIDE_INT == 32)
4025	  return 1;
4026	else
4027	  {
4028	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4029	    return trunc_int_for_mode (val, SImode) == val;
4030	  }
4031	break;
4032
4033      /* For certain code models, the symbolic references are known to fit.
4034	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4035	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4036	 only if inside of UNSPEC handled below.  */
4037      case SYMBOL_REF:
4038	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4039
4040      /* For certain code models, the code is near as well.  */
4041      case LABEL_REF:
4042	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4043		|| ix86_cmodel == CM_KERNEL);
4044
4045      /* We also may accept the offsetted memory references in certain special
4046         cases.  */
4047      case CONST:
4048	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4049	  switch (XINT (XEXP (value, 0), 1))
4050	    {
4051	    case UNSPEC_GOTPCREL:
4052	    case UNSPEC_DTPOFF:
4053	    case UNSPEC_GOTNTPOFF:
4054	    case UNSPEC_NTPOFF:
4055	      return 1;
4056	    default:
4057	      break;
4058	    }
4059	if (GET_CODE (XEXP (value, 0)) == PLUS)
4060	  {
4061	    rtx op1 = XEXP (XEXP (value, 0), 0);
4062	    rtx op2 = XEXP (XEXP (value, 0), 1);
4063	    HOST_WIDE_INT offset;
4064
4065	    if (ix86_cmodel == CM_LARGE)
4066	      return 0;
4067	    if (GET_CODE (op2) != CONST_INT)
4068	      return 0;
4069	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4070	    switch (GET_CODE (op1))
4071	      {
4072		case SYMBOL_REF:
4073		  /* For CM_SMALL assume that latest object is 16MB before
4074		     end of 31bits boundary.  We may also accept pretty
4075		     large negative constants knowing that all objects are
4076		     in the positive half of address space.  */
4077		  if (ix86_cmodel == CM_SMALL
4078		      && offset < 16*1024*1024
4079		      && trunc_int_for_mode (offset, SImode) == offset)
4080		    return 1;
4081		  /* For CM_KERNEL we know that all object resist in the
4082		     negative half of 32bits address space.  We may not
4083		     accept negative offsets, since they may be just off
4084		     and we may accept pretty large positive ones.  */
4085		  if (ix86_cmodel == CM_KERNEL
4086		      && offset > 0
4087		      && trunc_int_for_mode (offset, SImode) == offset)
4088		    return 1;
4089		  break;
4090		case LABEL_REF:
4091		  /* These conditions are similar to SYMBOL_REF ones, just the
4092		     constraints for code models differ.  */
4093		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4094		      && offset < 16*1024*1024
4095		      && trunc_int_for_mode (offset, SImode) == offset)
4096		    return 1;
4097		  if (ix86_cmodel == CM_KERNEL
4098		      && offset > 0
4099		      && trunc_int_for_mode (offset, SImode) == offset)
4100		    return 1;
4101		  break;
4102		case UNSPEC:
4103		  switch (XINT (op1, 1))
4104		    {
4105		    case UNSPEC_DTPOFF:
4106		    case UNSPEC_NTPOFF:
4107		      if (offset > 0
4108			  && trunc_int_for_mode (offset, SImode) == offset)
4109			return 1;
4110		    }
4111		  break;
4112		default:
4113		  return 0;
4114	      }
4115	  }
4116	return 0;
4117      default:
4118	return 0;
4119    }
4120}
4121
4122/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4123int
4124x86_64_zero_extended_value (value)
4125     rtx value;
4126{
4127  switch (GET_CODE (value))
4128    {
4129      case CONST_DOUBLE:
4130	if (HOST_BITS_PER_WIDE_INT == 32)
4131	  return  (GET_MODE (value) == VOIDmode
4132		   && !CONST_DOUBLE_HIGH (value));
4133	else
4134	  return 0;
4135      case CONST_INT:
4136	if (HOST_BITS_PER_WIDE_INT == 32)
4137	  return INTVAL (value) >= 0;
4138	else
4139	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4140	break;
4141
4142      /* For certain code models, the symbolic references are known to fit.  */
4143      case SYMBOL_REF:
4144	return ix86_cmodel == CM_SMALL;
4145
4146      /* For certain code models, the code is near as well.  */
4147      case LABEL_REF:
4148	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4149
4150      /* We also may accept the offsetted memory references in certain special
4151         cases.  */
4152      case CONST:
4153	if (GET_CODE (XEXP (value, 0)) == PLUS)
4154	  {
4155	    rtx op1 = XEXP (XEXP (value, 0), 0);
4156	    rtx op2 = XEXP (XEXP (value, 0), 1);
4157
4158	    if (ix86_cmodel == CM_LARGE)
4159	      return 0;
4160	    switch (GET_CODE (op1))
4161	      {
4162		case SYMBOL_REF:
4163		    return 0;
4164		  /* For small code model we may accept pretty large positive
4165		     offsets, since one bit is available for free.  Negative
4166		     offsets are limited by the size of NULL pointer area
4167		     specified by the ABI.  */
4168		  if (ix86_cmodel == CM_SMALL
4169		      && GET_CODE (op2) == CONST_INT
4170		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4171		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4172			  == INTVAL (op2)))
4173		    return 1;
4174	          /* ??? For the kernel, we may accept adjustment of
4175		     -0x10000000, since we know that it will just convert
4176		     negative address space to positive, but perhaps this
4177		     is not worthwhile.  */
4178		  break;
4179		case LABEL_REF:
4180		  /* These conditions are similar to SYMBOL_REF ones, just the
4181		     constraints for code models differ.  */
4182		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4183		      && GET_CODE (op2) == CONST_INT
4184		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4185		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4186			  == INTVAL (op2)))
4187		    return 1;
4188		  break;
4189		default:
4190		  return 0;
4191	      }
4192	  }
4193	return 0;
4194      default:
4195	return 0;
4196    }
4197}
4198
4199/* Value should be nonzero if functions must have frame pointers.
4200   Zero means the frame pointer need not be set up (and parms may
4201   be accessed via the stack pointer) in functions that seem suitable.  */
4202
4203int
4204ix86_frame_pointer_required ()
4205{
4206  /* If we accessed previous frames, then the generated code expects
4207     to be able to access the saved ebp value in our frame.  */
4208  if (cfun->machine->accesses_prev_frame)
4209    return 1;
4210
4211  /* Several x86 os'es need a frame pointer for other reasons,
4212     usually pertaining to setjmp.  */
4213  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4214    return 1;
4215
4216  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4217     the frame pointer by default.  Turn it back on now if we've not
4218     got a leaf function.  */
4219  if (TARGET_OMIT_LEAF_FRAME_POINTER
4220      && (!current_function_is_leaf))
4221    return 1;
4222
4223  if (current_function_profile)
4224    return 1;
4225
4226  return 0;
4227}
4228
4229/* Record that the current function accesses previous call frames.  */
4230
4231void
4232ix86_setup_frame_addresses ()
4233{
4234  cfun->machine->accesses_prev_frame = 1;
4235}
4236
4237#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4238# define USE_HIDDEN_LINKONCE 1
4239#else
4240# define USE_HIDDEN_LINKONCE 0
4241#endif
4242
4243static int pic_labels_used;
4244
4245/* Fills in the label name that should be used for a pc thunk for
4246   the given register.  */
4247
4248static void
4249get_pc_thunk_name (name, regno)
4250     char name[32];
4251     unsigned int regno;
4252{
4253  if (USE_HIDDEN_LINKONCE)
4254    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4255  else
4256    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4257}
4258
4259
4260/* This function generates code for -fpic that loads %ebx with
4261   the return address of the caller and then returns.  */
4262
4263void
4264ix86_asm_file_end (file)
4265     FILE *file;
4266{
4267  rtx xops[2];
4268  int regno;
4269
4270  for (regno = 0; regno < 8; ++regno)
4271    {
4272      char name[32];
4273
4274      if (! ((pic_labels_used >> regno) & 1))
4275	continue;
4276
4277      get_pc_thunk_name (name, regno);
4278
4279      if (USE_HIDDEN_LINKONCE)
4280	{
4281	  tree decl;
4282
4283	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4284			     error_mark_node);
4285	  TREE_PUBLIC (decl) = 1;
4286	  TREE_STATIC (decl) = 1;
4287	  DECL_ONE_ONLY (decl) = 1;
4288
4289	  (*targetm.asm_out.unique_section) (decl, 0);
4290	  named_section (decl, NULL, 0);
4291
4292	  (*targetm.asm_out.globalize_label) (file, name);
4293	  fputs ("\t.hidden\t", file);
4294	  assemble_name (file, name);
4295	  fputc ('\n', file);
4296	  ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4297	}
4298      else
4299	{
4300	  text_section ();
4301	  ASM_OUTPUT_LABEL (file, name);
4302	}
4303
4304      xops[0] = gen_rtx_REG (SImode, regno);
4305      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4306      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4307      output_asm_insn ("ret", xops);
4308    }
4309}
4310
4311/* Emit code for the SET_GOT patterns.  */
4312
4313const char *
4314output_set_got (dest)
4315     rtx dest;
4316{
4317  rtx xops[3];
4318
4319  xops[0] = dest;
4320  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4321
4322  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4323    {
4324      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4325
4326      if (!flag_pic)
4327	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4328      else
4329	output_asm_insn ("call\t%a2", xops);
4330
4331#if TARGET_MACHO
4332      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4333         is what will be referred to by the Mach-O PIC subsystem.  */
4334      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4335#endif
4336      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4337				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4338
4339      if (flag_pic)
4340	output_asm_insn ("pop{l}\t%0", xops);
4341    }
4342  else
4343    {
4344      char name[32];
4345      get_pc_thunk_name (name, REGNO (dest));
4346      pic_labels_used |= 1 << REGNO (dest);
4347
4348      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4349      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4350      output_asm_insn ("call\t%X2", xops);
4351    }
4352
4353  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4354    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4355  else if (!TARGET_MACHO)
4356    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4357
4358  return "";
4359}
4360
4361/* Generate an "push" pattern for input ARG.  */
4362
4363static rtx
4364gen_push (arg)
4365     rtx arg;
4366{
4367  return gen_rtx_SET (VOIDmode,
4368		      gen_rtx_MEM (Pmode,
4369				   gen_rtx_PRE_DEC (Pmode,
4370						    stack_pointer_rtx)),
4371		      arg);
4372}
4373
4374/* Return >= 0 if there is an unused call-clobbered register available
4375   for the entire function.  */
4376
4377static unsigned int
4378ix86_select_alt_pic_regnum ()
4379{
4380  if (current_function_is_leaf && !current_function_profile)
4381    {
4382      int i;
4383      for (i = 2; i >= 0; --i)
4384        if (!regs_ever_live[i])
4385	  return i;
4386    }
4387
4388  return INVALID_REGNUM;
4389}
4390
4391/* Return 1 if we need to save REGNO.  */
4392static int
4393ix86_save_reg (regno, maybe_eh_return)
4394     unsigned int regno;
4395     int maybe_eh_return;
4396{
4397  if (pic_offset_table_rtx
4398      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4399      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4400	  || current_function_profile
4401	  || current_function_calls_eh_return
4402	  || current_function_uses_const_pool))
4403    {
4404      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4405	return 0;
4406      return 1;
4407    }
4408
4409  if (current_function_calls_eh_return && maybe_eh_return)
4410    {
4411      unsigned i;
4412      for (i = 0; ; i++)
4413	{
4414	  unsigned test = EH_RETURN_DATA_REGNO (i);
4415	  if (test == INVALID_REGNUM)
4416	    break;
4417	  if (test == regno)
4418	    return 1;
4419	}
4420    }
4421
4422  return (regs_ever_live[regno]
4423	  && !call_used_regs[regno]
4424	  && !fixed_regs[regno]
4425	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4426}
4427
4428/* Return number of registers to be saved on the stack.  */
4429
4430static int
4431ix86_nsaved_regs ()
4432{
4433  int nregs = 0;
4434  int regno;
4435
4436  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4437    if (ix86_save_reg (regno, true))
4438      nregs++;
4439  return nregs;
4440}
4441
4442/* Return the offset between two registers, one to be eliminated, and the other
4443   its replacement, at the start of a routine.  */
4444
4445HOST_WIDE_INT
4446ix86_initial_elimination_offset (from, to)
4447     int from;
4448     int to;
4449{
4450  struct ix86_frame frame;
4451  ix86_compute_frame_layout (&frame);
4452
4453  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4454    return frame.hard_frame_pointer_offset;
4455  else if (from == FRAME_POINTER_REGNUM
4456	   && to == HARD_FRAME_POINTER_REGNUM)
4457    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4458  else
4459    {
4460      if (to != STACK_POINTER_REGNUM)
4461	abort ();
4462      else if (from == ARG_POINTER_REGNUM)
4463	return frame.stack_pointer_offset;
4464      else if (from != FRAME_POINTER_REGNUM)
4465	abort ();
4466      else
4467	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4468    }
4469}
4470
4471/* Fill structure ix86_frame about frame of currently computed function.  */
4472
4473static void
4474ix86_compute_frame_layout (frame)
4475     struct ix86_frame *frame;
4476{
4477  HOST_WIDE_INT total_size;
4478  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4479  int offset;
4480  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4481  HOST_WIDE_INT size = get_frame_size ();
4482
4483  frame->nregs = ix86_nsaved_regs ();
4484  total_size = size;
4485
4486  /* Skip return address and saved base pointer.  */
4487  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4488
4489  frame->hard_frame_pointer_offset = offset;
4490
4491  /* Do some sanity checking of stack_alignment_needed and
4492     preferred_alignment, since i386 port is the only using those features
4493     that may break easily.  */
4494
4495  if (size && !stack_alignment_needed)
4496    abort ();
4497  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4498    abort ();
4499  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4500    abort ();
4501  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4502    abort ();
4503
4504  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4505    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4506
4507  /* Register save area */
4508  offset += frame->nregs * UNITS_PER_WORD;
4509
4510  /* Va-arg area */
4511  if (ix86_save_varrargs_registers)
4512    {
4513      offset += X86_64_VARARGS_SIZE;
4514      frame->va_arg_size = X86_64_VARARGS_SIZE;
4515    }
4516  else
4517    frame->va_arg_size = 0;
4518
4519  /* Align start of frame for local function.  */
4520  frame->padding1 = ((offset + stack_alignment_needed - 1)
4521		     & -stack_alignment_needed) - offset;
4522
4523  offset += frame->padding1;
4524
4525  /* Frame pointer points here.  */
4526  frame->frame_pointer_offset = offset;
4527
4528  offset += size;
4529
4530  /* Add outgoing arguments area.  Can be skipped if we eliminated
4531     all the function calls as dead code.  */
4532  if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4533    {
4534      offset += current_function_outgoing_args_size;
4535      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4536    }
4537  else
4538    frame->outgoing_arguments_size = 0;
4539
4540  /* Align stack boundary.  Only needed if we're calling another function
4541     or using alloca.  */
4542  if (!current_function_is_leaf || current_function_calls_alloca)
4543    frame->padding2 = ((offset + preferred_alignment - 1)
4544		       & -preferred_alignment) - offset;
4545  else
4546    frame->padding2 = 0;
4547
4548  offset += frame->padding2;
4549
4550  /* We've reached end of stack frame.  */
4551  frame->stack_pointer_offset = offset;
4552
4553  /* Size prologue needs to allocate.  */
4554  frame->to_allocate =
4555    (size + frame->padding1 + frame->padding2
4556     + frame->outgoing_arguments_size + frame->va_arg_size);
4557
4558  if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4559      && current_function_is_leaf)
4560    {
4561      frame->red_zone_size = frame->to_allocate;
4562      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4563	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4564    }
4565  else
4566    frame->red_zone_size = 0;
4567  frame->to_allocate -= frame->red_zone_size;
4568  frame->stack_pointer_offset -= frame->red_zone_size;
4569#if 0
4570  fprintf (stderr, "nregs: %i\n", frame->nregs);
4571  fprintf (stderr, "size: %i\n", size);
4572  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4573  fprintf (stderr, "padding1: %i\n", frame->padding1);
4574  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4575  fprintf (stderr, "padding2: %i\n", frame->padding2);
4576  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4577  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4578  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4579  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4580	   frame->hard_frame_pointer_offset);
4581  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4582#endif
4583}
4584
4585/* Emit code to save registers in the prologue.  */
4586
4587static void
4588ix86_emit_save_regs ()
4589{
4590  register int regno;
4591  rtx insn;
4592
4593  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4594    if (ix86_save_reg (regno, true))
4595      {
4596	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4597	RTX_FRAME_RELATED_P (insn) = 1;
4598      }
4599}
4600
4601/* Emit code to save registers using MOV insns.  First register
4602   is restored from POINTER + OFFSET.  */
4603static void
4604ix86_emit_save_regs_using_mov (pointer, offset)
4605     rtx pointer;
4606     HOST_WIDE_INT offset;
4607{
4608  int regno;
4609  rtx insn;
4610
4611  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4612    if (ix86_save_reg (regno, true))
4613      {
4614	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4615					       Pmode, offset),
4616			       gen_rtx_REG (Pmode, regno));
4617	RTX_FRAME_RELATED_P (insn) = 1;
4618	offset += UNITS_PER_WORD;
4619      }
4620}
4621
4622/* Expand the prologue into a bunch of separate insns.  */
4623
4624void
4625ix86_expand_prologue ()
4626{
4627  rtx insn;
4628  bool pic_reg_used;
4629  struct ix86_frame frame;
4630  int use_mov = 0;
4631  HOST_WIDE_INT allocate;
4632
4633  if (!optimize_size)
4634    {
4635      use_fast_prologue_epilogue
4636	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4637      if (TARGET_PROLOGUE_USING_MOVE)
4638        use_mov = use_fast_prologue_epilogue;
4639    }
4640  ix86_compute_frame_layout (&frame);
4641
4642  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4643     slower on all targets.  Also sdb doesn't like it.  */
4644
4645  if (frame_pointer_needed)
4646    {
4647      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4648      RTX_FRAME_RELATED_P (insn) = 1;
4649
4650      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4651      RTX_FRAME_RELATED_P (insn) = 1;
4652    }
4653
4654  allocate = frame.to_allocate;
4655  /* In case we are dealing only with single register and empty frame,
4656     push is equivalent of the mov+add sequence.  */
4657  if (allocate == 0 && frame.nregs <= 1)
4658    use_mov = 0;
4659
4660  if (!use_mov)
4661    ix86_emit_save_regs ();
4662  else
4663    allocate += frame.nregs * UNITS_PER_WORD;
4664
4665  if (allocate == 0)
4666    ;
4667  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4668    {
4669      insn = emit_insn (gen_pro_epilogue_adjust_stack
4670			(stack_pointer_rtx, stack_pointer_rtx,
4671			 GEN_INT (-allocate)));
4672      RTX_FRAME_RELATED_P (insn) = 1;
4673    }
4674  else
4675    {
4676      /* ??? Is this only valid for Win32?  */
4677
4678      rtx arg0, sym;
4679
4680      if (TARGET_64BIT)
4681	abort ();
4682
4683      arg0 = gen_rtx_REG (SImode, 0);
4684      emit_move_insn (arg0, GEN_INT (allocate));
4685
4686      sym = gen_rtx_MEM (FUNCTION_MODE,
4687			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4688      insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4689
4690      CALL_INSN_FUNCTION_USAGE (insn)
4691	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4692			     CALL_INSN_FUNCTION_USAGE (insn));
4693
4694      /* Don't allow scheduling pass to move insns across __alloca
4695         call.  */
4696      emit_insn (gen_blockage (const0_rtx));
4697    }
4698  if (use_mov)
4699    {
4700      if (!frame_pointer_needed || !frame.to_allocate)
4701        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4702      else
4703        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4704				       -frame.nregs * UNITS_PER_WORD);
4705    }
4706
4707#ifdef SUBTARGET_PROLOGUE
4708  SUBTARGET_PROLOGUE;
4709#endif
4710
4711  pic_reg_used = false;
4712  if (pic_offset_table_rtx
4713      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4714	  || current_function_profile))
4715    {
4716      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4717
4718      if (alt_pic_reg_used != INVALID_REGNUM)
4719	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4720
4721      pic_reg_used = true;
4722    }
4723
4724  if (pic_reg_used)
4725    {
4726      insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4727
4728      /* Even with accurate pre-reload life analysis, we can wind up
4729	 deleting all references to the pic register after reload.
4730	 Consider if cross-jumping unifies two sides of a branch
4731	 controled by a comparison vs the only read from a global.
4732	 In which case, allow the set_got to be deleted, though we're
4733	 too late to do anything about the ebx save in the prologue.  */
4734      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4735    }
4736
4737  /* Prevent function calls from be scheduled before the call to mcount.
4738     In the pic_reg_used case, make sure that the got load isn't deleted.  */
4739  if (current_function_profile)
4740    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4741}
4742
4743/* Emit code to restore saved registers using MOV insns.  First register
4744   is restored from POINTER + OFFSET.  */
4745static void
4746ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4747     rtx pointer;
4748     int offset;
4749     int maybe_eh_return;
4750{
4751  int regno;
4752
4753  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4754    if (ix86_save_reg (regno, maybe_eh_return))
4755      {
4756	emit_move_insn (gen_rtx_REG (Pmode, regno),
4757			adjust_address (gen_rtx_MEM (Pmode, pointer),
4758					Pmode, offset));
4759	offset += UNITS_PER_WORD;
4760      }
4761}
4762
4763/* Restore function stack, frame, and registers.  */
4764
4765void
4766ix86_expand_epilogue (style)
4767     int style;
4768{
4769  int regno;
4770  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4771  struct ix86_frame frame;
4772  HOST_WIDE_INT offset;
4773
4774  ix86_compute_frame_layout (&frame);
4775
4776  /* Calculate start of saved registers relative to ebp.  Special care
4777     must be taken for the normal return case of a function using
4778     eh_return: the eax and edx registers are marked as saved, but not
4779     restored along this path.  */
4780  offset = frame.nregs;
4781  if (current_function_calls_eh_return && style != 2)
4782    offset -= 2;
4783  offset *= -UNITS_PER_WORD;
4784
4785  /* If we're only restoring one register and sp is not valid then
4786     using a move instruction to restore the register since it's
4787     less work than reloading sp and popping the register.
4788
4789     The default code result in stack adjustment using add/lea instruction,
4790     while this code results in LEAVE instruction (or discrete equivalent),
4791     so it is profitable in some other cases as well.  Especially when there
4792     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4793     and there is exactly one register to pop. This heruistic may need some
4794     tuning in future.  */
4795  if ((!sp_valid && frame.nregs <= 1)
4796      || (TARGET_EPILOGUE_USING_MOVE
4797	  && use_fast_prologue_epilogue
4798	  && (frame.nregs > 1 || frame.to_allocate))
4799      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4800      || (frame_pointer_needed && TARGET_USE_LEAVE
4801	  && use_fast_prologue_epilogue && frame.nregs == 1)
4802      || current_function_calls_eh_return)
4803    {
4804      /* Restore registers.  We can use ebp or esp to address the memory
4805	 locations.  If both are available, default to ebp, since offsets
4806	 are known to be small.  Only exception is esp pointing directly to the
4807	 end of block of saved registers, where we may simplify addressing
4808	 mode.  */
4809
4810      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4811	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4812					  frame.to_allocate, style == 2);
4813      else
4814	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4815					  offset, style == 2);
4816
4817      /* eh_return epilogues need %ecx added to the stack pointer.  */
4818      if (style == 2)
4819	{
4820	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4821
4822	  if (frame_pointer_needed)
4823	    {
4824	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4825	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4826	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4827
4828	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4829	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4830
4831	      emit_insn (gen_pro_epilogue_adjust_stack
4832			 (stack_pointer_rtx, sa, const0_rtx));
4833	    }
4834	  else
4835	    {
4836	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4837	      tmp = plus_constant (tmp, (frame.to_allocate
4838                                         + frame.nregs * UNITS_PER_WORD));
4839	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4840	    }
4841	}
4842      else if (!frame_pointer_needed)
4843	emit_insn (gen_pro_epilogue_adjust_stack
4844		   (stack_pointer_rtx, stack_pointer_rtx,
4845		    GEN_INT (frame.to_allocate
4846			     + frame.nregs * UNITS_PER_WORD)));
4847      /* If not an i386, mov & pop is faster than "leave".  */
4848      else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4849	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4850      else
4851	{
4852	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4853						    hard_frame_pointer_rtx,
4854						    const0_rtx));
4855	  if (TARGET_64BIT)
4856	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4857	  else
4858	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4859	}
4860    }
4861  else
4862    {
4863      /* First step is to deallocate the stack frame so that we can
4864	 pop the registers.  */
4865      if (!sp_valid)
4866	{
4867	  if (!frame_pointer_needed)
4868	    abort ();
4869          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4870						    hard_frame_pointer_rtx,
4871						    GEN_INT (offset)));
4872	}
4873      else if (frame.to_allocate)
4874	emit_insn (gen_pro_epilogue_adjust_stack
4875		   (stack_pointer_rtx, stack_pointer_rtx,
4876		    GEN_INT (frame.to_allocate)));
4877
4878      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4879	if (ix86_save_reg (regno, false))
4880	  {
4881	    if (TARGET_64BIT)
4882	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4883	    else
4884	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4885	  }
4886      if (frame_pointer_needed)
4887	{
4888	  /* Leave results in shorter dependency chains on CPUs that are
4889	     able to grok it fast.  */
4890	  if (TARGET_USE_LEAVE)
4891	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4892	  else if (TARGET_64BIT)
4893	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4894	  else
4895	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4896	}
4897    }
4898
4899  /* Sibcall epilogues don't want a return instruction.  */
4900  if (style == 0)
4901    return;
4902
4903  if (current_function_pops_args && current_function_args_size)
4904    {
4905      rtx popc = GEN_INT (current_function_pops_args);
4906
4907      /* i386 can only pop 64K bytes.  If asked to pop more, pop
4908	 return address, do explicit add, and jump indirectly to the
4909	 caller.  */
4910
4911      if (current_function_pops_args >= 65536)
4912	{
4913	  rtx ecx = gen_rtx_REG (SImode, 2);
4914
4915	  /* There are is no "pascal" calling convention in 64bit ABI.  */
4916	  if (TARGET_64BIT)
4917	    abort ();
4918
4919	  emit_insn (gen_popsi1 (ecx));
4920	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4921	  emit_jump_insn (gen_return_indirect_internal (ecx));
4922	}
4923      else
4924	emit_jump_insn (gen_return_pop_internal (popc));
4925    }
4926  else
4927    emit_jump_insn (gen_return_internal ());
4928}
4929
4930/* Reset from the function's potential modifications.  */
4931
4932static void
4933ix86_output_function_epilogue (file, size)
4934     FILE *file ATTRIBUTE_UNUSED;
4935     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4936{
4937  if (pic_offset_table_rtx)
4938    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4939}
4940
4941/* Extract the parts of an RTL expression that is a valid memory address
4942   for an instruction.  Return 0 if the structure of the address is
4943   grossly off.  Return -1 if the address contains ASHIFT, so it is not
4944   strictly valid, but still used for computing length of lea instruction.
4945   */
4946
4947static int
4948ix86_decompose_address (addr, out)
4949     register rtx addr;
4950     struct ix86_address *out;
4951{
4952  rtx base = NULL_RTX;
4953  rtx index = NULL_RTX;
4954  rtx disp = NULL_RTX;
4955  HOST_WIDE_INT scale = 1;
4956  rtx scale_rtx = NULL_RTX;
4957  int retval = 1;
4958
4959  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4960    base = addr;
4961  else if (GET_CODE (addr) == PLUS)
4962    {
4963      rtx op0 = XEXP (addr, 0);
4964      rtx op1 = XEXP (addr, 1);
4965      enum rtx_code code0 = GET_CODE (op0);
4966      enum rtx_code code1 = GET_CODE (op1);
4967
4968      if (code0 == REG || code0 == SUBREG)
4969	{
4970	  if (code1 == REG || code1 == SUBREG)
4971	    index = op0, base = op1;	/* index + base */
4972	  else
4973	    base = op0, disp = op1;	/* base + displacement */
4974	}
4975      else if (code0 == MULT)
4976	{
4977	  index = XEXP (op0, 0);
4978	  scale_rtx = XEXP (op0, 1);
4979	  if (code1 == REG || code1 == SUBREG)
4980	    base = op1;			/* index*scale + base */
4981	  else
4982	    disp = op1;			/* index*scale + disp */
4983	}
4984      else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4985	{
4986	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
4987	  scale_rtx = XEXP (XEXP (op0, 0), 1);
4988	  base = XEXP (op0, 1);
4989	  disp = op1;
4990	}
4991      else if (code0 == PLUS)
4992	{
4993	  index = XEXP (op0, 0);	/* index + base + disp */
4994	  base = XEXP (op0, 1);
4995	  disp = op1;
4996	}
4997      else
4998	return 0;
4999    }
5000  else if (GET_CODE (addr) == MULT)
5001    {
5002      index = XEXP (addr, 0);		/* index*scale */
5003      scale_rtx = XEXP (addr, 1);
5004    }
5005  else if (GET_CODE (addr) == ASHIFT)
5006    {
5007      rtx tmp;
5008
5009      /* We're called for lea too, which implements ashift on occasion.  */
5010      index = XEXP (addr, 0);
5011      tmp = XEXP (addr, 1);
5012      if (GET_CODE (tmp) != CONST_INT)
5013	return 0;
5014      scale = INTVAL (tmp);
5015      if ((unsigned HOST_WIDE_INT) scale > 3)
5016	return 0;
5017      scale = 1 << scale;
5018      retval = -1;
5019    }
5020  else
5021    disp = addr;			/* displacement */
5022
5023  /* Extract the integral value of scale.  */
5024  if (scale_rtx)
5025    {
5026      if (GET_CODE (scale_rtx) != CONST_INT)
5027	return 0;
5028      scale = INTVAL (scale_rtx);
5029    }
5030
5031  /* Allow arg pointer and stack pointer as index if there is not scaling */
5032  if (base && index && scale == 1
5033      && (index == arg_pointer_rtx || index == frame_pointer_rtx
5034          || index == stack_pointer_rtx))
5035    {
5036      rtx tmp = base;
5037      base = index;
5038      index = tmp;
5039    }
5040
5041  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5042  if ((base == hard_frame_pointer_rtx
5043       || base == frame_pointer_rtx
5044       || base == arg_pointer_rtx) && !disp)
5045    disp = const0_rtx;
5046
5047  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5048     Avoid this by transforming to [%esi+0].  */
5049  if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5050      && base && !index && !disp
5051      && REG_P (base)
5052      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5053    disp = const0_rtx;
5054
5055  /* Special case: encode reg+reg instead of reg*2.  */
5056  if (!base && index && scale && scale == 2)
5057    base = index, scale = 1;
5058
5059  /* Special case: scaling cannot be encoded without base or displacement.  */
5060  if (!base && !disp && index && scale != 1)
5061    disp = const0_rtx;
5062
5063  out->base = base;
5064  out->index = index;
5065  out->disp = disp;
5066  out->scale = scale;
5067
5068  return retval;
5069}
5070
5071/* Return cost of the memory address x.
5072   For i386, it is better to use a complex address than let gcc copy
5073   the address into a reg and make a new pseudo.  But not if the address
5074   requires to two regs - that would mean more pseudos with longer
5075   lifetimes.  */
5076int
5077ix86_address_cost (x)
5078     rtx x;
5079{
5080  struct ix86_address parts;
5081  int cost = 1;
5082
5083  if (!ix86_decompose_address (x, &parts))
5084    abort ();
5085
5086  if (parts.base && GET_CODE (parts.base) == SUBREG)
5087    parts.base = SUBREG_REG (parts.base);
5088  if (parts.index && GET_CODE (parts.index) == SUBREG)
5089    parts.index = SUBREG_REG (parts.index);
5090
5091  /* More complex memory references are better.  */
5092  if (parts.disp && parts.disp != const0_rtx)
5093    cost--;
5094
5095  /* Attempt to minimize number of registers in the address.  */
5096  if ((parts.base
5097       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5098      || (parts.index
5099	  && (!REG_P (parts.index)
5100	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5101    cost++;
5102
5103  if (parts.base
5104      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5105      && parts.index
5106      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5107      && parts.base != parts.index)
5108    cost++;
5109
5110  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5111     since it's predecode logic can't detect the length of instructions
5112     and it degenerates to vector decoded.  Increase cost of such
5113     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5114     to split such addresses or even refuse such addresses at all.
5115
5116     Following addressing modes are affected:
5117      [base+scale*index]
5118      [scale*index+disp]
5119      [base+index]
5120
5121     The first and last case  may be avoidable by explicitly coding the zero in
5122     memory address, but I don't have AMD-K6 machine handy to check this
5123     theory.  */
5124
5125  if (TARGET_K6
5126      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5127	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5128	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5129    cost += 10;
5130
5131  return cost;
5132}
5133
5134/* If X is a machine specific address (i.e. a symbol or label being
5135   referenced as a displacement from the GOT implemented using an
5136   UNSPEC), then return the base term.  Otherwise return X.  */
5137
5138rtx
5139ix86_find_base_term (x)
5140     rtx x;
5141{
5142  rtx term;
5143
5144  if (TARGET_64BIT)
5145    {
5146      if (GET_CODE (x) != CONST)
5147	return x;
5148      term = XEXP (x, 0);
5149      if (GET_CODE (term) == PLUS
5150	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5151	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5152	term = XEXP (term, 0);
5153      if (GET_CODE (term) != UNSPEC
5154	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5155	return x;
5156
5157      term = XVECEXP (term, 0, 0);
5158
5159      if (GET_CODE (term) != SYMBOL_REF
5160	  && GET_CODE (term) != LABEL_REF)
5161	return x;
5162
5163      return term;
5164    }
5165
5166  if (GET_CODE (x) != PLUS
5167      || XEXP (x, 0) != pic_offset_table_rtx
5168      || GET_CODE (XEXP (x, 1)) != CONST)
5169    return x;
5170
5171  term = XEXP (XEXP (x, 1), 0);
5172
5173  if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5174    term = XEXP (term, 0);
5175
5176  if (GET_CODE (term) != UNSPEC
5177      || XINT (term, 1) != UNSPEC_GOTOFF)
5178    return x;
5179
5180  term = XVECEXP (term, 0, 0);
5181
5182  if (GET_CODE (term) != SYMBOL_REF
5183      && GET_CODE (term) != LABEL_REF)
5184    return x;
5185
5186  return term;
5187}
5188
5189/* Determine if a given RTX is a valid constant.  We already know this
5190   satisfies CONSTANT_P.  */
5191
5192bool
5193legitimate_constant_p (x)
5194     rtx x;
5195{
5196  rtx inner;
5197
5198  switch (GET_CODE (x))
5199    {
5200    case SYMBOL_REF:
5201      /* TLS symbols are not constant.  */
5202      if (tls_symbolic_operand (x, Pmode))
5203	return false;
5204      break;
5205
5206    case CONST:
5207      inner = XEXP (x, 0);
5208
5209      /* Offsets of TLS symbols are never valid.
5210	 Discourage CSE from creating them.  */
5211      if (GET_CODE (inner) == PLUS
5212	  && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5213	return false;
5214
5215      /* Only some unspecs are valid as "constants".  */
5216      if (GET_CODE (inner) == UNSPEC)
5217	switch (XINT (inner, 1))
5218	  {
5219	  case UNSPEC_TPOFF:
5220	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5221	  default:
5222	    return false;
5223	  }
5224      break;
5225
5226    default:
5227      break;
5228    }
5229
5230  /* Otherwise we handle everything else in the move patterns.  */
5231  return true;
5232}
5233
5234/* Determine if it's legal to put X into the constant pool.  This
5235   is not possible for the address of thread-local symbols, which
5236   is checked above.  */
5237
5238static bool
5239ix86_cannot_force_const_mem (x)
5240     rtx x;
5241{
5242  return !legitimate_constant_p (x);
5243}
5244
5245/* Determine if a given RTX is a valid constant address.  */
5246
5247bool
5248constant_address_p (x)
5249     rtx x;
5250{
5251  switch (GET_CODE (x))
5252    {
5253    case LABEL_REF:
5254    case CONST_INT:
5255      return true;
5256
5257    case CONST_DOUBLE:
5258      return TARGET_64BIT;
5259
5260    case CONST:
5261      /* For Mach-O, really believe the CONST.  */
5262      if (TARGET_MACHO)
5263	return true;
5264      /* Otherwise fall through.  */
5265    case SYMBOL_REF:
5266      return !flag_pic && legitimate_constant_p (x);
5267
5268    default:
5269      return false;
5270    }
5271}
5272
5273/* Nonzero if the constant value X is a legitimate general operand
5274   when generating PIC code.  It is given that flag_pic is on and
5275   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5276
5277bool
5278legitimate_pic_operand_p (x)
5279     rtx x;
5280{
5281  rtx inner;
5282
5283  switch (GET_CODE (x))
5284    {
5285    case CONST:
5286      inner = XEXP (x, 0);
5287
5288      /* Only some unspecs are valid as "constants".  */
5289      if (GET_CODE (inner) == UNSPEC)
5290	switch (XINT (inner, 1))
5291	  {
5292	  case UNSPEC_TPOFF:
5293	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5294	  default:
5295	    return false;
5296	  }
5297      /* FALLTHRU */
5298
5299    case SYMBOL_REF:
5300    case LABEL_REF:
5301      return legitimate_pic_address_disp_p (x);
5302
5303    default:
5304      return true;
5305    }
5306}
5307
5308/* Determine if a given CONST RTX is a valid memory displacement
5309   in PIC mode.  */
5310
5311int
5312legitimate_pic_address_disp_p (disp)
5313     register rtx disp;
5314{
5315  bool saw_plus;
5316
5317  /* In 64bit mode we can allow direct addresses of symbols and labels
5318     when they are not dynamic symbols.  */
5319  if (TARGET_64BIT)
5320    {
5321      /* TLS references should always be enclosed in UNSPEC.  */
5322      if (tls_symbolic_operand (disp, GET_MODE (disp)))
5323	return 0;
5324      if (GET_CODE (disp) == SYMBOL_REF
5325	  && ix86_cmodel == CM_SMALL_PIC
5326	  && (CONSTANT_POOL_ADDRESS_P (disp)
5327	      || SYMBOL_REF_FLAG (disp)))
5328	return 1;
5329      if (GET_CODE (disp) == LABEL_REF)
5330	return 1;
5331      if (GET_CODE (disp) == CONST
5332	  && GET_CODE (XEXP (disp, 0)) == PLUS
5333	  && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5334	       && ix86_cmodel == CM_SMALL_PIC
5335	       && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5336		   || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5337	      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5338	  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5339	  && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5340	  && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5341	return 1;
5342    }
5343  if (GET_CODE (disp) != CONST)
5344    return 0;
5345  disp = XEXP (disp, 0);
5346
5347  if (TARGET_64BIT)
5348    {
5349      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5350         of GOT tables.  We should not need these anyway.  */
5351      if (GET_CODE (disp) != UNSPEC
5352	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5353	return 0;
5354
5355      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5356	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5357	return 0;
5358      return 1;
5359    }
5360
5361  saw_plus = false;
5362  if (GET_CODE (disp) == PLUS)
5363    {
5364      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5365	return 0;
5366      disp = XEXP (disp, 0);
5367      saw_plus = true;
5368    }
5369
5370  /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5371  if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5372    {
5373      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5374          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5375        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5376          {
5377            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5378            if (strstr (sym_name, "$pb") != 0)
5379              return 1;
5380          }
5381    }
5382
5383  if (GET_CODE (disp) != UNSPEC)
5384    return 0;
5385
5386  switch (XINT (disp, 1))
5387    {
5388    case UNSPEC_GOT:
5389      if (saw_plus)
5390	return false;
5391      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5392    case UNSPEC_GOTOFF:
5393      return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5394    case UNSPEC_GOTTPOFF:
5395    case UNSPEC_GOTNTPOFF:
5396    case UNSPEC_INDNTPOFF:
5397      if (saw_plus)
5398	return false;
5399      return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5400    case UNSPEC_NTPOFF:
5401      return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5402    case UNSPEC_DTPOFF:
5403      return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5404    }
5405
5406  return 0;
5407}
5408
5409/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5410   memory address for an instruction.  The MODE argument is the machine mode
5411   for the MEM expression that wants to use this address.
5412
5413   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5414   convert common non-canonical forms to canonical form so that they will
5415   be recognized.  */
5416
5417int
5418legitimate_address_p (mode, addr, strict)
5419     enum machine_mode mode;
5420     register rtx addr;
5421     int strict;
5422{
5423  struct ix86_address parts;
5424  rtx base, index, disp;
5425  HOST_WIDE_INT scale;
5426  const char *reason = NULL;
5427  rtx reason_rtx = NULL_RTX;
5428
5429  if (TARGET_DEBUG_ADDR)
5430    {
5431      fprintf (stderr,
5432	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5433	       GET_MODE_NAME (mode), strict);
5434      debug_rtx (addr);
5435    }
5436
5437  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5438    {
5439      if (TARGET_DEBUG_ADDR)
5440	fprintf (stderr, "Success.\n");
5441      return TRUE;
5442    }
5443
5444  if (ix86_decompose_address (addr, &parts) <= 0)
5445    {
5446      reason = "decomposition failed";
5447      goto report_error;
5448    }
5449
5450  base = parts.base;
5451  index = parts.index;
5452  disp = parts.disp;
5453  scale = parts.scale;
5454
5455  /* Validate base register.
5456
5457     Don't allow SUBREG's here, it can lead to spill failures when the base
5458     is one word out of a two word structure, which is represented internally
5459     as a DImode int.  */
5460
5461  if (base)
5462    {
5463      rtx reg;
5464      reason_rtx = base;
5465
5466      if (GET_CODE (base) == SUBREG)
5467	reg = SUBREG_REG (base);
5468      else
5469	reg = base;
5470
5471      if (GET_CODE (reg) != REG)
5472	{
5473	  reason = "base is not a register";
5474	  goto report_error;
5475	}
5476
5477      if (GET_MODE (base) != Pmode)
5478	{
5479	  reason = "base is not in Pmode";
5480	  goto report_error;
5481	}
5482
5483      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5484	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5485	{
5486	  reason = "base is not valid";
5487	  goto report_error;
5488	}
5489    }
5490
5491  /* Validate index register.
5492
5493     Don't allow SUBREG's here, it can lead to spill failures when the index
5494     is one word out of a two word structure, which is represented internally
5495     as a DImode int.  */
5496
5497  if (index)
5498    {
5499      rtx reg;
5500      reason_rtx = index;
5501
5502      if (GET_CODE (index) == SUBREG)
5503	reg = SUBREG_REG (index);
5504      else
5505	reg = index;
5506
5507      if (GET_CODE (reg) != REG)
5508	{
5509	  reason = "index is not a register";
5510	  goto report_error;
5511	}
5512
5513      if (GET_MODE (index) != Pmode)
5514	{
5515	  reason = "index is not in Pmode";
5516	  goto report_error;
5517	}
5518
5519      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5520	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5521	{
5522	  reason = "index is not valid";
5523	  goto report_error;
5524	}
5525    }
5526
5527  /* Validate scale factor.  */
5528  if (scale != 1)
5529    {
5530      reason_rtx = GEN_INT (scale);
5531      if (!index)
5532	{
5533	  reason = "scale without index";
5534	  goto report_error;
5535	}
5536
5537      if (scale != 2 && scale != 4 && scale != 8)
5538	{
5539	  reason = "scale is not a valid multiplier";
5540	  goto report_error;
5541	}
5542    }
5543
5544  /* Validate displacement.  */
5545  if (disp)
5546    {
5547      reason_rtx = disp;
5548
5549      if (GET_CODE (disp) == CONST
5550	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5551	switch (XINT (XEXP (disp, 0), 1))
5552	  {
5553	  case UNSPEC_GOT:
5554	  case UNSPEC_GOTOFF:
5555	  case UNSPEC_GOTPCREL:
5556	    if (!flag_pic)
5557	      abort ();
5558	    goto is_legitimate_pic;
5559
5560	  case UNSPEC_GOTTPOFF:
5561	  case UNSPEC_GOTNTPOFF:
5562	  case UNSPEC_INDNTPOFF:
5563	  case UNSPEC_NTPOFF:
5564	  case UNSPEC_DTPOFF:
5565	    break;
5566
5567	  default:
5568	    reason = "invalid address unspec";
5569	    goto report_error;
5570	  }
5571
5572      else if (flag_pic && (SYMBOLIC_CONST (disp)
5573#if TARGET_MACHO
5574			    && !machopic_operand_p (disp)
5575#endif
5576			    ))
5577	{
5578	is_legitimate_pic:
5579	  if (TARGET_64BIT && (index || base))
5580	    {
5581	      /* foo@dtpoff(%rX) is ok.  */
5582	      if (GET_CODE (disp) != CONST
5583		  || GET_CODE (XEXP (disp, 0)) != PLUS
5584		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5585		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5586		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5587		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5588		{
5589		  reason = "non-constant pic memory reference";
5590		  goto report_error;
5591		}
5592	    }
5593	  else if (! legitimate_pic_address_disp_p (disp))
5594	    {
5595	      reason = "displacement is an invalid pic construct";
5596	      goto report_error;
5597	    }
5598
5599          /* This code used to verify that a symbolic pic displacement
5600	     includes the pic_offset_table_rtx register.
5601
5602	     While this is good idea, unfortunately these constructs may
5603	     be created by "adds using lea" optimization for incorrect
5604	     code like:
5605
5606	     int a;
5607	     int foo(int i)
5608	       {
5609	         return *(&a+i);
5610	       }
5611
5612	     This code is nonsensical, but results in addressing
5613	     GOT table with pic_offset_table_rtx base.  We can't
5614	     just refuse it easily, since it gets matched by
5615	     "addsi3" pattern, that later gets split to lea in the
5616	     case output register differs from input.  While this
5617	     can be handled by separate addsi pattern for this case
5618	     that never results in lea, this seems to be easier and
5619	     correct fix for crash to disable this test.  */
5620	}
5621      else if (!CONSTANT_ADDRESS_P (disp))
5622	{
5623	  reason = "displacement is not constant";
5624	  goto report_error;
5625	}
5626      else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5627	{
5628	  reason = "displacement is out of range";
5629	  goto report_error;
5630	}
5631      else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5632	{
5633	  reason = "displacement is a const_double";
5634	  goto report_error;
5635	}
5636    }
5637
5638  /* Everything looks valid.  */
5639  if (TARGET_DEBUG_ADDR)
5640    fprintf (stderr, "Success.\n");
5641  return TRUE;
5642
5643 report_error:
5644  if (TARGET_DEBUG_ADDR)
5645    {
5646      fprintf (stderr, "Error: %s\n", reason);
5647      debug_rtx (reason_rtx);
5648    }
5649  return FALSE;
5650}
5651
5652/* Return an unique alias set for the GOT.  */
5653
5654static HOST_WIDE_INT
5655ix86_GOT_alias_set ()
5656{
5657  static HOST_WIDE_INT set = -1;
5658  if (set == -1)
5659    set = new_alias_set ();
5660  return set;
5661}
5662
5663/* Return a legitimate reference for ORIG (an address) using the
5664   register REG.  If REG is 0, a new pseudo is generated.
5665
5666   There are two types of references that must be handled:
5667
5668   1. Global data references must load the address from the GOT, via
5669      the PIC reg.  An insn is emitted to do this load, and the reg is
5670      returned.
5671
5672   2. Static data references, constant pool addresses, and code labels
5673      compute the address as an offset from the GOT, whose base is in
5674      the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
5675      differentiate them from global data objects.  The returned
5676      address is the PIC reg + an unspec constant.
5677
5678   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5679   reg also appears in the address.  */
5680
5681rtx
5682legitimize_pic_address (orig, reg)
5683     rtx orig;
5684     rtx reg;
5685{
5686  rtx addr = orig;
5687  rtx new = orig;
5688  rtx base;
5689
5690#if TARGET_MACHO
5691  if (reg == 0)
5692    reg = gen_reg_rtx (Pmode);
5693  /* Use the generic Mach-O PIC machinery.  */
5694  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5695#endif
5696
5697  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5698    new = addr;
5699  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5700    {
5701      /* This symbol may be referenced via a displacement from the PIC
5702	 base address (@GOTOFF).  */
5703
5704      if (reload_in_progress)
5705	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5706      new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5707      new = gen_rtx_CONST (Pmode, new);
5708      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5709
5710      if (reg != 0)
5711	{
5712	  emit_move_insn (reg, new);
5713	  new = reg;
5714	}
5715    }
5716  else if (GET_CODE (addr) == SYMBOL_REF)
5717    {
5718      if (TARGET_64BIT)
5719	{
5720	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5721	  new = gen_rtx_CONST (Pmode, new);
5722	  new = gen_rtx_MEM (Pmode, new);
5723	  RTX_UNCHANGING_P (new) = 1;
5724	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5725
5726	  if (reg == 0)
5727	    reg = gen_reg_rtx (Pmode);
5728	  /* Use directly gen_movsi, otherwise the address is loaded
5729	     into register for CSE.  We don't want to CSE this addresses,
5730	     instead we CSE addresses from the GOT table, so skip this.  */
5731	  emit_insn (gen_movsi (reg, new));
5732	  new = reg;
5733	}
5734      else
5735	{
5736	  /* This symbol must be referenced via a load from the
5737	     Global Offset Table (@GOT).  */
5738
5739	  if (reload_in_progress)
5740	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5741	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5742	  new = gen_rtx_CONST (Pmode, new);
5743	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5744	  new = gen_rtx_MEM (Pmode, new);
5745	  RTX_UNCHANGING_P (new) = 1;
5746	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5747
5748	  if (reg == 0)
5749	    reg = gen_reg_rtx (Pmode);
5750	  emit_move_insn (reg, new);
5751	  new = reg;
5752	}
5753    }
5754  else
5755    {
5756      if (GET_CODE (addr) == CONST)
5757	{
5758	  addr = XEXP (addr, 0);
5759
5760	  /* We must match stuff we generate before.  Assume the only
5761	     unspecs that can get here are ours.  Not that we could do
5762	     anything with them anyway...  */
5763	  if (GET_CODE (addr) == UNSPEC
5764	      || (GET_CODE (addr) == PLUS
5765		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5766	    return orig;
5767	  if (GET_CODE (addr) != PLUS)
5768	    abort ();
5769	}
5770      if (GET_CODE (addr) == PLUS)
5771	{
5772	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5773
5774	  /* Check first to see if this is a constant offset from a @GOTOFF
5775	     symbol reference.  */
5776	  if (local_symbolic_operand (op0, Pmode)
5777	      && GET_CODE (op1) == CONST_INT)
5778	    {
5779	      if (!TARGET_64BIT)
5780		{
5781		  if (reload_in_progress)
5782		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5783		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5784					UNSPEC_GOTOFF);
5785		  new = gen_rtx_PLUS (Pmode, new, op1);
5786		  new = gen_rtx_CONST (Pmode, new);
5787		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5788
5789		  if (reg != 0)
5790		    {
5791		      emit_move_insn (reg, new);
5792		      new = reg;
5793		    }
5794		}
5795	      else
5796		{
5797		  if (INTVAL (op1) < -16*1024*1024
5798		      || INTVAL (op1) >= 16*1024*1024)
5799		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5800		}
5801	    }
5802	  else
5803	    {
5804	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5805	      new  = legitimize_pic_address (XEXP (addr, 1),
5806					     base == reg ? NULL_RTX : reg);
5807
5808	      if (GET_CODE (new) == CONST_INT)
5809		new = plus_constant (base, INTVAL (new));
5810	      else
5811		{
5812		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5813		    {
5814		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5815		      new = XEXP (new, 1);
5816		    }
5817		  new = gen_rtx_PLUS (Pmode, base, new);
5818		}
5819	    }
5820	}
5821    }
5822  return new;
5823}
5824
5825static void
5826ix86_encode_section_info (decl, first)
5827     tree decl;
5828     int first ATTRIBUTE_UNUSED;
5829{
5830  bool local_p = (*targetm.binds_local_p) (decl);
5831  rtx rtl, symbol;
5832
5833  rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5834  if (GET_CODE (rtl) != MEM)
5835    return;
5836  symbol = XEXP (rtl, 0);
5837  if (GET_CODE (symbol) != SYMBOL_REF)
5838    return;
5839
5840  /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5841     symbol so that we may access it directly in the GOT.  */
5842
5843  if (flag_pic)
5844    SYMBOL_REF_FLAG (symbol) = local_p;
5845
5846  /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5847     "local dynamic", "initial exec" or "local exec" TLS models
5848     respectively.  */
5849
5850  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5851    {
5852      const char *symbol_str;
5853      char *newstr;
5854      size_t len;
5855      enum tls_model kind = decl_tls_model (decl);
5856
5857      if (TARGET_64BIT && ! flag_pic)
5858	{
5859	  /* x86-64 doesn't allow non-pic code for shared libraries,
5860	     so don't generate GD/LD TLS models for non-pic code.  */
5861	  switch (kind)
5862	    {
5863	    case TLS_MODEL_GLOBAL_DYNAMIC:
5864	      kind = TLS_MODEL_INITIAL_EXEC; break;
5865	    case TLS_MODEL_LOCAL_DYNAMIC:
5866	      kind = TLS_MODEL_LOCAL_EXEC; break;
5867	    default:
5868	      break;
5869	    }
5870	}
5871
5872      symbol_str = XSTR (symbol, 0);
5873
5874      if (symbol_str[0] == '%')
5875	{
5876	  if (symbol_str[1] == tls_model_chars[kind])
5877	    return;
5878	  symbol_str += 2;
5879	}
5880      len = strlen (symbol_str) + 1;
5881      newstr = alloca (len + 2);
5882
5883      newstr[0] = '%';
5884      newstr[1] = tls_model_chars[kind];
5885      memcpy (newstr + 2, symbol_str, len);
5886
5887      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5888    }
5889}
5890
5891/* Undo the above when printing symbol names.  */
5892
5893static const char *
5894ix86_strip_name_encoding (str)
5895     const char *str;
5896{
5897  if (str[0] == '%')
5898    str += 2;
5899  if (str [0] == '*')
5900    str += 1;
5901  return str;
5902}
5903
5904/* Load the thread pointer into a register.  */
5905
5906static rtx
5907get_thread_pointer ()
5908{
5909  rtx tp;
5910
5911  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5912  tp = gen_rtx_MEM (Pmode, tp);
5913  RTX_UNCHANGING_P (tp) = 1;
5914  set_mem_alias_set (tp, ix86_GOT_alias_set ());
5915  tp = force_reg (Pmode, tp);
5916
5917  return tp;
5918}
5919
5920/* Try machine-dependent ways of modifying an illegitimate address
5921   to be legitimate.  If we find one, return the new, valid address.
5922   This macro is used in only one place: `memory_address' in explow.c.
5923
5924   OLDX is the address as it was before break_out_memory_refs was called.
5925   In some cases it is useful to look at this to decide what needs to be done.
5926
5927   MODE and WIN are passed so that this macro can use
5928   GO_IF_LEGITIMATE_ADDRESS.
5929
5930   It is always safe for this macro to do nothing.  It exists to recognize
5931   opportunities to optimize the output.
5932
5933   For the 80386, we handle X+REG by loading X into a register R and
5934   using R+REG.  R will go in a general reg and indexing will be used.
5935   However, if REG is a broken-out memory address or multiplication,
5936   nothing needs to be done because REG can certainly go in a general reg.
5937
5938   When -fpic is used, special handling is needed for symbolic references.
5939   See comments by legitimize_pic_address in i386.c for details.  */
5940
5941rtx
5942legitimize_address (x, oldx, mode)
5943     register rtx x;
5944     register rtx oldx ATTRIBUTE_UNUSED;
5945     enum machine_mode mode;
5946{
5947  int changed = 0;
5948  unsigned log;
5949
5950  if (TARGET_DEBUG_ADDR)
5951    {
5952      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5953	       GET_MODE_NAME (mode));
5954      debug_rtx (x);
5955    }
5956
5957  log = tls_symbolic_operand (x, mode);
5958  if (log)
5959    {
5960      rtx dest, base, off, pic;
5961      int type;
5962
5963      switch (log)
5964        {
5965        case TLS_MODEL_GLOBAL_DYNAMIC:
5966	  dest = gen_reg_rtx (Pmode);
5967	  if (TARGET_64BIT)
5968	    {
5969	      rtx rax = gen_rtx_REG (Pmode, 0), insns;
5970
5971	      start_sequence ();
5972	      emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5973	      insns = get_insns ();
5974	      end_sequence ();
5975
5976	      emit_libcall_block (insns, dest, rax, x);
5977	    }
5978	  else
5979	    emit_insn (gen_tls_global_dynamic_32 (dest, x));
5980	  break;
5981
5982        case TLS_MODEL_LOCAL_DYNAMIC:
5983	  base = gen_reg_rtx (Pmode);
5984	  if (TARGET_64BIT)
5985	    {
5986	      rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5987
5988	      start_sequence ();
5989	      emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5990	      insns = get_insns ();
5991	      end_sequence ();
5992
5993	      note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5994	      note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5995	      emit_libcall_block (insns, base, rax, note);
5996	    }
5997	  else
5998	    emit_insn (gen_tls_local_dynamic_base_32 (base));
5999
6000	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6001	  off = gen_rtx_CONST (Pmode, off);
6002
6003	  return gen_rtx_PLUS (Pmode, base, off);
6004
6005        case TLS_MODEL_INITIAL_EXEC:
6006	  if (TARGET_64BIT)
6007	    {
6008	      pic = NULL;
6009	      type = UNSPEC_GOTNTPOFF;
6010	    }
6011	  else if (flag_pic)
6012	    {
6013	      if (reload_in_progress)
6014		regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6015	      pic = pic_offset_table_rtx;
6016	      type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6017	    }
6018	  else if (!TARGET_GNU_TLS)
6019	    {
6020	      pic = gen_reg_rtx (Pmode);
6021	      emit_insn (gen_set_got (pic));
6022	      type = UNSPEC_GOTTPOFF;
6023	    }
6024	  else
6025	    {
6026	      pic = NULL;
6027	      type = UNSPEC_INDNTPOFF;
6028	    }
6029
6030	  base = get_thread_pointer ();
6031
6032	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6033	  off = gen_rtx_CONST (Pmode, off);
6034	  if (pic)
6035	    off = gen_rtx_PLUS (Pmode, pic, off);
6036	  off = gen_rtx_MEM (Pmode, off);
6037	  RTX_UNCHANGING_P (off) = 1;
6038	  set_mem_alias_set (off, ix86_GOT_alias_set ());
6039	  dest = gen_reg_rtx (Pmode);
6040
6041	  if (TARGET_64BIT || TARGET_GNU_TLS)
6042	    {
6043	      emit_move_insn (dest, off);
6044	      return gen_rtx_PLUS (Pmode, base, dest);
6045	    }
6046	  else
6047	    emit_insn (gen_subsi3 (dest, base, off));
6048	  break;
6049
6050        case TLS_MODEL_LOCAL_EXEC:
6051	  base = get_thread_pointer ();
6052
6053	  off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6054				(TARGET_64BIT || TARGET_GNU_TLS)
6055				? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6056	  off = gen_rtx_CONST (Pmode, off);
6057
6058	  if (TARGET_64BIT || TARGET_GNU_TLS)
6059	    return gen_rtx_PLUS (Pmode, base, off);
6060	  else
6061	    {
6062	      dest = gen_reg_rtx (Pmode);
6063	      emit_insn (gen_subsi3 (dest, base, off));
6064	    }
6065	  break;
6066
6067	default:
6068	  abort ();
6069        }
6070
6071      return dest;
6072    }
6073
6074  if (flag_pic && SYMBOLIC_CONST (x))
6075    return legitimize_pic_address (x, 0);
6076
6077  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6078  if (GET_CODE (x) == ASHIFT
6079      && GET_CODE (XEXP (x, 1)) == CONST_INT
6080      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6081    {
6082      changed = 1;
6083      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6084			GEN_INT (1 << log));
6085    }
6086
6087  if (GET_CODE (x) == PLUS)
6088    {
6089      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6090
6091      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6092	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6093	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6094	{
6095	  changed = 1;
6096	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6097				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6098				      GEN_INT (1 << log));
6099	}
6100
6101      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6102	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6103	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6104	{
6105	  changed = 1;
6106	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6107				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6108				      GEN_INT (1 << log));
6109	}
6110
6111      /* Put multiply first if it isn't already.  */
6112      if (GET_CODE (XEXP (x, 1)) == MULT)
6113	{
6114	  rtx tmp = XEXP (x, 0);
6115	  XEXP (x, 0) = XEXP (x, 1);
6116	  XEXP (x, 1) = tmp;
6117	  changed = 1;
6118	}
6119
6120      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6121	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6122	 created by virtual register instantiation, register elimination, and
6123	 similar optimizations.  */
6124      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6125	{
6126	  changed = 1;
6127	  x = gen_rtx_PLUS (Pmode,
6128			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6129					  XEXP (XEXP (x, 1), 0)),
6130			    XEXP (XEXP (x, 1), 1));
6131	}
6132
6133      /* Canonicalize
6134	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6135	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6136      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6137	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6138	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6139	       && CONSTANT_P (XEXP (x, 1)))
6140	{
6141	  rtx constant;
6142	  rtx other = NULL_RTX;
6143
6144	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6145	    {
6146	      constant = XEXP (x, 1);
6147	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6148	    }
6149	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6150	    {
6151	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6152	      other = XEXP (x, 1);
6153	    }
6154	  else
6155	    constant = 0;
6156
6157	  if (constant)
6158	    {
6159	      changed = 1;
6160	      x = gen_rtx_PLUS (Pmode,
6161				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6162					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6163				plus_constant (other, INTVAL (constant)));
6164	    }
6165	}
6166
6167      if (changed && legitimate_address_p (mode, x, FALSE))
6168	return x;
6169
6170      if (GET_CODE (XEXP (x, 0)) == MULT)
6171	{
6172	  changed = 1;
6173	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6174	}
6175
6176      if (GET_CODE (XEXP (x, 1)) == MULT)
6177	{
6178	  changed = 1;
6179	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6180	}
6181
6182      if (changed
6183	  && GET_CODE (XEXP (x, 1)) == REG
6184	  && GET_CODE (XEXP (x, 0)) == REG)
6185	return x;
6186
6187      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6188	{
6189	  changed = 1;
6190	  x = legitimize_pic_address (x, 0);
6191	}
6192
6193      if (changed && legitimate_address_p (mode, x, FALSE))
6194	return x;
6195
6196      if (GET_CODE (XEXP (x, 0)) == REG)
6197	{
6198	  register rtx temp = gen_reg_rtx (Pmode);
6199	  register rtx val  = force_operand (XEXP (x, 1), temp);
6200	  if (val != temp)
6201	    emit_move_insn (temp, val);
6202
6203	  XEXP (x, 1) = temp;
6204	  return x;
6205	}
6206
6207      else if (GET_CODE (XEXP (x, 1)) == REG)
6208	{
6209	  register rtx temp = gen_reg_rtx (Pmode);
6210	  register rtx val  = force_operand (XEXP (x, 0), temp);
6211	  if (val != temp)
6212	    emit_move_insn (temp, val);
6213
6214	  XEXP (x, 0) = temp;
6215	  return x;
6216	}
6217    }
6218
6219  return x;
6220}
6221
6222/* Print an integer constant expression in assembler syntax.  Addition
6223   and subtraction are the only arithmetic that may appear in these
6224   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6225   CODE is the operand print code from the output string.  */
6226
6227static void
6228output_pic_addr_const (file, x, code)
6229     FILE *file;
6230     rtx x;
6231     int code;
6232{
6233  char buf[256];
6234
6235  switch (GET_CODE (x))
6236    {
6237    case PC:
6238      if (flag_pic)
6239	putc ('.', file);
6240      else
6241	abort ();
6242      break;
6243
6244    case SYMBOL_REF:
6245      assemble_name (file, XSTR (x, 0));
6246      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6247	fputs ("@PLT", file);
6248      break;
6249
6250    case LABEL_REF:
6251      x = XEXP (x, 0);
6252      /* FALLTHRU */
6253    case CODE_LABEL:
6254      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6255      assemble_name (asm_out_file, buf);
6256      break;
6257
6258    case CONST_INT:
6259      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6260      break;
6261
6262    case CONST:
6263      /* This used to output parentheses around the expression,
6264	 but that does not work on the 386 (either ATT or BSD assembler).  */
6265      output_pic_addr_const (file, XEXP (x, 0), code);
6266      break;
6267
6268    case CONST_DOUBLE:
6269      if (GET_MODE (x) == VOIDmode)
6270	{
6271	  /* We can use %d if the number is <32 bits and positive.  */
6272	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6273	    fprintf (file, "0x%lx%08lx",
6274		     (unsigned long) CONST_DOUBLE_HIGH (x),
6275		     (unsigned long) CONST_DOUBLE_LOW (x));
6276	  else
6277	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6278	}
6279      else
6280	/* We can't handle floating point constants;
6281	   PRINT_OPERAND must handle them.  */
6282	output_operand_lossage ("floating constant misused");
6283      break;
6284
6285    case PLUS:
6286      /* Some assemblers need integer constants to appear first.  */
6287      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6288	{
6289	  output_pic_addr_const (file, XEXP (x, 0), code);
6290	  putc ('+', file);
6291	  output_pic_addr_const (file, XEXP (x, 1), code);
6292	}
6293      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6294	{
6295	  output_pic_addr_const (file, XEXP (x, 1), code);
6296	  putc ('+', file);
6297	  output_pic_addr_const (file, XEXP (x, 0), code);
6298	}
6299      else
6300	abort ();
6301      break;
6302
6303    case MINUS:
6304      if (!TARGET_MACHO)
6305	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6306      output_pic_addr_const (file, XEXP (x, 0), code);
6307      putc ('-', file);
6308      output_pic_addr_const (file, XEXP (x, 1), code);
6309      if (!TARGET_MACHO)
6310	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6311      break;
6312
6313     case UNSPEC:
6314       if (XVECLEN (x, 0) != 1)
6315	 abort ();
6316       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6317       switch (XINT (x, 1))
6318	{
6319	case UNSPEC_GOT:
6320	  fputs ("@GOT", file);
6321	  break;
6322	case UNSPEC_GOTOFF:
6323	  fputs ("@GOTOFF", file);
6324	  break;
6325	case UNSPEC_GOTPCREL:
6326	  fputs ("@GOTPCREL(%rip)", file);
6327	  break;
6328	case UNSPEC_GOTTPOFF:
6329	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6330	  fputs ("@GOTTPOFF", file);
6331	  break;
6332	case UNSPEC_TPOFF:
6333	  fputs ("@TPOFF", file);
6334	  break;
6335	case UNSPEC_NTPOFF:
6336	  if (TARGET_64BIT)
6337	    fputs ("@TPOFF", file);
6338	  else
6339	    fputs ("@NTPOFF", file);
6340	  break;
6341	case UNSPEC_DTPOFF:
6342	  fputs ("@DTPOFF", file);
6343	  break;
6344	case UNSPEC_GOTNTPOFF:
6345	  if (TARGET_64BIT)
6346	    fputs ("@GOTTPOFF(%rip)", file);
6347	  else
6348	    fputs ("@GOTNTPOFF", file);
6349	  break;
6350	case UNSPEC_INDNTPOFF:
6351	  fputs ("@INDNTPOFF", file);
6352	  break;
6353	default:
6354	  output_operand_lossage ("invalid UNSPEC as operand");
6355	  break;
6356	}
6357       break;
6358
6359    default:
6360      output_operand_lossage ("invalid expression as operand");
6361    }
6362}
6363
6364/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6365   We need to handle our special PIC relocations.  */
6366
6367void
6368i386_dwarf_output_addr_const (file, x)
6369     FILE *file;
6370     rtx x;
6371{
6372#ifdef ASM_QUAD
6373  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6374#else
6375  if (TARGET_64BIT)
6376    abort ();
6377  fprintf (file, "%s", ASM_LONG);
6378#endif
6379  if (flag_pic)
6380    output_pic_addr_const (file, x, '\0');
6381  else
6382    output_addr_const (file, x);
6383  fputc ('\n', file);
6384}
6385
6386/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6387   We need to emit DTP-relative relocations.  */
6388
6389void
6390i386_output_dwarf_dtprel (file, size, x)
6391     FILE *file;
6392     int size;
6393     rtx x;
6394{
6395  fputs (ASM_LONG, file);
6396  output_addr_const (file, x);
6397  fputs ("@DTPOFF", file);
6398  switch (size)
6399    {
6400    case 4:
6401      break;
6402    case 8:
6403      fputs (", 0", file);
6404      break;
6405    default:
6406      abort ();
6407   }
6408}
6409
6410/* In the name of slightly smaller debug output, and to cater to
6411   general assembler losage, recognize PIC+GOTOFF and turn it back
6412   into a direct symbol reference.  */
6413
6414rtx
6415i386_simplify_dwarf_addr (orig_x)
6416     rtx orig_x;
6417{
6418  rtx x = orig_x, y;
6419
6420  if (GET_CODE (x) == MEM)
6421    x = XEXP (x, 0);
6422
6423  if (TARGET_64BIT)
6424    {
6425      if (GET_CODE (x) != CONST
6426	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6427	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6428	  || GET_CODE (orig_x) != MEM)
6429	return orig_x;
6430      return XVECEXP (XEXP (x, 0), 0, 0);
6431    }
6432
6433  if (GET_CODE (x) != PLUS
6434      || GET_CODE (XEXP (x, 1)) != CONST)
6435    return orig_x;
6436
6437  if (GET_CODE (XEXP (x, 0)) == REG
6438      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6439    /* %ebx + GOT/GOTOFF */
6440    y = NULL;
6441  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6442    {
6443      /* %ebx + %reg * scale + GOT/GOTOFF */
6444      y = XEXP (x, 0);
6445      if (GET_CODE (XEXP (y, 0)) == REG
6446	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6447	y = XEXP (y, 1);
6448      else if (GET_CODE (XEXP (y, 1)) == REG
6449	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6450	y = XEXP (y, 0);
6451      else
6452	return orig_x;
6453      if (GET_CODE (y) != REG
6454	  && GET_CODE (y) != MULT
6455	  && GET_CODE (y) != ASHIFT)
6456	return orig_x;
6457    }
6458  else
6459    return orig_x;
6460
6461  x = XEXP (XEXP (x, 1), 0);
6462  if (GET_CODE (x) == UNSPEC
6463      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6464	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6465    {
6466      if (y)
6467	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6468      return XVECEXP (x, 0, 0);
6469    }
6470
6471  if (GET_CODE (x) == PLUS
6472      && GET_CODE (XEXP (x, 0)) == UNSPEC
6473      && GET_CODE (XEXP (x, 1)) == CONST_INT
6474      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6475	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6476	      && GET_CODE (orig_x) != MEM)))
6477    {
6478      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6479      if (y)
6480	return gen_rtx_PLUS (Pmode, y, x);
6481      return x;
6482    }
6483
6484  return orig_x;
6485}
6486
6487static void
6488put_condition_code (code, mode, reverse, fp, file)
6489     enum rtx_code code;
6490     enum machine_mode mode;
6491     int reverse, fp;
6492     FILE *file;
6493{
6494  const char *suffix;
6495
6496  if (mode == CCFPmode || mode == CCFPUmode)
6497    {
6498      enum rtx_code second_code, bypass_code;
6499      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6500      if (bypass_code != NIL || second_code != NIL)
6501	abort ();
6502      code = ix86_fp_compare_code_to_integer (code);
6503      mode = CCmode;
6504    }
6505  if (reverse)
6506    code = reverse_condition (code);
6507
6508  switch (code)
6509    {
6510    case EQ:
6511      suffix = "e";
6512      break;
6513    case NE:
6514      suffix = "ne";
6515      break;
6516    case GT:
6517      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6518	abort ();
6519      suffix = "g";
6520      break;
6521    case GTU:
6522      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6523	 Those same assemblers have the same but opposite losage on cmov.  */
6524      if (mode != CCmode)
6525	abort ();
6526      suffix = fp ? "nbe" : "a";
6527      break;
6528    case LT:
6529      if (mode == CCNOmode || mode == CCGOCmode)
6530	suffix = "s";
6531      else if (mode == CCmode || mode == CCGCmode)
6532	suffix = "l";
6533      else
6534	abort ();
6535      break;
6536    case LTU:
6537      if (mode != CCmode)
6538	abort ();
6539      suffix = "b";
6540      break;
6541    case GE:
6542      if (mode == CCNOmode || mode == CCGOCmode)
6543	suffix = "ns";
6544      else if (mode == CCmode || mode == CCGCmode)
6545	suffix = "ge";
6546      else
6547	abort ();
6548      break;
6549    case GEU:
6550      /* ??? As above.  */
6551      if (mode != CCmode)
6552	abort ();
6553      suffix = fp ? "nb" : "ae";
6554      break;
6555    case LE:
6556      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6557	abort ();
6558      suffix = "le";
6559      break;
6560    case LEU:
6561      if (mode != CCmode)
6562	abort ();
6563      suffix = "be";
6564      break;
6565    case UNORDERED:
6566      suffix = fp ? "u" : "p";
6567      break;
6568    case ORDERED:
6569      suffix = fp ? "nu" : "np";
6570      break;
6571    default:
6572      abort ();
6573    }
6574  fputs (suffix, file);
6575}
6576
6577void
6578print_reg (x, code, file)
6579     rtx x;
6580     int code;
6581     FILE *file;
6582{
6583  if (REGNO (x) == ARG_POINTER_REGNUM
6584      || REGNO (x) == FRAME_POINTER_REGNUM
6585      || REGNO (x) == FLAGS_REG
6586      || REGNO (x) == FPSR_REG)
6587    abort ();
6588
6589  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6590    putc ('%', file);
6591
6592  if (code == 'w' || MMX_REG_P (x))
6593    code = 2;
6594  else if (code == 'b')
6595    code = 1;
6596  else if (code == 'k')
6597    code = 4;
6598  else if (code == 'q')
6599    code = 8;
6600  else if (code == 'y')
6601    code = 3;
6602  else if (code == 'h')
6603    code = 0;
6604  else
6605    code = GET_MODE_SIZE (GET_MODE (x));
6606
6607  /* Irritatingly, AMD extended registers use different naming convention
6608     from the normal registers.  */
6609  if (REX_INT_REG_P (x))
6610    {
6611      if (!TARGET_64BIT)
6612	abort ();
6613      switch (code)
6614	{
6615	  case 0:
6616	    error ("extended registers have no high halves");
6617	    break;
6618	  case 1:
6619	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6620	    break;
6621	  case 2:
6622	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6623	    break;
6624	  case 4:
6625	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6626	    break;
6627	  case 8:
6628	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6629	    break;
6630	  default:
6631	    error ("unsupported operand size for extended register");
6632	    break;
6633	}
6634      return;
6635    }
6636  switch (code)
6637    {
6638    case 3:
6639      if (STACK_TOP_P (x))
6640	{
6641	  fputs ("st(0)", file);
6642	  break;
6643	}
6644      /* FALLTHRU */
6645    case 8:
6646    case 4:
6647    case 12:
6648      if (! ANY_FP_REG_P (x))
6649	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6650      /* FALLTHRU */
6651    case 16:
6652    case 2:
6653      fputs (hi_reg_name[REGNO (x)], file);
6654      break;
6655    case 1:
6656      fputs (qi_reg_name[REGNO (x)], file);
6657      break;
6658    case 0:
6659      fputs (qi_high_reg_name[REGNO (x)], file);
6660      break;
6661    default:
6662      abort ();
6663    }
6664}
6665
6666/* Locate some local-dynamic symbol still in use by this function
6667   so that we can print its name in some tls_local_dynamic_base
6668   pattern.  */
6669
6670static const char *
6671get_some_local_dynamic_name ()
6672{
6673  rtx insn;
6674
6675  if (cfun->machine->some_ld_name)
6676    return cfun->machine->some_ld_name;
6677
6678  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6679    if (INSN_P (insn)
6680	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6681      return cfun->machine->some_ld_name;
6682
6683  abort ();
6684}
6685
6686static int
6687get_some_local_dynamic_name_1 (px, data)
6688     rtx *px;
6689     void *data ATTRIBUTE_UNUSED;
6690{
6691  rtx x = *px;
6692
6693  if (GET_CODE (x) == SYMBOL_REF
6694      && local_dynamic_symbolic_operand (x, Pmode))
6695    {
6696      cfun->machine->some_ld_name = XSTR (x, 0);
6697      return 1;
6698    }
6699
6700  return 0;
6701}
6702
6703/* Meaning of CODE:
6704   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6705   C -- print opcode suffix for set/cmov insn.
6706   c -- like C, but print reversed condition
6707   F,f -- likewise, but for floating-point.
6708   O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6709        nothing
6710   R -- print the prefix for register names.
6711   z -- print the opcode suffix for the size of the current operand.
6712   * -- print a star (in certain assembler syntax)
6713   A -- print an absolute memory reference.
6714   w -- print the operand as if it's a "word" (HImode) even if it isn't.
6715   s -- print a shift double count, followed by the assemblers argument
6716	delimiter.
6717   b -- print the QImode name of the register for the indicated operand.
6718	%b0 would print %al if operands[0] is reg 0.
6719   w --  likewise, print the HImode name of the register.
6720   k --  likewise, print the SImode name of the register.
6721   q --  likewise, print the DImode name of the register.
6722   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6723   y -- print "st(0)" instead of "st" as a register.
6724   D -- print condition for SSE cmp instruction.
6725   P -- if PIC, print an @PLT suffix.
6726   X -- don't print any sort of PIC '@' suffix for a symbol.
6727   & -- print some in-use local-dynamic symbol name.
6728 */
6729
6730void
6731print_operand (file, x, code)
6732     FILE *file;
6733     rtx x;
6734     int code;
6735{
6736  if (code)
6737    {
6738      switch (code)
6739	{
6740	case '*':
6741	  if (ASSEMBLER_DIALECT == ASM_ATT)
6742	    putc ('*', file);
6743	  return;
6744
6745	case '&':
6746	  assemble_name (file, get_some_local_dynamic_name ());
6747	  return;
6748
6749	case 'A':
6750	  if (ASSEMBLER_DIALECT == ASM_ATT)
6751	    putc ('*', file);
6752	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
6753	    {
6754	      /* Intel syntax. For absolute addresses, registers should not
6755		 be surrounded by braces.  */
6756	      if (GET_CODE (x) != REG)
6757		{
6758		  putc ('[', file);
6759		  PRINT_OPERAND (file, x, 0);
6760		  putc (']', file);
6761		  return;
6762		}
6763	    }
6764	  else
6765	    abort ();
6766
6767	  PRINT_OPERAND (file, x, 0);
6768	  return;
6769
6770
6771	case 'L':
6772	  if (ASSEMBLER_DIALECT == ASM_ATT)
6773	    putc ('l', file);
6774	  return;
6775
6776	case 'W':
6777	  if (ASSEMBLER_DIALECT == ASM_ATT)
6778	    putc ('w', file);
6779	  return;
6780
6781	case 'B':
6782	  if (ASSEMBLER_DIALECT == ASM_ATT)
6783	    putc ('b', file);
6784	  return;
6785
6786	case 'Q':
6787	  if (ASSEMBLER_DIALECT == ASM_ATT)
6788	    putc ('l', file);
6789	  return;
6790
6791	case 'S':
6792	  if (ASSEMBLER_DIALECT == ASM_ATT)
6793	    putc ('s', file);
6794	  return;
6795
6796	case 'T':
6797	  if (ASSEMBLER_DIALECT == ASM_ATT)
6798	    putc ('t', file);
6799	  return;
6800
6801	case 'z':
6802	  /* 387 opcodes don't get size suffixes if the operands are
6803	     registers.  */
6804	  if (STACK_REG_P (x))
6805	    return;
6806
6807	  /* Likewise if using Intel opcodes.  */
6808	  if (ASSEMBLER_DIALECT == ASM_INTEL)
6809	    return;
6810
6811	  /* This is the size of op from size of operand.  */
6812	  switch (GET_MODE_SIZE (GET_MODE (x)))
6813	    {
6814	    case 2:
6815#ifdef HAVE_GAS_FILDS_FISTS
6816	      putc ('s', file);
6817#endif
6818	      return;
6819
6820	    case 4:
6821	      if (GET_MODE (x) == SFmode)
6822		{
6823		  putc ('s', file);
6824		  return;
6825		}
6826	      else
6827		putc ('l', file);
6828	      return;
6829
6830	    case 12:
6831	    case 16:
6832	      putc ('t', file);
6833	      return;
6834
6835	    case 8:
6836	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6837		{
6838#ifdef GAS_MNEMONICS
6839		  putc ('q', file);
6840#else
6841		  putc ('l', file);
6842		  putc ('l', file);
6843#endif
6844		}
6845	      else
6846	        putc ('l', file);
6847	      return;
6848
6849	    default:
6850	      abort ();
6851	    }
6852
6853	case 'b':
6854	case 'w':
6855	case 'k':
6856	case 'q':
6857	case 'h':
6858	case 'y':
6859	case 'X':
6860	case 'P':
6861	  break;
6862
6863	case 's':
6864	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6865	    {
6866	      PRINT_OPERAND (file, x, 0);
6867	      putc (',', file);
6868	    }
6869	  return;
6870
6871	case 'D':
6872	  /* Little bit of braindamage here.  The SSE compare instructions
6873	     does use completely different names for the comparisons that the
6874	     fp conditional moves.  */
6875	  switch (GET_CODE (x))
6876	    {
6877	    case EQ:
6878	    case UNEQ:
6879	      fputs ("eq", file);
6880	      break;
6881	    case LT:
6882	    case UNLT:
6883	      fputs ("lt", file);
6884	      break;
6885	    case LE:
6886	    case UNLE:
6887	      fputs ("le", file);
6888	      break;
6889	    case UNORDERED:
6890	      fputs ("unord", file);
6891	      break;
6892	    case NE:
6893	    case LTGT:
6894	      fputs ("neq", file);
6895	      break;
6896	    case UNGE:
6897	    case GE:
6898	      fputs ("nlt", file);
6899	      break;
6900	    case UNGT:
6901	    case GT:
6902	      fputs ("nle", file);
6903	      break;
6904	    case ORDERED:
6905	      fputs ("ord", file);
6906	      break;
6907	    default:
6908	      abort ();
6909	      break;
6910	    }
6911	  return;
6912	case 'O':
6913#ifdef CMOV_SUN_AS_SYNTAX
6914	  if (ASSEMBLER_DIALECT == ASM_ATT)
6915	    {
6916	      switch (GET_MODE (x))
6917		{
6918		case HImode: putc ('w', file); break;
6919		case SImode:
6920		case SFmode: putc ('l', file); break;
6921		case DImode:
6922		case DFmode: putc ('q', file); break;
6923		default: abort ();
6924		}
6925	      putc ('.', file);
6926	    }
6927#endif
6928	  return;
6929	case 'C':
6930	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6931	  return;
6932	case 'F':
6933#ifdef CMOV_SUN_AS_SYNTAX
6934	  if (ASSEMBLER_DIALECT == ASM_ATT)
6935	    putc ('.', file);
6936#endif
6937	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6938	  return;
6939
6940	  /* Like above, but reverse condition */
6941	case 'c':
6942	  /* Check to see if argument to %c is really a constant
6943	     and not a condition code which needs to be reversed.  */
6944	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6945	  {
6946	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6947	     return;
6948	  }
6949	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6950	  return;
6951	case 'f':
6952#ifdef CMOV_SUN_AS_SYNTAX
6953	  if (ASSEMBLER_DIALECT == ASM_ATT)
6954	    putc ('.', file);
6955#endif
6956	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6957	  return;
6958	case '+':
6959	  {
6960	    rtx x;
6961
6962	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6963	      return;
6964
6965	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6966	    if (x)
6967	      {
6968		int pred_val = INTVAL (XEXP (x, 0));
6969
6970		if (pred_val < REG_BR_PROB_BASE * 45 / 100
6971		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
6972		  {
6973		    int taken = pred_val > REG_BR_PROB_BASE / 2;
6974		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
6975
6976		    /* Emit hints only in the case default branch prediction
6977		       heruistics would fail.  */
6978		    if (taken != cputaken)
6979		      {
6980			/* We use 3e (DS) prefix for taken branches and
6981			   2e (CS) prefix for not taken branches.  */
6982			if (taken)
6983			  fputs ("ds ; ", file);
6984			else
6985			  fputs ("cs ; ", file);
6986		      }
6987		  }
6988	      }
6989	    return;
6990	  }
6991	default:
6992	    output_operand_lossage ("invalid operand code `%c'", code);
6993	}
6994    }
6995
6996  if (GET_CODE (x) == REG)
6997    {
6998      PRINT_REG (x, code, file);
6999    }
7000
7001  else if (GET_CODE (x) == MEM)
7002    {
7003      /* No `byte ptr' prefix for call instructions.  */
7004      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7005	{
7006	  const char * size;
7007	  switch (GET_MODE_SIZE (GET_MODE (x)))
7008	    {
7009	    case 1: size = "BYTE"; break;
7010	    case 2: size = "WORD"; break;
7011	    case 4: size = "DWORD"; break;
7012	    case 8: size = "QWORD"; break;
7013	    case 12: size = "XWORD"; break;
7014	    case 16: size = "XMMWORD"; break;
7015	    default:
7016	      abort ();
7017	    }
7018
7019	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7020	  if (code == 'b')
7021	    size = "BYTE";
7022	  else if (code == 'w')
7023	    size = "WORD";
7024	  else if (code == 'k')
7025	    size = "DWORD";
7026
7027	  fputs (size, file);
7028	  fputs (" PTR ", file);
7029	}
7030
7031      x = XEXP (x, 0);
7032      if (flag_pic && CONSTANT_ADDRESS_P (x))
7033	output_pic_addr_const (file, x, code);
7034      /* Avoid (%rip) for call operands.  */
7035      else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7036	       && GET_CODE (x) != CONST_INT)
7037	output_addr_const (file, x);
7038      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7039	output_operand_lossage ("invalid constraints for operand");
7040      else
7041	output_address (x);
7042    }
7043
7044  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7045    {
7046      REAL_VALUE_TYPE r;
7047      long l;
7048
7049      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7050      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7051
7052      if (ASSEMBLER_DIALECT == ASM_ATT)
7053	putc ('$', file);
7054      fprintf (file, "0x%lx", l);
7055    }
7056
7057 /* These float cases don't actually occur as immediate operands.  */
7058 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7059    {
7060      char dstr[30];
7061
7062      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7063      fprintf (file, "%s", dstr);
7064    }
7065
7066  else if (GET_CODE (x) == CONST_DOUBLE
7067	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7068    {
7069      char dstr[30];
7070
7071      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7072      fprintf (file, "%s", dstr);
7073    }
7074
7075  else
7076    {
7077      if (code != 'P')
7078	{
7079	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7080	    {
7081	      if (ASSEMBLER_DIALECT == ASM_ATT)
7082		putc ('$', file);
7083	    }
7084	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7085		   || GET_CODE (x) == LABEL_REF)
7086	    {
7087	      if (ASSEMBLER_DIALECT == ASM_ATT)
7088		putc ('$', file);
7089	      else
7090		fputs ("OFFSET FLAT:", file);
7091	    }
7092	}
7093      if (GET_CODE (x) == CONST_INT)
7094	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7095      else if (flag_pic)
7096	output_pic_addr_const (file, x, code);
7097      else
7098	output_addr_const (file, x);
7099    }
7100}
7101
7102/* Print a memory operand whose address is ADDR.  */
7103
7104void
7105print_operand_address (file, addr)
7106     FILE *file;
7107     register rtx addr;
7108{
7109  struct ix86_address parts;
7110  rtx base, index, disp;
7111  int scale;
7112
7113  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7114    {
7115      if (ASSEMBLER_DIALECT == ASM_INTEL)
7116	fputs ("DWORD PTR ", file);
7117      if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7118	putc ('%', file);
7119      if (TARGET_64BIT)
7120	fputs ("fs:0", file);
7121      else
7122	fputs ("gs:0", file);
7123      return;
7124    }
7125
7126  if (! ix86_decompose_address (addr, &parts))
7127    abort ();
7128
7129  base = parts.base;
7130  index = parts.index;
7131  disp = parts.disp;
7132  scale = parts.scale;
7133
7134  if (!base && !index)
7135    {
7136      /* Displacement only requires special attention.  */
7137
7138      if (GET_CODE (disp) == CONST_INT)
7139	{
7140	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7141	    {
7142	      if (USER_LABEL_PREFIX[0] == 0)
7143		putc ('%', file);
7144	      fputs ("ds:", file);
7145	    }
7146	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7147	}
7148      else if (flag_pic)
7149	output_pic_addr_const (file, addr, 0);
7150      else
7151	output_addr_const (file, addr);
7152
7153      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7154      if (TARGET_64BIT
7155	  && ((GET_CODE (addr) == SYMBOL_REF
7156	       && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7157	      || GET_CODE (addr) == LABEL_REF
7158	      || (GET_CODE (addr) == CONST
7159		  && GET_CODE (XEXP (addr, 0)) == PLUS
7160		  && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7161		      || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7162		  && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7163	fputs ("(%rip)", file);
7164    }
7165  else
7166    {
7167      if (ASSEMBLER_DIALECT == ASM_ATT)
7168	{
7169	  if (disp)
7170	    {
7171	      if (flag_pic)
7172		output_pic_addr_const (file, disp, 0);
7173	      else if (GET_CODE (disp) == LABEL_REF)
7174		output_asm_label (disp);
7175	      else
7176		output_addr_const (file, disp);
7177	    }
7178
7179	  putc ('(', file);
7180	  if (base)
7181	    PRINT_REG (base, 0, file);
7182	  if (index)
7183	    {
7184	      putc (',', file);
7185	      PRINT_REG (index, 0, file);
7186	      if (scale != 1)
7187		fprintf (file, ",%d", scale);
7188	    }
7189	  putc (')', file);
7190	}
7191      else
7192	{
7193	  rtx offset = NULL_RTX;
7194
7195	  if (disp)
7196	    {
7197	      /* Pull out the offset of a symbol; print any symbol itself.  */
7198	      if (GET_CODE (disp) == CONST
7199		  && GET_CODE (XEXP (disp, 0)) == PLUS
7200		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7201		{
7202		  offset = XEXP (XEXP (disp, 0), 1);
7203		  disp = gen_rtx_CONST (VOIDmode,
7204					XEXP (XEXP (disp, 0), 0));
7205		}
7206
7207	      if (flag_pic)
7208		output_pic_addr_const (file, disp, 0);
7209	      else if (GET_CODE (disp) == LABEL_REF)
7210		output_asm_label (disp);
7211	      else if (GET_CODE (disp) == CONST_INT)
7212		offset = disp;
7213	      else
7214		output_addr_const (file, disp);
7215	    }
7216
7217	  putc ('[', file);
7218	  if (base)
7219	    {
7220	      PRINT_REG (base, 0, file);
7221	      if (offset)
7222		{
7223		  if (INTVAL (offset) >= 0)
7224		    putc ('+', file);
7225		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7226		}
7227	    }
7228	  else if (offset)
7229	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7230	  else
7231	    putc ('0', file);
7232
7233	  if (index)
7234	    {
7235	      putc ('+', file);
7236	      PRINT_REG (index, 0, file);
7237	      if (scale != 1)
7238		fprintf (file, "*%d", scale);
7239	    }
7240	  putc (']', file);
7241	}
7242    }
7243}
7244
7245bool
7246output_addr_const_extra (file, x)
7247     FILE *file;
7248     rtx x;
7249{
7250  rtx op;
7251
7252  if (GET_CODE (x) != UNSPEC)
7253    return false;
7254
7255  op = XVECEXP (x, 0, 0);
7256  switch (XINT (x, 1))
7257    {
7258    case UNSPEC_GOTTPOFF:
7259      output_addr_const (file, op);
7260      /* FIXME: This might be @TPOFF in Sun ld.  */
7261      fputs ("@GOTTPOFF", file);
7262      break;
7263    case UNSPEC_TPOFF:
7264      output_addr_const (file, op);
7265      fputs ("@TPOFF", file);
7266      break;
7267    case UNSPEC_NTPOFF:
7268      output_addr_const (file, op);
7269      if (TARGET_64BIT)
7270	fputs ("@TPOFF", file);
7271      else
7272	fputs ("@NTPOFF", file);
7273      break;
7274    case UNSPEC_DTPOFF:
7275      output_addr_const (file, op);
7276      fputs ("@DTPOFF", file);
7277      break;
7278    case UNSPEC_GOTNTPOFF:
7279      output_addr_const (file, op);
7280      if (TARGET_64BIT)
7281	fputs ("@GOTTPOFF(%rip)", file);
7282      else
7283	fputs ("@GOTNTPOFF", file);
7284      break;
7285    case UNSPEC_INDNTPOFF:
7286      output_addr_const (file, op);
7287      fputs ("@INDNTPOFF", file);
7288      break;
7289
7290    default:
7291      return false;
7292    }
7293
7294  return true;
7295}
7296
7297/* Split one or more DImode RTL references into pairs of SImode
7298   references.  The RTL can be REG, offsettable MEM, integer constant, or
7299   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7300   split and "num" is its length.  lo_half and hi_half are output arrays
7301   that parallel "operands".  */
7302
7303void
7304split_di (operands, num, lo_half, hi_half)
7305     rtx operands[];
7306     int num;
7307     rtx lo_half[], hi_half[];
7308{
7309  while (num--)
7310    {
7311      rtx op = operands[num];
7312
7313      /* simplify_subreg refuse to split volatile memory addresses,
7314         but we still have to handle it.  */
7315      if (GET_CODE (op) == MEM)
7316	{
7317	  lo_half[num] = adjust_address (op, SImode, 0);
7318	  hi_half[num] = adjust_address (op, SImode, 4);
7319	}
7320      else
7321	{
7322	  lo_half[num] = simplify_gen_subreg (SImode, op,
7323					      GET_MODE (op) == VOIDmode
7324					      ? DImode : GET_MODE (op), 0);
7325	  hi_half[num] = simplify_gen_subreg (SImode, op,
7326					      GET_MODE (op) == VOIDmode
7327					      ? DImode : GET_MODE (op), 4);
7328	}
7329    }
7330}
7331/* Split one or more TImode RTL references into pairs of SImode
7332   references.  The RTL can be REG, offsettable MEM, integer constant, or
7333   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7334   split and "num" is its length.  lo_half and hi_half are output arrays
7335   that parallel "operands".  */
7336
7337void
7338split_ti (operands, num, lo_half, hi_half)
7339     rtx operands[];
7340     int num;
7341     rtx lo_half[], hi_half[];
7342{
7343  while (num--)
7344    {
7345      rtx op = operands[num];
7346
7347      /* simplify_subreg refuse to split volatile memory addresses, but we
7348         still have to handle it.  */
7349      if (GET_CODE (op) == MEM)
7350	{
7351	  lo_half[num] = adjust_address (op, DImode, 0);
7352	  hi_half[num] = adjust_address (op, DImode, 8);
7353	}
7354      else
7355	{
7356	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7357	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7358	}
7359    }
7360}
7361
7362/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7363   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7364   is the expression of the binary operation.  The output may either be
7365   emitted here, or returned to the caller, like all output_* functions.
7366
7367   There is no guarantee that the operands are the same mode, as they
7368   might be within FLOAT or FLOAT_EXTEND expressions.  */
7369
7370#ifndef SYSV386_COMPAT
7371/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7372   wants to fix the assemblers because that causes incompatibility
7373   with gcc.  No-one wants to fix gcc because that causes
7374   incompatibility with assemblers...  You can use the option of
7375   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7376#define SYSV386_COMPAT 1
7377#endif
7378
7379const char *
7380output_387_binary_op (insn, operands)
7381     rtx insn;
7382     rtx *operands;
7383{
7384  static char buf[30];
7385  const char *p;
7386  const char *ssep;
7387  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7388
7389#ifdef ENABLE_CHECKING
7390  /* Even if we do not want to check the inputs, this documents input
7391     constraints.  Which helps in understanding the following code.  */
7392  if (STACK_REG_P (operands[0])
7393      && ((REG_P (operands[1])
7394	   && REGNO (operands[0]) == REGNO (operands[1])
7395	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7396	  || (REG_P (operands[2])
7397	      && REGNO (operands[0]) == REGNO (operands[2])
7398	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7399      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7400    ; /* ok */
7401  else if (!is_sse)
7402    abort ();
7403#endif
7404
7405  switch (GET_CODE (operands[3]))
7406    {
7407    case PLUS:
7408      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7409	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7410	p = "fiadd";
7411      else
7412	p = "fadd";
7413      ssep = "add";
7414      break;
7415
7416    case MINUS:
7417      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7418	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7419	p = "fisub";
7420      else
7421	p = "fsub";
7422      ssep = "sub";
7423      break;
7424
7425    case MULT:
7426      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7427	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7428	p = "fimul";
7429      else
7430	p = "fmul";
7431      ssep = "mul";
7432      break;
7433
7434    case DIV:
7435      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7436	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7437	p = "fidiv";
7438      else
7439	p = "fdiv";
7440      ssep = "div";
7441      break;
7442
7443    default:
7444      abort ();
7445    }
7446
7447  if (is_sse)
7448   {
7449      strcpy (buf, ssep);
7450      if (GET_MODE (operands[0]) == SFmode)
7451	strcat (buf, "ss\t{%2, %0|%0, %2}");
7452      else
7453	strcat (buf, "sd\t{%2, %0|%0, %2}");
7454      return buf;
7455   }
7456  strcpy (buf, p);
7457
7458  switch (GET_CODE (operands[3]))
7459    {
7460    case MULT:
7461    case PLUS:
7462      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7463	{
7464	  rtx temp = operands[2];
7465	  operands[2] = operands[1];
7466	  operands[1] = temp;
7467	}
7468
7469      /* know operands[0] == operands[1].  */
7470
7471      if (GET_CODE (operands[2]) == MEM)
7472	{
7473	  p = "%z2\t%2";
7474	  break;
7475	}
7476
7477      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7478	{
7479	  if (STACK_TOP_P (operands[0]))
7480	    /* How is it that we are storing to a dead operand[2]?
7481	       Well, presumably operands[1] is dead too.  We can't
7482	       store the result to st(0) as st(0) gets popped on this
7483	       instruction.  Instead store to operands[2] (which I
7484	       think has to be st(1)).  st(1) will be popped later.
7485	       gcc <= 2.8.1 didn't have this check and generated
7486	       assembly code that the Unixware assembler rejected.  */
7487	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7488	  else
7489	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7490	  break;
7491	}
7492
7493      if (STACK_TOP_P (operands[0]))
7494	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7495      else
7496	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7497      break;
7498
7499    case MINUS:
7500    case DIV:
7501      if (GET_CODE (operands[1]) == MEM)
7502	{
7503	  p = "r%z1\t%1";
7504	  break;
7505	}
7506
7507      if (GET_CODE (operands[2]) == MEM)
7508	{
7509	  p = "%z2\t%2";
7510	  break;
7511	}
7512
7513      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7514	{
7515#if SYSV386_COMPAT
7516	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7517	     derived assemblers, confusingly reverse the direction of
7518	     the operation for fsub{r} and fdiv{r} when the
7519	     destination register is not st(0).  The Intel assembler
7520	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7521	     figure out what the hardware really does.  */
7522	  if (STACK_TOP_P (operands[0]))
7523	    p = "{p\t%0, %2|rp\t%2, %0}";
7524	  else
7525	    p = "{rp\t%2, %0|p\t%0, %2}";
7526#else
7527	  if (STACK_TOP_P (operands[0]))
7528	    /* As above for fmul/fadd, we can't store to st(0).  */
7529	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7530	  else
7531	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7532#endif
7533	  break;
7534	}
7535
7536      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7537	{
7538#if SYSV386_COMPAT
7539	  if (STACK_TOP_P (operands[0]))
7540	    p = "{rp\t%0, %1|p\t%1, %0}";
7541	  else
7542	    p = "{p\t%1, %0|rp\t%0, %1}";
7543#else
7544	  if (STACK_TOP_P (operands[0]))
7545	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
7546	  else
7547	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
7548#endif
7549	  break;
7550	}
7551
7552      if (STACK_TOP_P (operands[0]))
7553	{
7554	  if (STACK_TOP_P (operands[1]))
7555	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7556	  else
7557	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
7558	  break;
7559	}
7560      else if (STACK_TOP_P (operands[1]))
7561	{
7562#if SYSV386_COMPAT
7563	  p = "{\t%1, %0|r\t%0, %1}";
7564#else
7565	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
7566#endif
7567	}
7568      else
7569	{
7570#if SYSV386_COMPAT
7571	  p = "{r\t%2, %0|\t%0, %2}";
7572#else
7573	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7574#endif
7575	}
7576      break;
7577
7578    default:
7579      abort ();
7580    }
7581
7582  strcat (buf, p);
7583  return buf;
7584}
7585
7586/* Output code to initialize control word copies used by
7587   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
7588   is set to control word rounding downwards.  */
7589void
7590emit_i387_cw_initialization (normal, round_down)
7591     rtx normal, round_down;
7592{
7593  rtx reg = gen_reg_rtx (HImode);
7594
7595  emit_insn (gen_x86_fnstcw_1 (normal));
7596  emit_move_insn (reg, normal);
7597  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7598      && !TARGET_64BIT)
7599    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7600  else
7601    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7602  emit_move_insn (round_down, reg);
7603}
7604
7605/* Output code for INSN to convert a float to a signed int.  OPERANDS
7606   are the insn operands.  The output may be [HSD]Imode and the input
7607   operand may be [SDX]Fmode.  */
7608
7609const char *
7610output_fix_trunc (insn, operands)
7611     rtx insn;
7612     rtx *operands;
7613{
7614  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7615  int dimode_p = GET_MODE (operands[0]) == DImode;
7616
7617  /* Jump through a hoop or two for DImode, since the hardware has no
7618     non-popping instruction.  We used to do this a different way, but
7619     that was somewhat fragile and broke with post-reload splitters.  */
7620  if (dimode_p && !stack_top_dies)
7621    output_asm_insn ("fld\t%y1", operands);
7622
7623  if (!STACK_TOP_P (operands[1]))
7624    abort ();
7625
7626  if (GET_CODE (operands[0]) != MEM)
7627    abort ();
7628
7629  output_asm_insn ("fldcw\t%3", operands);
7630  if (stack_top_dies || dimode_p)
7631    output_asm_insn ("fistp%z0\t%0", operands);
7632  else
7633    output_asm_insn ("fist%z0\t%0", operands);
7634  output_asm_insn ("fldcw\t%2", operands);
7635
7636  return "";
7637}
7638
7639/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
7640   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
7641   when fucom should be used.  */
7642
7643const char *
7644output_fp_compare (insn, operands, eflags_p, unordered_p)
7645     rtx insn;
7646     rtx *operands;
7647     int eflags_p, unordered_p;
7648{
7649  int stack_top_dies;
7650  rtx cmp_op0 = operands[0];
7651  rtx cmp_op1 = operands[1];
7652  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7653
7654  if (eflags_p == 2)
7655    {
7656      cmp_op0 = cmp_op1;
7657      cmp_op1 = operands[2];
7658    }
7659  if (is_sse)
7660    {
7661      if (GET_MODE (operands[0]) == SFmode)
7662	if (unordered_p)
7663	  return "ucomiss\t{%1, %0|%0, %1}";
7664	else
7665	  return "comiss\t{%1, %0|%0, %1}";
7666      else
7667	if (unordered_p)
7668	  return "ucomisd\t{%1, %0|%0, %1}";
7669	else
7670	  return "comisd\t{%1, %0|%0, %1}";
7671    }
7672
7673  if (! STACK_TOP_P (cmp_op0))
7674    abort ();
7675
7676  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7677
7678  if (STACK_REG_P (cmp_op1)
7679      && stack_top_dies
7680      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7681      && REGNO (cmp_op1) != FIRST_STACK_REG)
7682    {
7683      /* If both the top of the 387 stack dies, and the other operand
7684	 is also a stack register that dies, then this must be a
7685	 `fcompp' float compare */
7686
7687      if (eflags_p == 1)
7688	{
7689	  /* There is no double popping fcomi variant.  Fortunately,
7690	     eflags is immune from the fstp's cc clobbering.  */
7691	  if (unordered_p)
7692	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7693	  else
7694	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7695	  return "fstp\t%y0";
7696	}
7697      else
7698	{
7699	  if (eflags_p == 2)
7700	    {
7701	      if (unordered_p)
7702		return "fucompp\n\tfnstsw\t%0";
7703	      else
7704		return "fcompp\n\tfnstsw\t%0";
7705	    }
7706	  else
7707	    {
7708	      if (unordered_p)
7709		return "fucompp";
7710	      else
7711		return "fcompp";
7712	    }
7713	}
7714    }
7715  else
7716    {
7717      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
7718
7719      static const char * const alt[24] =
7720      {
7721	"fcom%z1\t%y1",
7722	"fcomp%z1\t%y1",
7723	"fucom%z1\t%y1",
7724	"fucomp%z1\t%y1",
7725
7726	"ficom%z1\t%y1",
7727	"ficomp%z1\t%y1",
7728	NULL,
7729	NULL,
7730
7731	"fcomi\t{%y1, %0|%0, %y1}",
7732	"fcomip\t{%y1, %0|%0, %y1}",
7733	"fucomi\t{%y1, %0|%0, %y1}",
7734	"fucomip\t{%y1, %0|%0, %y1}",
7735
7736	NULL,
7737	NULL,
7738	NULL,
7739	NULL,
7740
7741	"fcom%z2\t%y2\n\tfnstsw\t%0",
7742	"fcomp%z2\t%y2\n\tfnstsw\t%0",
7743	"fucom%z2\t%y2\n\tfnstsw\t%0",
7744	"fucomp%z2\t%y2\n\tfnstsw\t%0",
7745
7746	"ficom%z2\t%y2\n\tfnstsw\t%0",
7747	"ficomp%z2\t%y2\n\tfnstsw\t%0",
7748	NULL,
7749	NULL
7750      };
7751
7752      int mask;
7753      const char *ret;
7754
7755      mask  = eflags_p << 3;
7756      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7757      mask |= unordered_p << 1;
7758      mask |= stack_top_dies;
7759
7760      if (mask >= 24)
7761	abort ();
7762      ret = alt[mask];
7763      if (ret == NULL)
7764	abort ();
7765
7766      return ret;
7767    }
7768}
7769
7770void
7771ix86_output_addr_vec_elt (file, value)
7772     FILE *file;
7773     int value;
7774{
7775  const char *directive = ASM_LONG;
7776
7777  if (TARGET_64BIT)
7778    {
7779#ifdef ASM_QUAD
7780      directive = ASM_QUAD;
7781#else
7782      abort ();
7783#endif
7784    }
7785
7786  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7787}
7788
7789void
7790ix86_output_addr_diff_elt (file, value, rel)
7791     FILE *file;
7792     int value, rel;
7793{
7794  if (TARGET_64BIT)
7795    fprintf (file, "%s%s%d-%s%d\n",
7796	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
7797  else if (HAVE_AS_GOTOFF_IN_DATA)
7798    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7799#if TARGET_MACHO
7800  else if (TARGET_MACHO)
7801    fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7802	     machopic_function_base_name () + 1);
7803#endif
7804  else
7805    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7806		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7807}
7808
7809/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7810   for the target.  */
7811
7812void
7813ix86_expand_clear (dest)
7814     rtx dest;
7815{
7816  rtx tmp;
7817
7818  /* We play register width games, which are only valid after reload.  */
7819  if (!reload_completed)
7820    abort ();
7821
7822  /* Avoid HImode and its attendant prefix byte.  */
7823  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7824    dest = gen_rtx_REG (SImode, REGNO (dest));
7825
7826  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7827
7828  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
7829  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7830    {
7831      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7832      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7833    }
7834
7835  emit_insn (tmp);
7836}
7837
7838/* X is an unchanging MEM.  If it is a constant pool reference, return
7839   the constant pool rtx, else NULL.  */
7840
7841static rtx
7842maybe_get_pool_constant (x)
7843     rtx x;
7844{
7845  x = XEXP (x, 0);
7846
7847  if (flag_pic && ! TARGET_64BIT)
7848    {
7849      if (GET_CODE (x) != PLUS)
7850	return NULL_RTX;
7851      if (XEXP (x, 0) != pic_offset_table_rtx)
7852	return NULL_RTX;
7853      x = XEXP (x, 1);
7854      if (GET_CODE (x) != CONST)
7855	return NULL_RTX;
7856      x = XEXP (x, 0);
7857      if (GET_CODE (x) != UNSPEC)
7858	return NULL_RTX;
7859      if (XINT (x, 1) != UNSPEC_GOTOFF)
7860	return NULL_RTX;
7861      x = XVECEXP (x, 0, 0);
7862    }
7863
7864  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7865    return get_pool_constant (x);
7866
7867  return NULL_RTX;
7868}
7869
7870void
7871ix86_expand_move (mode, operands)
7872     enum machine_mode mode;
7873     rtx operands[];
7874{
7875  int strict = (reload_in_progress || reload_completed);
7876  rtx insn, op0, op1, tmp;
7877
7878  op0 = operands[0];
7879  op1 = operands[1];
7880
7881  if (tls_symbolic_operand (op1, Pmode))
7882    {
7883      op1 = legitimize_address (op1, op1, VOIDmode);
7884      if (GET_CODE (op0) == MEM)
7885	{
7886	  tmp = gen_reg_rtx (mode);
7887	  emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7888	  op1 = tmp;
7889	}
7890    }
7891  else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7892    {
7893#if TARGET_MACHO
7894      if (MACHOPIC_PURE)
7895	{
7896	  rtx temp = ((reload_in_progress
7897		       || ((op0 && GET_CODE (op0) == REG)
7898			   && mode == Pmode))
7899		      ? op0 : gen_reg_rtx (Pmode));
7900	  op1 = machopic_indirect_data_reference (op1, temp);
7901	  op1 = machopic_legitimize_pic_address (op1, mode,
7902						 temp == op1 ? 0 : temp);
7903	}
7904      else
7905	{
7906	  if (MACHOPIC_INDIRECT)
7907	    op1 = machopic_indirect_data_reference (op1, 0);
7908	}
7909      if (op0 != op1)
7910	{
7911	  insn = gen_rtx_SET (VOIDmode, op0, op1);
7912	  emit_insn (insn);
7913	}
7914      return;
7915#endif /* TARGET_MACHO */
7916      if (GET_CODE (op0) == MEM)
7917	op1 = force_reg (Pmode, op1);
7918      else
7919	{
7920	  rtx temp = op0;
7921	  if (GET_CODE (temp) != REG)
7922	    temp = gen_reg_rtx (Pmode);
7923	  temp = legitimize_pic_address (op1, temp);
7924	  if (temp == op0)
7925	    return;
7926	  op1 = temp;
7927	}
7928    }
7929  else
7930    {
7931      if (GET_CODE (op0) == MEM
7932	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7933	      || !push_operand (op0, mode))
7934	  && GET_CODE (op1) == MEM)
7935	op1 = force_reg (mode, op1);
7936
7937      if (push_operand (op0, mode)
7938	  && ! general_no_elim_operand (op1, mode))
7939	op1 = copy_to_mode_reg (mode, op1);
7940
7941      /* Force large constants in 64bit compilation into register
7942	 to get them CSEed.  */
7943      if (TARGET_64BIT && mode == DImode
7944	  && immediate_operand (op1, mode)
7945	  && !x86_64_zero_extended_value (op1)
7946	  && !register_operand (op0, mode)
7947	  && optimize && !reload_completed && !reload_in_progress)
7948	op1 = copy_to_mode_reg (mode, op1);
7949
7950      if (FLOAT_MODE_P (mode))
7951	{
7952	  /* If we are loading a floating point constant to a register,
7953	     force the value to memory now, since we'll get better code
7954	     out the back end.  */
7955
7956	  if (strict)
7957	    ;
7958	  else if (GET_CODE (op1) == CONST_DOUBLE)
7959	    {
7960	      op1 = validize_mem (force_const_mem (mode, op1));
7961	      if (!register_operand (op0, mode))
7962		{
7963		  rtx temp = gen_reg_rtx (mode);
7964		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
7965		  emit_move_insn (op0, temp);
7966		  return;
7967		}
7968	    }
7969	}
7970    }
7971
7972  insn = gen_rtx_SET (VOIDmode, op0, op1);
7973
7974  emit_insn (insn);
7975}
7976
7977void
7978ix86_expand_vector_move (mode, operands)
7979     enum machine_mode mode;
7980     rtx operands[];
7981{
7982  /* Force constants other than zero into memory.  We do not know how
7983     the instructions used to build constants modify the upper 64 bits
7984     of the register, once we have that information we may be able
7985     to handle some of them more efficiently.  */
7986  if ((reload_in_progress | reload_completed) == 0
7987      && register_operand (operands[0], mode)
7988      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
7989    {
7990      operands[1] = force_const_mem (mode, operands[1]);
7991      emit_move_insn (operands[0], operands[1]);
7992      return;
7993    }
7994
7995  /* Make operand1 a register if it isn't already.  */
7996  if (!no_new_pseudos
7997      && !register_operand (operands[0], mode)
7998      && !register_operand (operands[1], mode))
7999    {
8000      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8001      emit_move_insn (operands[0], temp);
8002      return;
8003    }
8004
8005  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8006}
8007
8008/* Attempt to expand a binary operator.  Make the expansion closer to the
8009   actual machine, then just general_operand, which will allow 3 separate
8010   memory references (one output, two input) in a single insn.  */
8011
8012void
8013ix86_expand_binary_operator (code, mode, operands)
8014     enum rtx_code code;
8015     enum machine_mode mode;
8016     rtx operands[];
8017{
8018  int matching_memory;
8019  rtx src1, src2, dst, op, clob;
8020
8021  dst = operands[0];
8022  src1 = operands[1];
8023  src2 = operands[2];
8024
8025  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8026  if (GET_RTX_CLASS (code) == 'c'
8027      && (rtx_equal_p (dst, src2)
8028	  || immediate_operand (src1, mode)))
8029    {
8030      rtx temp = src1;
8031      src1 = src2;
8032      src2 = temp;
8033    }
8034
8035  /* If the destination is memory, and we do not have matching source
8036     operands, do things in registers.  */
8037  matching_memory = 0;
8038  if (GET_CODE (dst) == MEM)
8039    {
8040      if (rtx_equal_p (dst, src1))
8041	matching_memory = 1;
8042      else if (GET_RTX_CLASS (code) == 'c'
8043	       && rtx_equal_p (dst, src2))
8044	matching_memory = 2;
8045      else
8046	dst = gen_reg_rtx (mode);
8047    }
8048
8049  /* Both source operands cannot be in memory.  */
8050  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8051    {
8052      if (matching_memory != 2)
8053	src2 = force_reg (mode, src2);
8054      else
8055	src1 = force_reg (mode, src1);
8056    }
8057
8058  /* If the operation is not commutable, source 1 cannot be a constant
8059     or non-matching memory.  */
8060  if ((CONSTANT_P (src1)
8061       || (!matching_memory && GET_CODE (src1) == MEM))
8062      && GET_RTX_CLASS (code) != 'c')
8063    src1 = force_reg (mode, src1);
8064
8065  /* If optimizing, copy to regs to improve CSE */
8066  if (optimize && ! no_new_pseudos)
8067    {
8068      if (GET_CODE (dst) == MEM)
8069	dst = gen_reg_rtx (mode);
8070      if (GET_CODE (src1) == MEM)
8071	src1 = force_reg (mode, src1);
8072      if (GET_CODE (src2) == MEM)
8073	src2 = force_reg (mode, src2);
8074    }
8075
8076  /* Emit the instruction.  */
8077
8078  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8079  if (reload_in_progress)
8080    {
8081      /* Reload doesn't know about the flags register, and doesn't know that
8082         it doesn't want to clobber it.  We can only do this with PLUS.  */
8083      if (code != PLUS)
8084	abort ();
8085      emit_insn (op);
8086    }
8087  else
8088    {
8089      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8090      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8091    }
8092
8093  /* Fix up the destination if needed.  */
8094  if (dst != operands[0])
8095    emit_move_insn (operands[0], dst);
8096}
8097
8098/* Return TRUE or FALSE depending on whether the binary operator meets the
8099   appropriate constraints.  */
8100
8101int
8102ix86_binary_operator_ok (code, mode, operands)
8103     enum rtx_code code;
8104     enum machine_mode mode ATTRIBUTE_UNUSED;
8105     rtx operands[3];
8106{
8107  /* Both source operands cannot be in memory.  */
8108  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8109    return 0;
8110  /* If the operation is not commutable, source 1 cannot be a constant.  */
8111  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8112    return 0;
8113  /* If the destination is memory, we must have a matching source operand.  */
8114  if (GET_CODE (operands[0]) == MEM
8115      && ! (rtx_equal_p (operands[0], operands[1])
8116	    || (GET_RTX_CLASS (code) == 'c'
8117		&& rtx_equal_p (operands[0], operands[2]))))
8118    return 0;
8119  /* If the operation is not commutable and the source 1 is memory, we must
8120     have a matching destination.  */
8121  if (GET_CODE (operands[1]) == MEM
8122      && GET_RTX_CLASS (code) != 'c'
8123      && ! rtx_equal_p (operands[0], operands[1]))
8124    return 0;
8125  return 1;
8126}
8127
8128/* Attempt to expand a unary operator.  Make the expansion closer to the
8129   actual machine, then just general_operand, which will allow 2 separate
8130   memory references (one output, one input) in a single insn.  */
8131
8132void
8133ix86_expand_unary_operator (code, mode, operands)
8134     enum rtx_code code;
8135     enum machine_mode mode;
8136     rtx operands[];
8137{
8138  int matching_memory;
8139  rtx src, dst, op, clob;
8140
8141  dst = operands[0];
8142  src = operands[1];
8143
8144  /* If the destination is memory, and we do not have matching source
8145     operands, do things in registers.  */
8146  matching_memory = 0;
8147  if (GET_CODE (dst) == MEM)
8148    {
8149      if (rtx_equal_p (dst, src))
8150	matching_memory = 1;
8151      else
8152	dst = gen_reg_rtx (mode);
8153    }
8154
8155  /* When source operand is memory, destination must match.  */
8156  if (!matching_memory && GET_CODE (src) == MEM)
8157    src = force_reg (mode, src);
8158
8159  /* If optimizing, copy to regs to improve CSE */
8160  if (optimize && ! no_new_pseudos)
8161    {
8162      if (GET_CODE (dst) == MEM)
8163	dst = gen_reg_rtx (mode);
8164      if (GET_CODE (src) == MEM)
8165	src = force_reg (mode, src);
8166    }
8167
8168  /* Emit the instruction.  */
8169
8170  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8171  if (reload_in_progress || code == NOT)
8172    {
8173      /* Reload doesn't know about the flags register, and doesn't know that
8174         it doesn't want to clobber it.  */
8175      if (code != NOT)
8176        abort ();
8177      emit_insn (op);
8178    }
8179  else
8180    {
8181      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8182      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8183    }
8184
8185  /* Fix up the destination if needed.  */
8186  if (dst != operands[0])
8187    emit_move_insn (operands[0], dst);
8188}
8189
8190/* Return TRUE or FALSE depending on whether the unary operator meets the
8191   appropriate constraints.  */
8192
8193int
8194ix86_unary_operator_ok (code, mode, operands)
8195     enum rtx_code code ATTRIBUTE_UNUSED;
8196     enum machine_mode mode ATTRIBUTE_UNUSED;
8197     rtx operands[2] ATTRIBUTE_UNUSED;
8198{
8199  /* If one of operands is memory, source and destination must match.  */
8200  if ((GET_CODE (operands[0]) == MEM
8201       || GET_CODE (operands[1]) == MEM)
8202      && ! rtx_equal_p (operands[0], operands[1]))
8203    return FALSE;
8204  return TRUE;
8205}
8206
8207/* Return TRUE or FALSE depending on whether the first SET in INSN
8208   has source and destination with matching CC modes, and that the
8209   CC mode is at least as constrained as REQ_MODE.  */
8210
8211int
8212ix86_match_ccmode (insn, req_mode)
8213     rtx insn;
8214     enum machine_mode req_mode;
8215{
8216  rtx set;
8217  enum machine_mode set_mode;
8218
8219  set = PATTERN (insn);
8220  if (GET_CODE (set) == PARALLEL)
8221    set = XVECEXP (set, 0, 0);
8222  if (GET_CODE (set) != SET)
8223    abort ();
8224  if (GET_CODE (SET_SRC (set)) != COMPARE)
8225    abort ();
8226
8227  set_mode = GET_MODE (SET_DEST (set));
8228  switch (set_mode)
8229    {
8230    case CCNOmode:
8231      if (req_mode != CCNOmode
8232	  && (req_mode != CCmode
8233	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8234	return 0;
8235      break;
8236    case CCmode:
8237      if (req_mode == CCGCmode)
8238	return 0;
8239      /* FALLTHRU */
8240    case CCGCmode:
8241      if (req_mode == CCGOCmode || req_mode == CCNOmode)
8242	return 0;
8243      /* FALLTHRU */
8244    case CCGOCmode:
8245      if (req_mode == CCZmode)
8246	return 0;
8247      /* FALLTHRU */
8248    case CCZmode:
8249      break;
8250
8251    default:
8252      abort ();
8253    }
8254
8255  return (GET_MODE (SET_SRC (set)) == set_mode);
8256}
8257
8258/* Generate insn patterns to do an integer compare of OPERANDS.  */
8259
8260static rtx
8261ix86_expand_int_compare (code, op0, op1)
8262     enum rtx_code code;
8263     rtx op0, op1;
8264{
8265  enum machine_mode cmpmode;
8266  rtx tmp, flags;
8267
8268  cmpmode = SELECT_CC_MODE (code, op0, op1);
8269  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8270
8271  /* This is very simple, but making the interface the same as in the
8272     FP case makes the rest of the code easier.  */
8273  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8274  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8275
8276  /* Return the test that should be put into the flags user, i.e.
8277     the bcc, scc, or cmov instruction.  */
8278  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8279}
8280
8281/* Figure out whether to use ordered or unordered fp comparisons.
8282   Return the appropriate mode to use.  */
8283
8284enum machine_mode
8285ix86_fp_compare_mode (code)
8286     enum rtx_code code ATTRIBUTE_UNUSED;
8287{
8288  /* ??? In order to make all comparisons reversible, we do all comparisons
8289     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8290     all forms trapping and nontrapping comparisons, we can make inequality
8291     comparisons trapping again, since it results in better code when using
8292     FCOM based compares.  */
8293  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8294}
8295
8296enum machine_mode
8297ix86_cc_mode (code, op0, op1)
8298     enum rtx_code code;
8299     rtx op0, op1;
8300{
8301  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8302    return ix86_fp_compare_mode (code);
8303  switch (code)
8304    {
8305      /* Only zero flag is needed.  */
8306    case EQ:			/* ZF=0 */
8307    case NE:			/* ZF!=0 */
8308      return CCZmode;
8309      /* Codes needing carry flag.  */
8310    case GEU:			/* CF=0 */
8311    case GTU:			/* CF=0 & ZF=0 */
8312    case LTU:			/* CF=1 */
8313    case LEU:			/* CF=1 | ZF=1 */
8314      return CCmode;
8315      /* Codes possibly doable only with sign flag when
8316         comparing against zero.  */
8317    case GE:			/* SF=OF   or   SF=0 */
8318    case LT:			/* SF<>OF  or   SF=1 */
8319      if (op1 == const0_rtx)
8320	return CCGOCmode;
8321      else
8322	/* For other cases Carry flag is not required.  */
8323	return CCGCmode;
8324      /* Codes doable only with sign flag when comparing
8325         against zero, but we miss jump instruction for it
8326         so we need to use relational tests agains overflow
8327         that thus needs to be zero.  */
8328    case GT:			/* ZF=0 & SF=OF */
8329    case LE:			/* ZF=1 | SF<>OF */
8330      if (op1 == const0_rtx)
8331	return CCNOmode;
8332      else
8333	return CCGCmode;
8334      /* strcmp pattern do (use flags) and combine may ask us for proper
8335	 mode.  */
8336    case USE:
8337      return CCmode;
8338    default:
8339      abort ();
8340    }
8341}
8342
8343/* Return true if we should use an FCOMI instruction for this fp comparison.  */
8344
8345int
8346ix86_use_fcomi_compare (code)
8347     enum rtx_code code ATTRIBUTE_UNUSED;
8348{
8349  enum rtx_code swapped_code = swap_condition (code);
8350  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8351	  || (ix86_fp_comparison_cost (swapped_code)
8352	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8353}
8354
8355/* Swap, force into registers, or otherwise massage the two operands
8356   to a fp comparison.  The operands are updated in place; the new
8357   comparsion code is returned.  */
8358
8359static enum rtx_code
8360ix86_prepare_fp_compare_args (code, pop0, pop1)
8361     enum rtx_code code;
8362     rtx *pop0, *pop1;
8363{
8364  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8365  rtx op0 = *pop0, op1 = *pop1;
8366  enum machine_mode op_mode = GET_MODE (op0);
8367  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8368
8369  /* All of the unordered compare instructions only work on registers.
8370     The same is true of the XFmode compare instructions.  The same is
8371     true of the fcomi compare instructions.  */
8372
8373  if (!is_sse
8374      && (fpcmp_mode == CCFPUmode
8375	  || op_mode == XFmode
8376	  || op_mode == TFmode
8377	  || ix86_use_fcomi_compare (code)))
8378    {
8379      op0 = force_reg (op_mode, op0);
8380      op1 = force_reg (op_mode, op1);
8381    }
8382  else
8383    {
8384      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8385	 things around if they appear profitable, otherwise force op0
8386	 into a register.  */
8387
8388      if (standard_80387_constant_p (op0) == 0
8389	  || (GET_CODE (op0) == MEM
8390	      && ! (standard_80387_constant_p (op1) == 0
8391		    || GET_CODE (op1) == MEM)))
8392	{
8393	  rtx tmp;
8394	  tmp = op0, op0 = op1, op1 = tmp;
8395	  code = swap_condition (code);
8396	}
8397
8398      if (GET_CODE (op0) != REG)
8399	op0 = force_reg (op_mode, op0);
8400
8401      if (CONSTANT_P (op1))
8402	{
8403	  if (standard_80387_constant_p (op1))
8404	    op1 = force_reg (op_mode, op1);
8405	  else
8406	    op1 = validize_mem (force_const_mem (op_mode, op1));
8407	}
8408    }
8409
8410  /* Try to rearrange the comparison to make it cheaper.  */
8411  if (ix86_fp_comparison_cost (code)
8412      > ix86_fp_comparison_cost (swap_condition (code))
8413      && (GET_CODE (op1) == REG || !no_new_pseudos))
8414    {
8415      rtx tmp;
8416      tmp = op0, op0 = op1, op1 = tmp;
8417      code = swap_condition (code);
8418      if (GET_CODE (op0) != REG)
8419	op0 = force_reg (op_mode, op0);
8420    }
8421
8422  *pop0 = op0;
8423  *pop1 = op1;
8424  return code;
8425}
8426
8427/* Convert comparison codes we use to represent FP comparison to integer
8428   code that will result in proper branch.  Return UNKNOWN if no such code
8429   is available.  */
8430static enum rtx_code
8431ix86_fp_compare_code_to_integer (code)
8432     enum rtx_code code;
8433{
8434  switch (code)
8435    {
8436    case GT:
8437      return GTU;
8438    case GE:
8439      return GEU;
8440    case ORDERED:
8441    case UNORDERED:
8442      return code;
8443      break;
8444    case UNEQ:
8445      return EQ;
8446      break;
8447    case UNLT:
8448      return LTU;
8449      break;
8450    case UNLE:
8451      return LEU;
8452      break;
8453    case LTGT:
8454      return NE;
8455      break;
8456    default:
8457      return UNKNOWN;
8458    }
8459}
8460
8461/* Split comparison code CODE into comparisons we can do using branch
8462   instructions.  BYPASS_CODE is comparison code for branch that will
8463   branch around FIRST_CODE and SECOND_CODE.  If some of branches
8464   is not required, set value to NIL.
8465   We never require more than two branches.  */
8466static void
8467ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8468     enum rtx_code code, *bypass_code, *first_code, *second_code;
8469{
8470  *first_code = code;
8471  *bypass_code = NIL;
8472  *second_code = NIL;
8473
8474  /* The fcomi comparison sets flags as follows:
8475
8476     cmp    ZF PF CF
8477     >      0  0  0
8478     <      0  0  1
8479     =      1  0  0
8480     un     1  1  1 */
8481
8482  switch (code)
8483    {
8484    case GT:			/* GTU - CF=0 & ZF=0 */
8485    case GE:			/* GEU - CF=0 */
8486    case ORDERED:		/* PF=0 */
8487    case UNORDERED:		/* PF=1 */
8488    case UNEQ:			/* EQ - ZF=1 */
8489    case UNLT:			/* LTU - CF=1 */
8490    case UNLE:			/* LEU - CF=1 | ZF=1 */
8491    case LTGT:			/* EQ - ZF=0 */
8492      break;
8493    case LT:			/* LTU - CF=1 - fails on unordered */
8494      *first_code = UNLT;
8495      *bypass_code = UNORDERED;
8496      break;
8497    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
8498      *first_code = UNLE;
8499      *bypass_code = UNORDERED;
8500      break;
8501    case EQ:			/* EQ - ZF=1 - fails on unordered */
8502      *first_code = UNEQ;
8503      *bypass_code = UNORDERED;
8504      break;
8505    case NE:			/* NE - ZF=0 - fails on unordered */
8506      *first_code = LTGT;
8507      *second_code = UNORDERED;
8508      break;
8509    case UNGE:			/* GEU - CF=0 - fails on unordered */
8510      *first_code = GE;
8511      *second_code = UNORDERED;
8512      break;
8513    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
8514      *first_code = GT;
8515      *second_code = UNORDERED;
8516      break;
8517    default:
8518      abort ();
8519    }
8520  if (!TARGET_IEEE_FP)
8521    {
8522      *second_code = NIL;
8523      *bypass_code = NIL;
8524    }
8525}
8526
8527/* Return cost of comparison done fcom + arithmetics operations on AX.
8528   All following functions do use number of instructions as an cost metrics.
8529   In future this should be tweaked to compute bytes for optimize_size and
8530   take into account performance of various instructions on various CPUs.  */
8531static int
8532ix86_fp_comparison_arithmetics_cost (code)
8533     enum rtx_code code;
8534{
8535  if (!TARGET_IEEE_FP)
8536    return 4;
8537  /* The cost of code output by ix86_expand_fp_compare.  */
8538  switch (code)
8539    {
8540    case UNLE:
8541    case UNLT:
8542    case LTGT:
8543    case GT:
8544    case GE:
8545    case UNORDERED:
8546    case ORDERED:
8547    case UNEQ:
8548      return 4;
8549      break;
8550    case LT:
8551    case NE:
8552    case EQ:
8553    case UNGE:
8554      return 5;
8555      break;
8556    case LE:
8557    case UNGT:
8558      return 6;
8559      break;
8560    default:
8561      abort ();
8562    }
8563}
8564
8565/* Return cost of comparison done using fcomi operation.
8566   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8567static int
8568ix86_fp_comparison_fcomi_cost (code)
8569     enum rtx_code code;
8570{
8571  enum rtx_code bypass_code, first_code, second_code;
8572  /* Return arbitarily high cost when instruction is not supported - this
8573     prevents gcc from using it.  */
8574  if (!TARGET_CMOVE)
8575    return 1024;
8576  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8577  return (bypass_code != NIL || second_code != NIL) + 2;
8578}
8579
8580/* Return cost of comparison done using sahf operation.
8581   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8582static int
8583ix86_fp_comparison_sahf_cost (code)
8584     enum rtx_code code;
8585{
8586  enum rtx_code bypass_code, first_code, second_code;
8587  /* Return arbitarily high cost when instruction is not preferred - this
8588     avoids gcc from using it.  */
8589  if (!TARGET_USE_SAHF && !optimize_size)
8590    return 1024;
8591  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8592  return (bypass_code != NIL || second_code != NIL) + 3;
8593}
8594
8595/* Compute cost of the comparison done using any method.
8596   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
8597static int
8598ix86_fp_comparison_cost (code)
8599     enum rtx_code code;
8600{
8601  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8602  int min;
8603
8604  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8605  sahf_cost = ix86_fp_comparison_sahf_cost (code);
8606
8607  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8608  if (min > sahf_cost)
8609    min = sahf_cost;
8610  if (min > fcomi_cost)
8611    min = fcomi_cost;
8612  return min;
8613}
8614
8615/* Generate insn patterns to do a floating point compare of OPERANDS.  */
8616
8617static rtx
8618ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8619     enum rtx_code code;
8620     rtx op0, op1, scratch;
8621     rtx *second_test;
8622     rtx *bypass_test;
8623{
8624  enum machine_mode fpcmp_mode, intcmp_mode;
8625  rtx tmp, tmp2;
8626  int cost = ix86_fp_comparison_cost (code);
8627  enum rtx_code bypass_code, first_code, second_code;
8628
8629  fpcmp_mode = ix86_fp_compare_mode (code);
8630  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8631
8632  if (second_test)
8633    *second_test = NULL_RTX;
8634  if (bypass_test)
8635    *bypass_test = NULL_RTX;
8636
8637  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8638
8639  /* Do fcomi/sahf based test when profitable.  */
8640  if ((bypass_code == NIL || bypass_test)
8641      && (second_code == NIL || second_test)
8642      && ix86_fp_comparison_arithmetics_cost (code) > cost)
8643    {
8644      if (TARGET_CMOVE)
8645	{
8646	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8647	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8648			     tmp);
8649	  emit_insn (tmp);
8650	}
8651      else
8652	{
8653	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8654	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8655	  if (!scratch)
8656	    scratch = gen_reg_rtx (HImode);
8657	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8658	  emit_insn (gen_x86_sahf_1 (scratch));
8659	}
8660
8661      /* The FP codes work out to act like unsigned.  */
8662      intcmp_mode = fpcmp_mode;
8663      code = first_code;
8664      if (bypass_code != NIL)
8665	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8666				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8667				       const0_rtx);
8668      if (second_code != NIL)
8669	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8670				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
8671				       const0_rtx);
8672    }
8673  else
8674    {
8675      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
8676      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8677      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8678      if (!scratch)
8679	scratch = gen_reg_rtx (HImode);
8680      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8681
8682      /* In the unordered case, we have to check C2 for NaN's, which
8683	 doesn't happen to work out to anything nice combination-wise.
8684	 So do some bit twiddling on the value we've got in AH to come
8685	 up with an appropriate set of condition codes.  */
8686
8687      intcmp_mode = CCNOmode;
8688      switch (code)
8689	{
8690	case GT:
8691	case UNGT:
8692	  if (code == GT || !TARGET_IEEE_FP)
8693	    {
8694	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8695	      code = EQ;
8696	    }
8697	  else
8698	    {
8699	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8700	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8701	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8702	      intcmp_mode = CCmode;
8703	      code = GEU;
8704	    }
8705	  break;
8706	case LT:
8707	case UNLT:
8708	  if (code == LT && TARGET_IEEE_FP)
8709	    {
8710	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8711	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8712	      intcmp_mode = CCmode;
8713	      code = EQ;
8714	    }
8715	  else
8716	    {
8717	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8718	      code = NE;
8719	    }
8720	  break;
8721	case GE:
8722	case UNGE:
8723	  if (code == GE || !TARGET_IEEE_FP)
8724	    {
8725	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8726	      code = EQ;
8727	    }
8728	  else
8729	    {
8730	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8731	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8732					     GEN_INT (0x01)));
8733	      code = NE;
8734	    }
8735	  break;
8736	case LE:
8737	case UNLE:
8738	  if (code == LE && TARGET_IEEE_FP)
8739	    {
8740	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8741	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8742	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8743	      intcmp_mode = CCmode;
8744	      code = LTU;
8745	    }
8746	  else
8747	    {
8748	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8749	      code = NE;
8750	    }
8751	  break;
8752	case EQ:
8753	case UNEQ:
8754	  if (code == EQ && TARGET_IEEE_FP)
8755	    {
8756	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8757	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8758	      intcmp_mode = CCmode;
8759	      code = EQ;
8760	    }
8761	  else
8762	    {
8763	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8764	      code = NE;
8765	      break;
8766	    }
8767	  break;
8768	case NE:
8769	case LTGT:
8770	  if (code == NE && TARGET_IEEE_FP)
8771	    {
8772	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8773	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8774					     GEN_INT (0x40)));
8775	      code = NE;
8776	    }
8777	  else
8778	    {
8779	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8780	      code = EQ;
8781	    }
8782	  break;
8783
8784	case UNORDERED:
8785	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8786	  code = NE;
8787	  break;
8788	case ORDERED:
8789	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8790	  code = EQ;
8791	  break;
8792
8793	default:
8794	  abort ();
8795	}
8796    }
8797
8798  /* Return the test that should be put into the flags user, i.e.
8799     the bcc, scc, or cmov instruction.  */
8800  return gen_rtx_fmt_ee (code, VOIDmode,
8801			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8802			 const0_rtx);
8803}
8804
8805rtx
8806ix86_expand_compare (code, second_test, bypass_test)
8807     enum rtx_code code;
8808     rtx *second_test, *bypass_test;
8809{
8810  rtx op0, op1, ret;
8811  op0 = ix86_compare_op0;
8812  op1 = ix86_compare_op1;
8813
8814  if (second_test)
8815    *second_test = NULL_RTX;
8816  if (bypass_test)
8817    *bypass_test = NULL_RTX;
8818
8819  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8820    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8821				  second_test, bypass_test);
8822  else
8823    ret = ix86_expand_int_compare (code, op0, op1);
8824
8825  return ret;
8826}
8827
8828/* Return true if the CODE will result in nontrivial jump sequence.  */
8829bool
8830ix86_fp_jump_nontrivial_p (code)
8831    enum rtx_code code;
8832{
8833  enum rtx_code bypass_code, first_code, second_code;
8834  if (!TARGET_CMOVE)
8835    return true;
8836  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8837  return bypass_code != NIL || second_code != NIL;
8838}
8839
8840void
8841ix86_expand_branch (code, label)
8842     enum rtx_code code;
8843     rtx label;
8844{
8845  rtx tmp;
8846
8847  switch (GET_MODE (ix86_compare_op0))
8848    {
8849    case QImode:
8850    case HImode:
8851    case SImode:
8852      simple:
8853      tmp = ix86_expand_compare (code, NULL, NULL);
8854      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8855				  gen_rtx_LABEL_REF (VOIDmode, label),
8856				  pc_rtx);
8857      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8858      return;
8859
8860    case SFmode:
8861    case DFmode:
8862    case XFmode:
8863    case TFmode:
8864      {
8865	rtvec vec;
8866	int use_fcomi;
8867	enum rtx_code bypass_code, first_code, second_code;
8868
8869	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8870					     &ix86_compare_op1);
8871
8872	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8873
8874	/* Check whether we will use the natural sequence with one jump.  If
8875	   so, we can expand jump early.  Otherwise delay expansion by
8876	   creating compound insn to not confuse optimizers.  */
8877	if (bypass_code == NIL && second_code == NIL
8878	    && TARGET_CMOVE)
8879	  {
8880	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8881				  gen_rtx_LABEL_REF (VOIDmode, label),
8882				  pc_rtx, NULL_RTX);
8883	  }
8884	else
8885	  {
8886	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
8887				  ix86_compare_op0, ix86_compare_op1);
8888	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8889					gen_rtx_LABEL_REF (VOIDmode, label),
8890					pc_rtx);
8891	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8892
8893	    use_fcomi = ix86_use_fcomi_compare (code);
8894	    vec = rtvec_alloc (3 + !use_fcomi);
8895	    RTVEC_ELT (vec, 0) = tmp;
8896	    RTVEC_ELT (vec, 1)
8897	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8898	    RTVEC_ELT (vec, 2)
8899	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8900	    if (! use_fcomi)
8901	      RTVEC_ELT (vec, 3)
8902		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8903
8904	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8905	  }
8906	return;
8907      }
8908
8909    case DImode:
8910      if (TARGET_64BIT)
8911	goto simple;
8912      /* Expand DImode branch into multiple compare+branch.  */
8913      {
8914	rtx lo[2], hi[2], label2;
8915	enum rtx_code code1, code2, code3;
8916
8917	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8918	  {
8919	    tmp = ix86_compare_op0;
8920	    ix86_compare_op0 = ix86_compare_op1;
8921	    ix86_compare_op1 = tmp;
8922	    code = swap_condition (code);
8923	  }
8924	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8925	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8926
8927	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8928	   avoid two branches.  This costs one extra insn, so disable when
8929	   optimizing for size.  */
8930
8931	if ((code == EQ || code == NE)
8932	    && (!optimize_size
8933	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
8934	  {
8935	    rtx xor0, xor1;
8936
8937	    xor1 = hi[0];
8938	    if (hi[1] != const0_rtx)
8939	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8940				   NULL_RTX, 0, OPTAB_WIDEN);
8941
8942	    xor0 = lo[0];
8943	    if (lo[1] != const0_rtx)
8944	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8945				   NULL_RTX, 0, OPTAB_WIDEN);
8946
8947	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8948				NULL_RTX, 0, OPTAB_WIDEN);
8949
8950	    ix86_compare_op0 = tmp;
8951	    ix86_compare_op1 = const0_rtx;
8952	    ix86_expand_branch (code, label);
8953	    return;
8954	  }
8955
8956	/* Otherwise, if we are doing less-than or greater-or-equal-than,
8957	   op1 is a constant and the low word is zero, then we can just
8958	   examine the high word.  */
8959
8960	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8961	  switch (code)
8962	    {
8963	    case LT: case LTU: case GE: case GEU:
8964	      ix86_compare_op0 = hi[0];
8965	      ix86_compare_op1 = hi[1];
8966	      ix86_expand_branch (code, label);
8967	      return;
8968	    default:
8969	      break;
8970	    }
8971
8972	/* Otherwise, we need two or three jumps.  */
8973
8974	label2 = gen_label_rtx ();
8975
8976	code1 = code;
8977	code2 = swap_condition (code);
8978	code3 = unsigned_condition (code);
8979
8980	switch (code)
8981	  {
8982	  case LT: case GT: case LTU: case GTU:
8983	    break;
8984
8985	  case LE:   code1 = LT;  code2 = GT;  break;
8986	  case GE:   code1 = GT;  code2 = LT;  break;
8987	  case LEU:  code1 = LTU; code2 = GTU; break;
8988	  case GEU:  code1 = GTU; code2 = LTU; break;
8989
8990	  case EQ:   code1 = NIL; code2 = NE;  break;
8991	  case NE:   code2 = NIL; break;
8992
8993	  default:
8994	    abort ();
8995	  }
8996
8997	/*
8998	 * a < b =>
8999	 *    if (hi(a) < hi(b)) goto true;
9000	 *    if (hi(a) > hi(b)) goto false;
9001	 *    if (lo(a) < lo(b)) goto true;
9002	 *  false:
9003	 */
9004
9005	ix86_compare_op0 = hi[0];
9006	ix86_compare_op1 = hi[1];
9007
9008	if (code1 != NIL)
9009	  ix86_expand_branch (code1, label);
9010	if (code2 != NIL)
9011	  ix86_expand_branch (code2, label2);
9012
9013	ix86_compare_op0 = lo[0];
9014	ix86_compare_op1 = lo[1];
9015	ix86_expand_branch (code3, label);
9016
9017	if (code2 != NIL)
9018	  emit_label (label2);
9019	return;
9020      }
9021
9022    default:
9023      abort ();
9024    }
9025}
9026
9027/* Split branch based on floating point condition.  */
9028void
9029ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9030     enum rtx_code code;
9031     rtx op1, op2, target1, target2, tmp;
9032{
9033  rtx second, bypass;
9034  rtx label = NULL_RTX;
9035  rtx condition;
9036  int bypass_probability = -1, second_probability = -1, probability = -1;
9037  rtx i;
9038
9039  if (target2 != pc_rtx)
9040    {
9041      rtx tmp = target2;
9042      code = reverse_condition_maybe_unordered (code);
9043      target2 = target1;
9044      target1 = tmp;
9045    }
9046
9047  condition = ix86_expand_fp_compare (code, op1, op2,
9048				      tmp, &second, &bypass);
9049
9050  if (split_branch_probability >= 0)
9051    {
9052      /* Distribute the probabilities across the jumps.
9053	 Assume the BYPASS and SECOND to be always test
9054	 for UNORDERED.  */
9055      probability = split_branch_probability;
9056
9057      /* Value of 1 is low enough to make no need for probability
9058	 to be updated.  Later we may run some experiments and see
9059	 if unordered values are more frequent in practice.  */
9060      if (bypass)
9061	bypass_probability = 1;
9062      if (second)
9063	second_probability = 1;
9064    }
9065  if (bypass != NULL_RTX)
9066    {
9067      label = gen_label_rtx ();
9068      i = emit_jump_insn (gen_rtx_SET
9069			  (VOIDmode, pc_rtx,
9070			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9071						 bypass,
9072						 gen_rtx_LABEL_REF (VOIDmode,
9073								    label),
9074						 pc_rtx)));
9075      if (bypass_probability >= 0)
9076	REG_NOTES (i)
9077	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9078			       GEN_INT (bypass_probability),
9079			       REG_NOTES (i));
9080    }
9081  i = emit_jump_insn (gen_rtx_SET
9082		      (VOIDmode, pc_rtx,
9083		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9084					     condition, target1, target2)));
9085  if (probability >= 0)
9086    REG_NOTES (i)
9087      = gen_rtx_EXPR_LIST (REG_BR_PROB,
9088			   GEN_INT (probability),
9089			   REG_NOTES (i));
9090  if (second != NULL_RTX)
9091    {
9092      i = emit_jump_insn (gen_rtx_SET
9093			  (VOIDmode, pc_rtx,
9094			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9095						 target2)));
9096      if (second_probability >= 0)
9097	REG_NOTES (i)
9098	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9099			       GEN_INT (second_probability),
9100			       REG_NOTES (i));
9101    }
9102  if (label != NULL_RTX)
9103    emit_label (label);
9104}
9105
9106int
9107ix86_expand_setcc (code, dest)
9108     enum rtx_code code;
9109     rtx dest;
9110{
9111  rtx ret, tmp, tmpreg;
9112  rtx second_test, bypass_test;
9113
9114  if (GET_MODE (ix86_compare_op0) == DImode
9115      && !TARGET_64BIT)
9116    return 0; /* FAIL */
9117
9118  if (GET_MODE (dest) != QImode)
9119    abort ();
9120
9121  ret = ix86_expand_compare (code, &second_test, &bypass_test);
9122  PUT_MODE (ret, QImode);
9123
9124  tmp = dest;
9125  tmpreg = dest;
9126
9127  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9128  if (bypass_test || second_test)
9129    {
9130      rtx test = second_test;
9131      int bypass = 0;
9132      rtx tmp2 = gen_reg_rtx (QImode);
9133      if (bypass_test)
9134	{
9135	  if (second_test)
9136	    abort ();
9137	  test = bypass_test;
9138	  bypass = 1;
9139	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9140	}
9141      PUT_MODE (test, QImode);
9142      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9143
9144      if (bypass)
9145	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9146      else
9147	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9148    }
9149
9150  return 1; /* DONE */
9151}
9152
9153int
9154ix86_expand_int_movcc (operands)
9155     rtx operands[];
9156{
9157  enum rtx_code code = GET_CODE (operands[1]), compare_code;
9158  rtx compare_seq, compare_op;
9159  rtx second_test, bypass_test;
9160  enum machine_mode mode = GET_MODE (operands[0]);
9161
9162  /* When the compare code is not LTU or GEU, we can not use sbbl case.
9163     In case comparsion is done with immediate, we can convert it to LTU or
9164     GEU by altering the integer.  */
9165
9166  if ((code == LEU || code == GTU)
9167      && GET_CODE (ix86_compare_op1) == CONST_INT
9168      && mode != HImode
9169      && INTVAL (ix86_compare_op1) != -1
9170      /* For x86-64, the immediate field in the instruction is 32-bit
9171	 signed, so we can't increment a DImode value above 0x7fffffff.  */
9172      && (!TARGET_64BIT
9173	  || GET_MODE (ix86_compare_op0) != DImode
9174	  || INTVAL (ix86_compare_op1) != 0x7fffffff)
9175      && GET_CODE (operands[2]) == CONST_INT
9176      && GET_CODE (operands[3]) == CONST_INT)
9177    {
9178      if (code == LEU)
9179	code = LTU;
9180      else
9181	code = GEU;
9182      ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9183				       GET_MODE (ix86_compare_op0));
9184    }
9185
9186  start_sequence ();
9187  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9188  compare_seq = get_insns ();
9189  end_sequence ();
9190
9191  compare_code = GET_CODE (compare_op);
9192
9193  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9194     HImode insns, we'd be swallowed in word prefix ops.  */
9195
9196  if (mode != HImode
9197      && (mode != DImode || TARGET_64BIT)
9198      && GET_CODE (operands[2]) == CONST_INT
9199      && GET_CODE (operands[3]) == CONST_INT)
9200    {
9201      rtx out = operands[0];
9202      HOST_WIDE_INT ct = INTVAL (operands[2]);
9203      HOST_WIDE_INT cf = INTVAL (operands[3]);
9204      HOST_WIDE_INT diff;
9205
9206      if ((compare_code == LTU || compare_code == GEU)
9207	  && !second_test && !bypass_test)
9208	{
9209	  /* Detect overlap between destination and compare sources.  */
9210	  rtx tmp = out;
9211
9212	  /* To simplify rest of code, restrict to the GEU case.  */
9213	  if (compare_code == LTU)
9214	    {
9215	      HOST_WIDE_INT tmp = ct;
9216	      ct = cf;
9217	      cf = tmp;
9218	      compare_code = reverse_condition (compare_code);
9219	      code = reverse_condition (code);
9220	    }
9221	  diff = ct - cf;
9222
9223	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9224	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
9225	    tmp = gen_reg_rtx (mode);
9226
9227	  emit_insn (compare_seq);
9228	  if (mode == DImode)
9229	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9230	  else
9231	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
9232
9233	  if (diff == 1)
9234	    {
9235	      /*
9236	       * cmpl op0,op1
9237	       * sbbl dest,dest
9238	       * [addl dest, ct]
9239	       *
9240	       * Size 5 - 8.
9241	       */
9242	      if (ct)
9243	       	tmp = expand_simple_binop (mode, PLUS,
9244					   tmp, GEN_INT (ct),
9245					   tmp, 1, OPTAB_DIRECT);
9246	    }
9247	  else if (cf == -1)
9248	    {
9249	      /*
9250	       * cmpl op0,op1
9251	       * sbbl dest,dest
9252	       * orl $ct, dest
9253	       *
9254	       * Size 8.
9255	       */
9256	      tmp = expand_simple_binop (mode, IOR,
9257					 tmp, GEN_INT (ct),
9258					 tmp, 1, OPTAB_DIRECT);
9259	    }
9260	  else if (diff == -1 && ct)
9261	    {
9262	      /*
9263	       * cmpl op0,op1
9264	       * sbbl dest,dest
9265	       * notl dest
9266	       * [addl dest, cf]
9267	       *
9268	       * Size 8 - 11.
9269	       */
9270	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9271	      if (cf)
9272	       	tmp = expand_simple_binop (mode, PLUS,
9273					   tmp, GEN_INT (cf),
9274					   tmp, 1, OPTAB_DIRECT);
9275	    }
9276	  else
9277	    {
9278	      /*
9279	       * cmpl op0,op1
9280	       * sbbl dest,dest
9281	       * [notl dest]
9282	       * andl cf - ct, dest
9283	       * [addl dest, ct]
9284	       *
9285	       * Size 8 - 11.
9286	       */
9287
9288	      if (cf == 0)
9289		{
9290		  cf = ct;
9291		  ct = 0;
9292		  tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9293		}
9294
9295	      tmp = expand_simple_binop (mode, AND,
9296					 tmp,
9297					 gen_int_mode (cf - ct, mode),
9298					 tmp, 1, OPTAB_DIRECT);
9299	      if (ct)
9300	       	tmp = expand_simple_binop (mode, PLUS,
9301					   tmp, GEN_INT (ct),
9302					   tmp, 1, OPTAB_DIRECT);
9303	    }
9304
9305	  if (tmp != out)
9306	    emit_move_insn (out, tmp);
9307
9308	  return 1; /* DONE */
9309	}
9310
9311      diff = ct - cf;
9312      if (diff < 0)
9313	{
9314	  HOST_WIDE_INT tmp;
9315	  tmp = ct, ct = cf, cf = tmp;
9316	  diff = -diff;
9317	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9318	    {
9319	      /* We may be reversing unordered compare to normal compare, that
9320		 is not valid in general (we may convert non-trapping condition
9321		 to trapping one), however on i386 we currently emit all
9322		 comparisons unordered.  */
9323	      compare_code = reverse_condition_maybe_unordered (compare_code);
9324	      code = reverse_condition_maybe_unordered (code);
9325	    }
9326	  else
9327	    {
9328	      compare_code = reverse_condition (compare_code);
9329	      code = reverse_condition (code);
9330	    }
9331	}
9332
9333      compare_code = NIL;
9334      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9335	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9336	{
9337	  if (ix86_compare_op1 == const0_rtx
9338	      && (code == LT || code == GE))
9339	    compare_code = code;
9340	  else if (ix86_compare_op1 == constm1_rtx)
9341	    {
9342	      if (code == LE)
9343		compare_code = LT;
9344	      else if (code == GT)
9345		compare_code = GE;
9346	    }
9347	}
9348
9349      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9350      if (compare_code != NIL
9351	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9352	  && (cf == -1 || ct == -1))
9353	{
9354	  /* If lea code below could be used, only optimize
9355	     if it results in a 2 insn sequence.  */
9356
9357	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9358		 || diff == 3 || diff == 5 || diff == 9)
9359	      || (compare_code == LT && ct == -1)
9360	      || (compare_code == GE && cf == -1))
9361	    {
9362	      /*
9363	       * notl op1	(if necessary)
9364	       * sarl $31, op1
9365	       * orl cf, op1
9366	       */
9367	      if (ct != -1)
9368		{
9369		  cf = ct;
9370	  	  ct = -1;
9371		  code = reverse_condition (code);
9372		}
9373
9374	      out = emit_store_flag (out, code, ix86_compare_op0,
9375				     ix86_compare_op1, VOIDmode, 0, -1);
9376
9377	      out = expand_simple_binop (mode, IOR,
9378					 out, GEN_INT (cf),
9379					 out, 1, OPTAB_DIRECT);
9380	      if (out != operands[0])
9381		emit_move_insn (operands[0], out);
9382
9383	      return 1; /* DONE */
9384	    }
9385	}
9386
9387      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9388	   || diff == 3 || diff == 5 || diff == 9)
9389	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9390	{
9391	  /*
9392	   * xorl dest,dest
9393	   * cmpl op1,op2
9394	   * setcc dest
9395	   * lea cf(dest*(ct-cf)),dest
9396	   *
9397	   * Size 14.
9398	   *
9399	   * This also catches the degenerate setcc-only case.
9400	   */
9401
9402	  rtx tmp;
9403	  int nops;
9404
9405	  out = emit_store_flag (out, code, ix86_compare_op0,
9406				 ix86_compare_op1, VOIDmode, 0, 1);
9407
9408	  nops = 0;
9409	  /* On x86_64 the lea instruction operates on Pmode, so we need
9410	     to get arithmetics done in proper mode to match.  */
9411	  if (diff == 1)
9412	    tmp = copy_rtx (out);
9413	  else
9414	    {
9415	      rtx out1;
9416	      out1 = copy_rtx (out);
9417	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9418	      nops++;
9419	      if (diff & 1)
9420		{
9421		  tmp = gen_rtx_PLUS (mode, tmp, out1);
9422		  nops++;
9423		}
9424	    }
9425	  if (cf != 0)
9426	    {
9427	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9428	      nops++;
9429	    }
9430	  if (tmp != out
9431	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9432	    {
9433	      if (nops == 1)
9434		out = force_operand (tmp, copy_rtx (out));
9435	      else
9436		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9437	    }
9438	  if (out != operands[0])
9439	    emit_move_insn (operands[0], copy_rtx (out));
9440
9441	  return 1; /* DONE */
9442	}
9443
9444      /*
9445       * General case:			Jumpful:
9446       *   xorl dest,dest		cmpl op1, op2
9447       *   cmpl op1, op2		movl ct, dest
9448       *   setcc dest			jcc 1f
9449       *   decl dest			movl cf, dest
9450       *   andl (cf-ct),dest		1:
9451       *   addl ct,dest
9452       *
9453       * Size 20.			Size 14.
9454       *
9455       * This is reasonably steep, but branch mispredict costs are
9456       * high on modern cpus, so consider failing only if optimizing
9457       * for space.
9458       *
9459       * %%% Parameterize branch_cost on the tuning architecture, then
9460       * use that.  The 80386 couldn't care less about mispredicts.
9461       */
9462
9463      if (!optimize_size && !TARGET_CMOVE)
9464	{
9465	  if (cf == 0)
9466	    {
9467	      cf = ct;
9468	      ct = 0;
9469	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9470		/* We may be reversing unordered compare to normal compare,
9471		   that is not valid in general (we may convert non-trapping
9472		   condition to trapping one), however on i386 we currently
9473		   emit all comparisons unordered.  */
9474		code = reverse_condition_maybe_unordered (code);
9475	      else
9476		{
9477		  code = reverse_condition (code);
9478		  if (compare_code != NIL)
9479		    compare_code = reverse_condition (compare_code);
9480		}
9481	    }
9482
9483	  if (compare_code != NIL)
9484	    {
9485	      /* notl op1	(if needed)
9486		 sarl $31, op1
9487		 andl (cf-ct), op1
9488	 	 addl ct, op1
9489
9490		 For x < 0 (resp. x <= -1) there will be no notl,
9491		 so if possible swap the constants to get rid of the
9492		 complement.
9493		 True/false will be -1/0 while code below (store flag
9494		 followed by decrement) is 0/-1, so the constants need
9495		 to be exchanged once more.  */
9496
9497	      if (compare_code == GE || !cf)
9498		{
9499	  	  code = reverse_condition (code);
9500		  compare_code = LT;
9501		}
9502	      else
9503		{
9504		  HOST_WIDE_INT tmp = cf;
9505	  	  cf = ct;
9506		  ct = tmp;
9507		}
9508
9509	      out = emit_store_flag (out, code, ix86_compare_op0,
9510				     ix86_compare_op1, VOIDmode, 0, -1);
9511	    }
9512	  else
9513	    {
9514	      out = emit_store_flag (out, code, ix86_compare_op0,
9515				     ix86_compare_op1, VOIDmode, 0, 1);
9516
9517	      out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9518					 out, 1, OPTAB_DIRECT);
9519	    }
9520
9521	  out = expand_simple_binop (mode, AND, out,
9522				     gen_int_mode (cf - ct, mode),
9523				     out, 1, OPTAB_DIRECT);
9524	  if (ct)
9525	    out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9526				       out, 1, OPTAB_DIRECT);
9527	  if (out != operands[0])
9528	    emit_move_insn (operands[0], out);
9529
9530	  return 1; /* DONE */
9531	}
9532    }
9533
9534  if (!TARGET_CMOVE)
9535    {
9536      /* Try a few things more with specific constants and a variable.  */
9537
9538      optab op;
9539      rtx var, orig_out, out, tmp;
9540
9541      if (optimize_size)
9542	return 0; /* FAIL */
9543
9544      /* If one of the two operands is an interesting constant, load a
9545	 constant with the above and mask it in with a logical operation.  */
9546
9547      if (GET_CODE (operands[2]) == CONST_INT)
9548	{
9549	  var = operands[3];
9550	  if (INTVAL (operands[2]) == 0)
9551	    operands[3] = constm1_rtx, op = and_optab;
9552	  else if (INTVAL (operands[2]) == -1)
9553	    operands[3] = const0_rtx, op = ior_optab;
9554	  else
9555	    return 0; /* FAIL */
9556	}
9557      else if (GET_CODE (operands[3]) == CONST_INT)
9558	{
9559	  var = operands[2];
9560	  if (INTVAL (operands[3]) == 0)
9561	    operands[2] = constm1_rtx, op = and_optab;
9562	  else if (INTVAL (operands[3]) == -1)
9563	    operands[2] = const0_rtx, op = ior_optab;
9564	  else
9565	    return 0; /* FAIL */
9566	}
9567      else
9568        return 0; /* FAIL */
9569
9570      orig_out = operands[0];
9571      tmp = gen_reg_rtx (mode);
9572      operands[0] = tmp;
9573
9574      /* Recurse to get the constant loaded.  */
9575      if (ix86_expand_int_movcc (operands) == 0)
9576        return 0; /* FAIL */
9577
9578      /* Mask in the interesting variable.  */
9579      out = expand_binop (mode, op, var, tmp, orig_out, 0,
9580			  OPTAB_WIDEN);
9581      if (out != orig_out)
9582	emit_move_insn (orig_out, out);
9583
9584      return 1; /* DONE */
9585    }
9586
9587  /*
9588   * For comparison with above,
9589   *
9590   * movl cf,dest
9591   * movl ct,tmp
9592   * cmpl op1,op2
9593   * cmovcc tmp,dest
9594   *
9595   * Size 15.
9596   */
9597
9598  if (! nonimmediate_operand (operands[2], mode))
9599    operands[2] = force_reg (mode, operands[2]);
9600  if (! nonimmediate_operand (operands[3], mode))
9601    operands[3] = force_reg (mode, operands[3]);
9602
9603  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9604    {
9605      rtx tmp = gen_reg_rtx (mode);
9606      emit_move_insn (tmp, operands[3]);
9607      operands[3] = tmp;
9608    }
9609  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9610    {
9611      rtx tmp = gen_reg_rtx (mode);
9612      emit_move_insn (tmp, operands[2]);
9613      operands[2] = tmp;
9614    }
9615  if (! register_operand (operands[2], VOIDmode)
9616      && ! register_operand (operands[3], VOIDmode))
9617    operands[2] = force_reg (mode, operands[2]);
9618
9619  emit_insn (compare_seq);
9620  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9621			  gen_rtx_IF_THEN_ELSE (mode,
9622						compare_op, operands[2],
9623						operands[3])));
9624  if (bypass_test)
9625    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9626			    gen_rtx_IF_THEN_ELSE (mode,
9627				  bypass_test,
9628				  operands[3],
9629				  operands[0])));
9630  if (second_test)
9631    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9632			    gen_rtx_IF_THEN_ELSE (mode,
9633				  second_test,
9634				  operands[2],
9635				  operands[0])));
9636
9637  return 1; /* DONE */
9638}
9639
9640int
9641ix86_expand_fp_movcc (operands)
9642     rtx operands[];
9643{
9644  enum rtx_code code;
9645  rtx tmp;
9646  rtx compare_op, second_test, bypass_test;
9647
9648  /* For SF/DFmode conditional moves based on comparisons
9649     in same mode, we may want to use SSE min/max instructions.  */
9650  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9651       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9652      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9653      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
9654      && (!TARGET_IEEE_FP
9655	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9656      /* We may be called from the post-reload splitter.  */
9657      && (!REG_P (operands[0])
9658	  || SSE_REG_P (operands[0])
9659	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9660    {
9661      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9662      code = GET_CODE (operands[1]);
9663
9664      /* See if we have (cross) match between comparison operands and
9665         conditional move operands.  */
9666      if (rtx_equal_p (operands[2], op1))
9667	{
9668	  rtx tmp = op0;
9669	  op0 = op1;
9670	  op1 = tmp;
9671	  code = reverse_condition_maybe_unordered (code);
9672	}
9673      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9674	{
9675	  /* Check for min operation.  */
9676	  if (code == LT)
9677	    {
9678	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9679	       if (memory_operand (op0, VOIDmode))
9680		 op0 = force_reg (GET_MODE (operands[0]), op0);
9681	       if (GET_MODE (operands[0]) == SFmode)
9682		 emit_insn (gen_minsf3 (operands[0], op0, op1));
9683	       else
9684		 emit_insn (gen_mindf3 (operands[0], op0, op1));
9685	       return 1;
9686	    }
9687	  /* Check for max operation.  */
9688	  if (code == GT)
9689	    {
9690	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9691	       if (memory_operand (op0, VOIDmode))
9692		 op0 = force_reg (GET_MODE (operands[0]), op0);
9693	       if (GET_MODE (operands[0]) == SFmode)
9694		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9695	       else
9696		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9697	       return 1;
9698	    }
9699	}
9700      /* Manage condition to be sse_comparison_operator.  In case we are
9701	 in non-ieee mode, try to canonicalize the destination operand
9702	 to be first in the comparison - this helps reload to avoid extra
9703	 moves.  */
9704      if (!sse_comparison_operator (operands[1], VOIDmode)
9705	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9706	{
9707	  rtx tmp = ix86_compare_op0;
9708	  ix86_compare_op0 = ix86_compare_op1;
9709	  ix86_compare_op1 = tmp;
9710	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9711					VOIDmode, ix86_compare_op0,
9712					ix86_compare_op1);
9713	}
9714      /* Similary try to manage result to be first operand of conditional
9715	 move. We also don't support the NE comparison on SSE, so try to
9716	 avoid it.  */
9717      if ((rtx_equal_p (operands[0], operands[3])
9718	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9719	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9720	{
9721	  rtx tmp = operands[2];
9722	  operands[2] = operands[3];
9723	  operands[3] = tmp;
9724	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9725					  (GET_CODE (operands[1])),
9726					VOIDmode, ix86_compare_op0,
9727					ix86_compare_op1);
9728	}
9729      if (GET_MODE (operands[0]) == SFmode)
9730	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9731				    operands[2], operands[3],
9732				    ix86_compare_op0, ix86_compare_op1));
9733      else
9734	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9735				    operands[2], operands[3],
9736				    ix86_compare_op0, ix86_compare_op1));
9737      return 1;
9738    }
9739
9740  /* The floating point conditional move instructions don't directly
9741     support conditions resulting from a signed integer comparison.  */
9742
9743  code = GET_CODE (operands[1]);
9744  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9745
9746  /* The floating point conditional move instructions don't directly
9747     support signed integer comparisons.  */
9748
9749  if (!fcmov_comparison_operator (compare_op, VOIDmode))
9750    {
9751      if (second_test != NULL || bypass_test != NULL)
9752	abort ();
9753      tmp = gen_reg_rtx (QImode);
9754      ix86_expand_setcc (code, tmp);
9755      code = NE;
9756      ix86_compare_op0 = tmp;
9757      ix86_compare_op1 = const0_rtx;
9758      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
9759    }
9760  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9761    {
9762      tmp = gen_reg_rtx (GET_MODE (operands[0]));
9763      emit_move_insn (tmp, operands[3]);
9764      operands[3] = tmp;
9765    }
9766  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9767    {
9768      tmp = gen_reg_rtx (GET_MODE (operands[0]));
9769      emit_move_insn (tmp, operands[2]);
9770      operands[2] = tmp;
9771    }
9772
9773  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9774			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9775				compare_op,
9776				operands[2],
9777				operands[3])));
9778  if (bypass_test)
9779    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9780			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9781				  bypass_test,
9782				  operands[3],
9783				  operands[0])));
9784  if (second_test)
9785    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9786			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9787				  second_test,
9788				  operands[2],
9789				  operands[0])));
9790
9791  return 1;
9792}
9793
9794/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
9795   works for floating pointer parameters and nonoffsetable memories.
9796   For pushes, it returns just stack offsets; the values will be saved
9797   in the right order.  Maximally three parts are generated.  */
9798
9799static int
9800ix86_split_to_parts (operand, parts, mode)
9801     rtx operand;
9802     rtx *parts;
9803     enum machine_mode mode;
9804{
9805  int size;
9806
9807  if (!TARGET_64BIT)
9808    size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9809  else
9810    size = (GET_MODE_SIZE (mode) + 4) / 8;
9811
9812  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9813    abort ();
9814  if (size < 2 || size > 3)
9815    abort ();
9816
9817  /* Optimize constant pool reference to immediates.  This is used by fp
9818     moves, that force all constants to memory to allow combining.  */
9819  if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9820    {
9821      rtx tmp = maybe_get_pool_constant (operand);
9822      if (tmp)
9823	operand = tmp;
9824    }
9825
9826  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9827    {
9828      /* The only non-offsetable memories we handle are pushes.  */
9829      if (! push_operand (operand, VOIDmode))
9830	abort ();
9831
9832      operand = copy_rtx (operand);
9833      PUT_MODE (operand, Pmode);
9834      parts[0] = parts[1] = parts[2] = operand;
9835    }
9836  else if (!TARGET_64BIT)
9837    {
9838      if (mode == DImode)
9839	split_di (&operand, 1, &parts[0], &parts[1]);
9840      else
9841	{
9842	  if (REG_P (operand))
9843	    {
9844	      if (!reload_completed)
9845		abort ();
9846	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9847	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9848	      if (size == 3)
9849		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9850	    }
9851	  else if (offsettable_memref_p (operand))
9852	    {
9853	      operand = adjust_address (operand, SImode, 0);
9854	      parts[0] = operand;
9855	      parts[1] = adjust_address (operand, SImode, 4);
9856	      if (size == 3)
9857		parts[2] = adjust_address (operand, SImode, 8);
9858	    }
9859	  else if (GET_CODE (operand) == CONST_DOUBLE)
9860	    {
9861	      REAL_VALUE_TYPE r;
9862	      long l[4];
9863
9864	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9865	      switch (mode)
9866		{
9867		case XFmode:
9868		case TFmode:
9869		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9870		  parts[2] = gen_int_mode (l[2], SImode);
9871		  break;
9872		case DFmode:
9873		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9874		  break;
9875		default:
9876		  abort ();
9877		}
9878	      parts[1] = gen_int_mode (l[1], SImode);
9879	      parts[0] = gen_int_mode (l[0], SImode);
9880	    }
9881	  else
9882	    abort ();
9883	}
9884    }
9885  else
9886    {
9887      if (mode == TImode)
9888	split_ti (&operand, 1, &parts[0], &parts[1]);
9889      if (mode == XFmode || mode == TFmode)
9890	{
9891	  if (REG_P (operand))
9892	    {
9893	      if (!reload_completed)
9894		abort ();
9895	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9896	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9897	    }
9898	  else if (offsettable_memref_p (operand))
9899	    {
9900	      operand = adjust_address (operand, DImode, 0);
9901	      parts[0] = operand;
9902	      parts[1] = adjust_address (operand, SImode, 8);
9903	    }
9904	  else if (GET_CODE (operand) == CONST_DOUBLE)
9905	    {
9906	      REAL_VALUE_TYPE r;
9907	      long l[3];
9908
9909	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9910	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9911	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
9912	      if (HOST_BITS_PER_WIDE_INT >= 64)
9913	        parts[0]
9914		  = gen_int_mode
9915		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9916		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9917		       DImode);
9918	      else
9919	        parts[0] = immed_double_const (l[0], l[1], DImode);
9920	      parts[1] = gen_int_mode (l[2], SImode);
9921	    }
9922	  else
9923	    abort ();
9924	}
9925    }
9926
9927  return size;
9928}
9929
9930/* Emit insns to perform a move or push of DI, DF, and XF values.
9931   Return false when normal moves are needed; true when all required
9932   insns have been emitted.  Operands 2-4 contain the input values
9933   int the correct order; operands 5-7 contain the output values.  */
9934
9935void
9936ix86_split_long_move (operands)
9937     rtx operands[];
9938{
9939  rtx part[2][3];
9940  int nparts;
9941  int push = 0;
9942  int collisions = 0;
9943  enum machine_mode mode = GET_MODE (operands[0]);
9944
9945  /* The DFmode expanders may ask us to move double.
9946     For 64bit target this is single move.  By hiding the fact
9947     here we simplify i386.md splitters.  */
9948  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9949    {
9950      /* Optimize constant pool reference to immediates.  This is used by
9951	 fp moves, that force all constants to memory to allow combining.  */
9952
9953      if (GET_CODE (operands[1]) == MEM
9954	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9955	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9956	operands[1] = get_pool_constant (XEXP (operands[1], 0));
9957      if (push_operand (operands[0], VOIDmode))
9958	{
9959	  operands[0] = copy_rtx (operands[0]);
9960	  PUT_MODE (operands[0], Pmode);
9961	}
9962      else
9963        operands[0] = gen_lowpart (DImode, operands[0]);
9964      operands[1] = gen_lowpart (DImode, operands[1]);
9965      emit_move_insn (operands[0], operands[1]);
9966      return;
9967    }
9968
9969  /* The only non-offsettable memory we handle is push.  */
9970  if (push_operand (operands[0], VOIDmode))
9971    push = 1;
9972  else if (GET_CODE (operands[0]) == MEM
9973	   && ! offsettable_memref_p (operands[0]))
9974    abort ();
9975
9976  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9977  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9978
9979  /* When emitting push, take care for source operands on the stack.  */
9980  if (push && GET_CODE (operands[1]) == MEM
9981      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9982    {
9983      if (nparts == 3)
9984	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9985				     XEXP (part[1][2], 0));
9986      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9987				   XEXP (part[1][1], 0));
9988    }
9989
9990  /* We need to do copy in the right order in case an address register
9991     of the source overlaps the destination.  */
9992  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9993    {
9994      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9995	collisions++;
9996      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9997	collisions++;
9998      if (nparts == 3
9999	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10000	collisions++;
10001
10002      /* Collision in the middle part can be handled by reordering.  */
10003      if (collisions == 1 && nparts == 3
10004	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10005	{
10006	  rtx tmp;
10007	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10008	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10009	}
10010
10011      /* If there are more collisions, we can't handle it by reordering.
10012	 Do an lea to the last part and use only one colliding move.  */
10013      else if (collisions > 1)
10014	{
10015	  rtx base;
10016
10017	  collisions = 1;
10018
10019	  base = part[0][nparts - 1];
10020
10021	  /* Handle the case when the last part isn't valid for lea.
10022	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10023	  if (GET_MODE (base) != Pmode)
10024	    base = gen_rtx_REG (Pmode, REGNO (base));
10025
10026	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10027	  part[1][0] = replace_equiv_address (part[1][0], base);
10028	  part[1][1] = replace_equiv_address (part[1][1],
10029				      plus_constant (base, UNITS_PER_WORD));
10030	  if (nparts == 3)
10031	    part[1][2] = replace_equiv_address (part[1][2],
10032				      plus_constant (base, 8));
10033	}
10034    }
10035
10036  if (push)
10037    {
10038      if (!TARGET_64BIT)
10039	{
10040	  if (nparts == 3)
10041	    {
10042	      /* We use only first 12 bytes of TFmode value, but for pushing we
10043		 are required to adjust stack as if we were pushing real 16byte
10044		 value.  */
10045	      if (mode == TFmode && !TARGET_64BIT)
10046		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10047				       GEN_INT (-4)));
10048	      emit_move_insn (part[0][2], part[1][2]);
10049	    }
10050	}
10051      else
10052	{
10053	  /* In 64bit mode we don't have 32bit push available.  In case this is
10054	     register, it is OK - we will just use larger counterpart.  We also
10055	     retype memory - these comes from attempt to avoid REX prefix on
10056	     moving of second half of TFmode value.  */
10057	  if (GET_MODE (part[1][1]) == SImode)
10058	    {
10059	      if (GET_CODE (part[1][1]) == MEM)
10060		part[1][1] = adjust_address (part[1][1], DImode, 0);
10061	      else if (REG_P (part[1][1]))
10062		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10063	      else
10064		abort ();
10065	      if (GET_MODE (part[1][0]) == SImode)
10066		part[1][0] = part[1][1];
10067	    }
10068	}
10069      emit_move_insn (part[0][1], part[1][1]);
10070      emit_move_insn (part[0][0], part[1][0]);
10071      return;
10072    }
10073
10074  /* Choose correct order to not overwrite the source before it is copied.  */
10075  if ((REG_P (part[0][0])
10076       && REG_P (part[1][1])
10077       && (REGNO (part[0][0]) == REGNO (part[1][1])
10078	   || (nparts == 3
10079	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10080      || (collisions > 0
10081	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10082    {
10083      if (nparts == 3)
10084	{
10085	  operands[2] = part[0][2];
10086	  operands[3] = part[0][1];
10087	  operands[4] = part[0][0];
10088	  operands[5] = part[1][2];
10089	  operands[6] = part[1][1];
10090	  operands[7] = part[1][0];
10091	}
10092      else
10093	{
10094	  operands[2] = part[0][1];
10095	  operands[3] = part[0][0];
10096	  operands[5] = part[1][1];
10097	  operands[6] = part[1][0];
10098	}
10099    }
10100  else
10101    {
10102      if (nparts == 3)
10103	{
10104	  operands[2] = part[0][0];
10105	  operands[3] = part[0][1];
10106	  operands[4] = part[0][2];
10107	  operands[5] = part[1][0];
10108	  operands[6] = part[1][1];
10109	  operands[7] = part[1][2];
10110	}
10111      else
10112	{
10113	  operands[2] = part[0][0];
10114	  operands[3] = part[0][1];
10115	  operands[5] = part[1][0];
10116	  operands[6] = part[1][1];
10117	}
10118    }
10119  emit_move_insn (operands[2], operands[5]);
10120  emit_move_insn (operands[3], operands[6]);
10121  if (nparts == 3)
10122    emit_move_insn (operands[4], operands[7]);
10123
10124  return;
10125}
10126
10127void
10128ix86_split_ashldi (operands, scratch)
10129     rtx *operands, scratch;
10130{
10131  rtx low[2], high[2];
10132  int count;
10133
10134  if (GET_CODE (operands[2]) == CONST_INT)
10135    {
10136      split_di (operands, 2, low, high);
10137      count = INTVAL (operands[2]) & 63;
10138
10139      if (count >= 32)
10140	{
10141	  emit_move_insn (high[0], low[1]);
10142	  emit_move_insn (low[0], const0_rtx);
10143
10144	  if (count > 32)
10145	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10146	}
10147      else
10148	{
10149	  if (!rtx_equal_p (operands[0], operands[1]))
10150	    emit_move_insn (operands[0], operands[1]);
10151	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10152	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10153	}
10154    }
10155  else
10156    {
10157      if (!rtx_equal_p (operands[0], operands[1]))
10158	emit_move_insn (operands[0], operands[1]);
10159
10160      split_di (operands, 1, low, high);
10161
10162      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10163      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10164
10165      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10166	{
10167	  if (! no_new_pseudos)
10168	    scratch = force_reg (SImode, const0_rtx);
10169	  else
10170	    emit_move_insn (scratch, const0_rtx);
10171
10172	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10173					  scratch));
10174	}
10175      else
10176	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10177    }
10178}
10179
10180void
10181ix86_split_ashrdi (operands, scratch)
10182     rtx *operands, scratch;
10183{
10184  rtx low[2], high[2];
10185  int count;
10186
10187  if (GET_CODE (operands[2]) == CONST_INT)
10188    {
10189      split_di (operands, 2, low, high);
10190      count = INTVAL (operands[2]) & 63;
10191
10192      if (count >= 32)
10193	{
10194	  emit_move_insn (low[0], high[1]);
10195
10196	  if (! reload_completed)
10197	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10198	  else
10199	    {
10200	      emit_move_insn (high[0], low[0]);
10201	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10202	    }
10203
10204	  if (count > 32)
10205	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10206	}
10207      else
10208	{
10209	  if (!rtx_equal_p (operands[0], operands[1]))
10210	    emit_move_insn (operands[0], operands[1]);
10211	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10212	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10213	}
10214    }
10215  else
10216    {
10217      if (!rtx_equal_p (operands[0], operands[1]))
10218	emit_move_insn (operands[0], operands[1]);
10219
10220      split_di (operands, 1, low, high);
10221
10222      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10223      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10224
10225      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10226	{
10227	  if (! no_new_pseudos)
10228	    scratch = gen_reg_rtx (SImode);
10229	  emit_move_insn (scratch, high[0]);
10230	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10231	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10232					  scratch));
10233	}
10234      else
10235	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10236    }
10237}
10238
10239void
10240ix86_split_lshrdi (operands, scratch)
10241     rtx *operands, scratch;
10242{
10243  rtx low[2], high[2];
10244  int count;
10245
10246  if (GET_CODE (operands[2]) == CONST_INT)
10247    {
10248      split_di (operands, 2, low, high);
10249      count = INTVAL (operands[2]) & 63;
10250
10251      if (count >= 32)
10252	{
10253	  emit_move_insn (low[0], high[1]);
10254	  emit_move_insn (high[0], const0_rtx);
10255
10256	  if (count > 32)
10257	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10258	}
10259      else
10260	{
10261	  if (!rtx_equal_p (operands[0], operands[1]))
10262	    emit_move_insn (operands[0], operands[1]);
10263	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10264	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10265	}
10266    }
10267  else
10268    {
10269      if (!rtx_equal_p (operands[0], operands[1]))
10270	emit_move_insn (operands[0], operands[1]);
10271
10272      split_di (operands, 1, low, high);
10273
10274      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10275      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10276
10277      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
10278      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10279	{
10280	  if (! no_new_pseudos)
10281	    scratch = force_reg (SImode, const0_rtx);
10282	  else
10283	    emit_move_insn (scratch, const0_rtx);
10284
10285	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10286					  scratch));
10287	}
10288      else
10289	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10290    }
10291}
10292
10293/* Helper function for the string operations below.  Dest VARIABLE whether
10294   it is aligned to VALUE bytes.  If true, jump to the label.  */
10295static rtx
10296ix86_expand_aligntest (variable, value)
10297     rtx variable;
10298     int value;
10299{
10300  rtx label = gen_label_rtx ();
10301  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10302  if (GET_MODE (variable) == DImode)
10303    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10304  else
10305    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10306  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10307			   1, label);
10308  return label;
10309}
10310
10311/* Adjust COUNTER by the VALUE.  */
10312static void
10313ix86_adjust_counter (countreg, value)
10314     rtx countreg;
10315     HOST_WIDE_INT value;
10316{
10317  if (GET_MODE (countreg) == DImode)
10318    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10319  else
10320    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10321}
10322
10323/* Zero extend possibly SImode EXP to Pmode register.  */
10324rtx
10325ix86_zero_extend_to_Pmode (exp)
10326   rtx exp;
10327{
10328  rtx r;
10329  if (GET_MODE (exp) == VOIDmode)
10330    return force_reg (Pmode, exp);
10331  if (GET_MODE (exp) == Pmode)
10332    return copy_to_mode_reg (Pmode, exp);
10333  r = gen_reg_rtx (Pmode);
10334  emit_insn (gen_zero_extendsidi2 (r, exp));
10335  return r;
10336}
10337
10338/* Expand string move (memcpy) operation.  Use i386 string operations when
10339   profitable.  expand_clrstr contains similar code.  */
10340int
10341ix86_expand_movstr (dst, src, count_exp, align_exp)
10342     rtx dst, src, count_exp, align_exp;
10343{
10344  rtx srcreg, destreg, countreg;
10345  enum machine_mode counter_mode;
10346  HOST_WIDE_INT align = 0;
10347  unsigned HOST_WIDE_INT count = 0;
10348  rtx insns;
10349
10350  start_sequence ();
10351
10352  if (GET_CODE (align_exp) == CONST_INT)
10353    align = INTVAL (align_exp);
10354
10355  /* This simple hack avoids all inlining code and simplifies code below.  */
10356  if (!TARGET_ALIGN_STRINGOPS)
10357    align = 64;
10358
10359  if (GET_CODE (count_exp) == CONST_INT)
10360    count = INTVAL (count_exp);
10361
10362  /* Figure out proper mode for counter.  For 32bits it is always SImode,
10363     for 64bits use SImode when possible, otherwise DImode.
10364     Set count to number of bytes copied when known at compile time.  */
10365  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10366      || x86_64_zero_extended_value (count_exp))
10367    counter_mode = SImode;
10368  else
10369    counter_mode = DImode;
10370
10371  if (counter_mode != SImode && counter_mode != DImode)
10372    abort ();
10373
10374  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10375  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10376
10377  emit_insn (gen_cld ());
10378
10379  /* When optimizing for size emit simple rep ; movsb instruction for
10380     counts not divisible by 4.  */
10381
10382  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10383    {
10384      countreg = ix86_zero_extend_to_Pmode (count_exp);
10385      if (TARGET_64BIT)
10386	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10387				        destreg, srcreg, countreg));
10388      else
10389	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10390				  destreg, srcreg, countreg));
10391    }
10392
10393  /* For constant aligned (or small unaligned) copies use rep movsl
10394     followed by code copying the rest.  For PentiumPro ensure 8 byte
10395     alignment to allow rep movsl acceleration.  */
10396
10397  else if (count != 0
10398	   && (align >= 8
10399	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10400	       || optimize_size || count < (unsigned int) 64))
10401    {
10402      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10403      if (count & ~(size - 1))
10404	{
10405	  countreg = copy_to_mode_reg (counter_mode,
10406				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10407						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10408	  countreg = ix86_zero_extend_to_Pmode (countreg);
10409	  if (size == 4)
10410	    {
10411	      if (TARGET_64BIT)
10412		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10413					        destreg, srcreg, countreg));
10414	      else
10415		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10416					  destreg, srcreg, countreg));
10417	    }
10418	  else
10419	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10420					    destreg, srcreg, countreg));
10421	}
10422      if (size == 8 && (count & 0x04))
10423	emit_insn (gen_strmovsi (destreg, srcreg));
10424      if (count & 0x02)
10425	emit_insn (gen_strmovhi (destreg, srcreg));
10426      if (count & 0x01)
10427	emit_insn (gen_strmovqi (destreg, srcreg));
10428    }
10429  /* The generic code based on the glibc implementation:
10430     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10431     allowing accelerated copying there)
10432     - copy the data using rep movsl
10433     - copy the rest.  */
10434  else
10435    {
10436      rtx countreg2;
10437      rtx label = NULL;
10438      int desired_alignment = (TARGET_PENTIUMPRO
10439			       && (count == 0 || count >= (unsigned int) 260)
10440			       ? 8 : UNITS_PER_WORD);
10441
10442      /* In case we don't know anything about the alignment, default to
10443         library version, since it is usually equally fast and result in
10444         shorter code.  */
10445      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10446	{
10447	  end_sequence ();
10448	  return 0;
10449	}
10450
10451      if (TARGET_SINGLE_STRINGOP)
10452	emit_insn (gen_cld ());
10453
10454      countreg2 = gen_reg_rtx (Pmode);
10455      countreg = copy_to_mode_reg (counter_mode, count_exp);
10456
10457      /* We don't use loops to align destination and to copy parts smaller
10458         than 4 bytes, because gcc is able to optimize such code better (in
10459         the case the destination or the count really is aligned, gcc is often
10460         able to predict the branches) and also it is friendlier to the
10461         hardware branch prediction.
10462
10463         Using loops is benefical for generic case, because we can
10464         handle small counts using the loops.  Many CPUs (such as Athlon)
10465         have large REP prefix setup costs.
10466
10467         This is quite costy.  Maybe we can revisit this decision later or
10468         add some customizability to this code.  */
10469
10470      if (count == 0 && align < desired_alignment)
10471	{
10472	  label = gen_label_rtx ();
10473	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10474				   LEU, 0, counter_mode, 1, label);
10475	}
10476      if (align <= 1)
10477	{
10478	  rtx label = ix86_expand_aligntest (destreg, 1);
10479	  emit_insn (gen_strmovqi (destreg, srcreg));
10480	  ix86_adjust_counter (countreg, 1);
10481	  emit_label (label);
10482	  LABEL_NUSES (label) = 1;
10483	}
10484      if (align <= 2)
10485	{
10486	  rtx label = ix86_expand_aligntest (destreg, 2);
10487	  emit_insn (gen_strmovhi (destreg, srcreg));
10488	  ix86_adjust_counter (countreg, 2);
10489	  emit_label (label);
10490	  LABEL_NUSES (label) = 1;
10491	}
10492      if (align <= 4 && desired_alignment > 4)
10493	{
10494	  rtx label = ix86_expand_aligntest (destreg, 4);
10495	  emit_insn (gen_strmovsi (destreg, srcreg));
10496	  ix86_adjust_counter (countreg, 4);
10497	  emit_label (label);
10498	  LABEL_NUSES (label) = 1;
10499	}
10500
10501      if (label && desired_alignment > 4 && !TARGET_64BIT)
10502	{
10503	  emit_label (label);
10504	  LABEL_NUSES (label) = 1;
10505	  label = NULL_RTX;
10506	}
10507      if (!TARGET_SINGLE_STRINGOP)
10508	emit_insn (gen_cld ());
10509      if (TARGET_64BIT)
10510	{
10511	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10512				  GEN_INT (3)));
10513	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10514					  destreg, srcreg, countreg2));
10515	}
10516      else
10517	{
10518	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10519	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10520				    destreg, srcreg, countreg2));
10521	}
10522
10523      if (label)
10524	{
10525	  emit_label (label);
10526	  LABEL_NUSES (label) = 1;
10527	}
10528      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10529	emit_insn (gen_strmovsi (destreg, srcreg));
10530      if ((align <= 4 || count == 0) && TARGET_64BIT)
10531	{
10532	  rtx label = ix86_expand_aligntest (countreg, 4);
10533	  emit_insn (gen_strmovsi (destreg, srcreg));
10534	  emit_label (label);
10535	  LABEL_NUSES (label) = 1;
10536	}
10537      if (align > 2 && count != 0 && (count & 2))
10538	emit_insn (gen_strmovhi (destreg, srcreg));
10539      if (align <= 2 || count == 0)
10540	{
10541	  rtx label = ix86_expand_aligntest (countreg, 2);
10542	  emit_insn (gen_strmovhi (destreg, srcreg));
10543	  emit_label (label);
10544	  LABEL_NUSES (label) = 1;
10545	}
10546      if (align > 1 && count != 0 && (count & 1))
10547	emit_insn (gen_strmovqi (destreg, srcreg));
10548      if (align <= 1 || count == 0)
10549	{
10550	  rtx label = ix86_expand_aligntest (countreg, 1);
10551	  emit_insn (gen_strmovqi (destreg, srcreg));
10552	  emit_label (label);
10553	  LABEL_NUSES (label) = 1;
10554	}
10555    }
10556
10557  insns = get_insns ();
10558  end_sequence ();
10559
10560  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10561  emit_insn (insns);
10562  return 1;
10563}
10564
10565/* Expand string clear operation (bzero).  Use i386 string operations when
10566   profitable.  expand_movstr contains similar code.  */
10567int
10568ix86_expand_clrstr (src, count_exp, align_exp)
10569     rtx src, count_exp, align_exp;
10570{
10571  rtx destreg, zeroreg, countreg;
10572  enum machine_mode counter_mode;
10573  HOST_WIDE_INT align = 0;
10574  unsigned HOST_WIDE_INT count = 0;
10575
10576  if (GET_CODE (align_exp) == CONST_INT)
10577    align = INTVAL (align_exp);
10578
10579  /* This simple hack avoids all inlining code and simplifies code below.  */
10580  if (!TARGET_ALIGN_STRINGOPS)
10581    align = 32;
10582
10583  if (GET_CODE (count_exp) == CONST_INT)
10584    count = INTVAL (count_exp);
10585  /* Figure out proper mode for counter.  For 32bits it is always SImode,
10586     for 64bits use SImode when possible, otherwise DImode.
10587     Set count to number of bytes copied when known at compile time.  */
10588  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10589      || x86_64_zero_extended_value (count_exp))
10590    counter_mode = SImode;
10591  else
10592    counter_mode = DImode;
10593
10594  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10595
10596  emit_insn (gen_cld ());
10597
10598  /* When optimizing for size emit simple rep ; movsb instruction for
10599     counts not divisible by 4.  */
10600
10601  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10602    {
10603      countreg = ix86_zero_extend_to_Pmode (count_exp);
10604      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10605      if (TARGET_64BIT)
10606	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10607				         destreg, countreg));
10608      else
10609	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10610				   destreg, countreg));
10611    }
10612  else if (count != 0
10613	   && (align >= 8
10614	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10615	       || optimize_size || count < (unsigned int) 64))
10616    {
10617      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10618      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10619      if (count & ~(size - 1))
10620	{
10621	  countreg = copy_to_mode_reg (counter_mode,
10622				       GEN_INT ((count >> (size == 4 ? 2 : 3))
10623						& (TARGET_64BIT ? -1 : 0x3fffffff)));
10624	  countreg = ix86_zero_extend_to_Pmode (countreg);
10625	  if (size == 4)
10626	    {
10627	      if (TARGET_64BIT)
10628		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10629					         destreg, countreg));
10630	      else
10631		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10632					   destreg, countreg));
10633	    }
10634	  else
10635	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10636					     destreg, countreg));
10637	}
10638      if (size == 8 && (count & 0x04))
10639	emit_insn (gen_strsetsi (destreg,
10640				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10641      if (count & 0x02)
10642	emit_insn (gen_strsethi (destreg,
10643				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10644      if (count & 0x01)
10645	emit_insn (gen_strsetqi (destreg,
10646				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10647    }
10648  else
10649    {
10650      rtx countreg2;
10651      rtx label = NULL;
10652      /* Compute desired alignment of the string operation.  */
10653      int desired_alignment = (TARGET_PENTIUMPRO
10654			       && (count == 0 || count >= (unsigned int) 260)
10655			       ? 8 : UNITS_PER_WORD);
10656
10657      /* In case we don't know anything about the alignment, default to
10658         library version, since it is usually equally fast and result in
10659         shorter code.  */
10660      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10661	return 0;
10662
10663      if (TARGET_SINGLE_STRINGOP)
10664	emit_insn (gen_cld ());
10665
10666      countreg2 = gen_reg_rtx (Pmode);
10667      countreg = copy_to_mode_reg (counter_mode, count_exp);
10668      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10669
10670      if (count == 0 && align < desired_alignment)
10671	{
10672	  label = gen_label_rtx ();
10673	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10674				   LEU, 0, counter_mode, 1, label);
10675	}
10676      if (align <= 1)
10677	{
10678	  rtx label = ix86_expand_aligntest (destreg, 1);
10679	  emit_insn (gen_strsetqi (destreg,
10680				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10681	  ix86_adjust_counter (countreg, 1);
10682	  emit_label (label);
10683	  LABEL_NUSES (label) = 1;
10684	}
10685      if (align <= 2)
10686	{
10687	  rtx label = ix86_expand_aligntest (destreg, 2);
10688	  emit_insn (gen_strsethi (destreg,
10689				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10690	  ix86_adjust_counter (countreg, 2);
10691	  emit_label (label);
10692	  LABEL_NUSES (label) = 1;
10693	}
10694      if (align <= 4 && desired_alignment > 4)
10695	{
10696	  rtx label = ix86_expand_aligntest (destreg, 4);
10697	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10698					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10699					     : zeroreg)));
10700	  ix86_adjust_counter (countreg, 4);
10701	  emit_label (label);
10702	  LABEL_NUSES (label) = 1;
10703	}
10704
10705      if (label && desired_alignment > 4 && !TARGET_64BIT)
10706	{
10707	  emit_label (label);
10708	  LABEL_NUSES (label) = 1;
10709	  label = NULL_RTX;
10710	}
10711
10712      if (!TARGET_SINGLE_STRINGOP)
10713	emit_insn (gen_cld ());
10714      if (TARGET_64BIT)
10715	{
10716	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10717				  GEN_INT (3)));
10718	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10719					   destreg, countreg2));
10720	}
10721      else
10722	{
10723	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10724	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10725				     destreg, countreg2));
10726	}
10727      if (label)
10728	{
10729	  emit_label (label);
10730	  LABEL_NUSES (label) = 1;
10731	}
10732
10733      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10734	emit_insn (gen_strsetsi (destreg,
10735				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10736      if (TARGET_64BIT && (align <= 4 || count == 0))
10737	{
10738	  rtx label = ix86_expand_aligntest (countreg, 4);
10739	  emit_insn (gen_strsetsi (destreg,
10740				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
10741	  emit_label (label);
10742	  LABEL_NUSES (label) = 1;
10743	}
10744      if (align > 2 && count != 0 && (count & 2))
10745	emit_insn (gen_strsethi (destreg,
10746				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10747      if (align <= 2 || count == 0)
10748	{
10749	  rtx label = ix86_expand_aligntest (countreg, 2);
10750	  emit_insn (gen_strsethi (destreg,
10751				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
10752	  emit_label (label);
10753	  LABEL_NUSES (label) = 1;
10754	}
10755      if (align > 1 && count != 0 && (count & 1))
10756	emit_insn (gen_strsetqi (destreg,
10757				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10758      if (align <= 1 || count == 0)
10759	{
10760	  rtx label = ix86_expand_aligntest (countreg, 1);
10761	  emit_insn (gen_strsetqi (destreg,
10762				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
10763	  emit_label (label);
10764	  LABEL_NUSES (label) = 1;
10765	}
10766    }
10767  return 1;
10768}
10769/* Expand strlen.  */
10770int
10771ix86_expand_strlen (out, src, eoschar, align)
10772     rtx out, src, eoschar, align;
10773{
10774  rtx addr, scratch1, scratch2, scratch3, scratch4;
10775
10776  /* The generic case of strlen expander is long.  Avoid it's
10777     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
10778
10779  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10780      && !TARGET_INLINE_ALL_STRINGOPS
10781      && !optimize_size
10782      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10783    return 0;
10784
10785  addr = force_reg (Pmode, XEXP (src, 0));
10786  scratch1 = gen_reg_rtx (Pmode);
10787
10788  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10789      && !optimize_size)
10790    {
10791      /* Well it seems that some optimizer does not combine a call like
10792         foo(strlen(bar), strlen(bar));
10793         when the move and the subtraction is done here.  It does calculate
10794         the length just once when these instructions are done inside of
10795         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
10796         often used and I use one fewer register for the lifetime of
10797         output_strlen_unroll() this is better.  */
10798
10799      emit_move_insn (out, addr);
10800
10801      ix86_expand_strlensi_unroll_1 (out, align);
10802
10803      /* strlensi_unroll_1 returns the address of the zero at the end of
10804         the string, like memchr(), so compute the length by subtracting
10805         the start address.  */
10806      if (TARGET_64BIT)
10807	emit_insn (gen_subdi3 (out, out, addr));
10808      else
10809	emit_insn (gen_subsi3 (out, out, addr));
10810    }
10811  else
10812    {
10813      scratch2 = gen_reg_rtx (Pmode);
10814      scratch3 = gen_reg_rtx (Pmode);
10815      scratch4 = force_reg (Pmode, constm1_rtx);
10816
10817      emit_move_insn (scratch3, addr);
10818      eoschar = force_reg (QImode, eoschar);
10819
10820      emit_insn (gen_cld ());
10821      if (TARGET_64BIT)
10822	{
10823	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10824					 align, scratch4, scratch3));
10825	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10826	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10827	}
10828      else
10829	{
10830	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10831				     align, scratch4, scratch3));
10832	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10833	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10834	}
10835    }
10836  return 1;
10837}
10838
10839/* Expand the appropriate insns for doing strlen if not just doing
10840   repnz; scasb
10841
10842   out = result, initialized with the start address
10843   align_rtx = alignment of the address.
10844   scratch = scratch register, initialized with the startaddress when
10845	not aligned, otherwise undefined
10846
10847   This is just the body. It needs the initialisations mentioned above and
10848   some address computing at the end.  These things are done in i386.md.  */
10849
10850static void
10851ix86_expand_strlensi_unroll_1 (out, align_rtx)
10852     rtx out, align_rtx;
10853{
10854  int align;
10855  rtx tmp;
10856  rtx align_2_label = NULL_RTX;
10857  rtx align_3_label = NULL_RTX;
10858  rtx align_4_label = gen_label_rtx ();
10859  rtx end_0_label = gen_label_rtx ();
10860  rtx mem;
10861  rtx tmpreg = gen_reg_rtx (SImode);
10862  rtx scratch = gen_reg_rtx (SImode);
10863
10864  align = 0;
10865  if (GET_CODE (align_rtx) == CONST_INT)
10866    align = INTVAL (align_rtx);
10867
10868  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
10869
10870  /* Is there a known alignment and is it less than 4?  */
10871  if (align < 4)
10872    {
10873      rtx scratch1 = gen_reg_rtx (Pmode);
10874      emit_move_insn (scratch1, out);
10875      /* Is there a known alignment and is it not 2? */
10876      if (align != 2)
10877	{
10878	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10879	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10880
10881	  /* Leave just the 3 lower bits.  */
10882	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10883				    NULL_RTX, 0, OPTAB_WIDEN);
10884
10885	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10886				   Pmode, 1, align_4_label);
10887	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10888				   Pmode, 1, align_2_label);
10889	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10890				   Pmode, 1, align_3_label);
10891	}
10892      else
10893        {
10894	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
10895	     check if is aligned to 4 - byte.  */
10896
10897	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10898				    NULL_RTX, 0, OPTAB_WIDEN);
10899
10900	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10901				   Pmode, 1, align_4_label);
10902        }
10903
10904      mem = gen_rtx_MEM (QImode, out);
10905
10906      /* Now compare the bytes.  */
10907
10908      /* Compare the first n unaligned byte on a byte per byte basis.  */
10909      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10910			       QImode, 1, end_0_label);
10911
10912      /* Increment the address.  */
10913      if (TARGET_64BIT)
10914	emit_insn (gen_adddi3 (out, out, const1_rtx));
10915      else
10916	emit_insn (gen_addsi3 (out, out, const1_rtx));
10917
10918      /* Not needed with an alignment of 2 */
10919      if (align != 2)
10920	{
10921	  emit_label (align_2_label);
10922
10923	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10924				   end_0_label);
10925
10926	  if (TARGET_64BIT)
10927	    emit_insn (gen_adddi3 (out, out, const1_rtx));
10928	  else
10929	    emit_insn (gen_addsi3 (out, out, const1_rtx));
10930
10931	  emit_label (align_3_label);
10932	}
10933
10934      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10935			       end_0_label);
10936
10937      if (TARGET_64BIT)
10938	emit_insn (gen_adddi3 (out, out, const1_rtx));
10939      else
10940	emit_insn (gen_addsi3 (out, out, const1_rtx));
10941    }
10942
10943  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
10944     align this loop.  It gives only huge programs, but does not help to
10945     speed up.  */
10946  emit_label (align_4_label);
10947
10948  mem = gen_rtx_MEM (SImode, out);
10949  emit_move_insn (scratch, mem);
10950  if (TARGET_64BIT)
10951    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10952  else
10953    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10954
10955  /* This formula yields a nonzero result iff one of the bytes is zero.
10956     This saves three branches inside loop and many cycles.  */
10957
10958  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10959  emit_insn (gen_one_cmplsi2 (scratch, scratch));
10960  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10961  emit_insn (gen_andsi3 (tmpreg, tmpreg,
10962			 gen_int_mode (0x80808080, SImode)));
10963  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10964			   align_4_label);
10965
10966  if (TARGET_CMOVE)
10967    {
10968       rtx reg = gen_reg_rtx (SImode);
10969       rtx reg2 = gen_reg_rtx (Pmode);
10970       emit_move_insn (reg, tmpreg);
10971       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10972
10973       /* If zero is not in the first two bytes, move two bytes forward.  */
10974       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10975       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10976       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10977       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10978			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
10979						     reg,
10980						     tmpreg)));
10981       /* Emit lea manually to avoid clobbering of flags.  */
10982       emit_insn (gen_rtx_SET (SImode, reg2,
10983			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10984
10985       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10986       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10987       emit_insn (gen_rtx_SET (VOIDmode, out,
10988			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10989						     reg2,
10990						     out)));
10991
10992    }
10993  else
10994    {
10995       rtx end_2_label = gen_label_rtx ();
10996       /* Is zero in the first two bytes? */
10997
10998       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10999       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11000       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11001       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11002                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11003                            pc_rtx);
11004       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11005       JUMP_LABEL (tmp) = end_2_label;
11006
11007       /* Not in the first two.  Move two bytes forward.  */
11008       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11009       if (TARGET_64BIT)
11010	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11011       else
11012	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11013
11014       emit_label (end_2_label);
11015
11016    }
11017
11018  /* Avoid branch in fixing the byte.  */
11019  tmpreg = gen_lowpart (QImode, tmpreg);
11020  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11021  if (TARGET_64BIT)
11022    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11023  else
11024    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11025
11026  emit_label (end_0_label);
11027}
11028
11029void
11030ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
11031     rtx retval, fnaddr, callarg1, callarg2, pop;
11032{
11033  rtx use = NULL, call;
11034
11035  if (pop == const0_rtx)
11036    pop = NULL;
11037  if (TARGET_64BIT && pop)
11038    abort ();
11039
11040#if TARGET_MACHO
11041  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11042    fnaddr = machopic_indirect_call_target (fnaddr);
11043#else
11044  /* Static functions and indirect calls don't need the pic register.  */
11045  if (! TARGET_64BIT && flag_pic
11046      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11047      && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11048    use_reg (&use, pic_offset_table_rtx);
11049
11050  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11051    {
11052      rtx al = gen_rtx_REG (QImode, 0);
11053      emit_move_insn (al, callarg2);
11054      use_reg (&use, al);
11055    }
11056#endif /* TARGET_MACHO */
11057
11058  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11059    {
11060      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11061      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11062    }
11063
11064  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11065  if (retval)
11066    call = gen_rtx_SET (VOIDmode, retval, call);
11067  if (pop)
11068    {
11069      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11070      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11071      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11072    }
11073
11074  call = emit_call_insn (call);
11075  if (use)
11076    CALL_INSN_FUNCTION_USAGE (call) = use;
11077}
11078
11079
11080/* Clear stack slot assignments remembered from previous functions.
11081   This is called from INIT_EXPANDERS once before RTL is emitted for each
11082   function.  */
11083
11084static struct machine_function *
11085ix86_init_machine_status ()
11086{
11087  return ggc_alloc_cleared (sizeof (struct machine_function));
11088}
11089
11090/* Return a MEM corresponding to a stack slot with mode MODE.
11091   Allocate a new slot if necessary.
11092
11093   The RTL for a function can have several slots available: N is
11094   which slot to use.  */
11095
11096rtx
11097assign_386_stack_local (mode, n)
11098     enum machine_mode mode;
11099     int n;
11100{
11101  if (n < 0 || n >= MAX_386_STACK_LOCALS)
11102    abort ();
11103
11104  if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11105    ix86_stack_locals[(int) mode][n]
11106      = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11107
11108  return ix86_stack_locals[(int) mode][n];
11109}
11110
11111/* Construct the SYMBOL_REF for the tls_get_addr function.  */
11112
11113static GTY(()) rtx ix86_tls_symbol;
11114rtx
11115ix86_tls_get_addr ()
11116{
11117
11118  if (!ix86_tls_symbol)
11119    {
11120      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11121					    (TARGET_GNU_TLS && !TARGET_64BIT)
11122					    ? "___tls_get_addr"
11123					    : "__tls_get_addr");
11124    }
11125
11126  return ix86_tls_symbol;
11127}
11128
11129/* Calculate the length of the memory address in the instruction
11130   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11131
11132static int
11133memory_address_length (addr)
11134     rtx addr;
11135{
11136  struct ix86_address parts;
11137  rtx base, index, disp;
11138  int len;
11139
11140  if (GET_CODE (addr) == PRE_DEC
11141      || GET_CODE (addr) == POST_INC
11142      || GET_CODE (addr) == PRE_MODIFY
11143      || GET_CODE (addr) == POST_MODIFY)
11144    return 0;
11145
11146  if (! ix86_decompose_address (addr, &parts))
11147    abort ();
11148
11149  base = parts.base;
11150  index = parts.index;
11151  disp = parts.disp;
11152  len = 0;
11153
11154  /* Register Indirect.  */
11155  if (base && !index && !disp)
11156    {
11157      /* Special cases: ebp and esp need the two-byte modrm form.  */
11158      if (addr == stack_pointer_rtx
11159	  || addr == arg_pointer_rtx
11160	  || addr == frame_pointer_rtx
11161	  || addr == hard_frame_pointer_rtx)
11162	len = 1;
11163    }
11164
11165  /* Direct Addressing.  */
11166  else if (disp && !base && !index)
11167    len = 4;
11168
11169  else
11170    {
11171      /* Find the length of the displacement constant.  */
11172      if (disp)
11173	{
11174	  if (GET_CODE (disp) == CONST_INT
11175	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11176	      && base)
11177	    len = 1;
11178	  else
11179	    len = 4;
11180	}
11181
11182      /* An index requires the two-byte modrm form.  */
11183      if (index)
11184	len += 1;
11185    }
11186
11187  return len;
11188}
11189
11190/* Compute default value for "length_immediate" attribute.  When SHORTFORM
11191   is set, expect that insn have 8bit immediate alternative.  */
11192int
11193ix86_attr_length_immediate_default (insn, shortform)
11194     rtx insn;
11195     int shortform;
11196{
11197  int len = 0;
11198  int i;
11199  extract_insn_cached (insn);
11200  for (i = recog_data.n_operands - 1; i >= 0; --i)
11201    if (CONSTANT_P (recog_data.operand[i]))
11202      {
11203	if (len)
11204	  abort ();
11205	if (shortform
11206	    && GET_CODE (recog_data.operand[i]) == CONST_INT
11207	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11208	  len = 1;
11209	else
11210	  {
11211	    switch (get_attr_mode (insn))
11212	      {
11213		case MODE_QI:
11214		  len+=1;
11215		  break;
11216		case MODE_HI:
11217		  len+=2;
11218		  break;
11219		case MODE_SI:
11220		  len+=4;
11221		  break;
11222		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
11223		case MODE_DI:
11224		  len+=4;
11225		  break;
11226		default:
11227		  fatal_insn ("unknown insn mode", insn);
11228	      }
11229	  }
11230      }
11231  return len;
11232}
11233/* Compute default value for "length_address" attribute.  */
11234int
11235ix86_attr_length_address_default (insn)
11236     rtx insn;
11237{
11238  int i;
11239
11240  if (get_attr_type (insn) == TYPE_LEA)
11241    {
11242      rtx set = PATTERN (insn);
11243      if (GET_CODE (set) == SET)
11244	;
11245      else if (GET_CODE (set) == PARALLEL
11246	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11247	set = XVECEXP (set, 0, 0);
11248      else
11249	{
11250#ifdef ENABLE_CHECKING
11251	  abort ();
11252#endif
11253	  return 0;
11254	}
11255
11256      return memory_address_length (SET_SRC (set));
11257    }
11258
11259  extract_insn_cached (insn);
11260  for (i = recog_data.n_operands - 1; i >= 0; --i)
11261    if (GET_CODE (recog_data.operand[i]) == MEM)
11262      {
11263	return memory_address_length (XEXP (recog_data.operand[i], 0));
11264	break;
11265      }
11266  return 0;
11267}
11268
11269/* Return the maximum number of instructions a cpu can issue.  */
11270
11271static int
11272ix86_issue_rate ()
11273{
11274  switch (ix86_cpu)
11275    {
11276    case PROCESSOR_PENTIUM:
11277    case PROCESSOR_K6:
11278      return 2;
11279
11280    case PROCESSOR_PENTIUMPRO:
11281    case PROCESSOR_PENTIUM4:
11282    case PROCESSOR_ATHLON:
11283      return 3;
11284
11285    default:
11286      return 1;
11287    }
11288}
11289
11290/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11291   by DEP_INSN and nothing set by DEP_INSN.  */
11292
11293static int
11294ix86_flags_dependant (insn, dep_insn, insn_type)
11295     rtx insn, dep_insn;
11296     enum attr_type insn_type;
11297{
11298  rtx set, set2;
11299
11300  /* Simplify the test for uninteresting insns.  */
11301  if (insn_type != TYPE_SETCC
11302      && insn_type != TYPE_ICMOV
11303      && insn_type != TYPE_FCMOV
11304      && insn_type != TYPE_IBR)
11305    return 0;
11306
11307  if ((set = single_set (dep_insn)) != 0)
11308    {
11309      set = SET_DEST (set);
11310      set2 = NULL_RTX;
11311    }
11312  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11313	   && XVECLEN (PATTERN (dep_insn), 0) == 2
11314	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11315	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11316    {
11317      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11318      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11319    }
11320  else
11321    return 0;
11322
11323  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11324    return 0;
11325
11326  /* This test is true if the dependent insn reads the flags but
11327     not any other potentially set register.  */
11328  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11329    return 0;
11330
11331  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11332    return 0;
11333
11334  return 1;
11335}
11336
11337/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11338   address with operands set by DEP_INSN.  */
11339
11340static int
11341ix86_agi_dependant (insn, dep_insn, insn_type)
11342     rtx insn, dep_insn;
11343     enum attr_type insn_type;
11344{
11345  rtx addr;
11346
11347  if (insn_type == TYPE_LEA
11348      && TARGET_PENTIUM)
11349    {
11350      addr = PATTERN (insn);
11351      if (GET_CODE (addr) == SET)
11352	;
11353      else if (GET_CODE (addr) == PARALLEL
11354	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11355	addr = XVECEXP (addr, 0, 0);
11356      else
11357	abort ();
11358      addr = SET_SRC (addr);
11359    }
11360  else
11361    {
11362      int i;
11363      extract_insn_cached (insn);
11364      for (i = recog_data.n_operands - 1; i >= 0; --i)
11365	if (GET_CODE (recog_data.operand[i]) == MEM)
11366	  {
11367	    addr = XEXP (recog_data.operand[i], 0);
11368	    goto found;
11369	  }
11370      return 0;
11371    found:;
11372    }
11373
11374  return modified_in_p (addr, dep_insn);
11375}
11376
11377static int
11378ix86_adjust_cost (insn, link, dep_insn, cost)
11379     rtx insn, link, dep_insn;
11380     int cost;
11381{
11382  enum attr_type insn_type, dep_insn_type;
11383  enum attr_memory memory, dep_memory;
11384  rtx set, set2;
11385  int dep_insn_code_number;
11386
11387  /* Anti and output depenancies have zero cost on all CPUs.  */
11388  if (REG_NOTE_KIND (link) != 0)
11389    return 0;
11390
11391  dep_insn_code_number = recog_memoized (dep_insn);
11392
11393  /* If we can't recognize the insns, we can't really do anything.  */
11394  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11395    return cost;
11396
11397  insn_type = get_attr_type (insn);
11398  dep_insn_type = get_attr_type (dep_insn);
11399
11400  switch (ix86_cpu)
11401    {
11402    case PROCESSOR_PENTIUM:
11403      /* Address Generation Interlock adds a cycle of latency.  */
11404      if (ix86_agi_dependant (insn, dep_insn, insn_type))
11405	cost += 1;
11406
11407      /* ??? Compares pair with jump/setcc.  */
11408      if (ix86_flags_dependant (insn, dep_insn, insn_type))
11409	cost = 0;
11410
11411      /* Floating point stores require value to be ready one cycle ealier.  */
11412      if (insn_type == TYPE_FMOV
11413	  && get_attr_memory (insn) == MEMORY_STORE
11414	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11415	cost += 1;
11416      break;
11417
11418    case PROCESSOR_PENTIUMPRO:
11419      memory = get_attr_memory (insn);
11420      dep_memory = get_attr_memory (dep_insn);
11421
11422      /* Since we can't represent delayed latencies of load+operation,
11423	 increase the cost here for non-imov insns.  */
11424      if (dep_insn_type != TYPE_IMOV
11425          && dep_insn_type != TYPE_FMOV
11426          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11427	cost += 1;
11428
11429      /* INT->FP conversion is expensive.  */
11430      if (get_attr_fp_int_src (dep_insn))
11431	cost += 5;
11432
11433      /* There is one cycle extra latency between an FP op and a store.  */
11434      if (insn_type == TYPE_FMOV
11435	  && (set = single_set (dep_insn)) != NULL_RTX
11436	  && (set2 = single_set (insn)) != NULL_RTX
11437	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11438	  && GET_CODE (SET_DEST (set2)) == MEM)
11439	cost += 1;
11440
11441      /* Show ability of reorder buffer to hide latency of load by executing
11442	 in parallel with previous instruction in case
11443	 previous instruction is not needed to compute the address.  */
11444      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11445	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11446 	{
11447	  /* Claim moves to take one cycle, as core can issue one load
11448	     at time and the next load can start cycle later.  */
11449	  if (dep_insn_type == TYPE_IMOV
11450	      || dep_insn_type == TYPE_FMOV)
11451	    cost = 1;
11452	  else if (cost > 1)
11453	    cost--;
11454	}
11455      break;
11456
11457    case PROCESSOR_K6:
11458      memory = get_attr_memory (insn);
11459      dep_memory = get_attr_memory (dep_insn);
11460      /* The esp dependency is resolved before the instruction is really
11461         finished.  */
11462      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11463	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11464	return 1;
11465
11466      /* Since we can't represent delayed latencies of load+operation,
11467	 increase the cost here for non-imov insns.  */
11468      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11469	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11470
11471      /* INT->FP conversion is expensive.  */
11472      if (get_attr_fp_int_src (dep_insn))
11473	cost += 5;
11474
11475      /* Show ability of reorder buffer to hide latency of load by executing
11476	 in parallel with previous instruction in case
11477	 previous instruction is not needed to compute the address.  */
11478      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11479	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11480 	{
11481	  /* Claim moves to take one cycle, as core can issue one load
11482	     at time and the next load can start cycle later.  */
11483	  if (dep_insn_type == TYPE_IMOV
11484	      || dep_insn_type == TYPE_FMOV)
11485	    cost = 1;
11486	  else if (cost > 2)
11487	    cost -= 2;
11488	  else
11489	    cost = 1;
11490	}
11491      break;
11492
11493    case PROCESSOR_ATHLON:
11494      memory = get_attr_memory (insn);
11495      dep_memory = get_attr_memory (dep_insn);
11496
11497      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11498	{
11499	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11500	    cost += 2;
11501	  else
11502	    cost += 3;
11503        }
11504      /* Show ability of reorder buffer to hide latency of load by executing
11505	 in parallel with previous instruction in case
11506	 previous instruction is not needed to compute the address.  */
11507      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11508	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
11509 	{
11510	  /* Claim moves to take one cycle, as core can issue one load
11511	     at time and the next load can start cycle later.  */
11512	  if (dep_insn_type == TYPE_IMOV
11513	      || dep_insn_type == TYPE_FMOV)
11514	    cost = 0;
11515	  else if (cost >= 3)
11516	    cost -= 3;
11517	  else
11518	    cost = 0;
11519	}
11520
11521    default:
11522      break;
11523    }
11524
11525  return cost;
11526}
11527
11528static union
11529{
11530  struct ppro_sched_data
11531  {
11532    rtx decode[3];
11533    int issued_this_cycle;
11534  } ppro;
11535} ix86_sched_data;
11536
11537static enum attr_ppro_uops
11538ix86_safe_ppro_uops (insn)
11539     rtx insn;
11540{
11541  if (recog_memoized (insn) >= 0)
11542    return get_attr_ppro_uops (insn);
11543  else
11544    return PPRO_UOPS_MANY;
11545}
11546
11547static void
11548ix86_dump_ppro_packet (dump)
11549     FILE *dump;
11550{
11551  if (ix86_sched_data.ppro.decode[0])
11552    {
11553      fprintf (dump, "PPRO packet: %d",
11554	       INSN_UID (ix86_sched_data.ppro.decode[0]));
11555      if (ix86_sched_data.ppro.decode[1])
11556	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11557      if (ix86_sched_data.ppro.decode[2])
11558	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11559      fputc ('\n', dump);
11560    }
11561}
11562
11563/* We're beginning a new block.  Initialize data structures as necessary.  */
11564
11565static void
11566ix86_sched_init (dump, sched_verbose, veclen)
11567     FILE *dump ATTRIBUTE_UNUSED;
11568     int sched_verbose ATTRIBUTE_UNUSED;
11569     int veclen ATTRIBUTE_UNUSED;
11570{
11571  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11572}
11573
11574/* Shift INSN to SLOT, and shift everything else down.  */
11575
11576static void
11577ix86_reorder_insn (insnp, slot)
11578     rtx *insnp, *slot;
11579{
11580  if (insnp != slot)
11581    {
11582      rtx insn = *insnp;
11583      do
11584	insnp[0] = insnp[1];
11585      while (++insnp != slot);
11586      *insnp = insn;
11587    }
11588}
11589
11590static void
11591ix86_sched_reorder_ppro (ready, e_ready)
11592     rtx *ready;
11593     rtx *e_ready;
11594{
11595  rtx decode[3];
11596  enum attr_ppro_uops cur_uops;
11597  int issued_this_cycle;
11598  rtx *insnp;
11599  int i;
11600
11601  /* At this point .ppro.decode contains the state of the three
11602     decoders from last "cycle".  That is, those insns that were
11603     actually independent.  But here we're scheduling for the
11604     decoder, and we may find things that are decodable in the
11605     same cycle.  */
11606
11607  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11608  issued_this_cycle = 0;
11609
11610  insnp = e_ready;
11611  cur_uops = ix86_safe_ppro_uops (*insnp);
11612
11613  /* If the decoders are empty, and we've a complex insn at the
11614     head of the priority queue, let it issue without complaint.  */
11615  if (decode[0] == NULL)
11616    {
11617      if (cur_uops == PPRO_UOPS_MANY)
11618	{
11619	  decode[0] = *insnp;
11620	  goto ppro_done;
11621	}
11622
11623      /* Otherwise, search for a 2-4 uop unsn to issue.  */
11624      while (cur_uops != PPRO_UOPS_FEW)
11625	{
11626	  if (insnp == ready)
11627	    break;
11628	  cur_uops = ix86_safe_ppro_uops (*--insnp);
11629	}
11630
11631      /* If so, move it to the head of the line.  */
11632      if (cur_uops == PPRO_UOPS_FEW)
11633	ix86_reorder_insn (insnp, e_ready);
11634
11635      /* Issue the head of the queue.  */
11636      issued_this_cycle = 1;
11637      decode[0] = *e_ready--;
11638    }
11639
11640  /* Look for simple insns to fill in the other two slots.  */
11641  for (i = 1; i < 3; ++i)
11642    if (decode[i] == NULL)
11643      {
11644	if (ready > e_ready)
11645	  goto ppro_done;
11646
11647	insnp = e_ready;
11648	cur_uops = ix86_safe_ppro_uops (*insnp);
11649	while (cur_uops != PPRO_UOPS_ONE)
11650	  {
11651	    if (insnp == ready)
11652	      break;
11653	    cur_uops = ix86_safe_ppro_uops (*--insnp);
11654	  }
11655
11656	/* Found one.  Move it to the head of the queue and issue it.  */
11657	if (cur_uops == PPRO_UOPS_ONE)
11658	  {
11659	    ix86_reorder_insn (insnp, e_ready);
11660	    decode[i] = *e_ready--;
11661	    issued_this_cycle++;
11662	    continue;
11663	  }
11664
11665	/* ??? Didn't find one.  Ideally, here we would do a lazy split
11666	   of 2-uop insns, issue one and queue the other.  */
11667      }
11668
11669 ppro_done:
11670  if (issued_this_cycle == 0)
11671    issued_this_cycle = 1;
11672  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11673}
11674
11675/* We are about to being issuing insns for this clock cycle.
11676   Override the default sort algorithm to better slot instructions.  */
11677static int
11678ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11679     FILE *dump ATTRIBUTE_UNUSED;
11680     int sched_verbose ATTRIBUTE_UNUSED;
11681     rtx *ready;
11682     int *n_readyp;
11683     int clock_var ATTRIBUTE_UNUSED;
11684{
11685  int n_ready = *n_readyp;
11686  rtx *e_ready = ready + n_ready - 1;
11687
11688  /* Make sure to go ahead and initialize key items in
11689     ix86_sched_data if we are not going to bother trying to
11690     reorder the ready queue.  */
11691  if (n_ready < 2)
11692    {
11693      ix86_sched_data.ppro.issued_this_cycle = 1;
11694      goto out;
11695    }
11696
11697  switch (ix86_cpu)
11698    {
11699    default:
11700      break;
11701
11702    case PROCESSOR_PENTIUMPRO:
11703      ix86_sched_reorder_ppro (ready, e_ready);
11704      break;
11705    }
11706
11707out:
11708  return ix86_issue_rate ();
11709}
11710
11711/* We are about to issue INSN.  Return the number of insns left on the
11712   ready queue that can be issued this cycle.  */
11713
11714static int
11715ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11716     FILE *dump;
11717     int sched_verbose;
11718     rtx insn;
11719     int can_issue_more;
11720{
11721  int i;
11722  switch (ix86_cpu)
11723    {
11724    default:
11725      return can_issue_more - 1;
11726
11727    case PROCESSOR_PENTIUMPRO:
11728      {
11729	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11730
11731	if (uops == PPRO_UOPS_MANY)
11732	  {
11733	    if (sched_verbose)
11734	      ix86_dump_ppro_packet (dump);
11735	    ix86_sched_data.ppro.decode[0] = insn;
11736	    ix86_sched_data.ppro.decode[1] = NULL;
11737	    ix86_sched_data.ppro.decode[2] = NULL;
11738	    if (sched_verbose)
11739	      ix86_dump_ppro_packet (dump);
11740	    ix86_sched_data.ppro.decode[0] = NULL;
11741	  }
11742	else if (uops == PPRO_UOPS_FEW)
11743	  {
11744	    if (sched_verbose)
11745	      ix86_dump_ppro_packet (dump);
11746	    ix86_sched_data.ppro.decode[0] = insn;
11747	    ix86_sched_data.ppro.decode[1] = NULL;
11748	    ix86_sched_data.ppro.decode[2] = NULL;
11749	  }
11750	else
11751	  {
11752	    for (i = 0; i < 3; ++i)
11753	      if (ix86_sched_data.ppro.decode[i] == NULL)
11754		{
11755		  ix86_sched_data.ppro.decode[i] = insn;
11756		  break;
11757		}
11758	    if (i == 3)
11759	      abort ();
11760	    if (i == 2)
11761	      {
11762	        if (sched_verbose)
11763	          ix86_dump_ppro_packet (dump);
11764		ix86_sched_data.ppro.decode[0] = NULL;
11765		ix86_sched_data.ppro.decode[1] = NULL;
11766		ix86_sched_data.ppro.decode[2] = NULL;
11767	      }
11768	  }
11769      }
11770      return --ix86_sched_data.ppro.issued_this_cycle;
11771    }
11772}
11773
11774static int
11775ia32_use_dfa_pipeline_interface ()
11776{
11777  if (ix86_cpu == PROCESSOR_PENTIUM)
11778    return 1;
11779  return 0;
11780}
11781
11782/* How many alternative schedules to try.  This should be as wide as the
11783   scheduling freedom in the DFA, but no wider.  Making this value too
11784   large results extra work for the scheduler.  */
11785
11786static int
11787ia32_multipass_dfa_lookahead ()
11788{
11789  if (ix86_cpu == PROCESSOR_PENTIUM)
11790    return 2;
11791  else
11792   return 0;
11793}
11794
11795
11796/* Walk through INSNS and look for MEM references whose address is DSTREG or
11797   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11798   appropriate.  */
11799
11800void
11801ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11802     rtx insns;
11803     rtx dstref, srcref, dstreg, srcreg;
11804{
11805  rtx insn;
11806
11807  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11808    if (INSN_P (insn))
11809      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11810				 dstreg, srcreg);
11811}
11812
11813/* Subroutine of above to actually do the updating by recursively walking
11814   the rtx.  */
11815
11816static void
11817ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11818     rtx x;
11819     rtx dstref, srcref, dstreg, srcreg;
11820{
11821  enum rtx_code code = GET_CODE (x);
11822  const char *format_ptr = GET_RTX_FORMAT (code);
11823  int i, j;
11824
11825  if (code == MEM && XEXP (x, 0) == dstreg)
11826    MEM_COPY_ATTRIBUTES (x, dstref);
11827  else if (code == MEM && XEXP (x, 0) == srcreg)
11828    MEM_COPY_ATTRIBUTES (x, srcref);
11829
11830  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11831    {
11832      if (*format_ptr == 'e')
11833	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11834				   dstreg, srcreg);
11835      else if (*format_ptr == 'E')
11836	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11837	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11838				     dstreg, srcreg);
11839    }
11840}
11841
11842/* Compute the alignment given to a constant that is being placed in memory.
11843   EXP is the constant and ALIGN is the alignment that the object would
11844   ordinarily have.
11845   The value of this function is used instead of that alignment to align
11846   the object.  */
11847
11848int
11849ix86_constant_alignment (exp, align)
11850     tree exp;
11851     int align;
11852{
11853  if (TREE_CODE (exp) == REAL_CST)
11854    {
11855      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11856	return 64;
11857      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11858	return 128;
11859    }
11860  else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS
11861	   && TREE_STRING_LENGTH (exp) >= 31 && align < 256)
11862    return 256;
11863
11864  return align;
11865}
11866
11867/* Compute the alignment for a static variable.
11868   TYPE is the data type, and ALIGN is the alignment that
11869   the object would ordinarily have.  The value of this function is used
11870   instead of that alignment to align the object.  */
11871
11872int
11873ix86_data_alignment (type, align)
11874     tree type;
11875     int align;
11876{
11877  if (AGGREGATE_TYPE_P (type)
11878       && TYPE_SIZE (type)
11879       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11880       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11881	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11882    return 256;
11883
11884  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11885     to 16byte boundary.  */
11886  if (TARGET_64BIT)
11887    {
11888      if (AGGREGATE_TYPE_P (type)
11889	   && TYPE_SIZE (type)
11890	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11891	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11892	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11893	return 128;
11894    }
11895
11896  if (TREE_CODE (type) == ARRAY_TYPE)
11897    {
11898      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11899	return 64;
11900      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11901	return 128;
11902    }
11903  else if (TREE_CODE (type) == COMPLEX_TYPE)
11904    {
11905
11906      if (TYPE_MODE (type) == DCmode && align < 64)
11907	return 64;
11908      if (TYPE_MODE (type) == XCmode && align < 128)
11909	return 128;
11910    }
11911  else if ((TREE_CODE (type) == RECORD_TYPE
11912	    || TREE_CODE (type) == UNION_TYPE
11913	    || TREE_CODE (type) == QUAL_UNION_TYPE)
11914	   && TYPE_FIELDS (type))
11915    {
11916      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11917	return 64;
11918      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11919	return 128;
11920    }
11921  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11922	   || TREE_CODE (type) == INTEGER_TYPE)
11923    {
11924      if (TYPE_MODE (type) == DFmode && align < 64)
11925	return 64;
11926      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11927	return 128;
11928    }
11929
11930  return align;
11931}
11932
11933/* Compute the alignment for a local variable.
11934   TYPE is the data type, and ALIGN is the alignment that
11935   the object would ordinarily have.  The value of this macro is used
11936   instead of that alignment to align the object.  */
11937
11938int
11939ix86_local_alignment (type, align)
11940     tree type;
11941     int align;
11942{
11943  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11944     to 16byte boundary.  */
11945  if (TARGET_64BIT)
11946    {
11947      if (AGGREGATE_TYPE_P (type)
11948	   && TYPE_SIZE (type)
11949	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11950	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11951	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11952	return 128;
11953    }
11954  if (TREE_CODE (type) == ARRAY_TYPE)
11955    {
11956      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11957	return 64;
11958      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11959	return 128;
11960    }
11961  else if (TREE_CODE (type) == COMPLEX_TYPE)
11962    {
11963      if (TYPE_MODE (type) == DCmode && align < 64)
11964	return 64;
11965      if (TYPE_MODE (type) == XCmode && align < 128)
11966	return 128;
11967    }
11968  else if ((TREE_CODE (type) == RECORD_TYPE
11969	    || TREE_CODE (type) == UNION_TYPE
11970	    || TREE_CODE (type) == QUAL_UNION_TYPE)
11971	   && TYPE_FIELDS (type))
11972    {
11973      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11974	return 64;
11975      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11976	return 128;
11977    }
11978  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11979	   || TREE_CODE (type) == INTEGER_TYPE)
11980    {
11981
11982      if (TYPE_MODE (type) == DFmode && align < 64)
11983	return 64;
11984      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11985	return 128;
11986    }
11987  return align;
11988}
11989
11990/* Emit RTL insns to initialize the variable parts of a trampoline.
11991   FNADDR is an RTX for the address of the function's pure code.
11992   CXT is an RTX for the static chain value for the function.  */
11993void
11994x86_initialize_trampoline (tramp, fnaddr, cxt)
11995     rtx tramp, fnaddr, cxt;
11996{
11997  if (!TARGET_64BIT)
11998    {
11999      /* Compute offset from the end of the jmp to the target function.  */
12000      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12001			       plus_constant (tramp, 10),
12002			       NULL_RTX, 1, OPTAB_DIRECT);
12003      emit_move_insn (gen_rtx_MEM (QImode, tramp),
12004		      gen_int_mode (0xb9, QImode));
12005      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12006      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12007		      gen_int_mode (0xe9, QImode));
12008      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12009    }
12010  else
12011    {
12012      int offset = 0;
12013      /* Try to load address using shorter movl instead of movabs.
12014         We may want to support movq for kernel mode, but kernel does not use
12015         trampolines at the moment.  */
12016      if (x86_64_zero_extended_value (fnaddr))
12017	{
12018	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12019	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12020			  gen_int_mode (0xbb41, HImode));
12021	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12022			  gen_lowpart (SImode, fnaddr));
12023	  offset += 6;
12024	}
12025      else
12026	{
12027	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12028			  gen_int_mode (0xbb49, HImode));
12029	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12030			  fnaddr);
12031	  offset += 10;
12032	}
12033      /* Load static chain using movabs to r10.  */
12034      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12035		      gen_int_mode (0xba49, HImode));
12036      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12037		      cxt);
12038      offset += 10;
12039      /* Jump to the r11 */
12040      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12041		      gen_int_mode (0xff49, HImode));
12042      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12043		      gen_int_mode (0xe3, QImode));
12044      offset += 3;
12045      if (offset > TRAMPOLINE_SIZE)
12046	abort ();
12047    }
12048
12049#ifdef TRANSFER_FROM_TRAMPOLINE
12050  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12051		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12052#endif
12053}
12054
12055#define def_builtin(MASK, NAME, TYPE, CODE)			\
12056do {								\
12057  if ((MASK) & target_flags					\
12058      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12059    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12060		      NULL, NULL_TREE);				\
12061} while (0)
12062
12063struct builtin_description
12064{
12065  const unsigned int mask;
12066  const enum insn_code icode;
12067  const char *const name;
12068  const enum ix86_builtins code;
12069  const enum rtx_code comparison;
12070  const unsigned int flag;
12071};
12072
12073/* Used for builtins that are enabled both by -msse and -msse2.  */
12074#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12075#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12076#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12077
12078static const struct builtin_description bdesc_comi[] =
12079{
12080  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12081  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12082  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12083  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12084  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12085  { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12086  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12087  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12088  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12089  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12090  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12091  { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12092  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12093  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12094  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12095  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12096  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12097  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12098  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12099  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12100  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12101  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12102  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12103  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12104};
12105
12106static const struct builtin_description bdesc_2arg[] =
12107{
12108  /* SSE */
12109  { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12110  { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12111  { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12112  { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12113  { MASK_SSE1, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12114  { MASK_SSE1, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12115  { MASK_SSE1, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12116  { MASK_SSE1, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12117
12118  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12119  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12120  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12121  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12122  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12123  { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12124  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12125  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12126  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12127  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12128  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12129  { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12130  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12131  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12132  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12133  { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12134  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12135  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12136  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12137  { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12138
12139  { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12140  { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12141  { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12142  { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12143
12144  { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12145  { MASK_SSE1, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12146  { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12147  { MASK_SSE1, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12148
12149  { MASK_SSE1, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12150  { MASK_SSE1, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12151  { MASK_SSE1, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12152  { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12153  { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12154
12155  /* MMX */
12156  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12157  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12158  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12159  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12160  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12161  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12162  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12163  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12164
12165  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12166  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12167  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12168  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12169  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12170  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12171  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12172  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12173
12174  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12175  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12176  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12177
12178  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12179  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12180  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12181  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12182
12183  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12184  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12185
12186  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12187  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12188  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12189  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12190  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12191  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12192
12193  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12194  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12195  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12196  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12197
12198  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12199  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12200  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12201  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12202  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12203  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12204
12205  /* Special.  */
12206  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12207  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12208  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12209
12210  { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12211  { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12212  { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12213
12214  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12215  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12216  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12217  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12218  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12219  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12220
12221  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12222  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12223  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12224  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12225  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12226  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12227
12228  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12229  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12230  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12231  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12232
12233  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12234  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12235
12236  /* SSE2 */
12237  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12238  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12239  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12240  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12241  { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12242  { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12243  { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12244  { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12245
12246  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12247  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12248  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12249  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12250  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12251  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12252  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12253  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12254  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12255  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12256  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12257  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12258  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12259  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12260  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12261  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12262  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12263  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12264  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12265  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12266
12267  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12268  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12269  { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12270  { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12271
12272  { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12273  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12274  { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12275  { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12276
12277  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12278  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12279  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12280
12281  /* SSE2 MMX */
12282  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12283  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12284  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12285  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12286  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12287  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12288  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12289  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12290
12291  { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12292  { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12293  { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12294  { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12295  { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12296  { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12297  { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12298  { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12299
12300  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12301  { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12302  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12303  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12304
12305  { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12306  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12307  { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12308  { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12309
12310  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12311  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12312
12313  { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12314  { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12315  { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12316  { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12317  { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12318  { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12319
12320  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12321  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12322  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12323  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12324
12325  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12326  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12327  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12328  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12329  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12330  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12331  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12332  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12333
12334  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12335  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12336  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12337
12338  { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12339  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12340
12341  { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12342  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12343  { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12344  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12345  { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12346  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12347
12348  { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12349  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12350  { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12351  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12352  { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12353  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12354
12355  { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12356  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12357  { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12358  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12359
12360  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12361
12362  { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12363  { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12364  { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12365  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12366};
12367
12368static const struct builtin_description bdesc_1arg[] =
12369{
12370  { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12371  { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12372
12373  { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12374  { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12375  { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12376
12377  { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12378  { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12379  { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12380  { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12381  { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12382  { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12383
12384  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12385  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12386  { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12387  { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12388
12389  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12390
12391  { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12392  { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12393
12394  { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12395  { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12396  { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12397  { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12398  { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12399
12400  { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12401
12402  { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12403  { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12404  { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12405  { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12406
12407  { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12408  { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12409  { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12410
12411  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12412};
12413
12414void
12415ix86_init_builtins ()
12416{
12417  if (TARGET_MMX)
12418    ix86_init_mmx_sse_builtins ();
12419}
12420
12421/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
12422   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
12423   builtins.  */
12424static void
12425ix86_init_mmx_sse_builtins ()
12426{
12427  const struct builtin_description * d;
12428  size_t i;
12429
12430  tree pchar_type_node = build_pointer_type (char_type_node);
12431  tree pcchar_type_node = build_pointer_type (
12432			     build_type_variant (char_type_node, 1, 0));
12433  tree pfloat_type_node = build_pointer_type (float_type_node);
12434  tree pcfloat_type_node = build_pointer_type (
12435			     build_type_variant (float_type_node, 1, 0));
12436  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12437  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12438  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12439
12440  /* Comparisons.  */
12441  tree int_ftype_v4sf_v4sf
12442    = build_function_type_list (integer_type_node,
12443				V4SF_type_node, V4SF_type_node, NULL_TREE);
12444  tree v4si_ftype_v4sf_v4sf
12445    = build_function_type_list (V4SI_type_node,
12446				V4SF_type_node, V4SF_type_node, NULL_TREE);
12447  /* MMX/SSE/integer conversions.  */
12448  tree int_ftype_v4sf
12449    = build_function_type_list (integer_type_node,
12450				V4SF_type_node, NULL_TREE);
12451  tree int64_ftype_v4sf
12452    = build_function_type_list (long_long_integer_type_node,
12453				V4SF_type_node, NULL_TREE);
12454  tree int_ftype_v8qi
12455    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12456  tree v4sf_ftype_v4sf_int
12457    = build_function_type_list (V4SF_type_node,
12458				V4SF_type_node, integer_type_node, NULL_TREE);
12459  tree v4sf_ftype_v4sf_int64
12460    = build_function_type_list (V4SF_type_node,
12461				V4SF_type_node, long_long_integer_type_node,
12462				NULL_TREE);
12463  tree v4sf_ftype_v4sf_v2si
12464    = build_function_type_list (V4SF_type_node,
12465				V4SF_type_node, V2SI_type_node, NULL_TREE);
12466  tree int_ftype_v4hi_int
12467    = build_function_type_list (integer_type_node,
12468				V4HI_type_node, integer_type_node, NULL_TREE);
12469  tree v4hi_ftype_v4hi_int_int
12470    = build_function_type_list (V4HI_type_node, V4HI_type_node,
12471				integer_type_node, integer_type_node,
12472				NULL_TREE);
12473  /* Miscellaneous.  */
12474  tree v8qi_ftype_v4hi_v4hi
12475    = build_function_type_list (V8QI_type_node,
12476				V4HI_type_node, V4HI_type_node, NULL_TREE);
12477  tree v4hi_ftype_v2si_v2si
12478    = build_function_type_list (V4HI_type_node,
12479				V2SI_type_node, V2SI_type_node, NULL_TREE);
12480  tree v4sf_ftype_v4sf_v4sf_int
12481    = build_function_type_list (V4SF_type_node,
12482				V4SF_type_node, V4SF_type_node,
12483				integer_type_node, NULL_TREE);
12484  tree v2si_ftype_v4hi_v4hi
12485    = build_function_type_list (V2SI_type_node,
12486				V4HI_type_node, V4HI_type_node, NULL_TREE);
12487  tree v4hi_ftype_v4hi_int
12488    = build_function_type_list (V4HI_type_node,
12489				V4HI_type_node, integer_type_node, NULL_TREE);
12490  tree v4hi_ftype_v4hi_di
12491    = build_function_type_list (V4HI_type_node,
12492				V4HI_type_node, long_long_unsigned_type_node,
12493				NULL_TREE);
12494  tree v2si_ftype_v2si_di
12495    = build_function_type_list (V2SI_type_node,
12496				V2SI_type_node, long_long_unsigned_type_node,
12497				NULL_TREE);
12498  tree void_ftype_void
12499    = build_function_type (void_type_node, void_list_node);
12500  tree void_ftype_unsigned
12501    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12502  tree unsigned_ftype_void
12503    = build_function_type (unsigned_type_node, void_list_node);
12504  tree di_ftype_void
12505    = build_function_type (long_long_unsigned_type_node, void_list_node);
12506  tree v4sf_ftype_void
12507    = build_function_type (V4SF_type_node, void_list_node);
12508  tree v2si_ftype_v4sf
12509    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12510  /* Loads/stores.  */
12511  tree void_ftype_v8qi_v8qi_pchar
12512    = build_function_type_list (void_type_node,
12513				V8QI_type_node, V8QI_type_node,
12514				pchar_type_node, NULL_TREE);
12515  tree v4sf_ftype_pcfloat
12516    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12517  /* @@@ the type is bogus */
12518  tree v4sf_ftype_v4sf_pv2si
12519    = build_function_type_list (V4SF_type_node,
12520				V4SF_type_node, pv2si_type_node, NULL_TREE);
12521  tree void_ftype_pv2si_v4sf
12522    = build_function_type_list (void_type_node,
12523				pv2si_type_node, V4SF_type_node, NULL_TREE);
12524  tree void_ftype_pfloat_v4sf
12525    = build_function_type_list (void_type_node,
12526				pfloat_type_node, V4SF_type_node, NULL_TREE);
12527  tree void_ftype_pdi_di
12528    = build_function_type_list (void_type_node,
12529				pdi_type_node, long_long_unsigned_type_node,
12530				NULL_TREE);
12531  tree void_ftype_pv2di_v2di
12532    = build_function_type_list (void_type_node,
12533				pv2di_type_node, V2DI_type_node, NULL_TREE);
12534  /* Normal vector unops.  */
12535  tree v4sf_ftype_v4sf
12536    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12537
12538  /* Normal vector binops.  */
12539  tree v4sf_ftype_v4sf_v4sf
12540    = build_function_type_list (V4SF_type_node,
12541				V4SF_type_node, V4SF_type_node, NULL_TREE);
12542  tree v8qi_ftype_v8qi_v8qi
12543    = build_function_type_list (V8QI_type_node,
12544				V8QI_type_node, V8QI_type_node, NULL_TREE);
12545  tree v4hi_ftype_v4hi_v4hi
12546    = build_function_type_list (V4HI_type_node,
12547				V4HI_type_node, V4HI_type_node, NULL_TREE);
12548  tree v2si_ftype_v2si_v2si
12549    = build_function_type_list (V2SI_type_node,
12550				V2SI_type_node, V2SI_type_node, NULL_TREE);
12551  tree di_ftype_di_di
12552    = build_function_type_list (long_long_unsigned_type_node,
12553				long_long_unsigned_type_node,
12554				long_long_unsigned_type_node, NULL_TREE);
12555
12556  tree v2si_ftype_v2sf
12557    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12558  tree v2sf_ftype_v2si
12559    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12560  tree v2si_ftype_v2si
12561    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12562  tree v2sf_ftype_v2sf
12563    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12564  tree v2sf_ftype_v2sf_v2sf
12565    = build_function_type_list (V2SF_type_node,
12566				V2SF_type_node, V2SF_type_node, NULL_TREE);
12567  tree v2si_ftype_v2sf_v2sf
12568    = build_function_type_list (V2SI_type_node,
12569				V2SF_type_node, V2SF_type_node, NULL_TREE);
12570  tree pint_type_node    = build_pointer_type (integer_type_node);
12571  tree pcint_type_node = build_pointer_type (
12572			     build_type_variant (integer_type_node, 1, 0));
12573  tree pdouble_type_node = build_pointer_type (double_type_node);
12574  tree pcdouble_type_node = build_pointer_type (
12575				build_type_variant (double_type_node, 1, 0));
12576  tree int_ftype_v2df_v2df
12577    = build_function_type_list (integer_type_node,
12578				V2DF_type_node, V2DF_type_node, NULL_TREE);
12579
12580  tree ti_ftype_void
12581    = build_function_type (intTI_type_node, void_list_node);
12582  tree v2di_ftype_void
12583    = build_function_type (V2DI_type_node, void_list_node);
12584  tree ti_ftype_ti_ti
12585    = build_function_type_list (intTI_type_node,
12586				intTI_type_node, intTI_type_node, NULL_TREE);
12587  tree void_ftype_pcvoid
12588    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12589  tree v2di_ftype_di
12590    = build_function_type_list (V2DI_type_node,
12591				long_long_unsigned_type_node, NULL_TREE);
12592  tree di_ftype_v2di
12593    = build_function_type_list (long_long_unsigned_type_node,
12594				V2DI_type_node, NULL_TREE);
12595  tree v4sf_ftype_v4si
12596    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12597  tree v4si_ftype_v4sf
12598    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12599  tree v2df_ftype_v4si
12600    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12601  tree v4si_ftype_v2df
12602    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12603  tree v2si_ftype_v2df
12604    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12605  tree v4sf_ftype_v2df
12606    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12607  tree v2df_ftype_v2si
12608    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12609  tree v2df_ftype_v4sf
12610    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12611  tree int_ftype_v2df
12612    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12613  tree int64_ftype_v2df
12614    = build_function_type_list (long_long_integer_type_node,
12615		    		V2DF_type_node, NULL_TREE);
12616  tree v2df_ftype_v2df_int
12617    = build_function_type_list (V2DF_type_node,
12618				V2DF_type_node, integer_type_node, NULL_TREE);
12619  tree v2df_ftype_v2df_int64
12620    = build_function_type_list (V2DF_type_node,
12621				V2DF_type_node, long_long_integer_type_node,
12622				NULL_TREE);
12623  tree v4sf_ftype_v4sf_v2df
12624    = build_function_type_list (V4SF_type_node,
12625				V4SF_type_node, V2DF_type_node, NULL_TREE);
12626  tree v2df_ftype_v2df_v4sf
12627    = build_function_type_list (V2DF_type_node,
12628				V2DF_type_node, V4SF_type_node, NULL_TREE);
12629  tree v2df_ftype_v2df_v2df_int
12630    = build_function_type_list (V2DF_type_node,
12631				V2DF_type_node, V2DF_type_node,
12632				integer_type_node,
12633				NULL_TREE);
12634  tree v2df_ftype_v2df_pv2si
12635    = build_function_type_list (V2DF_type_node,
12636				V2DF_type_node, pv2si_type_node, NULL_TREE);
12637  tree void_ftype_pv2si_v2df
12638    = build_function_type_list (void_type_node,
12639				pv2si_type_node, V2DF_type_node, NULL_TREE);
12640  tree void_ftype_pdouble_v2df
12641    = build_function_type_list (void_type_node,
12642				pdouble_type_node, V2DF_type_node, NULL_TREE);
12643  tree void_ftype_pint_int
12644    = build_function_type_list (void_type_node,
12645				pint_type_node, integer_type_node, NULL_TREE);
12646  tree void_ftype_v16qi_v16qi_pchar
12647    = build_function_type_list (void_type_node,
12648				V16QI_type_node, V16QI_type_node,
12649				pchar_type_node, NULL_TREE);
12650  tree v2df_ftype_pcdouble
12651    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12652  tree v2df_ftype_v2df_v2df
12653    = build_function_type_list (V2DF_type_node,
12654				V2DF_type_node, V2DF_type_node, NULL_TREE);
12655  tree v16qi_ftype_v16qi_v16qi
12656    = build_function_type_list (V16QI_type_node,
12657				V16QI_type_node, V16QI_type_node, NULL_TREE);
12658  tree v8hi_ftype_v8hi_v8hi
12659    = build_function_type_list (V8HI_type_node,
12660				V8HI_type_node, V8HI_type_node, NULL_TREE);
12661  tree v4si_ftype_v4si_v4si
12662    = build_function_type_list (V4SI_type_node,
12663				V4SI_type_node, V4SI_type_node, NULL_TREE);
12664  tree v2di_ftype_v2di_v2di
12665    = build_function_type_list (V2DI_type_node,
12666				V2DI_type_node, V2DI_type_node, NULL_TREE);
12667  tree v2di_ftype_v2df_v2df
12668    = build_function_type_list (V2DI_type_node,
12669				V2DF_type_node, V2DF_type_node, NULL_TREE);
12670  tree v2df_ftype_v2df
12671    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12672  tree v2df_ftype_double
12673    = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12674  tree v2df_ftype_double_double
12675    = build_function_type_list (V2DF_type_node,
12676				double_type_node, double_type_node, NULL_TREE);
12677  tree int_ftype_v8hi_int
12678    = build_function_type_list (integer_type_node,
12679				V8HI_type_node, integer_type_node, NULL_TREE);
12680  tree v8hi_ftype_v8hi_int_int
12681    = build_function_type_list (V8HI_type_node,
12682				V8HI_type_node, integer_type_node,
12683				integer_type_node, NULL_TREE);
12684  tree v2di_ftype_v2di_int
12685    = build_function_type_list (V2DI_type_node,
12686				V2DI_type_node, integer_type_node, NULL_TREE);
12687  tree v4si_ftype_v4si_int
12688    = build_function_type_list (V4SI_type_node,
12689				V4SI_type_node, integer_type_node, NULL_TREE);
12690  tree v8hi_ftype_v8hi_int
12691    = build_function_type_list (V8HI_type_node,
12692				V8HI_type_node, integer_type_node, NULL_TREE);
12693  tree v8hi_ftype_v8hi_v2di
12694    = build_function_type_list (V8HI_type_node,
12695				V8HI_type_node, V2DI_type_node, NULL_TREE);
12696  tree v4si_ftype_v4si_v2di
12697    = build_function_type_list (V4SI_type_node,
12698				V4SI_type_node, V2DI_type_node, NULL_TREE);
12699  tree v4si_ftype_v8hi_v8hi
12700    = build_function_type_list (V4SI_type_node,
12701				V8HI_type_node, V8HI_type_node, NULL_TREE);
12702  tree di_ftype_v8qi_v8qi
12703    = build_function_type_list (long_long_unsigned_type_node,
12704				V8QI_type_node, V8QI_type_node, NULL_TREE);
12705  tree v2di_ftype_v16qi_v16qi
12706    = build_function_type_list (V2DI_type_node,
12707				V16QI_type_node, V16QI_type_node, NULL_TREE);
12708  tree int_ftype_v16qi
12709    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12710  tree v16qi_ftype_pcchar
12711    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12712  tree void_ftype_pchar_v16qi
12713    = build_function_type_list (void_type_node,
12714			        pchar_type_node, V16QI_type_node, NULL_TREE);
12715  tree v4si_ftype_pcint
12716    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12717  tree void_ftype_pcint_v4si
12718    = build_function_type_list (void_type_node,
12719			        pcint_type_node, V4SI_type_node, NULL_TREE);
12720  tree v2di_ftype_v2di
12721    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12722
12723  /* Add all builtins that are more or less simple operations on two
12724     operands.  */
12725  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12726    {
12727      /* Use one of the operands; the target can have a different mode for
12728	 mask-generating compares.  */
12729      enum machine_mode mode;
12730      tree type;
12731
12732      if (d->name == 0)
12733	continue;
12734      mode = insn_data[d->icode].operand[1].mode;
12735
12736      switch (mode)
12737	{
12738	case V16QImode:
12739	  type = v16qi_ftype_v16qi_v16qi;
12740	  break;
12741	case V8HImode:
12742	  type = v8hi_ftype_v8hi_v8hi;
12743	  break;
12744	case V4SImode:
12745	  type = v4si_ftype_v4si_v4si;
12746	  break;
12747	case V2DImode:
12748	  type = v2di_ftype_v2di_v2di;
12749	  break;
12750	case V2DFmode:
12751	  type = v2df_ftype_v2df_v2df;
12752	  break;
12753	case TImode:
12754	  type = ti_ftype_ti_ti;
12755	  break;
12756	case V4SFmode:
12757	  type = v4sf_ftype_v4sf_v4sf;
12758	  break;
12759	case V8QImode:
12760	  type = v8qi_ftype_v8qi_v8qi;
12761	  break;
12762	case V4HImode:
12763	  type = v4hi_ftype_v4hi_v4hi;
12764	  break;
12765	case V2SImode:
12766	  type = v2si_ftype_v2si_v2si;
12767	  break;
12768	case DImode:
12769	  type = di_ftype_di_di;
12770	  break;
12771
12772	default:
12773	  abort ();
12774	}
12775
12776      /* Override for comparisons.  */
12777      if (d->icode == CODE_FOR_maskcmpv4sf3
12778	  || d->icode == CODE_FOR_maskncmpv4sf3
12779	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
12780	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12781	type = v4si_ftype_v4sf_v4sf;
12782
12783      if (d->icode == CODE_FOR_maskcmpv2df3
12784	  || d->icode == CODE_FOR_maskncmpv2df3
12785	  || d->icode == CODE_FOR_vmmaskcmpv2df3
12786	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
12787	type = v2di_ftype_v2df_v2df;
12788
12789      def_builtin (d->mask, d->name, type, d->code);
12790    }
12791
12792  /* Add the remaining MMX insns with somewhat more complicated types.  */
12793  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12794  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12795  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12796  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12797  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12798
12799  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12800  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12801  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12802
12803  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12804  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12805
12806  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12807  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12808
12809  /* comi/ucomi insns.  */
12810  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12811    if (d->mask == MASK_SSE2)
12812      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12813    else
12814      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12815
12816  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12817  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12818  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12819
12820  def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12821  def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12822  def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12823  def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12824  def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12825  def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
12826  def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12827  def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
12828  def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12829  def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12830  def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
12831
12832  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12833  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12834
12835  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12836
12837  def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
12838  def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
12839  def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
12840  def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12841  def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12842  def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12843
12844  def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12845  def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12846  def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12847  def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12848
12849  def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12850  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12851  def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12852  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12853
12854  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12855
12856  def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12857
12858  def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12859  def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12860  def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12861  def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12862  def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12863  def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12864
12865  def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12866
12867  /* Original 3DNow!  */
12868  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12869  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12870  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12871  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12872  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12873  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12874  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12875  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12876  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12877  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12878  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12879  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12880  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12881  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12882  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12883  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12884  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12885  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12886  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12887  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12888
12889  /* 3DNow! extension as used in the Athlon CPU.  */
12890  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12891  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12892  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12893  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12894  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12895  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12896
12897  def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12898
12899  /* SSE2 */
12900  def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12901  def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12902
12903  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12904  def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12905  def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12906
12907  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
12908  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
12909  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
12910  def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12911  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12912  def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12913
12914  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12915  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12916  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12917  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12918
12919  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12920  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12921  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12922  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12923  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12924
12925  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12926  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12927  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12928  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12929
12930  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12931  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12932
12933  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12934
12935  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12936  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12937
12938  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12939  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12940  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12941  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12942  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12943
12944  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12945
12946  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12947  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12948  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
12949  def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
12950
12951  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12952  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12953  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12954
12955  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12956  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
12957  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12958  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12959
12960  def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12961  def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12962  def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12963  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
12964  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
12965  def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12966  def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12967
12968  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
12969  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12970  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12971
12972  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
12973  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
12974  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
12975  def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12976  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12977  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
12978  def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12979
12980  def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12981
12982  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12983  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12984  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12985
12986  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12987  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12988  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12989
12990  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12991  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12992
12993  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12994  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12995  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12996  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12997
12998  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12999  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13000  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13001  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13002
13003  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13004  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13005
13006  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13007}
13008
13009/* Errors in the source file can cause expand_expr to return const0_rtx
13010   where we expect a vector.  To avoid crashing, use one of the vector
13011   clear instructions.  */
13012static rtx
13013safe_vector_operand (x, mode)
13014     rtx x;
13015     enum machine_mode mode;
13016{
13017  if (x != const0_rtx)
13018    return x;
13019  x = gen_reg_rtx (mode);
13020
13021  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13022    emit_insn (gen_mmx_clrdi (mode == DImode ? x
13023			      : gen_rtx_SUBREG (DImode, x, 0)));
13024  else
13025    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13026				: gen_rtx_SUBREG (V4SFmode, x, 0)));
13027  return x;
13028}
13029
13030/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13031
13032static rtx
13033ix86_expand_binop_builtin (icode, arglist, target)
13034     enum insn_code icode;
13035     tree arglist;
13036     rtx target;
13037{
13038  rtx pat;
13039  tree arg0 = TREE_VALUE (arglist);
13040  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13041  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13042  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13043  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13044  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13045  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13046
13047  if (VECTOR_MODE_P (mode0))
13048    op0 = safe_vector_operand (op0, mode0);
13049  if (VECTOR_MODE_P (mode1))
13050    op1 = safe_vector_operand (op1, mode1);
13051
13052  if (! target
13053      || GET_MODE (target) != tmode
13054      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13055    target = gen_reg_rtx (tmode);
13056
13057  if (GET_MODE (op1) == SImode && mode1 == TImode)
13058    {
13059      rtx x = gen_reg_rtx (V4SImode);
13060      emit_insn (gen_sse2_loadd (x, op1));
13061      op1 = gen_lowpart (TImode, x);
13062    }
13063
13064  /* In case the insn wants input operands in modes different from
13065     the result, abort.  */
13066  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13067    abort ();
13068
13069  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13070    op0 = copy_to_mode_reg (mode0, op0);
13071  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13072    op1 = copy_to_mode_reg (mode1, op1);
13073
13074  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13075     yet one of the two must not be a memory.  This is normally enforced
13076     by expanders, but we didn't bother to create one here.  */
13077  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13078    op0 = copy_to_mode_reg (mode0, op0);
13079
13080  pat = GEN_FCN (icode) (target, op0, op1);
13081  if (! pat)
13082    return 0;
13083  emit_insn (pat);
13084  return target;
13085}
13086
13087/* Subroutine of ix86_expand_builtin to take care of stores.  */
13088
13089static rtx
13090ix86_expand_store_builtin (icode, arglist)
13091     enum insn_code icode;
13092     tree arglist;
13093{
13094  rtx pat;
13095  tree arg0 = TREE_VALUE (arglist);
13096  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13097  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13098  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13099  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13100  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13101
13102  if (VECTOR_MODE_P (mode1))
13103    op1 = safe_vector_operand (op1, mode1);
13104
13105  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13106  op1 = copy_to_mode_reg (mode1, op1);
13107
13108  pat = GEN_FCN (icode) (op0, op1);
13109  if (pat)
13110    emit_insn (pat);
13111  return 0;
13112}
13113
13114/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13115
13116static rtx
13117ix86_expand_unop_builtin (icode, arglist, target, do_load)
13118     enum insn_code icode;
13119     tree arglist;
13120     rtx target;
13121     int do_load;
13122{
13123  rtx pat;
13124  tree arg0 = TREE_VALUE (arglist);
13125  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13126  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13127  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13128
13129  if (! target
13130      || GET_MODE (target) != tmode
13131      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13132    target = gen_reg_rtx (tmode);
13133  if (do_load)
13134    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13135  else
13136    {
13137      if (VECTOR_MODE_P (mode0))
13138	op0 = safe_vector_operand (op0, mode0);
13139
13140      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13141	op0 = copy_to_mode_reg (mode0, op0);
13142    }
13143
13144  pat = GEN_FCN (icode) (target, op0);
13145  if (! pat)
13146    return 0;
13147  emit_insn (pat);
13148  return target;
13149}
13150
13151/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13152   sqrtss, rsqrtss, rcpss.  */
13153
13154static rtx
13155ix86_expand_unop1_builtin (icode, arglist, target)
13156     enum insn_code icode;
13157     tree arglist;
13158     rtx target;
13159{
13160  rtx pat;
13161  tree arg0 = TREE_VALUE (arglist);
13162  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13163  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13164  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13165
13166  if (! target
13167      || GET_MODE (target) != tmode
13168      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13169    target = gen_reg_rtx (tmode);
13170
13171  if (VECTOR_MODE_P (mode0))
13172    op0 = safe_vector_operand (op0, mode0);
13173
13174  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13175    op0 = copy_to_mode_reg (mode0, op0);
13176
13177  op1 = op0;
13178  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13179    op1 = copy_to_mode_reg (mode0, op1);
13180
13181  pat = GEN_FCN (icode) (target, op0, op1);
13182  if (! pat)
13183    return 0;
13184  emit_insn (pat);
13185  return target;
13186}
13187
13188/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
13189
13190static rtx
13191ix86_expand_sse_compare (d, arglist, target)
13192     const struct builtin_description *d;
13193     tree arglist;
13194     rtx target;
13195{
13196  rtx pat;
13197  tree arg0 = TREE_VALUE (arglist);
13198  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13199  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13200  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13201  rtx op2;
13202  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13203  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13204  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13205  enum rtx_code comparison = d->comparison;
13206
13207  if (VECTOR_MODE_P (mode0))
13208    op0 = safe_vector_operand (op0, mode0);
13209  if (VECTOR_MODE_P (mode1))
13210    op1 = safe_vector_operand (op1, mode1);
13211
13212  /* Swap operands if we have a comparison that isn't available in
13213     hardware.  */
13214  if (d->flag)
13215    {
13216      rtx tmp = gen_reg_rtx (mode1);
13217      emit_move_insn (tmp, op1);
13218      op1 = op0;
13219      op0 = tmp;
13220    }
13221
13222  if (! target
13223      || GET_MODE (target) != tmode
13224      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13225    target = gen_reg_rtx (tmode);
13226
13227  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13228    op0 = copy_to_mode_reg (mode0, op0);
13229  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13230    op1 = copy_to_mode_reg (mode1, op1);
13231
13232  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13233  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13234  if (! pat)
13235    return 0;
13236  emit_insn (pat);
13237  return target;
13238}
13239
13240/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
13241
13242static rtx
13243ix86_expand_sse_comi (d, arglist, target)
13244     const struct builtin_description *d;
13245     tree arglist;
13246     rtx target;
13247{
13248  rtx pat;
13249  tree arg0 = TREE_VALUE (arglist);
13250  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13251  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13252  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13253  rtx op2;
13254  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13255  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13256  enum rtx_code comparison = d->comparison;
13257
13258  if (VECTOR_MODE_P (mode0))
13259    op0 = safe_vector_operand (op0, mode0);
13260  if (VECTOR_MODE_P (mode1))
13261    op1 = safe_vector_operand (op1, mode1);
13262
13263  /* Swap operands if we have a comparison that isn't available in
13264     hardware.  */
13265  if (d->flag)
13266    {
13267      rtx tmp = op1;
13268      op1 = op0;
13269      op0 = tmp;
13270    }
13271
13272  target = gen_reg_rtx (SImode);
13273  emit_move_insn (target, const0_rtx);
13274  target = gen_rtx_SUBREG (QImode, target, 0);
13275
13276  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13277    op0 = copy_to_mode_reg (mode0, op0);
13278  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13279    op1 = copy_to_mode_reg (mode1, op1);
13280
13281  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13282  pat = GEN_FCN (d->icode) (op0, op1);
13283  if (! pat)
13284    return 0;
13285  emit_insn (pat);
13286  emit_insn (gen_rtx_SET (VOIDmode,
13287			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13288			  gen_rtx_fmt_ee (comparison, QImode,
13289					  SET_DEST (pat),
13290					  const0_rtx)));
13291
13292  return SUBREG_REG (target);
13293}
13294
13295/* Expand an expression EXP that calls a built-in function,
13296   with result going to TARGET if that's convenient
13297   (and in mode MODE if that's convenient).
13298   SUBTARGET may be used as the target for computing one of EXP's operands.
13299   IGNORE is nonzero if the value is to be ignored.  */
13300
13301rtx
13302ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13303     tree exp;
13304     rtx target;
13305     rtx subtarget ATTRIBUTE_UNUSED;
13306     enum machine_mode mode ATTRIBUTE_UNUSED;
13307     int ignore ATTRIBUTE_UNUSED;
13308{
13309  const struct builtin_description *d;
13310  size_t i;
13311  enum insn_code icode;
13312  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13313  tree arglist = TREE_OPERAND (exp, 1);
13314  tree arg0, arg1, arg2;
13315  rtx op0, op1, op2, pat;
13316  enum machine_mode tmode, mode0, mode1, mode2;
13317  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13318
13319  switch (fcode)
13320    {
13321    case IX86_BUILTIN_EMMS:
13322      emit_insn (gen_emms ());
13323      return 0;
13324
13325    case IX86_BUILTIN_SFENCE:
13326      emit_insn (gen_sfence ());
13327      return 0;
13328
13329    case IX86_BUILTIN_PEXTRW:
13330    case IX86_BUILTIN_PEXTRW128:
13331      icode = (fcode == IX86_BUILTIN_PEXTRW
13332	       ? CODE_FOR_mmx_pextrw
13333	       : CODE_FOR_sse2_pextrw);
13334      arg0 = TREE_VALUE (arglist);
13335      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13336      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13337      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13338      tmode = insn_data[icode].operand[0].mode;
13339      mode0 = insn_data[icode].operand[1].mode;
13340      mode1 = insn_data[icode].operand[2].mode;
13341
13342      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13343	op0 = copy_to_mode_reg (mode0, op0);
13344      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13345	{
13346	  /* @@@ better error message */
13347	  error ("selector must be an immediate");
13348	  return gen_reg_rtx (tmode);
13349	}
13350      if (target == 0
13351	  || GET_MODE (target) != tmode
13352	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13353	target = gen_reg_rtx (tmode);
13354      pat = GEN_FCN (icode) (target, op0, op1);
13355      if (! pat)
13356	return 0;
13357      emit_insn (pat);
13358      return target;
13359
13360    case IX86_BUILTIN_PINSRW:
13361    case IX86_BUILTIN_PINSRW128:
13362      icode = (fcode == IX86_BUILTIN_PINSRW
13363	       ? CODE_FOR_mmx_pinsrw
13364	       : CODE_FOR_sse2_pinsrw);
13365      arg0 = TREE_VALUE (arglist);
13366      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13367      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13368      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13369      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13370      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13371      tmode = insn_data[icode].operand[0].mode;
13372      mode0 = insn_data[icode].operand[1].mode;
13373      mode1 = insn_data[icode].operand[2].mode;
13374      mode2 = insn_data[icode].operand[3].mode;
13375
13376      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13377	op0 = copy_to_mode_reg (mode0, op0);
13378      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13379	op1 = copy_to_mode_reg (mode1, op1);
13380      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13381	{
13382	  /* @@@ better error message */
13383	  error ("selector must be an immediate");
13384	  return const0_rtx;
13385	}
13386      if (target == 0
13387	  || GET_MODE (target) != tmode
13388	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13389	target = gen_reg_rtx (tmode);
13390      pat = GEN_FCN (icode) (target, op0, op1, op2);
13391      if (! pat)
13392	return 0;
13393      emit_insn (pat);
13394      return target;
13395
13396    case IX86_BUILTIN_MASKMOVQ:
13397    case IX86_BUILTIN_MASKMOVDQU:
13398      icode = (fcode == IX86_BUILTIN_MASKMOVQ
13399	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13400	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13401		  : CODE_FOR_sse2_maskmovdqu));
13402      /* Note the arg order is different from the operand order.  */
13403      arg1 = TREE_VALUE (arglist);
13404      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13405      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13406      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13407      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13408      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13409      mode0 = insn_data[icode].operand[0].mode;
13410      mode1 = insn_data[icode].operand[1].mode;
13411      mode2 = insn_data[icode].operand[2].mode;
13412
13413      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13414	op0 = copy_to_mode_reg (mode0, op0);
13415      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13416	op1 = copy_to_mode_reg (mode1, op1);
13417      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13418	op2 = copy_to_mode_reg (mode2, op2);
13419      pat = GEN_FCN (icode) (op0, op1, op2);
13420      if (! pat)
13421	return 0;
13422      emit_insn (pat);
13423      return 0;
13424
13425    case IX86_BUILTIN_SQRTSS:
13426      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13427    case IX86_BUILTIN_RSQRTSS:
13428      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13429    case IX86_BUILTIN_RCPSS:
13430      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13431
13432    case IX86_BUILTIN_LOADAPS:
13433      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13434
13435    case IX86_BUILTIN_LOADUPS:
13436      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13437
13438    case IX86_BUILTIN_STOREAPS:
13439      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13440
13441    case IX86_BUILTIN_STOREUPS:
13442      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13443
13444    case IX86_BUILTIN_LOADSS:
13445      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13446
13447    case IX86_BUILTIN_STORESS:
13448      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13449
13450    case IX86_BUILTIN_LOADHPS:
13451    case IX86_BUILTIN_LOADLPS:
13452    case IX86_BUILTIN_LOADHPD:
13453    case IX86_BUILTIN_LOADLPD:
13454      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13455	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13456	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13457	       : CODE_FOR_sse2_movlpd);
13458      arg0 = TREE_VALUE (arglist);
13459      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13460      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13461      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13462      tmode = insn_data[icode].operand[0].mode;
13463      mode0 = insn_data[icode].operand[1].mode;
13464      mode1 = insn_data[icode].operand[2].mode;
13465
13466      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13467	op0 = copy_to_mode_reg (mode0, op0);
13468      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13469      if (target == 0
13470	  || GET_MODE (target) != tmode
13471	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13472	target = gen_reg_rtx (tmode);
13473      pat = GEN_FCN (icode) (target, op0, op1);
13474      if (! pat)
13475	return 0;
13476      emit_insn (pat);
13477      return target;
13478
13479    case IX86_BUILTIN_STOREHPS:
13480    case IX86_BUILTIN_STORELPS:
13481    case IX86_BUILTIN_STOREHPD:
13482    case IX86_BUILTIN_STORELPD:
13483      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13484	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13485	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13486	       : CODE_FOR_sse2_movlpd);
13487      arg0 = TREE_VALUE (arglist);
13488      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13489      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13490      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13491      mode0 = insn_data[icode].operand[1].mode;
13492      mode1 = insn_data[icode].operand[2].mode;
13493
13494      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13495      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13496	op1 = copy_to_mode_reg (mode1, op1);
13497
13498      pat = GEN_FCN (icode) (op0, op0, op1);
13499      if (! pat)
13500	return 0;
13501      emit_insn (pat);
13502      return 0;
13503
13504    case IX86_BUILTIN_MOVNTPS:
13505      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13506    case IX86_BUILTIN_MOVNTQ:
13507      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13508
13509    case IX86_BUILTIN_LDMXCSR:
13510      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13511      target = assign_386_stack_local (SImode, 0);
13512      emit_move_insn (target, op0);
13513      emit_insn (gen_ldmxcsr (target));
13514      return 0;
13515
13516    case IX86_BUILTIN_STMXCSR:
13517      target = assign_386_stack_local (SImode, 0);
13518      emit_insn (gen_stmxcsr (target));
13519      return copy_to_mode_reg (SImode, target);
13520
13521    case IX86_BUILTIN_SHUFPS:
13522    case IX86_BUILTIN_SHUFPD:
13523      icode = (fcode == IX86_BUILTIN_SHUFPS
13524	       ? CODE_FOR_sse_shufps
13525	       : CODE_FOR_sse2_shufpd);
13526      arg0 = TREE_VALUE (arglist);
13527      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13528      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13529      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13530      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13531      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13532      tmode = insn_data[icode].operand[0].mode;
13533      mode0 = insn_data[icode].operand[1].mode;
13534      mode1 = insn_data[icode].operand[2].mode;
13535      mode2 = insn_data[icode].operand[3].mode;
13536
13537      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13538	op0 = copy_to_mode_reg (mode0, op0);
13539      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13540	op1 = copy_to_mode_reg (mode1, op1);
13541      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13542	{
13543	  /* @@@ better error message */
13544	  error ("mask must be an immediate");
13545	  return gen_reg_rtx (tmode);
13546	}
13547      if (target == 0
13548	  || GET_MODE (target) != tmode
13549	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13550	target = gen_reg_rtx (tmode);
13551      pat = GEN_FCN (icode) (target, op0, op1, op2);
13552      if (! pat)
13553	return 0;
13554      emit_insn (pat);
13555      return target;
13556
13557    case IX86_BUILTIN_PSHUFW:
13558    case IX86_BUILTIN_PSHUFD:
13559    case IX86_BUILTIN_PSHUFHW:
13560    case IX86_BUILTIN_PSHUFLW:
13561      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13562	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13563	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13564	       : CODE_FOR_mmx_pshufw);
13565      arg0 = TREE_VALUE (arglist);
13566      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13567      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13568      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13569      tmode = insn_data[icode].operand[0].mode;
13570      mode1 = insn_data[icode].operand[1].mode;
13571      mode2 = insn_data[icode].operand[2].mode;
13572
13573      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13574	op0 = copy_to_mode_reg (mode1, op0);
13575      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13576	{
13577	  /* @@@ better error message */
13578	  error ("mask must be an immediate");
13579	  return const0_rtx;
13580	}
13581      if (target == 0
13582	  || GET_MODE (target) != tmode
13583	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13584	target = gen_reg_rtx (tmode);
13585      pat = GEN_FCN (icode) (target, op0, op1);
13586      if (! pat)
13587	return 0;
13588      emit_insn (pat);
13589      return target;
13590
13591    case IX86_BUILTIN_PSLLDQI128:
13592    case IX86_BUILTIN_PSRLDQI128:
13593      icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13594	       : CODE_FOR_sse2_lshrti3);
13595      arg0 = TREE_VALUE (arglist);
13596      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13597      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13598      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13599      tmode = insn_data[icode].operand[0].mode;
13600      mode1 = insn_data[icode].operand[1].mode;
13601      mode2 = insn_data[icode].operand[2].mode;
13602
13603      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13604	{
13605	  op0 = copy_to_reg (op0);
13606	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13607	}
13608      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13609	{
13610	  error ("shift must be an immediate");
13611	  return const0_rtx;
13612	}
13613      target = gen_reg_rtx (V2DImode);
13614      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13615      if (! pat)
13616	return 0;
13617      emit_insn (pat);
13618      return target;
13619
13620    case IX86_BUILTIN_FEMMS:
13621      emit_insn (gen_femms ());
13622      return NULL_RTX;
13623
13624    case IX86_BUILTIN_PAVGUSB:
13625      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13626
13627    case IX86_BUILTIN_PF2ID:
13628      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13629
13630    case IX86_BUILTIN_PFACC:
13631      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13632
13633    case IX86_BUILTIN_PFADD:
13634     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13635
13636    case IX86_BUILTIN_PFCMPEQ:
13637      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13638
13639    case IX86_BUILTIN_PFCMPGE:
13640      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13641
13642    case IX86_BUILTIN_PFCMPGT:
13643      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13644
13645    case IX86_BUILTIN_PFMAX:
13646      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13647
13648    case IX86_BUILTIN_PFMIN:
13649      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13650
13651    case IX86_BUILTIN_PFMUL:
13652      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13653
13654    case IX86_BUILTIN_PFRCP:
13655      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13656
13657    case IX86_BUILTIN_PFRCPIT1:
13658      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13659
13660    case IX86_BUILTIN_PFRCPIT2:
13661      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13662
13663    case IX86_BUILTIN_PFRSQIT1:
13664      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13665
13666    case IX86_BUILTIN_PFRSQRT:
13667      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13668
13669    case IX86_BUILTIN_PFSUB:
13670      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13671
13672    case IX86_BUILTIN_PFSUBR:
13673      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13674
13675    case IX86_BUILTIN_PI2FD:
13676      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13677
13678    case IX86_BUILTIN_PMULHRW:
13679      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13680
13681    case IX86_BUILTIN_PF2IW:
13682      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13683
13684    case IX86_BUILTIN_PFNACC:
13685      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13686
13687    case IX86_BUILTIN_PFPNACC:
13688      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13689
13690    case IX86_BUILTIN_PI2FW:
13691      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13692
13693    case IX86_BUILTIN_PSWAPDSI:
13694      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13695
13696    case IX86_BUILTIN_PSWAPDSF:
13697      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13698
13699    case IX86_BUILTIN_SSE_ZERO:
13700      target = gen_reg_rtx (V4SFmode);
13701      emit_insn (gen_sse_clrv4sf (target));
13702      return target;
13703
13704    case IX86_BUILTIN_MMX_ZERO:
13705      target = gen_reg_rtx (DImode);
13706      emit_insn (gen_mmx_clrdi (target));
13707      return target;
13708
13709    case IX86_BUILTIN_CLRTI:
13710      target = gen_reg_rtx (V2DImode);
13711      emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13712      return target;
13713
13714
13715    case IX86_BUILTIN_SQRTSD:
13716      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13717    case IX86_BUILTIN_LOADAPD:
13718      return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13719    case IX86_BUILTIN_LOADUPD:
13720      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13721
13722    case IX86_BUILTIN_STOREAPD:
13723      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13724    case IX86_BUILTIN_STOREUPD:
13725      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13726
13727    case IX86_BUILTIN_LOADSD:
13728      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13729
13730    case IX86_BUILTIN_STORESD:
13731      return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13732
13733    case IX86_BUILTIN_SETPD1:
13734      target = assign_386_stack_local (DFmode, 0);
13735      arg0 = TREE_VALUE (arglist);
13736      emit_move_insn (adjust_address (target, DFmode, 0),
13737		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13738      op0 = gen_reg_rtx (V2DFmode);
13739      emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13740      emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13741      return op0;
13742
13743    case IX86_BUILTIN_SETPD:
13744      target = assign_386_stack_local (V2DFmode, 0);
13745      arg0 = TREE_VALUE (arglist);
13746      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13747      emit_move_insn (adjust_address (target, DFmode, 0),
13748		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13749      emit_move_insn (adjust_address (target, DFmode, 8),
13750		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13751      op0 = gen_reg_rtx (V2DFmode);
13752      emit_insn (gen_sse2_movapd (op0, target));
13753      return op0;
13754
13755    case IX86_BUILTIN_LOADRPD:
13756      target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13757					 gen_reg_rtx (V2DFmode), 1);
13758      emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13759      return target;
13760
13761    case IX86_BUILTIN_LOADPD1:
13762      target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13763					 gen_reg_rtx (V2DFmode), 1);
13764      emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13765      return target;
13766
13767    case IX86_BUILTIN_STOREPD1:
13768      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13769    case IX86_BUILTIN_STORERPD:
13770      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13771
13772    case IX86_BUILTIN_CLRPD:
13773      target = gen_reg_rtx (V2DFmode);
13774      emit_insn (gen_sse_clrv2df (target));
13775      return target;
13776
13777    case IX86_BUILTIN_MFENCE:
13778	emit_insn (gen_sse2_mfence ());
13779	return 0;
13780    case IX86_BUILTIN_LFENCE:
13781	emit_insn (gen_sse2_lfence ());
13782	return 0;
13783
13784    case IX86_BUILTIN_CLFLUSH:
13785	arg0 = TREE_VALUE (arglist);
13786	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13787	icode = CODE_FOR_sse2_clflush;
13788	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13789	    op0 = copy_to_mode_reg (Pmode, op0);
13790
13791	emit_insn (gen_sse2_clflush (op0));
13792	return 0;
13793
13794    case IX86_BUILTIN_MOVNTPD:
13795      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13796    case IX86_BUILTIN_MOVNTDQ:
13797      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13798    case IX86_BUILTIN_MOVNTI:
13799      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13800
13801    case IX86_BUILTIN_LOADDQA:
13802      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13803    case IX86_BUILTIN_LOADDQU:
13804      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13805    case IX86_BUILTIN_LOADD:
13806      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13807
13808    case IX86_BUILTIN_STOREDQA:
13809      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13810    case IX86_BUILTIN_STOREDQU:
13811      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13812    case IX86_BUILTIN_STORED:
13813      return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13814
13815    default:
13816      break;
13817    }
13818
13819  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13820    if (d->code == fcode)
13821      {
13822	/* Compares are treated specially.  */
13823	if (d->icode == CODE_FOR_maskcmpv4sf3
13824	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
13825	    || d->icode == CODE_FOR_maskncmpv4sf3
13826	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
13827	    || d->icode == CODE_FOR_maskcmpv2df3
13828	    || d->icode == CODE_FOR_vmmaskcmpv2df3
13829	    || d->icode == CODE_FOR_maskncmpv2df3
13830	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
13831	  return ix86_expand_sse_compare (d, arglist, target);
13832
13833	return ix86_expand_binop_builtin (d->icode, arglist, target);
13834      }
13835
13836  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13837    if (d->code == fcode)
13838      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13839
13840  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13841    if (d->code == fcode)
13842      return ix86_expand_sse_comi (d, arglist, target);
13843
13844  /* @@@ Should really do something sensible here.  */
13845  return 0;
13846}
13847
13848/* Store OPERAND to the memory after reload is completed.  This means
13849   that we can't easily use assign_stack_local.  */
13850rtx
13851ix86_force_to_memory (mode, operand)
13852     enum machine_mode mode;
13853     rtx operand;
13854{
13855  rtx result;
13856  if (!reload_completed)
13857    abort ();
13858  if (TARGET_64BIT && TARGET_RED_ZONE)
13859    {
13860      result = gen_rtx_MEM (mode,
13861			    gen_rtx_PLUS (Pmode,
13862					  stack_pointer_rtx,
13863					  GEN_INT (-RED_ZONE_SIZE)));
13864      emit_move_insn (result, operand);
13865    }
13866  else if (TARGET_64BIT && !TARGET_RED_ZONE)
13867    {
13868      switch (mode)
13869	{
13870	case HImode:
13871	case SImode:
13872	  operand = gen_lowpart (DImode, operand);
13873	  /* FALLTHRU */
13874	case DImode:
13875	  emit_insn (
13876		      gen_rtx_SET (VOIDmode,
13877				   gen_rtx_MEM (DImode,
13878						gen_rtx_PRE_DEC (DImode,
13879							stack_pointer_rtx)),
13880				   operand));
13881	  break;
13882	default:
13883	  abort ();
13884	}
13885      result = gen_rtx_MEM (mode, stack_pointer_rtx);
13886    }
13887  else
13888    {
13889      switch (mode)
13890	{
13891	case DImode:
13892	  {
13893	    rtx operands[2];
13894	    split_di (&operand, 1, operands, operands + 1);
13895	    emit_insn (
13896			gen_rtx_SET (VOIDmode,
13897				     gen_rtx_MEM (SImode,
13898						  gen_rtx_PRE_DEC (Pmode,
13899							stack_pointer_rtx)),
13900				     operands[1]));
13901	    emit_insn (
13902			gen_rtx_SET (VOIDmode,
13903				     gen_rtx_MEM (SImode,
13904						  gen_rtx_PRE_DEC (Pmode,
13905							stack_pointer_rtx)),
13906				     operands[0]));
13907	  }
13908	  break;
13909	case HImode:
13910	  /* It is better to store HImodes as SImodes.  */
13911	  if (!TARGET_PARTIAL_REG_STALL)
13912	    operand = gen_lowpart (SImode, operand);
13913	  /* FALLTHRU */
13914	case SImode:
13915	  emit_insn (
13916		      gen_rtx_SET (VOIDmode,
13917				   gen_rtx_MEM (GET_MODE (operand),
13918						gen_rtx_PRE_DEC (SImode,
13919							stack_pointer_rtx)),
13920				   operand));
13921	  break;
13922	default:
13923	  abort ();
13924	}
13925      result = gen_rtx_MEM (mode, stack_pointer_rtx);
13926    }
13927  return result;
13928}
13929
13930/* Free operand from the memory.  */
13931void
13932ix86_free_from_memory (mode)
13933     enum machine_mode mode;
13934{
13935  if (!TARGET_64BIT || !TARGET_RED_ZONE)
13936    {
13937      int size;
13938
13939      if (mode == DImode || TARGET_64BIT)
13940	size = 8;
13941      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13942	size = 2;
13943      else
13944	size = 4;
13945      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
13946         to pop or add instruction if registers are available.  */
13947      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13948			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13949					    GEN_INT (size))));
13950    }
13951}
13952
13953/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13954   QImode must go into class Q_REGS.
13955   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
13956   movdf to do mem-to-mem moves through integer regs.  */
13957enum reg_class
13958ix86_preferred_reload_class (x, class)
13959     rtx x;
13960     enum reg_class class;
13961{
13962  if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13963    return NO_REGS;
13964  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13965    {
13966      /* SSE can't load any constant directly yet.  */
13967      if (SSE_CLASS_P (class))
13968	return NO_REGS;
13969      /* Floats can load 0 and 1.  */
13970      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13971	{
13972	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
13973	  if (MAYBE_SSE_CLASS_P (class))
13974	    return (reg_class_subset_p (class, GENERAL_REGS)
13975		    ? GENERAL_REGS : FLOAT_REGS);
13976	  else
13977	    return class;
13978	}
13979      /* General regs can load everything.  */
13980      if (reg_class_subset_p (class, GENERAL_REGS))
13981	return GENERAL_REGS;
13982      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
13983      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13984	return NO_REGS;
13985    }
13986  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13987    return NO_REGS;
13988  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13989    return Q_REGS;
13990  return class;
13991}
13992
13993/* If we are copying between general and FP registers, we need a memory
13994   location. The same is true for SSE and MMX registers.
13995
13996   The macro can't work reliably when one of the CLASSES is class containing
13997   registers from multiple units (SSE, MMX, integer).  We avoid this by never
13998   combining those units in single alternative in the machine description.
13999   Ensure that this constraint holds to avoid unexpected surprises.
14000
14001   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14002   enforce these sanity checks.  */
14003int
14004ix86_secondary_memory_needed (class1, class2, mode, strict)
14005     enum reg_class class1, class2;
14006     enum machine_mode mode;
14007     int strict;
14008{
14009  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14010      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14011      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14012      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14013      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14014      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14015    {
14016      if (strict)
14017	abort ();
14018      else
14019	return 1;
14020    }
14021  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14022	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14023	      && (mode) != SImode)
14024	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14025	      && (mode) != SImode));
14026}
14027/* Return the cost of moving data from a register in class CLASS1 to
14028   one in class CLASS2.
14029
14030   It is not required that the cost always equal 2 when FROM is the same as TO;
14031   on some machines it is expensive to move between registers if they are not
14032   general registers.  */
14033int
14034ix86_register_move_cost (mode, class1, class2)
14035     enum machine_mode mode;
14036     enum reg_class class1, class2;
14037{
14038  /* In case we require secondary memory, compute cost of the store followed
14039     by load.  In order to avoid bad register allocation choices, we need
14040     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14041
14042  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14043    {
14044      int cost = 1;
14045
14046      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14047		   MEMORY_MOVE_COST (mode, class1, 1));
14048      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14049		   MEMORY_MOVE_COST (mode, class2, 1));
14050
14051      /* In case of copying from general_purpose_register we may emit multiple
14052         stores followed by single load causing memory size mismatch stall.
14053         Count this as arbitarily high cost of 20.  */
14054      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14055	cost += 20;
14056
14057      /* In the case of FP/MMX moves, the registers actually overlap, and we
14058	 have to switch modes in order to treat them differently.  */
14059      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14060          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14061	cost += 20;
14062
14063      return cost;
14064    }
14065
14066  /* Moves between SSE/MMX and integer unit are expensive.  */
14067  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14068      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14069    return ix86_cost->mmxsse_to_integer;
14070  if (MAYBE_FLOAT_CLASS_P (class1))
14071    return ix86_cost->fp_move;
14072  if (MAYBE_SSE_CLASS_P (class1))
14073    return ix86_cost->sse_move;
14074  if (MAYBE_MMX_CLASS_P (class1))
14075    return ix86_cost->mmx_move;
14076  return 2;
14077}
14078
14079/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14080int
14081ix86_hard_regno_mode_ok (regno, mode)
14082     int regno;
14083     enum machine_mode mode;
14084{
14085  /* Flags and only flags can only hold CCmode values.  */
14086  if (CC_REGNO_P (regno))
14087    return GET_MODE_CLASS (mode) == MODE_CC;
14088  if (GET_MODE_CLASS (mode) == MODE_CC
14089      || GET_MODE_CLASS (mode) == MODE_RANDOM
14090      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14091    return 0;
14092  if (FP_REGNO_P (regno))
14093    return VALID_FP_MODE_P (mode);
14094  if (SSE_REGNO_P (regno))
14095    return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14096  if (MMX_REGNO_P (regno))
14097    return (TARGET_MMX
14098	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14099  /* We handle both integer and floats in the general purpose registers.
14100     In future we should be able to handle vector modes as well.  */
14101  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14102    return 0;
14103  /* Take care for QImode values - they can be in non-QI regs, but then
14104     they do cause partial register stalls.  */
14105  if (regno < 4 || mode != QImode || TARGET_64BIT)
14106    return 1;
14107  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14108}
14109
14110/* Return the cost of moving data of mode M between a
14111   register and memory.  A value of 2 is the default; this cost is
14112   relative to those in `REGISTER_MOVE_COST'.
14113
14114   If moving between registers and memory is more expensive than
14115   between two registers, you should define this macro to express the
14116   relative cost.
14117
14118   Model also increased moving costs of QImode registers in non
14119   Q_REGS classes.
14120 */
14121int
14122ix86_memory_move_cost (mode, class, in)
14123     enum machine_mode mode;
14124     enum reg_class class;
14125     int in;
14126{
14127  if (FLOAT_CLASS_P (class))
14128    {
14129      int index;
14130      switch (mode)
14131	{
14132	  case SFmode:
14133	    index = 0;
14134	    break;
14135	  case DFmode:
14136	    index = 1;
14137	    break;
14138	  case XFmode:
14139	  case TFmode:
14140	    index = 2;
14141	    break;
14142	  default:
14143	    return 100;
14144	}
14145      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14146    }
14147  if (SSE_CLASS_P (class))
14148    {
14149      int index;
14150      switch (GET_MODE_SIZE (mode))
14151	{
14152	  case 4:
14153	    index = 0;
14154	    break;
14155	  case 8:
14156	    index = 1;
14157	    break;
14158	  case 16:
14159	    index = 2;
14160	    break;
14161	  default:
14162	    return 100;
14163	}
14164      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14165    }
14166  if (MMX_CLASS_P (class))
14167    {
14168      int index;
14169      switch (GET_MODE_SIZE (mode))
14170	{
14171	  case 4:
14172	    index = 0;
14173	    break;
14174	  case 8:
14175	    index = 1;
14176	    break;
14177	  default:
14178	    return 100;
14179	}
14180      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14181    }
14182  switch (GET_MODE_SIZE (mode))
14183    {
14184      case 1:
14185	if (in)
14186	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14187		  : ix86_cost->movzbl_load);
14188	else
14189	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14190		  : ix86_cost->int_store[0] + 4);
14191	break;
14192      case 2:
14193	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14194      default:
14195	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
14196	if (mode == TFmode)
14197	  mode = XFmode;
14198	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14199		* ((int) GET_MODE_SIZE (mode)
14200		   + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14201    }
14202}
14203
14204#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14205static void
14206ix86_svr3_asm_out_constructor (symbol, priority)
14207     rtx symbol;
14208     int priority ATTRIBUTE_UNUSED;
14209{
14210  init_section ();
14211  fputs ("\tpushl $", asm_out_file);
14212  assemble_name (asm_out_file, XSTR (symbol, 0));
14213  fputc ('\n', asm_out_file);
14214}
14215#endif
14216
14217#if TARGET_MACHO
14218
14219static int current_machopic_label_num;
14220
14221/* Given a symbol name and its associated stub, write out the
14222   definition of the stub.  */
14223
14224void
14225machopic_output_stub (file, symb, stub)
14226     FILE *file;
14227     const char *symb, *stub;
14228{
14229  unsigned int length;
14230  char *binder_name, *symbol_name, lazy_ptr_name[32];
14231  int label = ++current_machopic_label_num;
14232
14233  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
14234  symb = (*targetm.strip_name_encoding) (symb);
14235
14236  length = strlen (stub);
14237  binder_name = alloca (length + 32);
14238  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14239
14240  length = strlen (symb);
14241  symbol_name = alloca (length + 32);
14242  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14243
14244  sprintf (lazy_ptr_name, "L%d$lz", label);
14245
14246  if (MACHOPIC_PURE)
14247    machopic_picsymbol_stub_section ();
14248  else
14249    machopic_symbol_stub_section ();
14250
14251  fprintf (file, "%s:\n", stub);
14252  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14253
14254  if (MACHOPIC_PURE)
14255    {
14256      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14257      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14258      fprintf (file, "\tjmp %%edx\n");
14259    }
14260  else
14261    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14262
14263  fprintf (file, "%s:\n", binder_name);
14264
14265  if (MACHOPIC_PURE)
14266    {
14267      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14268      fprintf (file, "\tpushl %%eax\n");
14269    }
14270  else
14271    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14272
14273  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14274
14275  machopic_lazy_symbol_ptr_section ();
14276  fprintf (file, "%s:\n", lazy_ptr_name);
14277  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14278  fprintf (file, "\t.long %s\n", binder_name);
14279}
14280#endif /* TARGET_MACHO */
14281
14282/* Order the registers for register allocator.  */
14283
14284void
14285x86_order_regs_for_local_alloc ()
14286{
14287   int pos = 0;
14288   int i;
14289
14290   /* First allocate the local general purpose registers.  */
14291   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14292     if (GENERAL_REGNO_P (i) && call_used_regs[i])
14293	reg_alloc_order [pos++] = i;
14294
14295   /* Global general purpose registers.  */
14296   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14297     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14298	reg_alloc_order [pos++] = i;
14299
14300   /* x87 registers come first in case we are doing FP math
14301      using them.  */
14302   if (!TARGET_SSE_MATH)
14303     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14304       reg_alloc_order [pos++] = i;
14305
14306   /* SSE registers.  */
14307   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14308     reg_alloc_order [pos++] = i;
14309   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14310     reg_alloc_order [pos++] = i;
14311
14312   /* x87 registerts.  */
14313   if (TARGET_SSE_MATH)
14314     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14315       reg_alloc_order [pos++] = i;
14316
14317   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14318     reg_alloc_order [pos++] = i;
14319
14320   /* Initialize the rest of array as we do not allocate some registers
14321      at all.  */
14322   while (pos < FIRST_PSEUDO_REGISTER)
14323     reg_alloc_order [pos++] = 0;
14324}
14325
14326/* Returns an expression indicating where the this parameter is
14327   located on entry to the FUNCTION.  */
14328
14329static rtx
14330x86_this_parameter (function)
14331     tree function;
14332{
14333  tree type = TREE_TYPE (function);
14334
14335  if (TARGET_64BIT)
14336    {
14337      int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14338      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14339    }
14340
14341  if (ix86_fntype_regparm (type) > 0)
14342    {
14343      tree parm;
14344
14345      parm = TYPE_ARG_TYPES (type);
14346      /* Figure out whether or not the function has a variable number of
14347	 arguments.  */
14348      for (; parm; parm = TREE_CHAIN (parm))
14349	if (TREE_VALUE (parm) == void_type_node)
14350	  break;
14351      /* If not, the this parameter is in %eax.  */
14352      if (parm)
14353	return gen_rtx_REG (SImode, 0);
14354    }
14355
14356  if (aggregate_value_p (TREE_TYPE (type)))
14357    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14358  else
14359    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14360}
14361
14362/* Determine whether x86_output_mi_thunk can succeed.  */
14363
14364static bool
14365x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14366     tree thunk ATTRIBUTE_UNUSED;
14367     HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14368     HOST_WIDE_INT vcall_offset;
14369     tree function;
14370{
14371  /* 64-bit can handle anything.  */
14372  if (TARGET_64BIT)
14373    return true;
14374
14375  /* For 32-bit, everything's fine if we have one free register.  */
14376  if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14377    return true;
14378
14379  /* Need a free register for vcall_offset.  */
14380  if (vcall_offset)
14381    return false;
14382
14383  /* Need a free register for GOT references.  */
14384  if (flag_pic && !(*targetm.binds_local_p) (function))
14385    return false;
14386
14387  /* Otherwise ok.  */
14388  return true;
14389}
14390
14391/* Output the assembler code for a thunk function.  THUNK_DECL is the
14392   declaration for the thunk function itself, FUNCTION is the decl for
14393   the target function.  DELTA is an immediate constant offset to be
14394   added to THIS.  If VCALL_OFFSET is non-zero, the word at
14395   *(*this + vcall_offset) should be added to THIS.  */
14396
14397static void
14398x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14399     FILE *file ATTRIBUTE_UNUSED;
14400     tree thunk ATTRIBUTE_UNUSED;
14401     HOST_WIDE_INT delta;
14402     HOST_WIDE_INT vcall_offset;
14403     tree function;
14404{
14405  rtx xops[3];
14406  rtx this = x86_this_parameter (function);
14407  rtx this_reg, tmp;
14408
14409  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
14410     pull it in now and let DELTA benefit.  */
14411  if (REG_P (this))
14412    this_reg = this;
14413  else if (vcall_offset)
14414    {
14415      /* Put the this parameter into %eax.  */
14416      xops[0] = this;
14417      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14418      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14419    }
14420  else
14421    this_reg = NULL_RTX;
14422
14423  /* Adjust the this parameter by a fixed constant.  */
14424  if (delta)
14425    {
14426      xops[0] = GEN_INT (delta);
14427      xops[1] = this_reg ? this_reg : this;
14428      if (TARGET_64BIT)
14429	{
14430	  if (!x86_64_general_operand (xops[0], DImode))
14431	    {
14432	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14433	      xops[1] = tmp;
14434	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14435	      xops[0] = tmp;
14436	      xops[1] = this;
14437	    }
14438	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14439	}
14440      else
14441	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14442    }
14443
14444  /* Adjust the this parameter by a value stored in the vtable.  */
14445  if (vcall_offset)
14446    {
14447      if (TARGET_64BIT)
14448	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14449      else
14450	tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14451
14452      xops[0] = gen_rtx_MEM (Pmode, this_reg);
14453      xops[1] = tmp;
14454      if (TARGET_64BIT)
14455	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14456      else
14457	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14458
14459      /* Adjust the this parameter.  */
14460      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14461      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14462	{
14463	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14464	  xops[0] = GEN_INT (vcall_offset);
14465	  xops[1] = tmp2;
14466	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14467	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14468	}
14469      xops[1] = this_reg;
14470      if (TARGET_64BIT)
14471	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14472      else
14473	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14474    }
14475
14476  /* If necessary, drop THIS back to its stack slot.  */
14477  if (this_reg && this_reg != this)
14478    {
14479      xops[0] = this_reg;
14480      xops[1] = this;
14481      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14482    }
14483
14484  xops[0] = DECL_RTL (function);
14485  if (TARGET_64BIT)
14486    {
14487      if (!flag_pic || (*targetm.binds_local_p) (function))
14488	output_asm_insn ("jmp\t%P0", xops);
14489      else
14490	{
14491	  tmp = XEXP (xops[0], 0);
14492	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14493	  tmp = gen_rtx_CONST (Pmode, tmp);
14494	  tmp = gen_rtx_MEM (QImode, tmp);
14495	  xops[0] = tmp;
14496	  output_asm_insn ("jmp\t%A0", xops);
14497	}
14498    }
14499  else
14500    {
14501      if (!flag_pic || (*targetm.binds_local_p) (function))
14502	output_asm_insn ("jmp\t%P0", xops);
14503      else
14504	{
14505	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14506	  output_set_got (tmp);
14507
14508	  xops[1] = tmp;
14509	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14510	  output_asm_insn ("jmp\t{*}%1", xops);
14511	}
14512    }
14513}
14514
14515int
14516x86_field_alignment (field, computed)
14517     tree field;
14518     int computed;
14519{
14520  enum machine_mode mode;
14521  tree type = TREE_TYPE (field);
14522
14523  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14524    return computed;
14525  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14526		    ? get_inner_array_type (type) : type);
14527  if (mode == DFmode || mode == DCmode
14528      || GET_MODE_CLASS (mode) == MODE_INT
14529      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14530    return MIN (32, computed);
14531  return computed;
14532}
14533
14534/* Output assembler code to FILE to increment profiler label # LABELNO
14535   for profiling a function entry.  */
14536void
14537x86_function_profiler (file, labelno)
14538     FILE *file;
14539     int labelno;
14540{
14541  if (TARGET_64BIT)
14542    if (flag_pic)
14543      {
14544#ifndef NO_PROFILE_COUNTERS
14545	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14546#endif
14547	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14548      }
14549    else
14550      {
14551#ifndef NO_PROFILE_COUNTERS
14552	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14553#endif
14554	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14555      }
14556  else if (flag_pic)
14557    {
14558#ifndef NO_PROFILE_COUNTERS
14559      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14560	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14561#endif
14562      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14563    }
14564  else
14565    {
14566#ifndef NO_PROFILE_COUNTERS
14567      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
14568	       PROFILE_COUNT_REGISTER);
14569#endif
14570      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14571    }
14572}
14573
14574/* Implement machine specific optimizations.
14575   At the moment we implement single transformation: AMD Athlon works faster
14576   when RET is not destination of conditional jump or directly preceeded
14577   by other jump instruction.  We avoid the penalty by inserting NOP just
14578   before the RET instructions in such cases.  */
14579void
14580x86_machine_dependent_reorg (first)
14581     rtx first ATTRIBUTE_UNUSED;
14582{
14583  edge e;
14584
14585  if (!TARGET_ATHLON || !optimize || optimize_size)
14586    return;
14587  for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14588  {
14589    basic_block bb = e->src;
14590    rtx ret = bb->end;
14591    rtx prev;
14592    bool insert = false;
14593
14594    if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14595      continue;
14596    prev = prev_nonnote_insn (ret);
14597    if (prev && GET_CODE (prev) == CODE_LABEL)
14598      {
14599	edge e;
14600	for (e = bb->pred; e; e = e->pred_next)
14601	  if (EDGE_FREQUENCY (e) && e->src->index > 0
14602	      && !(e->flags & EDGE_FALLTHRU))
14603	    insert = 1;
14604      }
14605    if (!insert)
14606      {
14607	prev = prev_real_insn (ret);
14608	if (prev && GET_CODE (prev) == JUMP_INSN
14609	    && any_condjump_p (prev))
14610	  insert = 1;
14611      }
14612    if (insert)
14613      emit_insn_before (gen_nop (), ret);
14614  }
14615}
14616
14617#include "gt-i386.h"
14618