i386.c revision 108173
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 108173 2002-12-22 05:57:53Z kan $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-attr.h"
38#include "flags.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49
50#ifndef CHECK_STACK_LIMIT
51#define CHECK_STACK_LIMIT (-1)
52#endif
53
54/* Processor costs (relative to an add) */
55static const
56struct processor_costs size_cost = {	/* costs for tunning for size */
57  2,					/* cost of an add instruction */
58  3,					/* cost of a lea instruction */
59  2,					/* variable shift costs */
60  3,					/* constant shift costs */
61  3,					/* cost of starting a multiply */
62  0,					/* cost of multiply per each bit set */
63  3,					/* cost of a divide/mod */
64  3,					/* cost of movsx */
65  3,					/* cost of movzx */
66  0,					/* "large" insn */
67  2,					/* MOVE_RATIO */
68  2,					/* cost for loading QImode using movzbl */
69  {2, 2, 2},				/* cost of loading integer registers
70					   in QImode, HImode and SImode.
71					   Relative to reg-reg move (2).  */
72  {2, 2, 2},				/* cost of storing integer registers */
73  2,					/* cost of reg,reg fld/fst */
74  {2, 2, 2},				/* cost of loading fp registers
75					   in SFmode, DFmode and XFmode */
76  {2, 2, 2},				/* cost of loading integer registers */
77  3,					/* cost of moving MMX register */
78  {3, 3},				/* cost of loading MMX registers
79					   in SImode and DImode */
80  {3, 3},				/* cost of storing MMX registers
81					   in SImode and DImode */
82  3,					/* cost of moving SSE register */
83  {3, 3, 3},				/* cost of loading SSE registers
84					   in SImode, DImode and TImode */
85  {3, 3, 3},				/* cost of storing SSE registers
86					   in SImode, DImode and TImode */
87  3,					/* MMX or SSE register to integer */
88  0,					/* size of prefetch block */
89  0,					/* number of parallel prefetches */
90};
91/* Processor costs (relative to an add) */
92static const
93struct processor_costs i386_cost = {	/* 386 specific costs */
94  1,					/* cost of an add instruction */
95  1,					/* cost of a lea instruction */
96  3,					/* variable shift costs */
97  2,					/* constant shift costs */
98  6,					/* cost of starting a multiply */
99  1,					/* cost of multiply per each bit set */
100  23,					/* cost of a divide/mod */
101  3,					/* cost of movsx */
102  2,					/* cost of movzx */
103  15,					/* "large" insn */
104  3,					/* MOVE_RATIO */
105  4,					/* cost for loading QImode using movzbl */
106  {2, 4, 2},				/* cost of loading integer registers
107					   in QImode, HImode and SImode.
108					   Relative to reg-reg move (2).  */
109  {2, 4, 2},				/* cost of storing integer registers */
110  2,					/* cost of reg,reg fld/fst */
111  {8, 8, 8},				/* cost of loading fp registers
112					   in SFmode, DFmode and XFmode */
113  {8, 8, 8},				/* cost of loading integer registers */
114  2,					/* cost of moving MMX register */
115  {4, 8},				/* cost of loading MMX registers
116					   in SImode and DImode */
117  {4, 8},				/* cost of storing MMX registers
118					   in SImode and DImode */
119  2,					/* cost of moving SSE register */
120  {4, 8, 16},				/* cost of loading SSE registers
121					   in SImode, DImode and TImode */
122  {4, 8, 16},				/* cost of storing SSE registers
123					   in SImode, DImode and TImode */
124  3,					/* MMX or SSE register to integer */
125  0,					/* size of prefetch block */
126  0,					/* number of parallel prefetches */
127};
128
129static const
130struct processor_costs i486_cost = {	/* 486 specific costs */
131  1,					/* cost of an add instruction */
132  1,					/* cost of a lea instruction */
133  3,					/* variable shift costs */
134  2,					/* constant shift costs */
135  12,					/* cost of starting a multiply */
136  1,					/* cost of multiply per each bit set */
137  40,					/* cost of a divide/mod */
138  3,					/* cost of movsx */
139  2,					/* cost of movzx */
140  15,					/* "large" insn */
141  3,					/* MOVE_RATIO */
142  4,					/* cost for loading QImode using movzbl */
143  {2, 4, 2},				/* cost of loading integer registers
144					   in QImode, HImode and SImode.
145					   Relative to reg-reg move (2).  */
146  {2, 4, 2},				/* cost of storing integer registers */
147  2,					/* cost of reg,reg fld/fst */
148  {8, 8, 8},				/* cost of loading fp registers
149					   in SFmode, DFmode and XFmode */
150  {8, 8, 8},				/* cost of loading integer registers */
151  2,					/* cost of moving MMX register */
152  {4, 8},				/* cost of loading MMX registers
153					   in SImode and DImode */
154  {4, 8},				/* cost of storing MMX registers
155					   in SImode and DImode */
156  2,					/* cost of moving SSE register */
157  {4, 8, 16},				/* cost of loading SSE registers
158					   in SImode, DImode and TImode */
159  {4, 8, 16},				/* cost of storing SSE registers
160					   in SImode, DImode and TImode */
161  3,					/* MMX or SSE register to integer */
162  0,					/* size of prefetch block */
163  0,					/* number of parallel prefetches */
164};
165
166static const
167struct processor_costs pentium_cost = {
168  1,					/* cost of an add instruction */
169  1,					/* cost of a lea instruction */
170  4,					/* variable shift costs */
171  1,					/* constant shift costs */
172  11,					/* cost of starting a multiply */
173  0,					/* cost of multiply per each bit set */
174  25,					/* cost of a divide/mod */
175  3,					/* cost of movsx */
176  2,					/* cost of movzx */
177  8,					/* "large" insn */
178  6,					/* MOVE_RATIO */
179  6,					/* cost for loading QImode using movzbl */
180  {2, 4, 2},				/* cost of loading integer registers
181					   in QImode, HImode and SImode.
182					   Relative to reg-reg move (2).  */
183  {2, 4, 2},				/* cost of storing integer registers */
184  2,					/* cost of reg,reg fld/fst */
185  {2, 2, 6},				/* cost of loading fp registers
186					   in SFmode, DFmode and XFmode */
187  {4, 4, 6},				/* cost of loading integer registers */
188  8,					/* cost of moving MMX register */
189  {8, 8},				/* cost of loading MMX registers
190					   in SImode and DImode */
191  {8, 8},				/* cost of storing MMX registers
192					   in SImode and DImode */
193  2,					/* cost of moving SSE register */
194  {4, 8, 16},				/* cost of loading SSE registers
195					   in SImode, DImode and TImode */
196  {4, 8, 16},				/* cost of storing SSE registers
197					   in SImode, DImode and TImode */
198  3,					/* MMX or SSE register to integer */
199  0,					/* size of prefetch block */
200  0,					/* number of parallel prefetches */
201};
202
203static const
204struct processor_costs pentiumpro_cost = {
205  1,					/* cost of an add instruction */
206  1,					/* cost of a lea instruction */
207  1,					/* variable shift costs */
208  1,					/* constant shift costs */
209  4,					/* cost of starting a multiply */
210  0,					/* cost of multiply per each bit set */
211  17,					/* cost of a divide/mod */
212  1,					/* cost of movsx */
213  1,					/* cost of movzx */
214  8,					/* "large" insn */
215  6,					/* MOVE_RATIO */
216  2,					/* cost for loading QImode using movzbl */
217  {4, 4, 4},				/* cost of loading integer registers
218					   in QImode, HImode and SImode.
219					   Relative to reg-reg move (2).  */
220  {2, 2, 2},				/* cost of storing integer registers */
221  2,					/* cost of reg,reg fld/fst */
222  {2, 2, 6},				/* cost of loading fp registers
223					   in SFmode, DFmode and XFmode */
224  {4, 4, 6},				/* cost of loading integer registers */
225  2,					/* cost of moving MMX register */
226  {2, 2},				/* cost of loading MMX registers
227					   in SImode and DImode */
228  {2, 2},				/* cost of storing MMX registers
229					   in SImode and DImode */
230  2,					/* cost of moving SSE register */
231  {2, 2, 8},				/* cost of loading SSE registers
232					   in SImode, DImode and TImode */
233  {2, 2, 8},				/* cost of storing SSE registers
234					   in SImode, DImode and TImode */
235  3,					/* MMX or SSE register to integer */
236  32,					/* size of prefetch block */
237  6,					/* number of parallel prefetches */
238};
239
240static const
241struct processor_costs k6_cost = {
242  1,					/* cost of an add instruction */
243  2,					/* cost of a lea instruction */
244  1,					/* variable shift costs */
245  1,					/* constant shift costs */
246  3,					/* cost of starting a multiply */
247  0,					/* cost of multiply per each bit set */
248  18,					/* cost of a divide/mod */
249  2,					/* cost of movsx */
250  2,					/* cost of movzx */
251  8,					/* "large" insn */
252  4,					/* MOVE_RATIO */
253  3,					/* cost for loading QImode using movzbl */
254  {4, 5, 4},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 3, 2},				/* cost of storing integer registers */
258  4,					/* cost of reg,reg fld/fst */
259  {6, 6, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 4},				/* cost of loading integer registers */
262  2,					/* cost of moving MMX register */
263  {2, 2},				/* cost of loading MMX registers
264					   in SImode and DImode */
265  {2, 2},				/* cost of storing MMX registers
266					   in SImode and DImode */
267  2,					/* cost of moving SSE register */
268  {2, 2, 8},				/* cost of loading SSE registers
269					   in SImode, DImode and TImode */
270  {2, 2, 8},				/* cost of storing SSE registers
271					   in SImode, DImode and TImode */
272  6,					/* MMX or SSE register to integer */
273  32,					/* size of prefetch block */
274  1,					/* number of parallel prefetches */
275};
276
277static const
278struct processor_costs athlon_cost = {
279  1,					/* cost of an add instruction */
280  2,					/* cost of a lea instruction */
281  1,					/* variable shift costs */
282  1,					/* constant shift costs */
283  5,					/* cost of starting a multiply */
284  0,					/* cost of multiply per each bit set */
285  42,					/* cost of a divide/mod */
286  1,					/* cost of movsx */
287  1,					/* cost of movzx */
288  8,					/* "large" insn */
289  9,					/* MOVE_RATIO */
290  4,					/* cost for loading QImode using movzbl */
291  {3, 4, 3},				/* cost of loading integer registers
292					   in QImode, HImode and SImode.
293					   Relative to reg-reg move (2).  */
294  {3, 4, 3},				/* cost of storing integer registers */
295  4,					/* cost of reg,reg fld/fst */
296  {4, 4, 12},				/* cost of loading fp registers
297					   in SFmode, DFmode and XFmode */
298  {6, 6, 8},				/* cost of loading integer registers */
299  2,					/* cost of moving MMX register */
300  {4, 4},				/* cost of loading MMX registers
301					   in SImode and DImode */
302  {4, 4},				/* cost of storing MMX registers
303					   in SImode and DImode */
304  2,					/* cost of moving SSE register */
305  {4, 4, 6},				/* cost of loading SSE registers
306					   in SImode, DImode and TImode */
307  {4, 4, 5},				/* cost of storing SSE registers
308					   in SImode, DImode and TImode */
309  5,					/* MMX or SSE register to integer */
310  64,					/* size of prefetch block */
311  6,					/* number of parallel prefetches */
312};
313
314static const
315struct processor_costs pentium4_cost = {
316  1,					/* cost of an add instruction */
317  1,					/* cost of a lea instruction */
318  8,					/* variable shift costs */
319  8,					/* constant shift costs */
320  30,					/* cost of starting a multiply */
321  0,					/* cost of multiply per each bit set */
322  112,					/* cost of a divide/mod */
323  1,					/* cost of movsx */
324  1,					/* cost of movzx */
325  16,					/* "large" insn */
326  6,					/* MOVE_RATIO */
327  2,					/* cost for loading QImode using movzbl */
328  {4, 5, 4},				/* cost of loading integer registers
329					   in QImode, HImode and SImode.
330					   Relative to reg-reg move (2).  */
331  {2, 3, 2},				/* cost of storing integer registers */
332  2,					/* cost of reg,reg fld/fst */
333  {2, 2, 6},				/* cost of loading fp registers
334					   in SFmode, DFmode and XFmode */
335  {4, 4, 6},				/* cost of loading integer registers */
336  2,					/* cost of moving MMX register */
337  {2, 2},				/* cost of loading MMX registers
338					   in SImode and DImode */
339  {2, 2},				/* cost of storing MMX registers
340					   in SImode and DImode */
341  12,					/* cost of moving SSE register */
342  {12, 12, 12},				/* cost of loading SSE registers
343					   in SImode, DImode and TImode */
344  {2, 2, 8},				/* cost of storing SSE registers
345					   in SImode, DImode and TImode */
346  10,					/* MMX or SSE register to integer */
347  64,					/* size of prefetch block */
348  6,					/* number of parallel prefetches */
349};
350
351const struct processor_costs *ix86_cost = &pentium_cost;
352
353/* Processor feature/optimization bitmasks.  */
354#define m_386 (1<<PROCESSOR_I386)
355#define m_486 (1<<PROCESSOR_I486)
356#define m_PENT (1<<PROCESSOR_PENTIUM)
357#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
358#define m_K6  (1<<PROCESSOR_K6)
359#define m_ATHLON  (1<<PROCESSOR_ATHLON)
360#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
361
362const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
363const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
364const int x86_zero_extend_with_and = m_486 | m_PENT;
365const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
366const int x86_double_with_add = ~m_386;
367const int x86_use_bit_test = m_386;
368const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
369const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
370const int x86_3dnow_a = m_ATHLON;
371const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
372const int x86_branch_hints = m_PENT4;
373const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
374const int x86_partial_reg_stall = m_PPRO;
375const int x86_use_loop = m_K6;
376const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
377const int x86_use_mov0 = m_K6;
378const int x86_use_cltd = ~(m_PENT | m_K6);
379const int x86_read_modify_write = ~m_PENT;
380const int x86_read_modify = ~(m_PENT | m_PPRO);
381const int x86_split_long_moves = m_PPRO;
382const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
383const int x86_single_stringop = m_386 | m_PENT4;
384const int x86_qimode_math = ~(0);
385const int x86_promote_qi_regs = 0;
386const int x86_himode_math = ~(m_PPRO);
387const int x86_promote_hi_regs = m_PPRO;
388const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
389const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
390const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
391const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
392const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
393const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
394const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
395const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
396const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
397const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
398const int x86_decompose_lea = m_PENT4;
399const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
400
401/* In case the avreage insn count for single function invocation is
402   lower than this constant, emit fast (but longer) prologue and
403   epilogue code.  */
404#define FAST_PROLOGUE_INSN_COUNT 30
405/* Set by prologue expander and used by epilogue expander to determine
406   the style used.  */
407static int use_fast_prologue_epilogue;
408
409#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
410
411static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
412static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
413static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
414
415/* Array of the smallest class containing reg number REGNO, indexed by
416   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
417
418enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
419{
420  /* ax, dx, cx, bx */
421  AREG, DREG, CREG, BREG,
422  /* si, di, bp, sp */
423  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
424  /* FP registers */
425  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
426  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
427  /* arg pointer */
428  NON_Q_REGS,
429  /* flags, fpsr, dirflag, frame */
430  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
431  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
432  SSE_REGS, SSE_REGS,
433  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
434  MMX_REGS, MMX_REGS,
435  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
438  SSE_REGS, SSE_REGS,
439};
440
441/* The "default" register map used in 32bit mode.  */
442
443int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444{
445  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
446  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
447  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
448  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
449  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
450  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
451  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
452};
453
454static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
455					        1 /*RDX*/, 2 /*RCX*/,
456					        FIRST_REX_INT_REG /*R8 */,
457					        FIRST_REX_INT_REG + 1 /*R9 */};
458static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
459
460/* The "default" register map used in 64bit mode.  */
461int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
462{
463  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
464  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
465  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
466  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
467  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
468  8,9,10,11,12,13,14,15,		/* extended integer registers */
469  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
470};
471
472/* Define the register numbers to be used in Dwarf debugging information.
473   The SVR4 reference port C compiler uses the following register numbers
474   in its Dwarf output code:
475	0 for %eax (gcc regno = 0)
476	1 for %ecx (gcc regno = 2)
477	2 for %edx (gcc regno = 1)
478	3 for %ebx (gcc regno = 3)
479	4 for %esp (gcc regno = 7)
480	5 for %ebp (gcc regno = 6)
481	6 for %esi (gcc regno = 4)
482	7 for %edi (gcc regno = 5)
483   The following three DWARF register numbers are never generated by
484   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
485   believes these numbers have these meanings.
486	8  for %eip    (no gcc equivalent)
487	9  for %eflags (gcc regno = 17)
488	10 for %trapno (no gcc equivalent)
489   It is not at all clear how we should number the FP stack registers
490   for the x86 architecture.  If the version of SDB on x86/svr4 were
491   a bit less brain dead with respect to floating-point then we would
492   have a precedent to follow with respect to DWARF register numbers
493   for x86 FP registers, but the SDB on x86/svr4 is so completely
494   broken with respect to FP registers that it is hardly worth thinking
495   of it as something to strive for compatibility with.
496   The version of x86/svr4 SDB I have at the moment does (partially)
497   seem to believe that DWARF register number 11 is associated with
498   the x86 register %st(0), but that's about all.  Higher DWARF
499   register numbers don't seem to be associated with anything in
500   particular, and even for DWARF regno 11, SDB only seems to under-
501   stand that it should say that a variable lives in %st(0) (when
502   asked via an `=' command) if we said it was in DWARF regno 11,
503   but SDB still prints garbage when asked for the value of the
504   variable in question (via a `/' command).
505   (Also note that the labels SDB prints for various FP stack regs
506   when doing an `x' command are all wrong.)
507   Note that these problems generally don't affect the native SVR4
508   C compiler because it doesn't allow the use of -O with -g and
509   because when it is *not* optimizing, it allocates a memory
510   location for each floating-point variable, and the memory
511   location is what gets described in the DWARF AT_location
512   attribute for the variable in question.
513   Regardless of the severe mental illness of the x86/svr4 SDB, we
514   do something sensible here and we use the following DWARF
515   register numbers.  Note that these are all stack-top-relative
516   numbers.
517	11 for %st(0) (gcc regno = 8)
518	12 for %st(1) (gcc regno = 9)
519	13 for %st(2) (gcc regno = 10)
520	14 for %st(3) (gcc regno = 11)
521	15 for %st(4) (gcc regno = 12)
522	16 for %st(5) (gcc regno = 13)
523	17 for %st(6) (gcc regno = 14)
524	18 for %st(7) (gcc regno = 15)
525*/
526int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
527{
528  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
529  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
530  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
531  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
532  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
533  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
534  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
535};
536
537/* Test and compare insns in i386.md store the information needed to
538   generate branch and scc insns here.  */
539
540rtx ix86_compare_op0 = NULL_RTX;
541rtx ix86_compare_op1 = NULL_RTX;
542
543#define MAX_386_STACK_LOCALS 3
544/* Size of the register save area.  */
545#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
546
547/* Define the structure for the machine field in struct function.  */
548struct machine_function
549{
550  rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
551  int save_varrargs_registers;
552  int accesses_prev_frame;
553};
554
555#define ix86_stack_locals (cfun->machine->stack_locals)
556#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
557
558/* Structure describing stack frame layout.
559   Stack grows downward:
560
561   [arguments]
562					      <- ARG_POINTER
563   saved pc
564
565   saved frame pointer if frame_pointer_needed
566					      <- HARD_FRAME_POINTER
567   [saved regs]
568
569   [padding1]          \
570		        )
571   [va_arg registers]  (
572		        > to_allocate	      <- FRAME_POINTER
573   [frame]	       (
574		        )
575   [padding2]	       /
576  */
577struct ix86_frame
578{
579  int nregs;
580  int padding1;
581  int va_arg_size;
582  HOST_WIDE_INT frame;
583  int padding2;
584  int outgoing_arguments_size;
585  int red_zone_size;
586
587  HOST_WIDE_INT to_allocate;
588  /* The offsets relative to ARG_POINTER.  */
589  HOST_WIDE_INT frame_pointer_offset;
590  HOST_WIDE_INT hard_frame_pointer_offset;
591  HOST_WIDE_INT stack_pointer_offset;
592};
593
594/* Used to enable/disable debugging features.  */
595const char *ix86_debug_arg_string, *ix86_debug_addr_string;
596/* Code model option as passed by user.  */
597const char *ix86_cmodel_string;
598/* Parsed value.  */
599enum cmodel ix86_cmodel;
600/* Asm dialect.  */
601const char *ix86_asm_string;
602enum asm_dialect ix86_asm_dialect = ASM_ATT;
603
604/* which cpu are we scheduling for */
605enum processor_type ix86_cpu;
606
607/* which unit we are generating floating point math for */
608enum fpmath_unit ix86_fpmath;
609
610/* which instruction set architecture to use.  */
611int ix86_arch;
612
613/* Strings to hold which cpu and instruction set architecture  to use.  */
614const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
615const char *ix86_arch_string;		/* for -march=<xxx> */
616const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
617
618/* # of registers to use to pass arguments.  */
619const char *ix86_regparm_string;
620
621/* true if sse prefetch instruction is not NOOP.  */
622int x86_prefetch_sse;
623
624/* ix86_regparm_string as a number */
625int ix86_regparm;
626
627/* Alignment to use for loops and jumps:  */
628
629/* Power of two alignment for loops.  */
630const char *ix86_align_loops_string;
631
632/* Power of two alignment for non-loop jumps.  */
633const char *ix86_align_jumps_string;
634
635/* Power of two alignment for stack boundary in bytes.  */
636const char *ix86_preferred_stack_boundary_string;
637
638/* Preferred alignment for stack boundary in bits.  */
639int ix86_preferred_stack_boundary;
640
641/* Values 1-5: see jump.c */
642int ix86_branch_cost;
643const char *ix86_branch_cost_string;
644
645/* Power of two alignment for functions.  */
646const char *ix86_align_funcs_string;
647
648/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
649static char internal_label_prefix[16];
650static int internal_label_prefix_len;
651
652static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
653static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
654static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
655				       int, int, FILE *));
656static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
657static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
658							   rtx *, rtx *));
659static rtx gen_push PARAMS ((rtx));
660static int memory_address_length PARAMS ((rtx addr));
661static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
662static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
663static int ix86_safe_length PARAMS ((rtx));
664static enum attr_memory ix86_safe_memory PARAMS ((rtx));
665static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
666static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
667static void ix86_dump_ppro_packet PARAMS ((FILE *));
668static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
669static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
670					 rtx));
671static void ix86_init_machine_status PARAMS ((struct function *));
672static void ix86_mark_machine_status PARAMS ((struct function *));
673static void ix86_free_machine_status PARAMS ((struct function *));
674static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
675static int ix86_safe_length_prefix PARAMS ((rtx));
676static int ix86_nsaved_regs PARAMS ((void));
677static void ix86_emit_save_regs PARAMS ((void));
678static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
679static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
680static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
681static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
682static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
683static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
684static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
685static rtx ix86_expand_aligntest PARAMS ((rtx, int));
686static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
687static int ix86_issue_rate PARAMS ((void));
688static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
689static void ix86_sched_init PARAMS ((FILE *, int, int));
690static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
691static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
692static void ix86_init_mmx_sse_builtins PARAMS ((void));
693
694struct ix86_address
695{
696  rtx base, index, disp;
697  HOST_WIDE_INT scale;
698};
699
700static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
701
702struct builtin_description;
703static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
704					 tree, rtx));
705static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
706					    tree, rtx));
707static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
708static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
709static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
710static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
711						     tree, rtx));
712static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
713static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
714static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
715static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
716					      enum rtx_code *,
717					      enum rtx_code *,
718					      enum rtx_code *));
719static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
720					  rtx *, rtx *));
721static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
722static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
723static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
724static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
725static int ix86_save_reg PARAMS ((int, int));
726static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
727static int ix86_comp_type_attributes PARAMS ((tree, tree));
728const struct attribute_spec ix86_attribute_table[];
729static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
730static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
731
732#ifdef DO_GLOBAL_CTORS_BODY
733static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
734#endif
735
736/* Register class used for passing given 64bit part of the argument.
737   These represent classes as documented by the PS ABI, with the exception
738   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
739   use SF or DFmode move instead of DImode to avoid reformating penalties.
740
741   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
742   whenever possible (upper half does contain padding).
743 */
744enum x86_64_reg_class
745  {
746    X86_64_NO_CLASS,
747    X86_64_INTEGER_CLASS,
748    X86_64_INTEGERSI_CLASS,
749    X86_64_SSE_CLASS,
750    X86_64_SSESF_CLASS,
751    X86_64_SSEDF_CLASS,
752    X86_64_SSEUP_CLASS,
753    X86_64_X87_CLASS,
754    X86_64_X87UP_CLASS,
755    X86_64_MEMORY_CLASS
756  };
757static const char * const x86_64_reg_class_name[] =
758   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
759
760#define MAX_CLASSES 4
761static int classify_argument PARAMS ((enum machine_mode, tree,
762				      enum x86_64_reg_class [MAX_CLASSES],
763				      int));
764static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
765				     int *));
766static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
767					const int *, int));
768static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
769						    enum x86_64_reg_class));
770
771/* Initialize the GCC target structure.  */
772#undef TARGET_ATTRIBUTE_TABLE
773#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
774#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
775#  undef TARGET_MERGE_DECL_ATTRIBUTES
776#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
777#endif
778
779#undef TARGET_COMP_TYPE_ATTRIBUTES
780#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
781
782#undef TARGET_INIT_BUILTINS
783#define TARGET_INIT_BUILTINS ix86_init_builtins
784
785#undef TARGET_EXPAND_BUILTIN
786#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
787
788#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
789   static void ix86_osf_output_function_prologue PARAMS ((FILE *,
790							  HOST_WIDE_INT));
791#  undef TARGET_ASM_FUNCTION_PROLOGUE
792#  define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
793#endif
794
795#undef TARGET_ASM_OPEN_PAREN
796#define TARGET_ASM_OPEN_PAREN ""
797#undef TARGET_ASM_CLOSE_PAREN
798#define TARGET_ASM_CLOSE_PAREN ""
799
800#undef TARGET_ASM_ALIGNED_HI_OP
801#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
802#undef TARGET_ASM_ALIGNED_SI_OP
803#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
804#ifdef ASM_QUAD
805#undef TARGET_ASM_ALIGNED_DI_OP
806#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
807#endif
808
809#undef TARGET_ASM_UNALIGNED_HI_OP
810#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
811#undef TARGET_ASM_UNALIGNED_SI_OP
812#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
813#undef TARGET_ASM_UNALIGNED_DI_OP
814#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
815
816#undef TARGET_SCHED_ADJUST_COST
817#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
818#undef TARGET_SCHED_ISSUE_RATE
819#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
820#undef TARGET_SCHED_VARIABLE_ISSUE
821#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
822#undef TARGET_SCHED_INIT
823#define TARGET_SCHED_INIT ix86_sched_init
824#undef TARGET_SCHED_REORDER
825#define TARGET_SCHED_REORDER ix86_sched_reorder
826
827struct gcc_target targetm = TARGET_INITIALIZER;
828
829/* Sometimes certain combinations of command options do not make
830   sense on a particular target machine.  You can define a macro
831   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
832   defined, is executed once just after all the command options have
833   been parsed.
834
835   Don't use this macro to turn on various extra optimizations for
836   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
837
838void
839override_options ()
840{
841  int i;
842  /* Comes from final.c -- no real reason to change it.  */
843#define MAX_CODE_ALIGN 16
844
845  static struct ptt
846    {
847      const struct processor_costs *cost;	/* Processor costs */
848      const int target_enable;			/* Target flags to enable.  */
849      const int target_disable;			/* Target flags to disable.  */
850      const int align_loop;			/* Default alignments.  */
851      const int align_loop_max_skip;
852      const int align_jump;
853      const int align_jump_max_skip;
854      const int align_func;
855      const int branch_cost;
856    }
857  const processor_target_table[PROCESSOR_max] =
858    {
859      {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
860      {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
861      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
862      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
863      {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
864      {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
865      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
866    };
867
868  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
869  static struct pta
870    {
871      const char *const name;		/* processor name or nickname.  */
872      const enum processor_type processor;
873      const enum pta_flags
874	{
875	  PTA_SSE = 1,
876	  PTA_SSE2 = 2,
877	  PTA_MMX = 4,
878	  PTA_PREFETCH_SSE = 8,
879	  PTA_3DNOW = 16,
880	  PTA_3DNOW_A = 64
881	} flags;
882    }
883  const processor_alias_table[] =
884    {
885      {"i386", PROCESSOR_I386, 0},
886      {"i486", PROCESSOR_I486, 0},
887      {"i586", PROCESSOR_PENTIUM, 0},
888      {"pentium", PROCESSOR_PENTIUM, 0},
889      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
890      {"i686", PROCESSOR_PENTIUMPRO, 0},
891      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
892      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
893      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
894      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
895				       PTA_MMX | PTA_PREFETCH_SSE},
896      {"k6", PROCESSOR_K6, PTA_MMX},
897      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
898      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
899      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
900				   | PTA_3DNOW_A},
901      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
902					 | PTA_3DNOW | PTA_3DNOW_A},
903      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
904				    | PTA_3DNOW_A | PTA_SSE},
905      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
906				      | PTA_3DNOW_A | PTA_SSE},
907      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
908				      | PTA_3DNOW_A | PTA_SSE},
909    };
910
911  int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
912
913  /* Set the default values for switches whose default depends on TARGET_64BIT
914     in case they weren't overwriten by command line options.  */
915  if (TARGET_64BIT)
916    {
917      if (flag_omit_frame_pointer == 2)
918	flag_omit_frame_pointer = 1;
919      if (flag_asynchronous_unwind_tables == 2)
920	flag_asynchronous_unwind_tables = 1;
921      if (flag_pcc_struct_return == 2)
922	flag_pcc_struct_return = 0;
923    }
924  else
925    {
926      if (flag_omit_frame_pointer == 2)
927	flag_omit_frame_pointer = 0;
928      if (flag_asynchronous_unwind_tables == 2)
929	flag_asynchronous_unwind_tables = 0;
930      if (flag_pcc_struct_return == 2)
931	flag_pcc_struct_return = 1;
932    }
933
934#ifdef SUBTARGET_OVERRIDE_OPTIONS
935  SUBTARGET_OVERRIDE_OPTIONS;
936#endif
937
938  if (!ix86_cpu_string && ix86_arch_string)
939    ix86_cpu_string = ix86_arch_string;
940  if (!ix86_cpu_string)
941    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
942  if (!ix86_arch_string)
943    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
944
945  if (ix86_cmodel_string != 0)
946    {
947      if (!strcmp (ix86_cmodel_string, "small"))
948	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
949      else if (flag_pic)
950	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
951      else if (!strcmp (ix86_cmodel_string, "32"))
952	ix86_cmodel = CM_32;
953      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
954	ix86_cmodel = CM_KERNEL;
955      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
956	ix86_cmodel = CM_MEDIUM;
957      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
958	ix86_cmodel = CM_LARGE;
959      else
960	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
961    }
962  else
963    {
964      ix86_cmodel = CM_32;
965      if (TARGET_64BIT)
966	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
967    }
968  if (ix86_asm_string != 0)
969    {
970      if (!strcmp (ix86_asm_string, "intel"))
971	ix86_asm_dialect = ASM_INTEL;
972      else if (!strcmp (ix86_asm_string, "att"))
973	ix86_asm_dialect = ASM_ATT;
974      else
975	error ("bad value (%s) for -masm= switch", ix86_asm_string);
976    }
977  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
978    error ("code model `%s' not supported in the %s bit mode",
979	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
980  if (ix86_cmodel == CM_LARGE)
981    sorry ("code model `large' not supported yet");
982  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
983    sorry ("%i-bit mode not compiled in",
984	   (target_flags & MASK_64BIT) ? 64 : 32);
985
986  for (i = 0; i < pta_size; i++)
987    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
988      {
989	ix86_arch = processor_alias_table[i].processor;
990	/* Default cpu tuning to the architecture.  */
991	ix86_cpu = ix86_arch;
992	if (processor_alias_table[i].flags & PTA_MMX
993	    && !(target_flags & MASK_MMX_SET))
994	  target_flags |= MASK_MMX;
995	if (processor_alias_table[i].flags & PTA_3DNOW
996	    && !(target_flags & MASK_3DNOW_SET))
997	  target_flags |= MASK_3DNOW;
998	if (processor_alias_table[i].flags & PTA_3DNOW_A
999	    && !(target_flags & MASK_3DNOW_A_SET))
1000	  target_flags |= MASK_3DNOW_A;
1001	if (processor_alias_table[i].flags & PTA_SSE
1002	    && !(target_flags & MASK_SSE_SET))
1003	  target_flags |= MASK_SSE;
1004	if (processor_alias_table[i].flags & PTA_SSE2
1005	    && !(target_flags & MASK_SSE2_SET))
1006	  target_flags |= MASK_SSE2;
1007	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1008	  x86_prefetch_sse = true;
1009	break;
1010      }
1011
1012  if (i == pta_size)
1013    error ("bad value (%s) for -march= switch", ix86_arch_string);
1014
1015  for (i = 0; i < pta_size; i++)
1016    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1017      {
1018	ix86_cpu = processor_alias_table[i].processor;
1019	break;
1020      }
1021  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1022    x86_prefetch_sse = true;
1023  if (i == pta_size)
1024    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1025
1026  if (optimize_size)
1027    ix86_cost = &size_cost;
1028  else
1029    ix86_cost = processor_target_table[ix86_cpu].cost;
1030  target_flags |= processor_target_table[ix86_cpu].target_enable;
1031  target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1032
1033  /* Arrange to set up i386_stack_locals for all functions.  */
1034  init_machine_status = ix86_init_machine_status;
1035  mark_machine_status = ix86_mark_machine_status;
1036  free_machine_status = ix86_free_machine_status;
1037
1038  /* Validate -mregparm= value.  */
1039  if (ix86_regparm_string)
1040    {
1041      i = atoi (ix86_regparm_string);
1042      if (i < 0 || i > REGPARM_MAX)
1043	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1044      else
1045	ix86_regparm = i;
1046    }
1047  else
1048   if (TARGET_64BIT)
1049     ix86_regparm = REGPARM_MAX;
1050
1051  /* If the user has provided any of the -malign-* options,
1052     warn and use that value only if -falign-* is not set.
1053     Remove this code in GCC 3.2 or later.  */
1054  if (ix86_align_loops_string)
1055    {
1056      warning ("-malign-loops is obsolete, use -falign-loops");
1057      if (align_loops == 0)
1058	{
1059	  i = atoi (ix86_align_loops_string);
1060	  if (i < 0 || i > MAX_CODE_ALIGN)
1061	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062	  else
1063	    align_loops = 1 << i;
1064	}
1065    }
1066
1067  if (ix86_align_jumps_string)
1068    {
1069      warning ("-malign-jumps is obsolete, use -falign-jumps");
1070      if (align_jumps == 0)
1071	{
1072	  i = atoi (ix86_align_jumps_string);
1073	  if (i < 0 || i > MAX_CODE_ALIGN)
1074	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1075	  else
1076	    align_jumps = 1 << i;
1077	}
1078    }
1079
1080  if (ix86_align_funcs_string)
1081    {
1082      warning ("-malign-functions is obsolete, use -falign-functions");
1083      if (align_functions == 0)
1084	{
1085	  i = atoi (ix86_align_funcs_string);
1086	  if (i < 0 || i > MAX_CODE_ALIGN)
1087	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1088	  else
1089	    align_functions = 1 << i;
1090	}
1091    }
1092
1093  /* Default align_* from the processor table.  */
1094  if (align_loops == 0)
1095    {
1096      align_loops = processor_target_table[ix86_cpu].align_loop;
1097      align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1098    }
1099  if (align_jumps == 0)
1100    {
1101      align_jumps = processor_target_table[ix86_cpu].align_jump;
1102      align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1103    }
1104  if (align_functions == 0)
1105    {
1106      align_functions = processor_target_table[ix86_cpu].align_func;
1107    }
1108
1109  /* Validate -mpreferred-stack-boundary= value, or provide default.
1110     The default of 128 bits is for Pentium III's SSE __m128, but we
1111     don't want additional code to keep the stack aligned when
1112     optimizing for code size.  */
1113  ix86_preferred_stack_boundary = (optimize_size
1114				   ? TARGET_64BIT ? 128 : 32
1115				   : 128);
1116  if (ix86_preferred_stack_boundary_string)
1117    {
1118      i = atoi (ix86_preferred_stack_boundary_string);
1119      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1120	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1121	       TARGET_64BIT ? 4 : 2);
1122      else
1123	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1124    }
1125
1126  /* Validate -mbranch-cost= value, or provide default.  */
1127  ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1128  if (ix86_branch_cost_string)
1129    {
1130      i = atoi (ix86_branch_cost_string);
1131      if (i < 0 || i > 5)
1132	error ("-mbranch-cost=%d is not between 0 and 5", i);
1133      else
1134	ix86_branch_cost = i;
1135    }
1136
1137  /* Keep nonleaf frame pointers.  */
1138  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1139    flag_omit_frame_pointer = 1;
1140
1141  /* If we're doing fast math, we don't care about comparison order
1142     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1143  if (flag_unsafe_math_optimizations)
1144    target_flags &= ~MASK_IEEE_FP;
1145
1146  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1147     since the insns won't need emulation.  */
1148  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1149    target_flags &= ~MASK_NO_FANCY_MATH_387;
1150
1151  if (TARGET_64BIT)
1152    {
1153      if (TARGET_ALIGN_DOUBLE)
1154	error ("-malign-double makes no sense in the 64bit mode");
1155      if (TARGET_RTD)
1156	error ("-mrtd calling convention not supported in the 64bit mode");
1157      /* Enable by default the SSE and MMX builtins.  */
1158      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1159      ix86_fpmath = FPMATH_SSE;
1160     }
1161  else
1162    ix86_fpmath = FPMATH_387;
1163
1164  if (ix86_fpmath_string != 0)
1165    {
1166      if (! strcmp (ix86_fpmath_string, "387"))
1167	ix86_fpmath = FPMATH_387;
1168      else if (! strcmp (ix86_fpmath_string, "sse"))
1169	{
1170	  if (!TARGET_SSE)
1171	    {
1172	      warning ("SSE instruction set disabled, using 387 arithmetics");
1173	      ix86_fpmath = FPMATH_387;
1174	    }
1175	  else
1176	    ix86_fpmath = FPMATH_SSE;
1177	}
1178      else if (! strcmp (ix86_fpmath_string, "387,sse")
1179	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1180	{
1181	  if (!TARGET_SSE)
1182	    {
1183	      warning ("SSE instruction set disabled, using 387 arithmetics");
1184	      ix86_fpmath = FPMATH_387;
1185	    }
1186	  else if (!TARGET_80387)
1187	    {
1188	      warning ("387 instruction set disabled, using SSE arithmetics");
1189	      ix86_fpmath = FPMATH_SSE;
1190	    }
1191	  else
1192	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1193	}
1194      else
1195	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1196    }
1197
1198  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1199     on by -msse.  */
1200  if (TARGET_SSE)
1201    {
1202      target_flags |= MASK_MMX;
1203      x86_prefetch_sse = true;
1204    }
1205
1206  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1207  if (TARGET_3DNOW)
1208    {
1209      target_flags |= MASK_MMX;
1210      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1211	 extensions it adds.  */
1212      if (x86_3dnow_a & (1 << ix86_arch))
1213	target_flags |= MASK_3DNOW_A;
1214    }
1215  if ((x86_accumulate_outgoing_args & CPUMASK)
1216      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1217      && !optimize_size)
1218    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1219
1220  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1221  {
1222    char *p;
1223    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1224    p = strchr (internal_label_prefix, 'X');
1225    internal_label_prefix_len = p - internal_label_prefix;
1226    *p = '\0';
1227  }
1228}
1229
1230void
1231optimization_options (level, size)
1232     int level;
1233     int size ATTRIBUTE_UNUSED;
1234{
1235  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1236     make the problem with not enough registers even worse.  */
1237#ifdef INSN_SCHEDULING
1238  if (level > 1)
1239    flag_schedule_insns = 0;
1240#endif
1241  /* The default values of these switches depend on the TARGET_64BIT
1242     that is not known at this moment.  Mark these values with 2 and
1243     let user the to override these.  In case there is no command line option
1244     specifying them, we will set the defaults in override_options.  */
1245  if (optimize >= 1)
1246    flag_omit_frame_pointer = 2;
1247  flag_asynchronous_unwind_tables = 2;
1248}
1249
1250/* Table of valid machine attributes.  */
1251const struct attribute_spec ix86_attribute_table[] =
1252{
1253  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1254  /* Stdcall attribute says callee is responsible for popping arguments
1255     if they are not variable.  */
1256  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1257  /* Cdecl attribute says the callee is a normal C declaration */
1258  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1259  /* Regparm attribute specifies how many integer arguments are to be
1260     passed in registers.  */
1261  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1262#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1263  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1264  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1265  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1266#endif
1267  { NULL,        0, 0, false, false, false, NULL }
1268};
1269
1270/* Handle a "cdecl" or "stdcall" attribute;
1271   arguments as in struct attribute_spec.handler.  */
1272static tree
1273ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1274     tree *node;
1275     tree name;
1276     tree args ATTRIBUTE_UNUSED;
1277     int flags ATTRIBUTE_UNUSED;
1278     bool *no_add_attrs;
1279{
1280  if (TREE_CODE (*node) != FUNCTION_TYPE
1281      && TREE_CODE (*node) != METHOD_TYPE
1282      && TREE_CODE (*node) != FIELD_DECL
1283      && TREE_CODE (*node) != TYPE_DECL)
1284    {
1285      warning ("`%s' attribute only applies to functions",
1286	       IDENTIFIER_POINTER (name));
1287      *no_add_attrs = true;
1288    }
1289
1290  if (TARGET_64BIT)
1291    {
1292      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1293      *no_add_attrs = true;
1294    }
1295
1296  return NULL_TREE;
1297}
1298
1299/* Handle a "regparm" attribute;
1300   arguments as in struct attribute_spec.handler.  */
1301static tree
1302ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1303     tree *node;
1304     tree name;
1305     tree args;
1306     int flags ATTRIBUTE_UNUSED;
1307     bool *no_add_attrs;
1308{
1309  if (TREE_CODE (*node) != FUNCTION_TYPE
1310      && TREE_CODE (*node) != METHOD_TYPE
1311      && TREE_CODE (*node) != FIELD_DECL
1312      && TREE_CODE (*node) != TYPE_DECL)
1313    {
1314      warning ("`%s' attribute only applies to functions",
1315	       IDENTIFIER_POINTER (name));
1316      *no_add_attrs = true;
1317    }
1318  else
1319    {
1320      tree cst;
1321
1322      cst = TREE_VALUE (args);
1323      if (TREE_CODE (cst) != INTEGER_CST)
1324	{
1325	  warning ("`%s' attribute requires an integer constant argument",
1326		   IDENTIFIER_POINTER (name));
1327	  *no_add_attrs = true;
1328	}
1329      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1330	{
1331	  warning ("argument to `%s' attribute larger than %d",
1332		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1333	  *no_add_attrs = true;
1334	}
1335    }
1336
1337  return NULL_TREE;
1338}
1339
1340#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1341
1342/* Generate the assembly code for function entry.  FILE is a stdio
1343   stream to output the code to.  SIZE is an int: how many units of
1344   temporary storage to allocate.
1345
1346   Refer to the array `regs_ever_live' to determine which registers to
1347   save; `regs_ever_live[I]' is nonzero if register number I is ever
1348   used in the function.  This function is responsible for knowing
1349   which registers should not be saved even if used.
1350
1351   We override it here to allow for the new profiling code to go before
1352   the prologue and the old mcount code to go after the prologue (and
1353   after %ebx has been set up for ELF shared library support).  */
1354
1355static void
1356ix86_osf_output_function_prologue (file, size)
1357     FILE *file;
1358     HOST_WIDE_INT size;
1359{
1360  const char *prefix = "";
1361  const char *const lprefix = LPREFIX;
1362  int labelno = current_function_profile_label_no;
1363
1364#ifdef OSF_OS
1365
1366  if (TARGET_UNDERSCORES)
1367    prefix = "_";
1368
1369  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1370    {
1371      if (!flag_pic && !HALF_PIC_P ())
1372	{
1373	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1374	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1375	}
1376
1377      else if (HALF_PIC_P ())
1378	{
1379	  rtx symref;
1380
1381	  HALF_PIC_EXTERNAL ("_mcount_ptr");
1382	  symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1383						     "_mcount_ptr"));
1384
1385	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1386	  fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1387		   XSTR (symref, 0));
1388	  fprintf (file, "\tcall *(%%eax)\n");
1389	}
1390
1391      else
1392	{
1393	  static int call_no = 0;
1394
1395	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1396	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1397	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1398		   lprefix, call_no++);
1399	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1400		   lprefix, labelno);
1401	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1402		   prefix);
1403	  fprintf (file, "\tcall *(%%eax)\n");
1404	}
1405    }
1406
1407#else  /* !OSF_OS */
1408
1409  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1410    {
1411      if (!flag_pic)
1412	{
1413	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1414	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1415	}
1416
1417      else
1418	{
1419	  static int call_no = 0;
1420
1421	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1422	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1423	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1424		   lprefix, call_no++);
1425	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1426		   lprefix, labelno);
1427	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1428		   prefix);
1429	  fprintf (file, "\tcall *(%%eax)\n");
1430	}
1431    }
1432#endif /* !OSF_OS */
1433
1434  function_prologue (file, size);
1435}
1436
1437#endif  /* OSF_OS || TARGET_OSF1ELF */
1438
1439/* Return 0 if the attributes for two types are incompatible, 1 if they
1440   are compatible, and 2 if they are nearly compatible (which causes a
1441   warning to be generated).  */
1442
1443static int
1444ix86_comp_type_attributes (type1, type2)
1445     tree type1;
1446     tree type2;
1447{
1448  /* Check for mismatch of non-default calling convention.  */
1449  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1450
1451  if (TREE_CODE (type1) != FUNCTION_TYPE)
1452    return 1;
1453
1454  /* Check for mismatched return types (cdecl vs stdcall).  */
1455  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1456      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1457    return 0;
1458  return 1;
1459}
1460
1461/* Value is the number of bytes of arguments automatically
1462   popped when returning from a subroutine call.
1463   FUNDECL is the declaration node of the function (as a tree),
1464   FUNTYPE is the data type of the function (as a tree),
1465   or for a library call it is an identifier node for the subroutine name.
1466   SIZE is the number of bytes of arguments passed on the stack.
1467
1468   On the 80386, the RTD insn may be used to pop them if the number
1469     of args is fixed, but if the number is variable then the caller
1470     must pop them all.  RTD can't be used for library calls now
1471     because the library is compiled with the Unix compiler.
1472   Use of RTD is a selectable option, since it is incompatible with
1473   standard Unix calling sequences.  If the option is not selected,
1474   the caller must always pop the args.
1475
1476   The attribute stdcall is equivalent to RTD on a per module basis.  */
1477
1478int
1479ix86_return_pops_args (fundecl, funtype, size)
1480     tree fundecl;
1481     tree funtype;
1482     int size;
1483{
1484  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1485
1486    /* Cdecl functions override -mrtd, and never pop the stack.  */
1487  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1488
1489    /* Stdcall functions will pop the stack if not variable args.  */
1490    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1491      rtd = 1;
1492
1493    if (rtd
1494        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1495	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1496		== void_type_node)))
1497      return size;
1498  }
1499
1500  /* Lose any fake structure return argument if it is passed on the stack.  */
1501  if (aggregate_value_p (TREE_TYPE (funtype))
1502      && !TARGET_64BIT)
1503    {
1504      int nregs = ix86_regparm;
1505
1506      if (funtype)
1507	{
1508	  tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1509
1510	  if (attr)
1511	    nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1512	}
1513
1514      if (!nregs)
1515	return GET_MODE_SIZE (Pmode);
1516    }
1517
1518  return 0;
1519}
1520
1521/* Argument support functions.  */
1522
1523/* Return true when register may be used to pass function parameters.  */
1524bool
1525ix86_function_arg_regno_p (regno)
1526     int regno;
1527{
1528  int i;
1529  if (!TARGET_64BIT)
1530    return (regno < REGPARM_MAX
1531	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1532  if (SSE_REGNO_P (regno) && TARGET_SSE)
1533    return true;
1534  /* RAX is used as hidden argument to va_arg functions.  */
1535  if (!regno)
1536    return true;
1537  for (i = 0; i < REGPARM_MAX; i++)
1538    if (regno == x86_64_int_parameter_registers[i])
1539      return true;
1540  return false;
1541}
1542
1543/* Initialize a variable CUM of type CUMULATIVE_ARGS
1544   for a call to a function whose data type is FNTYPE.
1545   For a library call, FNTYPE is 0.  */
1546
1547void
1548init_cumulative_args (cum, fntype, libname)
1549     CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1550     tree fntype;		/* tree ptr for function decl */
1551     rtx libname;		/* SYMBOL_REF of library name or 0 */
1552{
1553  static CUMULATIVE_ARGS zero_cum;
1554  tree param, next_param;
1555
1556  if (TARGET_DEBUG_ARG)
1557    {
1558      fprintf (stderr, "\ninit_cumulative_args (");
1559      if (fntype)
1560	fprintf (stderr, "fntype code = %s, ret code = %s",
1561		 tree_code_name[(int) TREE_CODE (fntype)],
1562		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1563      else
1564	fprintf (stderr, "no fntype");
1565
1566      if (libname)
1567	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1568    }
1569
1570  *cum = zero_cum;
1571
1572  /* Set up the number of registers to use for passing arguments.  */
1573  cum->nregs = ix86_regparm;
1574  cum->sse_nregs = SSE_REGPARM_MAX;
1575  if (fntype && !TARGET_64BIT)
1576    {
1577      tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1578
1579      if (attr)
1580	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1581    }
1582  cum->maybe_vaarg = false;
1583
1584  /* Determine if this function has variable arguments.  This is
1585     indicated by the last argument being 'void_type_mode' if there
1586     are no variable arguments.  If there are variable arguments, then
1587     we won't pass anything in registers */
1588
1589  if (cum->nregs)
1590    {
1591      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1592	   param != 0; param = next_param)
1593	{
1594	  next_param = TREE_CHAIN (param);
1595	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1596	    {
1597	      if (!TARGET_64BIT)
1598		cum->nregs = 0;
1599	      cum->maybe_vaarg = true;
1600	    }
1601	}
1602    }
1603  if ((!fntype && !libname)
1604      || (fntype && !TYPE_ARG_TYPES (fntype)))
1605    cum->maybe_vaarg = 1;
1606
1607  if (TARGET_DEBUG_ARG)
1608    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1609
1610  return;
1611}
1612
1613/* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1614   of this code is to classify each 8bytes of incoming argument by the register
1615   class and assign registers accordingly.  */
1616
1617/* Return the union class of CLASS1 and CLASS2.
1618   See the x86-64 PS ABI for details.  */
1619
1620static enum x86_64_reg_class
1621merge_classes (class1, class2)
1622     enum x86_64_reg_class class1, class2;
1623{
1624  /* Rule #1: If both classes are equal, this is the resulting class.  */
1625  if (class1 == class2)
1626    return class1;
1627
1628  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1629     the other class.  */
1630  if (class1 == X86_64_NO_CLASS)
1631    return class2;
1632  if (class2 == X86_64_NO_CLASS)
1633    return class1;
1634
1635  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1636  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1637    return X86_64_MEMORY_CLASS;
1638
1639  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1640  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1641      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1642    return X86_64_INTEGERSI_CLASS;
1643  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1644      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1645    return X86_64_INTEGER_CLASS;
1646
1647  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1648  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1649      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1650    return X86_64_MEMORY_CLASS;
1651
1652  /* Rule #6: Otherwise class SSE is used.  */
1653  return X86_64_SSE_CLASS;
1654}
1655
1656/* Classify the argument of type TYPE and mode MODE.
1657   CLASSES will be filled by the register class used to pass each word
1658   of the operand.  The number of words is returned.  In case the parameter
1659   should be passed in memory, 0 is returned. As a special case for zero
1660   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1661
1662   BIT_OFFSET is used internally for handling records and specifies offset
1663   of the offset in bits modulo 256 to avoid overflow cases.
1664
1665   See the x86-64 PS ABI for details.
1666*/
1667
1668static int
1669classify_argument (mode, type, classes, bit_offset)
1670     enum machine_mode mode;
1671     tree type;
1672     enum x86_64_reg_class classes[MAX_CLASSES];
1673     int bit_offset;
1674{
1675  int bytes =
1676    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1677  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1678
1679  /* Variable sized structures are always passed on the stack.  */
1680  if (mode == BLKmode && type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
1681    return 0;
1682
1683  if (type && AGGREGATE_TYPE_P (type))
1684    {
1685      int i;
1686      tree field;
1687      enum x86_64_reg_class subclasses[MAX_CLASSES];
1688
1689      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1690      if (bytes > 16)
1691	return 0;
1692
1693      for (i = 0; i < words; i++)
1694	classes[i] = X86_64_NO_CLASS;
1695
1696      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1697	 signalize memory class, so handle it as special case.  */
1698      if (!words)
1699	{
1700	  classes[0] = X86_64_NO_CLASS;
1701	  return 1;
1702	}
1703
1704      /* Classify each field of record and merge classes.  */
1705      if (TREE_CODE (type) == RECORD_TYPE)
1706	{
1707	  /* For classes first merge in the field of the subclasses.  */
1708	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1709	    {
1710	      tree bases = TYPE_BINFO_BASETYPES (type);
1711	      int n_bases = TREE_VEC_LENGTH (bases);
1712	      int i;
1713
1714	      for (i = 0; i < n_bases; ++i)
1715		{
1716		   tree binfo = TREE_VEC_ELT (bases, i);
1717		   int num;
1718		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1719		   tree type = BINFO_TYPE (binfo);
1720
1721		   num = classify_argument (TYPE_MODE (type),
1722					    type, subclasses,
1723					    (offset + bit_offset) % 256);
1724		   if (!num)
1725		     return 0;
1726		   for (i = 0; i < num; i++)
1727		     {
1728		       int pos = (offset + bit_offset) / 8 / 8;
1729		       classes[i + pos] =
1730			 merge_classes (subclasses[i], classes[i + pos]);
1731		     }
1732		}
1733	    }
1734	  /* And now merge the fields of structure.   */
1735	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1736	    {
1737	      if (TREE_CODE (field) == FIELD_DECL)
1738		{
1739		  int num;
1740
1741		  /* Bitfields are always classified as integer.  Handle them
1742		     early, since later code would consider them to be
1743		     misaligned integers.  */
1744		  if (DECL_BIT_FIELD (field))
1745		    {
1746		      for (i = int_bit_position (field) / 8 / 8;
1747			   i < (int_bit_position (field)
1748			        + tree_low_cst (DECL_SIZE (field), 0)
1749			       	+ 63) / 8 / 8; i++)
1750			classes[i] =
1751			  merge_classes (X86_64_INTEGER_CLASS,
1752					 classes[i]);
1753		    }
1754		  else
1755		    {
1756		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1757					       TREE_TYPE (field), subclasses,
1758					       (int_bit_position (field)
1759						+ bit_offset) % 256);
1760		      if (!num)
1761			return 0;
1762		      for (i = 0; i < num; i++)
1763			{
1764			  int pos =
1765			    (int_bit_position (field) + bit_offset) / 8 / 8;
1766			  classes[i + pos] =
1767			    merge_classes (subclasses[i], classes[i + pos]);
1768			}
1769		    }
1770		}
1771	    }
1772	}
1773      /* Arrays are handled as small records.  */
1774      else if (TREE_CODE (type) == ARRAY_TYPE)
1775	{
1776	  int num;
1777	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1778				   TREE_TYPE (type), subclasses, bit_offset);
1779	  if (!num)
1780	    return 0;
1781
1782	  /* The partial classes are now full classes.  */
1783	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1784	    subclasses[0] = X86_64_SSE_CLASS;
1785	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1786	    subclasses[0] = X86_64_INTEGER_CLASS;
1787
1788	  for (i = 0; i < words; i++)
1789	    classes[i] = subclasses[i % num];
1790	}
1791      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1792      else if (TREE_CODE (type) == UNION_TYPE
1793	       || TREE_CODE (type) == QUAL_UNION_TYPE)
1794	{
1795	  /* For classes first merge in the field of the subclasses.  */
1796	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1797	    {
1798	      tree bases = TYPE_BINFO_BASETYPES (type);
1799	      int n_bases = TREE_VEC_LENGTH (bases);
1800	      int i;
1801
1802	      for (i = 0; i < n_bases; ++i)
1803		{
1804		   tree binfo = TREE_VEC_ELT (bases, i);
1805		   int num;
1806		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1807		   tree type = BINFO_TYPE (binfo);
1808
1809		   num = classify_argument (TYPE_MODE (type),
1810					    type, subclasses,
1811					    (offset + bit_offset) % 256);
1812		   if (!num)
1813		     return 0;
1814		   for (i = 0; i < num; i++)
1815		     {
1816		       int pos = (offset + bit_offset) / 8 / 8;
1817		       classes[i + pos] =
1818			 merge_classes (subclasses[i], classes[i + pos]);
1819		     }
1820		}
1821	    }
1822	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1823	    {
1824	      if (TREE_CODE (field) == FIELD_DECL)
1825		{
1826		  int num;
1827		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1828					   TREE_TYPE (field), subclasses,
1829					   bit_offset);
1830		  if (!num)
1831		    return 0;
1832		  for (i = 0; i < num; i++)
1833		    classes[i] = merge_classes (subclasses[i], classes[i]);
1834		}
1835	    }
1836	}
1837      else
1838	abort ();
1839
1840      /* Final merger cleanup.  */
1841      for (i = 0; i < words; i++)
1842	{
1843	  /* If one class is MEMORY, everything should be passed in
1844	     memory.  */
1845	  if (classes[i] == X86_64_MEMORY_CLASS)
1846	    return 0;
1847
1848	  /* The X86_64_SSEUP_CLASS should be always preceded by
1849	     X86_64_SSE_CLASS.  */
1850	  if (classes[i] == X86_64_SSEUP_CLASS
1851	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1852	    classes[i] = X86_64_SSE_CLASS;
1853
1854	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1855	  if (classes[i] == X86_64_X87UP_CLASS
1856	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1857	    classes[i] = X86_64_SSE_CLASS;
1858	}
1859      return words;
1860    }
1861
1862  /* Compute alignment needed.  We align all types to natural boundaries with
1863     exception of XFmode that is aligned to 64bits.  */
1864  if (mode != VOIDmode && mode != BLKmode)
1865    {
1866      int mode_alignment = GET_MODE_BITSIZE (mode);
1867
1868      if (mode == XFmode)
1869	mode_alignment = 128;
1870      else if (mode == XCmode)
1871	mode_alignment = 256;
1872      /* Misaligned fields are always returned in memory.  */
1873      if (bit_offset % mode_alignment)
1874	return 0;
1875    }
1876
1877  /* Classification of atomic types.  */
1878  switch (mode)
1879    {
1880    case DImode:
1881    case SImode:
1882    case HImode:
1883    case QImode:
1884    case CSImode:
1885    case CHImode:
1886    case CQImode:
1887      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1888	classes[0] = X86_64_INTEGERSI_CLASS;
1889      else
1890	classes[0] = X86_64_INTEGER_CLASS;
1891      return 1;
1892    case CDImode:
1893    case TImode:
1894      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1895      return 2;
1896    case CTImode:
1897      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1898      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1899      return 4;
1900    case SFmode:
1901      if (!(bit_offset % 64))
1902	classes[0] = X86_64_SSESF_CLASS;
1903      else
1904	classes[0] = X86_64_SSE_CLASS;
1905      return 1;
1906    case DFmode:
1907      classes[0] = X86_64_SSEDF_CLASS;
1908      return 1;
1909    case TFmode:
1910      classes[0] = X86_64_X87_CLASS;
1911      classes[1] = X86_64_X87UP_CLASS;
1912      return 2;
1913    case TCmode:
1914      classes[0] = X86_64_X87_CLASS;
1915      classes[1] = X86_64_X87UP_CLASS;
1916      classes[2] = X86_64_X87_CLASS;
1917      classes[3] = X86_64_X87UP_CLASS;
1918      return 4;
1919    case DCmode:
1920      classes[0] = X86_64_SSEDF_CLASS;
1921      classes[1] = X86_64_SSEDF_CLASS;
1922      return 2;
1923    case SCmode:
1924      classes[0] = X86_64_SSE_CLASS;
1925      return 1;
1926    case V4SFmode:
1927    case V4SImode:
1928      classes[0] = X86_64_SSE_CLASS;
1929      classes[1] = X86_64_SSEUP_CLASS;
1930      return 2;
1931    case V2SFmode:
1932    case V2SImode:
1933    case V4HImode:
1934    case V8QImode:
1935      classes[0] = X86_64_SSE_CLASS;
1936      return 1;
1937    case BLKmode:
1938    case VOIDmode:
1939      return 0;
1940    default:
1941      abort ();
1942    }
1943}
1944
1945/* Examine the argument and return set number of register required in each
1946   class.  Return 0 iff parameter should be passed in memory.  */
1947static int
1948examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1949     enum machine_mode mode;
1950     tree type;
1951     int *int_nregs, *sse_nregs;
1952     int in_return;
1953{
1954  enum x86_64_reg_class class[MAX_CLASSES];
1955  int n = classify_argument (mode, type, class, 0);
1956
1957  *int_nregs = 0;
1958  *sse_nregs = 0;
1959  if (!n)
1960    return 0;
1961  for (n--; n >= 0; n--)
1962    switch (class[n])
1963      {
1964      case X86_64_INTEGER_CLASS:
1965      case X86_64_INTEGERSI_CLASS:
1966	(*int_nregs)++;
1967	break;
1968      case X86_64_SSE_CLASS:
1969      case X86_64_SSESF_CLASS:
1970      case X86_64_SSEDF_CLASS:
1971	(*sse_nregs)++;
1972	break;
1973      case X86_64_NO_CLASS:
1974      case X86_64_SSEUP_CLASS:
1975	break;
1976      case X86_64_X87_CLASS:
1977      case X86_64_X87UP_CLASS:
1978	if (!in_return)
1979	  return 0;
1980	break;
1981      case X86_64_MEMORY_CLASS:
1982	abort ();
1983      }
1984  return 1;
1985}
1986/* Construct container for the argument used by GCC interface.  See
1987   FUNCTION_ARG for the detailed description.  */
1988static rtx
1989construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1990     enum machine_mode mode;
1991     tree type;
1992     int in_return;
1993     int nintregs, nsseregs;
1994     const int * intreg;
1995     int sse_regno;
1996{
1997  enum machine_mode tmpmode;
1998  int bytes =
1999    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2000  enum x86_64_reg_class class[MAX_CLASSES];
2001  int n;
2002  int i;
2003  int nexps = 0;
2004  int needed_sseregs, needed_intregs;
2005  rtx exp[MAX_CLASSES];
2006  rtx ret;
2007
2008  n = classify_argument (mode, type, class, 0);
2009  if (TARGET_DEBUG_ARG)
2010    {
2011      if (!n)
2012	fprintf (stderr, "Memory class\n");
2013      else
2014	{
2015	  fprintf (stderr, "Classes:");
2016	  for (i = 0; i < n; i++)
2017	    {
2018	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2019	    }
2020	   fprintf (stderr, "\n");
2021	}
2022    }
2023  if (!n)
2024    return NULL;
2025  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2026    return NULL;
2027  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2028    return NULL;
2029
2030  /* First construct simple cases.  Avoid SCmode, since we want to use
2031     single register to pass this type.  */
2032  if (n == 1 && mode != SCmode)
2033    switch (class[0])
2034      {
2035      case X86_64_INTEGER_CLASS:
2036      case X86_64_INTEGERSI_CLASS:
2037	return gen_rtx_REG (mode, intreg[0]);
2038      case X86_64_SSE_CLASS:
2039      case X86_64_SSESF_CLASS:
2040      case X86_64_SSEDF_CLASS:
2041	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2042      case X86_64_X87_CLASS:
2043	return gen_rtx_REG (mode, FIRST_STACK_REG);
2044      case X86_64_NO_CLASS:
2045	/* Zero sized array, struct or class.  */
2046	return NULL;
2047      default:
2048	abort ();
2049      }
2050  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2051    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2052  if (n == 2
2053      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2054    return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2055  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2056      && class[1] == X86_64_INTEGER_CLASS
2057      && (mode == CDImode || mode == TImode)
2058      && intreg[0] + 1 == intreg[1])
2059    return gen_rtx_REG (mode, intreg[0]);
2060  if (n == 4
2061      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2062      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2063    return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2064
2065  /* Otherwise figure out the entries of the PARALLEL.  */
2066  for (i = 0; i < n; i++)
2067    {
2068      switch (class[i])
2069        {
2070	  case X86_64_NO_CLASS:
2071	    break;
2072	  case X86_64_INTEGER_CLASS:
2073	  case X86_64_INTEGERSI_CLASS:
2074	    /* Merge TImodes on aligned occassions here too.  */
2075	    if (i * 8 + 8 > bytes)
2076	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2077	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2078	      tmpmode = SImode;
2079	    else
2080	      tmpmode = DImode;
2081	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2082	    if (tmpmode == BLKmode)
2083	      tmpmode = DImode;
2084	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2085					       gen_rtx_REG (tmpmode, *intreg),
2086					       GEN_INT (i*8));
2087	    intreg++;
2088	    break;
2089	  case X86_64_SSESF_CLASS:
2090	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2091					       gen_rtx_REG (SFmode,
2092							    SSE_REGNO (sse_regno)),
2093					       GEN_INT (i*8));
2094	    sse_regno++;
2095	    break;
2096	  case X86_64_SSEDF_CLASS:
2097	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2098					       gen_rtx_REG (DFmode,
2099							    SSE_REGNO (sse_regno)),
2100					       GEN_INT (i*8));
2101	    sse_regno++;
2102	    break;
2103	  case X86_64_SSE_CLASS:
2104	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2105	      tmpmode = TImode, i++;
2106	    else
2107	      tmpmode = DImode;
2108	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2109					       gen_rtx_REG (tmpmode,
2110							    SSE_REGNO (sse_regno)),
2111					       GEN_INT (i*8));
2112	    sse_regno++;
2113	    break;
2114	  default:
2115	    abort ();
2116	}
2117    }
2118  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2119  for (i = 0; i < nexps; i++)
2120    XVECEXP (ret, 0, i) = exp [i];
2121  return ret;
2122}
2123
2124/* Update the data in CUM to advance over an argument
2125   of mode MODE and data type TYPE.
2126   (TYPE is null for libcalls where that information may not be available.)  */
2127
2128void
2129function_arg_advance (cum, mode, type, named)
2130     CUMULATIVE_ARGS *cum;	/* current arg information */
2131     enum machine_mode mode;	/* current arg mode */
2132     tree type;			/* type of the argument or 0 if lib support */
2133     int named;			/* whether or not the argument was named */
2134{
2135  int bytes =
2136    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2137  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2138
2139  if (TARGET_DEBUG_ARG)
2140    fprintf (stderr,
2141	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2142	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2143  if (TARGET_64BIT)
2144    {
2145      int int_nregs, sse_nregs;
2146      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2147	cum->words += words;
2148      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2149	{
2150	  cum->nregs -= int_nregs;
2151	  cum->sse_nregs -= sse_nregs;
2152	  cum->regno += int_nregs;
2153	  cum->sse_regno += sse_nregs;
2154	}
2155      else
2156	cum->words += words;
2157    }
2158  else
2159    {
2160      if (TARGET_SSE && mode == TImode)
2161	{
2162	  cum->sse_words += words;
2163	  cum->sse_nregs -= 1;
2164	  cum->sse_regno += 1;
2165	  if (cum->sse_nregs <= 0)
2166	    {
2167	      cum->sse_nregs = 0;
2168	      cum->sse_regno = 0;
2169	    }
2170	}
2171      else
2172	{
2173	  cum->words += words;
2174	  cum->nregs -= words;
2175	  cum->regno += words;
2176
2177	  if (cum->nregs <= 0)
2178	    {
2179	      cum->nregs = 0;
2180	      cum->regno = 0;
2181	    }
2182	}
2183    }
2184  return;
2185}
2186
2187/* Define where to put the arguments to a function.
2188   Value is zero to push the argument on the stack,
2189   or a hard register in which to store the argument.
2190
2191   MODE is the argument's machine mode.
2192   TYPE is the data type of the argument (as a tree).
2193    This is null for libcalls where that information may
2194    not be available.
2195   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2196    the preceding args and about the function being called.
2197   NAMED is nonzero if this argument is a named parameter
2198    (otherwise it is an extra parameter matching an ellipsis).  */
2199
2200rtx
2201function_arg (cum, mode, type, named)
2202     CUMULATIVE_ARGS *cum;	/* current arg information */
2203     enum machine_mode mode;	/* current arg mode */
2204     tree type;			/* type of the argument or 0 if lib support */
2205     int named;			/* != 0 for normal args, == 0 for ... args */
2206{
2207  rtx ret   = NULL_RTX;
2208  int bytes =
2209    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2210  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2211
2212  /* Handle an hidden AL argument containing number of registers for varargs
2213     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2214     any AL settings.  */
2215  if (mode == VOIDmode)
2216    {
2217      if (TARGET_64BIT)
2218	return GEN_INT (cum->maybe_vaarg
2219			? (cum->sse_nregs < 0
2220			   ? SSE_REGPARM_MAX
2221			   : cum->sse_regno)
2222			: -1);
2223      else
2224	return constm1_rtx;
2225    }
2226  if (TARGET_64BIT)
2227    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2228			       &x86_64_int_parameter_registers [cum->regno],
2229			       cum->sse_regno);
2230  else
2231    switch (mode)
2232      {
2233	/* For now, pass fp/complex values on the stack.  */
2234      default:
2235	break;
2236
2237      case BLKmode:
2238      case DImode:
2239      case SImode:
2240      case HImode:
2241      case QImode:
2242	if (words <= cum->nregs)
2243	  ret = gen_rtx_REG (mode, cum->regno);
2244	break;
2245      case TImode:
2246	if (cum->sse_nregs)
2247	  ret = gen_rtx_REG (mode, cum->sse_regno);
2248	break;
2249      }
2250
2251  if (TARGET_DEBUG_ARG)
2252    {
2253      fprintf (stderr,
2254	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2255	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2256
2257      if (ret)
2258	fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2259      else
2260	fprintf (stderr, ", stack");
2261
2262      fprintf (stderr, " )\n");
2263    }
2264
2265  return ret;
2266}
2267
2268/* Gives the alignment boundary, in bits, of an argument with the specified mode
2269   and type.   */
2270
2271int
2272ix86_function_arg_boundary (mode, type)
2273     enum machine_mode mode;
2274     tree type;
2275{
2276  int align;
2277  if (!TARGET_64BIT)
2278    return PARM_BOUNDARY;
2279  if (type)
2280    align = TYPE_ALIGN (type);
2281  else
2282    align = GET_MODE_ALIGNMENT (mode);
2283  if (align < PARM_BOUNDARY)
2284    align = PARM_BOUNDARY;
2285  if (align > 128)
2286    align = 128;
2287  return align;
2288}
2289
2290/* Return true if N is a possible register number of function value.  */
2291bool
2292ix86_function_value_regno_p (regno)
2293     int regno;
2294{
2295  if (!TARGET_64BIT)
2296    {
2297      return ((regno) == 0
2298	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2299	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2300    }
2301  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2302	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2303	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2304}
2305
2306/* Define how to find the value returned by a function.
2307   VALTYPE is the data type of the value (as a tree).
2308   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2309   otherwise, FUNC is 0.  */
2310rtx
2311ix86_function_value (valtype)
2312     tree valtype;
2313{
2314  if (TARGET_64BIT)
2315    {
2316      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2317				     REGPARM_MAX, SSE_REGPARM_MAX,
2318				     x86_64_int_return_registers, 0);
2319      /* For zero sized structures, construct_continer return NULL, but we need
2320         to keep rest of compiler happy by returning meaningfull value.  */
2321      if (!ret)
2322	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2323      return ret;
2324    }
2325  else
2326    return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2327}
2328
2329/* Return false iff type is returned in memory.  */
2330int
2331ix86_return_in_memory (type)
2332     tree type;
2333{
2334  int needed_intregs, needed_sseregs;
2335  if (TARGET_64BIT)
2336    {
2337      return !examine_argument (TYPE_MODE (type), type, 1,
2338				&needed_intregs, &needed_sseregs);
2339    }
2340  else
2341    {
2342      if (TYPE_MODE (type) == BLKmode
2343	  || (VECTOR_MODE_P (TYPE_MODE (type))
2344	      && int_size_in_bytes (type) == 8)
2345	  || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2346	      && TYPE_MODE (type) != TFmode
2347	      && !VECTOR_MODE_P (TYPE_MODE (type))))
2348	return 1;
2349      return 0;
2350    }
2351}
2352
2353/* Define how to find the value returned by a library function
2354   assuming the value has mode MODE.  */
2355rtx
2356ix86_libcall_value (mode)
2357   enum machine_mode mode;
2358{
2359  if (TARGET_64BIT)
2360    {
2361      switch (mode)
2362	{
2363	  case SFmode:
2364	  case SCmode:
2365	  case DFmode:
2366	  case DCmode:
2367	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2368	  case TFmode:
2369	  case TCmode:
2370	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2371	  default:
2372	    return gen_rtx_REG (mode, 0);
2373	}
2374    }
2375  else
2376   return gen_rtx_REG (mode, VALUE_REGNO (mode));
2377}
2378
2379/* Create the va_list data type.  */
2380
2381tree
2382ix86_build_va_list ()
2383{
2384  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2385
2386  /* For i386 we use plain pointer to argument area.  */
2387  if (!TARGET_64BIT)
2388    return build_pointer_type (char_type_node);
2389
2390  record = make_lang_type (RECORD_TYPE);
2391  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2392
2393  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2394		      unsigned_type_node);
2395  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2396		      unsigned_type_node);
2397  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2398		      ptr_type_node);
2399  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2400		      ptr_type_node);
2401
2402  DECL_FIELD_CONTEXT (f_gpr) = record;
2403  DECL_FIELD_CONTEXT (f_fpr) = record;
2404  DECL_FIELD_CONTEXT (f_ovf) = record;
2405  DECL_FIELD_CONTEXT (f_sav) = record;
2406
2407  TREE_CHAIN (record) = type_decl;
2408  TYPE_NAME (record) = type_decl;
2409  TYPE_FIELDS (record) = f_gpr;
2410  TREE_CHAIN (f_gpr) = f_fpr;
2411  TREE_CHAIN (f_fpr) = f_ovf;
2412  TREE_CHAIN (f_ovf) = f_sav;
2413
2414  layout_type (record);
2415
2416  /* The correct type is an array type of one element.  */
2417  return build_array_type (record, build_index_type (size_zero_node));
2418}
2419
2420/* Perform any needed actions needed for a function that is receiving a
2421   variable number of arguments.
2422
2423   CUM is as above.
2424
2425   MODE and TYPE are the mode and type of the current parameter.
2426
2427   PRETEND_SIZE is a variable that should be set to the amount of stack
2428   that must be pushed by the prolog to pretend that our caller pushed
2429   it.
2430
2431   Normally, this macro will push all remaining incoming registers on the
2432   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2433
2434void
2435ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2436     CUMULATIVE_ARGS *cum;
2437     enum machine_mode mode;
2438     tree type;
2439     int *pretend_size ATTRIBUTE_UNUSED;
2440     int no_rtl;
2441
2442{
2443  CUMULATIVE_ARGS next_cum;
2444  rtx save_area = NULL_RTX, mem;
2445  rtx label;
2446  rtx label_ref;
2447  rtx tmp_reg;
2448  rtx nsse_reg;
2449  int set;
2450  tree fntype;
2451  int stdarg_p;
2452  int i;
2453
2454  if (!TARGET_64BIT)
2455    return;
2456
2457  /* Indicate to allocate space on the stack for varargs save area.  */
2458  ix86_save_varrargs_registers = 1;
2459
2460  fntype = TREE_TYPE (current_function_decl);
2461  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2462	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2463		  != void_type_node));
2464
2465  /* For varargs, we do not want to skip the dummy va_dcl argument.
2466     For stdargs, we do want to skip the last named argument.  */
2467  next_cum = *cum;
2468  if (stdarg_p)
2469    function_arg_advance (&next_cum, mode, type, 1);
2470
2471  if (!no_rtl)
2472    save_area = frame_pointer_rtx;
2473
2474  set = get_varargs_alias_set ();
2475
2476  for (i = next_cum.regno; i < ix86_regparm; i++)
2477    {
2478      mem = gen_rtx_MEM (Pmode,
2479			 plus_constant (save_area, i * UNITS_PER_WORD));
2480      set_mem_alias_set (mem, set);
2481      emit_move_insn (mem, gen_rtx_REG (Pmode,
2482					x86_64_int_parameter_registers[i]));
2483    }
2484
2485  if (next_cum.sse_nregs)
2486    {
2487      /* Now emit code to save SSE registers.  The AX parameter contains number
2488	 of SSE parameter regsiters used to call this function.  We use
2489	 sse_prologue_save insn template that produces computed jump across
2490	 SSE saves.  We need some preparation work to get this working.  */
2491
2492      label = gen_label_rtx ();
2493      label_ref = gen_rtx_LABEL_REF (Pmode, label);
2494
2495      /* Compute address to jump to :
2496         label - 5*eax + nnamed_sse_arguments*5  */
2497      tmp_reg = gen_reg_rtx (Pmode);
2498      nsse_reg = gen_reg_rtx (Pmode);
2499      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2500      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2501			      gen_rtx_MULT (Pmode, nsse_reg,
2502					    GEN_INT (4))));
2503      if (next_cum.sse_regno)
2504	emit_move_insn
2505	  (nsse_reg,
2506	   gen_rtx_CONST (DImode,
2507			  gen_rtx_PLUS (DImode,
2508					label_ref,
2509					GEN_INT (next_cum.sse_regno * 4))));
2510      else
2511	emit_move_insn (nsse_reg, label_ref);
2512      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2513
2514      /* Compute address of memory block we save into.  We always use pointer
2515	 pointing 127 bytes after first byte to store - this is needed to keep
2516	 instruction size limited by 4 bytes.  */
2517      tmp_reg = gen_reg_rtx (Pmode);
2518      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2519			      plus_constant (save_area,
2520					     8 * REGPARM_MAX + 127)));
2521      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2522      set_mem_alias_set (mem, set);
2523      set_mem_align (mem, BITS_PER_WORD);
2524
2525      /* And finally do the dirty job!  */
2526      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2527					GEN_INT (next_cum.sse_regno), label));
2528    }
2529
2530}
2531
2532/* Implement va_start.  */
2533
2534void
2535ix86_va_start (stdarg_p, valist, nextarg)
2536     int stdarg_p;
2537     tree valist;
2538     rtx nextarg;
2539{
2540  HOST_WIDE_INT words, n_gpr, n_fpr;
2541  tree f_gpr, f_fpr, f_ovf, f_sav;
2542  tree gpr, fpr, ovf, sav, t;
2543
2544  /* Only 64bit target needs something special.  */
2545  if (!TARGET_64BIT)
2546    {
2547      std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2548      return;
2549    }
2550
2551  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2552  f_fpr = TREE_CHAIN (f_gpr);
2553  f_ovf = TREE_CHAIN (f_fpr);
2554  f_sav = TREE_CHAIN (f_ovf);
2555
2556  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2557  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2558  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2559  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2560  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2561
2562  /* Count number of gp and fp argument registers used.  */
2563  words = current_function_args_info.words;
2564  n_gpr = current_function_args_info.regno;
2565  n_fpr = current_function_args_info.sse_regno;
2566
2567  if (TARGET_DEBUG_ARG)
2568    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2569	     (int) words, (int) n_gpr, (int) n_fpr);
2570
2571  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2572	     build_int_2 (n_gpr * 8, 0));
2573  TREE_SIDE_EFFECTS (t) = 1;
2574  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2575
2576  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2577	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2578  TREE_SIDE_EFFECTS (t) = 1;
2579  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2580
2581  /* Find the overflow area.  */
2582  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2583  if (words != 0)
2584    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2585	       build_int_2 (words * UNITS_PER_WORD, 0));
2586  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2587  TREE_SIDE_EFFECTS (t) = 1;
2588  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2589
2590  /* Find the register save area.
2591     Prologue of the function save it right above stack frame.  */
2592  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2593  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2594  TREE_SIDE_EFFECTS (t) = 1;
2595  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2596  cfun->preferred_stack_boundary = 128;
2597}
2598
2599/* Implement va_arg.  */
2600rtx
2601ix86_va_arg (valist, type)
2602     tree valist, type;
2603{
2604  static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2605  tree f_gpr, f_fpr, f_ovf, f_sav;
2606  tree gpr, fpr, ovf, sav, t;
2607  int size, rsize;
2608  rtx lab_false, lab_over = NULL_RTX;
2609  rtx addr_rtx, r;
2610  rtx container;
2611
2612  /* Only 64bit target needs something special.  */
2613  if (!TARGET_64BIT)
2614    {
2615      return std_expand_builtin_va_arg (valist, type);
2616    }
2617
2618  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2619  f_fpr = TREE_CHAIN (f_gpr);
2620  f_ovf = TREE_CHAIN (f_fpr);
2621  f_sav = TREE_CHAIN (f_ovf);
2622
2623  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2624  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2625  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2626  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2627  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2628
2629  size = int_size_in_bytes (type);
2630  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2631
2632  container = construct_container (TYPE_MODE (type), type, 0,
2633				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2634  /*
2635   * Pull the value out of the saved registers ...
2636   */
2637
2638  addr_rtx = gen_reg_rtx (Pmode);
2639
2640  if (container)
2641    {
2642      rtx int_addr_rtx, sse_addr_rtx;
2643      int needed_intregs, needed_sseregs;
2644      int need_temp;
2645
2646      lab_over = gen_label_rtx ();
2647      lab_false = gen_label_rtx ();
2648
2649      examine_argument (TYPE_MODE (type), type, 0,
2650		        &needed_intregs, &needed_sseregs);
2651
2652
2653      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2654		   || TYPE_ALIGN (type) > 128);
2655
2656      /* In case we are passing structure, verify that it is consetuctive block
2657         on the register save area.  If not we need to do moves.  */
2658      if (!need_temp && !REG_P (container))
2659	{
2660	  /* Verify that all registers are strictly consetuctive  */
2661	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2662	    {
2663	      int i;
2664
2665	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2666		{
2667		  rtx slot = XVECEXP (container, 0, i);
2668		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2669		      || INTVAL (XEXP (slot, 1)) != i * 16)
2670		    need_temp = 1;
2671		}
2672	    }
2673	  else
2674	    {
2675	      int i;
2676
2677	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2678		{
2679		  rtx slot = XVECEXP (container, 0, i);
2680		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2681		      || INTVAL (XEXP (slot, 1)) != i * 8)
2682		    need_temp = 1;
2683		}
2684	    }
2685	}
2686      if (!need_temp)
2687	{
2688	  int_addr_rtx = addr_rtx;
2689	  sse_addr_rtx = addr_rtx;
2690	}
2691      else
2692	{
2693	  int_addr_rtx = gen_reg_rtx (Pmode);
2694	  sse_addr_rtx = gen_reg_rtx (Pmode);
2695	}
2696      /* First ensure that we fit completely in registers.  */
2697      if (needed_intregs)
2698	{
2699	  emit_cmp_and_jump_insns (expand_expr
2700				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2701				   GEN_INT ((REGPARM_MAX - needed_intregs +
2702					     1) * 8), GE, const1_rtx, SImode,
2703				   1, lab_false);
2704	}
2705      if (needed_sseregs)
2706	{
2707	  emit_cmp_and_jump_insns (expand_expr
2708				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2709				   GEN_INT ((SSE_REGPARM_MAX -
2710					     needed_sseregs + 1) * 16 +
2711					    REGPARM_MAX * 8), GE, const1_rtx,
2712				   SImode, 1, lab_false);
2713	}
2714
2715      /* Compute index to start of area used for integer regs.  */
2716      if (needed_intregs)
2717	{
2718	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2719	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2720	  if (r != int_addr_rtx)
2721	    emit_move_insn (int_addr_rtx, r);
2722	}
2723      if (needed_sseregs)
2724	{
2725	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2726	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2727	  if (r != sse_addr_rtx)
2728	    emit_move_insn (sse_addr_rtx, r);
2729	}
2730      if (need_temp)
2731	{
2732	  int i;
2733	  rtx mem;
2734
2735	  /* Never use the memory itself, as it has the alias set.  */
2736	  addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2737	  mem = gen_rtx_MEM (BLKmode, addr_rtx);
2738	  set_mem_alias_set (mem, get_varargs_alias_set ());
2739	  set_mem_align (mem, BITS_PER_UNIT);
2740
2741	  for (i = 0; i < XVECLEN (container, 0); i++)
2742	    {
2743	      rtx slot = XVECEXP (container, 0, i);
2744	      rtx reg = XEXP (slot, 0);
2745	      enum machine_mode mode = GET_MODE (reg);
2746	      rtx src_addr;
2747	      rtx src_mem;
2748	      int src_offset;
2749	      rtx dest_mem;
2750
2751	      if (SSE_REGNO_P (REGNO (reg)))
2752		{
2753		  src_addr = sse_addr_rtx;
2754		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2755		}
2756	      else
2757		{
2758		  src_addr = int_addr_rtx;
2759		  src_offset = REGNO (reg) * 8;
2760		}
2761	      src_mem = gen_rtx_MEM (mode, src_addr);
2762	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2763	      src_mem = adjust_address (src_mem, mode, src_offset);
2764	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2765	      emit_move_insn (dest_mem, src_mem);
2766	    }
2767	}
2768
2769      if (needed_intregs)
2770	{
2771	  t =
2772	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2773		   build_int_2 (needed_intregs * 8, 0));
2774	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2775	  TREE_SIDE_EFFECTS (t) = 1;
2776	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2777	}
2778      if (needed_sseregs)
2779	{
2780	  t =
2781	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2782		   build_int_2 (needed_sseregs * 16, 0));
2783	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2784	  TREE_SIDE_EFFECTS (t) = 1;
2785	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2786	}
2787
2788      emit_jump_insn (gen_jump (lab_over));
2789      emit_barrier ();
2790      emit_label (lab_false);
2791    }
2792
2793  /* ... otherwise out of the overflow area.  */
2794
2795  /* Care for on-stack alignment if needed.  */
2796  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2797    t = ovf;
2798  else
2799    {
2800      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2801      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2802      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2803    }
2804  t = save_expr (t);
2805
2806  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2807  if (r != addr_rtx)
2808    emit_move_insn (addr_rtx, r);
2809
2810  t =
2811    build (PLUS_EXPR, TREE_TYPE (t), t,
2812	   build_int_2 (rsize * UNITS_PER_WORD, 0));
2813  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2814  TREE_SIDE_EFFECTS (t) = 1;
2815  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2816
2817  if (container)
2818    emit_label (lab_over);
2819
2820  return addr_rtx;
2821}
2822
2823/* Return nonzero if OP is general operand representable on x86_64.  */
2824
2825int
2826x86_64_general_operand (op, mode)
2827     rtx op;
2828     enum machine_mode mode;
2829{
2830  if (!TARGET_64BIT)
2831    return general_operand (op, mode);
2832  if (nonimmediate_operand (op, mode))
2833    return 1;
2834  return x86_64_sign_extended_value (op);
2835}
2836
2837/* Return nonzero if OP is general operand representable on x86_64
2838   as either sign extended or zero extended constant.  */
2839
2840int
2841x86_64_szext_general_operand (op, mode)
2842     rtx op;
2843     enum machine_mode mode;
2844{
2845  if (!TARGET_64BIT)
2846    return general_operand (op, mode);
2847  if (nonimmediate_operand (op, mode))
2848    return 1;
2849  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2850}
2851
2852/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2853
2854int
2855x86_64_nonmemory_operand (op, mode)
2856     rtx op;
2857     enum machine_mode mode;
2858{
2859  if (!TARGET_64BIT)
2860    return nonmemory_operand (op, mode);
2861  if (register_operand (op, mode))
2862    return 1;
2863  return x86_64_sign_extended_value (op);
2864}
2865
2866/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
2867
2868int
2869x86_64_movabs_operand (op, mode)
2870     rtx op;
2871     enum machine_mode mode;
2872{
2873  if (!TARGET_64BIT || !flag_pic)
2874    return nonmemory_operand (op, mode);
2875  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2876    return 1;
2877  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2878    return 1;
2879  return 0;
2880}
2881
2882/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2883
2884int
2885x86_64_szext_nonmemory_operand (op, mode)
2886     rtx op;
2887     enum machine_mode mode;
2888{
2889  if (!TARGET_64BIT)
2890    return nonmemory_operand (op, mode);
2891  if (register_operand (op, mode))
2892    return 1;
2893  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2894}
2895
2896/* Return nonzero if OP is immediate operand representable on x86_64.  */
2897
2898int
2899x86_64_immediate_operand (op, mode)
2900     rtx op;
2901     enum machine_mode mode;
2902{
2903  if (!TARGET_64BIT)
2904    return immediate_operand (op, mode);
2905  return x86_64_sign_extended_value (op);
2906}
2907
2908/* Return nonzero if OP is immediate operand representable on x86_64.  */
2909
2910int
2911x86_64_zext_immediate_operand (op, mode)
2912     rtx op;
2913     enum machine_mode mode ATTRIBUTE_UNUSED;
2914{
2915  return x86_64_zero_extended_value (op);
2916}
2917
2918/* Return nonzero if OP is (const_int 1), else return zero.  */
2919
2920int
2921const_int_1_operand (op, mode)
2922     rtx op;
2923     enum machine_mode mode ATTRIBUTE_UNUSED;
2924{
2925  return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2926}
2927
2928/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2929   for shift & compare patterns, as shifting by 0 does not change flags),
2930   else return zero.  */
2931
2932int
2933const_int_1_31_operand (op, mode)
2934     rtx op;
2935     enum machine_mode mode ATTRIBUTE_UNUSED;
2936{
2937  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2938}
2939
2940/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2941   reference and a constant.  */
2942
2943int
2944symbolic_operand (op, mode)
2945     register rtx op;
2946     enum machine_mode mode ATTRIBUTE_UNUSED;
2947{
2948  switch (GET_CODE (op))
2949    {
2950    case SYMBOL_REF:
2951    case LABEL_REF:
2952      return 1;
2953
2954    case CONST:
2955      op = XEXP (op, 0);
2956      if (GET_CODE (op) == SYMBOL_REF
2957	  || GET_CODE (op) == LABEL_REF
2958	  || (GET_CODE (op) == UNSPEC
2959	      && (XINT (op, 1) == 6
2960		  || XINT (op, 1) == 7
2961		  || XINT (op, 1) == 15)))
2962	return 1;
2963      if (GET_CODE (op) != PLUS
2964	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
2965	return 0;
2966
2967      op = XEXP (op, 0);
2968      if (GET_CODE (op) == SYMBOL_REF
2969	  || GET_CODE (op) == LABEL_REF)
2970	return 1;
2971      /* Only @GOTOFF gets offsets.  */
2972      if (GET_CODE (op) != UNSPEC
2973	  || XINT (op, 1) != 7)
2974	return 0;
2975
2976      op = XVECEXP (op, 0, 0);
2977      if (GET_CODE (op) == SYMBOL_REF
2978	  || GET_CODE (op) == LABEL_REF)
2979	return 1;
2980      return 0;
2981
2982    default:
2983      return 0;
2984    }
2985}
2986
2987/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
2988
2989int
2990pic_symbolic_operand (op, mode)
2991     register rtx op;
2992     enum machine_mode mode ATTRIBUTE_UNUSED;
2993{
2994  if (GET_CODE (op) != CONST)
2995    return 0;
2996  op = XEXP (op, 0);
2997  if (TARGET_64BIT)
2998    {
2999      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3000	return 1;
3001    }
3002  else
3003    {
3004      if (GET_CODE (op) == UNSPEC)
3005	return 1;
3006      if (GET_CODE (op) != PLUS
3007	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3008	return 0;
3009      op = XEXP (op, 0);
3010      if (GET_CODE (op) == UNSPEC)
3011	return 1;
3012    }
3013  return 0;
3014}
3015
3016/* Return true if OP is a symbolic operand that resolves locally.  */
3017
3018static int
3019local_symbolic_operand (op, mode)
3020     rtx op;
3021     enum machine_mode mode ATTRIBUTE_UNUSED;
3022{
3023  if (GET_CODE (op) == LABEL_REF)
3024    return 1;
3025
3026  if (GET_CODE (op) == CONST
3027      && GET_CODE (XEXP (op, 0)) == PLUS
3028      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3029    op = XEXP (XEXP (op, 0), 0);
3030
3031  if (GET_CODE (op) != SYMBOL_REF)
3032    return 0;
3033
3034  /* These we've been told are local by varasm and encode_section_info
3035     respectively.  */
3036  if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3037    return 1;
3038
3039  /* There is, however, a not insubstantial body of code in the rest of
3040     the compiler that assumes it can just stick the results of
3041     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3042  /* ??? This is a hack.  Should update the body of the compiler to
3043     always create a DECL an invoke ENCODE_SECTION_INFO.  */
3044  if (strncmp (XSTR (op, 0), internal_label_prefix,
3045	       internal_label_prefix_len) == 0)
3046    return 1;
3047
3048  return 0;
3049}
3050
3051/* Test for a valid operand for a call instruction.  Don't allow the
3052   arg pointer register or virtual regs since they may decay into
3053   reg + const, which the patterns can't handle.  */
3054
3055int
3056call_insn_operand (op, mode)
3057     rtx op;
3058     enum machine_mode mode ATTRIBUTE_UNUSED;
3059{
3060  /* Disallow indirect through a virtual register.  This leads to
3061     compiler aborts when trying to eliminate them.  */
3062  if (GET_CODE (op) == REG
3063      && (op == arg_pointer_rtx
3064	  || op == frame_pointer_rtx
3065	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3066	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3067    return 0;
3068
3069  /* Disallow `call 1234'.  Due to varying assembler lameness this
3070     gets either rejected or translated to `call .+1234'.  */
3071  if (GET_CODE (op) == CONST_INT)
3072    return 0;
3073
3074  /* Explicitly allow SYMBOL_REF even if pic.  */
3075  if (GET_CODE (op) == SYMBOL_REF)
3076    return 1;
3077
3078  /* Half-pic doesn't allow anything but registers and constants.
3079     We've just taken care of the later.  */
3080  if (HALF_PIC_P ())
3081    return register_operand (op, Pmode);
3082
3083  /* Otherwise we can allow any general_operand in the address.  */
3084  return general_operand (op, Pmode);
3085}
3086
3087int
3088constant_call_address_operand (op, mode)
3089     rtx op;
3090     enum machine_mode mode ATTRIBUTE_UNUSED;
3091{
3092  if (GET_CODE (op) == CONST
3093      && GET_CODE (XEXP (op, 0)) == PLUS
3094      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3095    op = XEXP (XEXP (op, 0), 0);
3096  return GET_CODE (op) == SYMBOL_REF;
3097}
3098
3099/* Match exactly zero and one.  */
3100
3101int
3102const0_operand (op, mode)
3103     register rtx op;
3104     enum machine_mode mode;
3105{
3106  return op == CONST0_RTX (mode);
3107}
3108
3109int
3110const1_operand (op, mode)
3111     register rtx op;
3112     enum machine_mode mode ATTRIBUTE_UNUSED;
3113{
3114  return op == const1_rtx;
3115}
3116
3117/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3118
3119int
3120const248_operand (op, mode)
3121     register rtx op;
3122     enum machine_mode mode ATTRIBUTE_UNUSED;
3123{
3124  return (GET_CODE (op) == CONST_INT
3125	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3126}
3127
3128/* True if this is a constant appropriate for an increment or decremenmt.  */
3129
3130int
3131incdec_operand (op, mode)
3132     register rtx op;
3133     enum machine_mode mode ATTRIBUTE_UNUSED;
3134{
3135  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3136     registers, since carry flag is not set.  */
3137  if (TARGET_PENTIUM4 && !optimize_size)
3138    return 0;
3139  return op == const1_rtx || op == constm1_rtx;
3140}
3141
3142/* Return nonzero if OP is acceptable as operand of DImode shift
3143   expander.  */
3144
3145int
3146shiftdi_operand (op, mode)
3147     rtx op;
3148     enum machine_mode mode ATTRIBUTE_UNUSED;
3149{
3150  if (TARGET_64BIT)
3151    return nonimmediate_operand (op, mode);
3152  else
3153    return register_operand (op, mode);
3154}
3155
3156/* Return false if this is the stack pointer, or any other fake
3157   register eliminable to the stack pointer.  Otherwise, this is
3158   a register operand.
3159
3160   This is used to prevent esp from being used as an index reg.
3161   Which would only happen in pathological cases.  */
3162
3163int
3164reg_no_sp_operand (op, mode)
3165     register rtx op;
3166     enum machine_mode mode;
3167{
3168  rtx t = op;
3169  if (GET_CODE (t) == SUBREG)
3170    t = SUBREG_REG (t);
3171  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3172    return 0;
3173
3174  return register_operand (op, mode);
3175}
3176
3177int
3178mmx_reg_operand (op, mode)
3179     register rtx op;
3180     enum machine_mode mode ATTRIBUTE_UNUSED;
3181{
3182  return MMX_REG_P (op);
3183}
3184
3185/* Return false if this is any eliminable register.  Otherwise
3186   general_operand.  */
3187
3188int
3189general_no_elim_operand (op, mode)
3190     register rtx op;
3191     enum machine_mode mode;
3192{
3193  rtx t = op;
3194  if (GET_CODE (t) == SUBREG)
3195    t = SUBREG_REG (t);
3196  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3197      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3198      || t == virtual_stack_dynamic_rtx)
3199    return 0;
3200  if (REG_P (t)
3201      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3202      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3203    return 0;
3204
3205  return general_operand (op, mode);
3206}
3207
3208/* Return false if this is any eliminable register.  Otherwise
3209   register_operand or const_int.  */
3210
3211int
3212nonmemory_no_elim_operand (op, mode)
3213     register rtx op;
3214     enum machine_mode mode;
3215{
3216  rtx t = op;
3217  if (GET_CODE (t) == SUBREG)
3218    t = SUBREG_REG (t);
3219  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3220      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3221      || t == virtual_stack_dynamic_rtx)
3222    return 0;
3223
3224  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3225}
3226
3227/* Return true if op is a Q_REGS class register.  */
3228
3229int
3230q_regs_operand (op, mode)
3231     register rtx op;
3232     enum machine_mode mode;
3233{
3234  if (mode != VOIDmode && GET_MODE (op) != mode)
3235    return 0;
3236  if (GET_CODE (op) == SUBREG)
3237    op = SUBREG_REG (op);
3238  return ANY_QI_REG_P (op);
3239}
3240
3241/* Return true if op is a NON_Q_REGS class register.  */
3242
3243int
3244non_q_regs_operand (op, mode)
3245     register rtx op;
3246     enum machine_mode mode;
3247{
3248  if (mode != VOIDmode && GET_MODE (op) != mode)
3249    return 0;
3250  if (GET_CODE (op) == SUBREG)
3251    op = SUBREG_REG (op);
3252  return NON_QI_REG_P (op);
3253}
3254
3255/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3256   insns.  */
3257int
3258sse_comparison_operator (op, mode)
3259     rtx op;
3260     enum machine_mode mode ATTRIBUTE_UNUSED;
3261{
3262  enum rtx_code code = GET_CODE (op);
3263  switch (code)
3264    {
3265    /* Operations supported directly.  */
3266    case EQ:
3267    case LT:
3268    case LE:
3269    case UNORDERED:
3270    case NE:
3271    case UNGE:
3272    case UNGT:
3273    case ORDERED:
3274      return 1;
3275    /* These are equivalent to ones above in non-IEEE comparisons.  */
3276    case UNEQ:
3277    case UNLT:
3278    case UNLE:
3279    case LTGT:
3280    case GE:
3281    case GT:
3282      return !TARGET_IEEE_FP;
3283    default:
3284      return 0;
3285    }
3286}
3287/* Return 1 if OP is a valid comparison operator in valid mode.  */
3288int
3289ix86_comparison_operator (op, mode)
3290     register rtx op;
3291     enum machine_mode mode;
3292{
3293  enum machine_mode inmode;
3294  enum rtx_code code = GET_CODE (op);
3295  if (mode != VOIDmode && GET_MODE (op) != mode)
3296    return 0;
3297  if (GET_RTX_CLASS (code) != '<')
3298    return 0;
3299  inmode = GET_MODE (XEXP (op, 0));
3300
3301  if (inmode == CCFPmode || inmode == CCFPUmode)
3302    {
3303      enum rtx_code second_code, bypass_code;
3304      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3305      return (bypass_code == NIL && second_code == NIL);
3306    }
3307  switch (code)
3308    {
3309    case EQ: case NE:
3310      return 1;
3311    case LT: case GE:
3312      if (inmode == CCmode || inmode == CCGCmode
3313	  || inmode == CCGOCmode || inmode == CCNOmode)
3314	return 1;
3315      return 0;
3316    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3317      if (inmode == CCmode)
3318	return 1;
3319      return 0;
3320    case GT: case LE:
3321      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3322	return 1;
3323      return 0;
3324    default:
3325      return 0;
3326    }
3327}
3328
3329/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3330
3331int
3332fcmov_comparison_operator (op, mode)
3333    register rtx op;
3334    enum machine_mode mode;
3335{
3336  enum machine_mode inmode;
3337  enum rtx_code code = GET_CODE (op);
3338  if (mode != VOIDmode && GET_MODE (op) != mode)
3339    return 0;
3340  if (GET_RTX_CLASS (code) != '<')
3341    return 0;
3342  inmode = GET_MODE (XEXP (op, 0));
3343  if (inmode == CCFPmode || inmode == CCFPUmode)
3344    {
3345      enum rtx_code second_code, bypass_code;
3346      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3347      if (bypass_code != NIL || second_code != NIL)
3348	return 0;
3349      code = ix86_fp_compare_code_to_integer (code);
3350    }
3351  /* i387 supports just limited amount of conditional codes.  */
3352  switch (code)
3353    {
3354    case LTU: case GTU: case LEU: case GEU:
3355      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3356	return 1;
3357      return 0;
3358    case ORDERED: case UNORDERED:
3359    case EQ: case NE:
3360      return 1;
3361    default:
3362      return 0;
3363    }
3364}
3365
3366/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3367
3368int
3369promotable_binary_operator (op, mode)
3370     register rtx op;
3371     enum machine_mode mode ATTRIBUTE_UNUSED;
3372{
3373  switch (GET_CODE (op))
3374    {
3375    case MULT:
3376      /* Modern CPUs have same latency for HImode and SImode multiply,
3377         but 386 and 486 do HImode multiply faster.  */
3378      return ix86_cpu > PROCESSOR_I486;
3379    case PLUS:
3380    case AND:
3381    case IOR:
3382    case XOR:
3383    case ASHIFT:
3384      return 1;
3385    default:
3386      return 0;
3387    }
3388}
3389
3390/* Nearly general operand, but accept any const_double, since we wish
3391   to be able to drop them into memory rather than have them get pulled
3392   into registers.  */
3393
3394int
3395cmp_fp_expander_operand (op, mode)
3396     register rtx op;
3397     enum machine_mode mode;
3398{
3399  if (mode != VOIDmode && mode != GET_MODE (op))
3400    return 0;
3401  if (GET_CODE (op) == CONST_DOUBLE)
3402    return 1;
3403  return general_operand (op, mode);
3404}
3405
3406/* Match an SI or HImode register for a zero_extract.  */
3407
3408int
3409ext_register_operand (op, mode)
3410     register rtx op;
3411     enum machine_mode mode ATTRIBUTE_UNUSED;
3412{
3413  int regno;
3414  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3415      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3416    return 0;
3417
3418  if (!register_operand (op, VOIDmode))
3419    return 0;
3420
3421  /* Be curefull to accept only registers having upper parts.  */
3422  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3423  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3424}
3425
3426/* Return 1 if this is a valid binary floating-point operation.
3427   OP is the expression matched, and MODE is its mode.  */
3428
3429int
3430binary_fp_operator (op, mode)
3431    register rtx op;
3432    enum machine_mode mode;
3433{
3434  if (mode != VOIDmode && mode != GET_MODE (op))
3435    return 0;
3436
3437  switch (GET_CODE (op))
3438    {
3439    case PLUS:
3440    case MINUS:
3441    case MULT:
3442    case DIV:
3443      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3444
3445    default:
3446      return 0;
3447    }
3448}
3449
3450int
3451mult_operator (op, mode)
3452    register rtx op;
3453    enum machine_mode mode ATTRIBUTE_UNUSED;
3454{
3455  return GET_CODE (op) == MULT;
3456}
3457
3458int
3459div_operator (op, mode)
3460    register rtx op;
3461    enum machine_mode mode ATTRIBUTE_UNUSED;
3462{
3463  return GET_CODE (op) == DIV;
3464}
3465
3466int
3467arith_or_logical_operator (op, mode)
3468      rtx op;
3469      enum machine_mode mode;
3470{
3471  return ((mode == VOIDmode || GET_MODE (op) == mode)
3472          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3473              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3474}
3475
3476/* Returns 1 if OP is memory operand with a displacement.  */
3477
3478int
3479memory_displacement_operand (op, mode)
3480     register rtx op;
3481     enum machine_mode mode;
3482{
3483  struct ix86_address parts;
3484
3485  if (! memory_operand (op, mode))
3486    return 0;
3487
3488  if (! ix86_decompose_address (XEXP (op, 0), &parts))
3489    abort ();
3490
3491  return parts.disp != NULL_RTX;
3492}
3493
3494/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3495   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3496
3497   ??? It seems likely that this will only work because cmpsi is an
3498   expander, and no actual insns use this.  */
3499
3500int
3501cmpsi_operand (op, mode)
3502      rtx op;
3503      enum machine_mode mode;
3504{
3505  if (nonimmediate_operand (op, mode))
3506    return 1;
3507
3508  if (GET_CODE (op) == AND
3509      && GET_MODE (op) == SImode
3510      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3511      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3512      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3513      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3514      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3515      && GET_CODE (XEXP (op, 1)) == CONST_INT)
3516    return 1;
3517
3518  return 0;
3519}
3520
3521/* Returns 1 if OP is memory operand that can not be represented by the
3522   modRM array.  */
3523
3524int
3525long_memory_operand (op, mode)
3526     register rtx op;
3527     enum machine_mode mode;
3528{
3529  if (! memory_operand (op, mode))
3530    return 0;
3531
3532  return memory_address_length (op) != 0;
3533}
3534
3535/* Return nonzero if the rtx is known aligned.  */
3536
3537int
3538aligned_operand (op, mode)
3539     rtx op;
3540     enum machine_mode mode;
3541{
3542  struct ix86_address parts;
3543
3544  if (!general_operand (op, mode))
3545    return 0;
3546
3547  /* Registers and immediate operands are always "aligned".  */
3548  if (GET_CODE (op) != MEM)
3549    return 1;
3550
3551  /* Don't even try to do any aligned optimizations with volatiles.  */
3552  if (MEM_VOLATILE_P (op))
3553    return 0;
3554
3555  op = XEXP (op, 0);
3556
3557  /* Pushes and pops are only valid on the stack pointer.  */
3558  if (GET_CODE (op) == PRE_DEC
3559      || GET_CODE (op) == POST_INC)
3560    return 1;
3561
3562  /* Decode the address.  */
3563  if (! ix86_decompose_address (op, &parts))
3564    abort ();
3565
3566  /* Look for some component that isn't known to be aligned.  */
3567  if (parts.index)
3568    {
3569      if (parts.scale < 4
3570	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3571	return 0;
3572    }
3573  if (parts.base)
3574    {
3575      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3576	return 0;
3577    }
3578  if (parts.disp)
3579    {
3580      if (GET_CODE (parts.disp) != CONST_INT
3581	  || (INTVAL (parts.disp) & 3) != 0)
3582	return 0;
3583    }
3584
3585  /* Didn't find one -- this must be an aligned address.  */
3586  return 1;
3587}
3588
3589/* Return true if the constant is something that can be loaded with
3590   a special instruction.  Only handle 0.0 and 1.0; others are less
3591   worthwhile.  */
3592
3593int
3594standard_80387_constant_p (x)
3595     rtx x;
3596{
3597  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3598    return -1;
3599  /* Note that on the 80387, other constants, such as pi, that we should support
3600     too.  On some machines, these are much slower to load as standard constant,
3601     than to load from doubles in memory.  */
3602  if (x == CONST0_RTX (GET_MODE (x)))
3603    return 1;
3604  if (x == CONST1_RTX (GET_MODE (x)))
3605    return 2;
3606  return 0;
3607}
3608
3609/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3610 */
3611int
3612standard_sse_constant_p (x)
3613     rtx x;
3614{
3615  if (GET_CODE (x) != CONST_DOUBLE)
3616    return -1;
3617  return (x == CONST0_RTX (GET_MODE (x)));
3618}
3619
3620/* Returns 1 if OP contains a symbol reference */
3621
3622int
3623symbolic_reference_mentioned_p (op)
3624     rtx op;
3625{
3626  register const char *fmt;
3627  register int i;
3628
3629  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3630    return 1;
3631
3632  fmt = GET_RTX_FORMAT (GET_CODE (op));
3633  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3634    {
3635      if (fmt[i] == 'E')
3636	{
3637	  register int j;
3638
3639	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3640	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3641	      return 1;
3642	}
3643
3644      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3645	return 1;
3646    }
3647
3648  return 0;
3649}
3650
3651/* Return 1 if it is appropriate to emit `ret' instructions in the
3652   body of a function.  Do this only if the epilogue is simple, needing a
3653   couple of insns.  Prior to reloading, we can't tell how many registers
3654   must be saved, so return 0 then.  Return 0 if there is no frame
3655   marker to de-allocate.
3656
3657   If NON_SAVING_SETJMP is defined and true, then it is not possible
3658   for the epilogue to be simple, so return 0.  This is a special case
3659   since NON_SAVING_SETJMP will not cause regs_ever_live to change
3660   until final, but jump_optimize may need to know sooner if a
3661   `return' is OK.  */
3662
3663int
3664ix86_can_use_return_insn_p ()
3665{
3666  struct ix86_frame frame;
3667
3668#ifdef NON_SAVING_SETJMP
3669  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3670    return 0;
3671#endif
3672
3673  if (! reload_completed || frame_pointer_needed)
3674    return 0;
3675
3676  /* Don't allow more than 32 pop, since that's all we can do
3677     with one instruction.  */
3678  if (current_function_pops_args
3679      && current_function_args_size >= 32768)
3680    return 0;
3681
3682  ix86_compute_frame_layout (&frame);
3683  return frame.to_allocate == 0 && frame.nregs == 0;
3684}
3685
3686/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
3687int
3688x86_64_sign_extended_value (value)
3689     rtx value;
3690{
3691  switch (GET_CODE (value))
3692    {
3693      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3694         to be at least 32 and this all acceptable constants are
3695	 represented as CONST_INT.  */
3696      case CONST_INT:
3697	if (HOST_BITS_PER_WIDE_INT == 32)
3698	  return 1;
3699	else
3700	  {
3701	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3702	    return trunc_int_for_mode (val, SImode) == val;
3703	  }
3704	break;
3705
3706      /* For certain code models, the symbolic references are known to fit.  */
3707      case SYMBOL_REF:
3708	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3709
3710      /* For certain code models, the code is near as well.  */
3711      case LABEL_REF:
3712	return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3713
3714      /* We also may accept the offsetted memory references in certain special
3715         cases.  */
3716      case CONST:
3717	if (GET_CODE (XEXP (value, 0)) == UNSPEC
3718	    && XVECLEN (XEXP (value, 0), 0) == 1
3719	    && XINT (XEXP (value, 0), 1) ==  15)
3720	  return 1;
3721	else if (GET_CODE (XEXP (value, 0)) == PLUS)
3722	  {
3723	    rtx op1 = XEXP (XEXP (value, 0), 0);
3724	    rtx op2 = XEXP (XEXP (value, 0), 1);
3725	    HOST_WIDE_INT offset;
3726
3727	    if (ix86_cmodel == CM_LARGE)
3728	      return 0;
3729	    if (GET_CODE (op2) != CONST_INT)
3730	      return 0;
3731	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
3732	    switch (GET_CODE (op1))
3733	      {
3734		case SYMBOL_REF:
3735		  /* For CM_SMALL assume that latest object is 1MB before
3736		     end of 31bits boundary.  We may also accept pretty
3737		     large negative constants knowing that all objects are
3738		     in the positive half of address space.  */
3739		  if (ix86_cmodel == CM_SMALL
3740		      && offset < 1024*1024*1024
3741		      && trunc_int_for_mode (offset, SImode) == offset)
3742		    return 1;
3743		  /* For CM_KERNEL we know that all object resist in the
3744		     negative half of 32bits address space.  We may not
3745		     accept negative offsets, since they may be just off
3746		     and we may accept pretty large positive ones.  */
3747		  if (ix86_cmodel == CM_KERNEL
3748		      && offset > 0
3749		      && trunc_int_for_mode (offset, SImode) == offset)
3750		    return 1;
3751		  break;
3752		case LABEL_REF:
3753		  /* These conditions are similar to SYMBOL_REF ones, just the
3754		     constraints for code models differ.  */
3755		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3756		      && offset < 1024*1024*1024
3757		      && trunc_int_for_mode (offset, SImode) == offset)
3758		    return 1;
3759		  if (ix86_cmodel == CM_KERNEL
3760		      && offset > 0
3761		      && trunc_int_for_mode (offset, SImode) == offset)
3762		    return 1;
3763		  break;
3764		default:
3765		  return 0;
3766	      }
3767	  }
3768	return 0;
3769      default:
3770	return 0;
3771    }
3772}
3773
3774/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
3775int
3776x86_64_zero_extended_value (value)
3777     rtx value;
3778{
3779  switch (GET_CODE (value))
3780    {
3781      case CONST_DOUBLE:
3782	if (HOST_BITS_PER_WIDE_INT == 32)
3783	  return  (GET_MODE (value) == VOIDmode
3784		   && !CONST_DOUBLE_HIGH (value));
3785	else
3786	  return 0;
3787      case CONST_INT:
3788	if (HOST_BITS_PER_WIDE_INT == 32)
3789	  return INTVAL (value) >= 0;
3790	else
3791	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3792	break;
3793
3794      /* For certain code models, the symbolic references are known to fit.  */
3795      case SYMBOL_REF:
3796	return ix86_cmodel == CM_SMALL;
3797
3798      /* For certain code models, the code is near as well.  */
3799      case LABEL_REF:
3800	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3801
3802      /* We also may accept the offsetted memory references in certain special
3803         cases.  */
3804      case CONST:
3805	if (GET_CODE (XEXP (value, 0)) == PLUS)
3806	  {
3807	    rtx op1 = XEXP (XEXP (value, 0), 0);
3808	    rtx op2 = XEXP (XEXP (value, 0), 1);
3809
3810	    if (ix86_cmodel == CM_LARGE)
3811	      return 0;
3812	    switch (GET_CODE (op1))
3813	      {
3814		case SYMBOL_REF:
3815		    return 0;
3816		  /* For small code model we may accept pretty large positive
3817		     offsets, since one bit is available for free.  Negative
3818		     offsets are limited by the size of NULL pointer area
3819		     specified by the ABI.  */
3820		  if (ix86_cmodel == CM_SMALL
3821		      && GET_CODE (op2) == CONST_INT
3822		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3823		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3824			  == INTVAL (op2)))
3825		    return 1;
3826	          /* ??? For the kernel, we may accept adjustment of
3827		     -0x10000000, since we know that it will just convert
3828		     negative address space to positive, but perhaps this
3829		     is not worthwhile.  */
3830		  break;
3831		case LABEL_REF:
3832		  /* These conditions are similar to SYMBOL_REF ones, just the
3833		     constraints for code models differ.  */
3834		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3835		      && GET_CODE (op2) == CONST_INT
3836		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3837		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3838			  == INTVAL (op2)))
3839		    return 1;
3840		  break;
3841		default:
3842		  return 0;
3843	      }
3844	  }
3845	return 0;
3846      default:
3847	return 0;
3848    }
3849}
3850
3851/* Value should be nonzero if functions must have frame pointers.
3852   Zero means the frame pointer need not be set up (and parms may
3853   be accessed via the stack pointer) in functions that seem suitable.  */
3854
3855int
3856ix86_frame_pointer_required ()
3857{
3858  /* If we accessed previous frames, then the generated code expects
3859     to be able to access the saved ebp value in our frame.  */
3860  if (cfun->machine->accesses_prev_frame)
3861    return 1;
3862
3863  /* Several x86 os'es need a frame pointer for other reasons,
3864     usually pertaining to setjmp.  */
3865  if (SUBTARGET_FRAME_POINTER_REQUIRED)
3866    return 1;
3867
3868  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3869     the frame pointer by default.  Turn it back on now if we've not
3870     got a leaf function.  */
3871  if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3872    return 1;
3873
3874  return 0;
3875}
3876
3877/* Record that the current function accesses previous call frames.  */
3878
3879void
3880ix86_setup_frame_addresses ()
3881{
3882  cfun->machine->accesses_prev_frame = 1;
3883}
3884
3885static char pic_label_name[32];
3886
3887/* This function generates code for -fpic that loads %ebx with
3888   the return address of the caller and then returns.  */
3889
3890void
3891ix86_asm_file_end (file)
3892     FILE *file;
3893{
3894  rtx xops[2];
3895
3896  if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3897    return;
3898
3899  /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3900     to updating relocations to a section being discarded such that this
3901     doesn't work.  Ought to detect this at configure time.  */
3902#if 0
3903  /* The trick here is to create a linkonce section containing the
3904     pic label thunk, but to refer to it with an internal label.
3905     Because the label is internal, we don't have inter-dso name
3906     binding issues on hosts that don't support ".hidden".
3907
3908     In order to use these macros, however, we must create a fake
3909     function decl.  */
3910  if (targetm.have_named_sections)
3911    {
3912      tree decl = build_decl (FUNCTION_DECL,
3913			      get_identifier ("i686.get_pc_thunk"),
3914			      error_mark_node);
3915      DECL_ONE_ONLY (decl) = 1;
3916      UNIQUE_SECTION (decl, 0);
3917      named_section (decl, NULL);
3918    }
3919  else
3920#else
3921    text_section ();
3922#endif
3923
3924  /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3925     internal (non-global) label that's being emitted, it didn't make
3926     sense to have .type information for local labels.   This caused
3927     the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3928     me debug info for a label that you're declaring non-global?) this
3929     was changed to call ASM_OUTPUT_LABEL() instead.  */
3930
3931  ASM_OUTPUT_LABEL (file, pic_label_name);
3932
3933  xops[0] = pic_offset_table_rtx;
3934  xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3935  output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3936  output_asm_insn ("ret", xops);
3937}
3938
3939void
3940load_pic_register ()
3941{
3942  rtx gotsym, pclab;
3943
3944  if (TARGET_64BIT)
3945    abort ();
3946
3947  gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3948
3949  if (TARGET_DEEP_BRANCH_PREDICTION)
3950    {
3951      if (! pic_label_name[0])
3952	ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3953      pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3954    }
3955  else
3956    {
3957      pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3958    }
3959
3960  emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3961
3962  emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3963}
3964
3965/* Generate an "push" pattern for input ARG.  */
3966
3967static rtx
3968gen_push (arg)
3969     rtx arg;
3970{
3971  return gen_rtx_SET (VOIDmode,
3972		      gen_rtx_MEM (Pmode,
3973				   gen_rtx_PRE_DEC (Pmode,
3974						    stack_pointer_rtx)),
3975		      arg);
3976}
3977
3978/* Return 1 if we need to save REGNO.  */
3979static int
3980ix86_save_reg (regno, maybe_eh_return)
3981     int regno;
3982     int maybe_eh_return;
3983{
3984  if (regno == PIC_OFFSET_TABLE_REGNUM
3985      && (current_function_uses_pic_offset_table
3986	  || current_function_uses_const_pool
3987	  || current_function_calls_eh_return))
3988    return 1;
3989
3990  if (current_function_calls_eh_return && maybe_eh_return)
3991    {
3992      unsigned i;
3993      for (i = 0; ; i++)
3994	{
3995	  unsigned test = EH_RETURN_DATA_REGNO (i);
3996	  if (test == INVALID_REGNUM)
3997	    break;
3998	  if (test == (unsigned) regno)
3999	    return 1;
4000	}
4001    }
4002
4003  return (regs_ever_live[regno]
4004	  && !call_used_regs[regno]
4005	  && !fixed_regs[regno]
4006	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4007}
4008
4009/* Return number of registers to be saved on the stack.  */
4010
4011static int
4012ix86_nsaved_regs ()
4013{
4014  int nregs = 0;
4015  int regno;
4016
4017  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4018    if (ix86_save_reg (regno, true))
4019      nregs++;
4020  return nregs;
4021}
4022
4023/* Return the offset between two registers, one to be eliminated, and the other
4024   its replacement, at the start of a routine.  */
4025
4026HOST_WIDE_INT
4027ix86_initial_elimination_offset (from, to)
4028     int from;
4029     int to;
4030{
4031  struct ix86_frame frame;
4032  ix86_compute_frame_layout (&frame);
4033
4034  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4035    return frame.hard_frame_pointer_offset;
4036  else if (from == FRAME_POINTER_REGNUM
4037	   && to == HARD_FRAME_POINTER_REGNUM)
4038    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4039  else
4040    {
4041      if (to != STACK_POINTER_REGNUM)
4042	abort ();
4043      else if (from == ARG_POINTER_REGNUM)
4044	return frame.stack_pointer_offset;
4045      else if (from != FRAME_POINTER_REGNUM)
4046	abort ();
4047      else
4048	return frame.stack_pointer_offset - frame.frame_pointer_offset;
4049    }
4050}
4051
4052/* Fill structure ix86_frame about frame of currently computed function.  */
4053
4054static void
4055ix86_compute_frame_layout (frame)
4056     struct ix86_frame *frame;
4057{
4058  HOST_WIDE_INT total_size;
4059  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4060  int offset;
4061  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4062  HOST_WIDE_INT size = get_frame_size ();
4063
4064  frame->nregs = ix86_nsaved_regs ();
4065  total_size = size;
4066
4067  /* Skip return value and save base pointer.  */
4068  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4069
4070  frame->hard_frame_pointer_offset = offset;
4071
4072  /* Do some sanity checking of stack_alignment_needed and
4073     preferred_alignment, since i386 port is the only using those features
4074     that may break easily.  */
4075
4076  if (size && !stack_alignment_needed)
4077    abort ();
4078  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4079    abort ();
4080  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4081    abort ();
4082  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4083    abort ();
4084
4085  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4086    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4087
4088  /* Register save area */
4089  offset += frame->nregs * UNITS_PER_WORD;
4090
4091  /* Va-arg area */
4092  if (ix86_save_varrargs_registers)
4093    {
4094      offset += X86_64_VARARGS_SIZE;
4095      frame->va_arg_size = X86_64_VARARGS_SIZE;
4096    }
4097  else
4098    frame->va_arg_size = 0;
4099
4100  /* Align start of frame for local function.  */
4101  frame->padding1 = ((offset + stack_alignment_needed - 1)
4102		     & -stack_alignment_needed) - offset;
4103
4104  offset += frame->padding1;
4105
4106  /* Frame pointer points here.  */
4107  frame->frame_pointer_offset = offset;
4108
4109  offset += size;
4110
4111  /* Add outgoing arguments area.  Can be skipped if we eliminated
4112     all the function calls as dead code.  */
4113  if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4114    {
4115      offset += current_function_outgoing_args_size;
4116      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4117    }
4118  else
4119    frame->outgoing_arguments_size = 0;
4120
4121  /* Align stack boundary.  Only needed if we're calling another function
4122     or using alloca.  */
4123  if (!current_function_is_leaf || current_function_calls_alloca)
4124    frame->padding2 = ((offset + preferred_alignment - 1)
4125		       & -preferred_alignment) - offset;
4126  else
4127    frame->padding2 = 0;
4128
4129  offset += frame->padding2;
4130
4131  /* We've reached end of stack frame.  */
4132  frame->stack_pointer_offset = offset;
4133
4134  /* Size prologue needs to allocate.  */
4135  frame->to_allocate =
4136    (size + frame->padding1 + frame->padding2
4137     + frame->outgoing_arguments_size + frame->va_arg_size);
4138
4139  if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4140      && current_function_is_leaf)
4141    {
4142      frame->red_zone_size = frame->to_allocate;
4143      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4144	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4145    }
4146  else
4147    frame->red_zone_size = 0;
4148  frame->to_allocate -= frame->red_zone_size;
4149  frame->stack_pointer_offset -= frame->red_zone_size;
4150#if 0
4151  fprintf (stderr, "nregs: %i\n", frame->nregs);
4152  fprintf (stderr, "size: %i\n", size);
4153  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4154  fprintf (stderr, "padding1: %i\n", frame->padding1);
4155  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4156  fprintf (stderr, "padding2: %i\n", frame->padding2);
4157  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4158  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4159  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4160  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4161	   frame->hard_frame_pointer_offset);
4162  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4163#endif
4164}
4165
4166/* Emit code to save registers in the prologue.  */
4167
4168static void
4169ix86_emit_save_regs ()
4170{
4171  register int regno;
4172  rtx insn;
4173
4174  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4175    if (ix86_save_reg (regno, true))
4176      {
4177	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4178	RTX_FRAME_RELATED_P (insn) = 1;
4179      }
4180}
4181
4182/* Emit code to save registers using MOV insns.  First register
4183   is restored from POINTER + OFFSET.  */
4184static void
4185ix86_emit_save_regs_using_mov (pointer, offset)
4186     rtx pointer;
4187     HOST_WIDE_INT offset;
4188{
4189  int regno;
4190  rtx insn;
4191
4192  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4193    if (ix86_save_reg (regno, true))
4194      {
4195	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4196					       Pmode, offset),
4197			       gen_rtx_REG (Pmode, regno));
4198	RTX_FRAME_RELATED_P (insn) = 1;
4199	offset += UNITS_PER_WORD;
4200      }
4201}
4202
4203/* Expand the prologue into a bunch of separate insns.  */
4204
4205void
4206ix86_expand_prologue ()
4207{
4208  rtx insn;
4209  int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4210				  || current_function_uses_const_pool)
4211		      && !TARGET_64BIT);
4212  struct ix86_frame frame;
4213  int use_mov = 0;
4214  HOST_WIDE_INT allocate;
4215
4216  if (!optimize_size)
4217    {
4218      use_fast_prologue_epilogue
4219	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4220      if (TARGET_PROLOGUE_USING_MOVE)
4221        use_mov = use_fast_prologue_epilogue;
4222    }
4223  ix86_compute_frame_layout (&frame);
4224
4225  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4226     slower on all targets.  Also sdb doesn't like it.  */
4227
4228  if (frame_pointer_needed)
4229    {
4230      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4231      RTX_FRAME_RELATED_P (insn) = 1;
4232
4233      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4234      RTX_FRAME_RELATED_P (insn) = 1;
4235    }
4236
4237  allocate = frame.to_allocate;
4238  /* In case we are dealing only with single register and empty frame,
4239     push is equivalent of the mov+add sequence.  */
4240  if (allocate == 0 && frame.nregs <= 1)
4241    use_mov = 0;
4242
4243  if (!use_mov)
4244    ix86_emit_save_regs ();
4245  else
4246    allocate += frame.nregs * UNITS_PER_WORD;
4247
4248  if (allocate == 0)
4249    ;
4250  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4251    {
4252      insn = emit_insn (gen_pro_epilogue_adjust_stack
4253			(stack_pointer_rtx, stack_pointer_rtx,
4254			 GEN_INT (-allocate)));
4255      RTX_FRAME_RELATED_P (insn) = 1;
4256    }
4257  else
4258    {
4259      /* ??? Is this only valid for Win32?  */
4260
4261      rtx arg0, sym;
4262
4263      if (TARGET_64BIT)
4264	abort ();
4265
4266      arg0 = gen_rtx_REG (SImode, 0);
4267      emit_move_insn (arg0, GEN_INT (allocate));
4268
4269      sym = gen_rtx_MEM (FUNCTION_MODE,
4270			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4271      insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4272
4273      CALL_INSN_FUNCTION_USAGE (insn)
4274	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4275			     CALL_INSN_FUNCTION_USAGE (insn));
4276    }
4277  if (use_mov)
4278    {
4279      if (!frame_pointer_needed || !frame.to_allocate)
4280        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4281      else
4282        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4283				       -frame.nregs * UNITS_PER_WORD);
4284    }
4285
4286#ifdef SUBTARGET_PROLOGUE
4287  SUBTARGET_PROLOGUE;
4288#endif
4289
4290  if (pic_reg_used)
4291    load_pic_register ();
4292
4293  /* If we are profiling, make sure no instructions are scheduled before
4294     the call to mcount.  However, if -fpic, the above call will have
4295     done that.  */
4296  if (current_function_profile && ! pic_reg_used)
4297    emit_insn (gen_blockage ());
4298}
4299
4300/* Emit code to restore saved registers using MOV insns.  First register
4301   is restored from POINTER + OFFSET.  */
4302static void
4303ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4304     rtx pointer;
4305     int offset;
4306     int maybe_eh_return;
4307{
4308  int regno;
4309
4310  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4311    if (ix86_save_reg (regno, maybe_eh_return))
4312      {
4313	emit_move_insn (gen_rtx_REG (Pmode, regno),
4314			adjust_address (gen_rtx_MEM (Pmode, pointer),
4315					Pmode, offset));
4316	offset += UNITS_PER_WORD;
4317      }
4318}
4319
4320/* Restore function stack, frame, and registers.  */
4321
4322void
4323ix86_expand_epilogue (style)
4324     int style;
4325{
4326  int regno;
4327  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4328  struct ix86_frame frame;
4329  HOST_WIDE_INT offset;
4330
4331  ix86_compute_frame_layout (&frame);
4332
4333  /* Calculate start of saved registers relative to ebp.  Special care
4334     must be taken for the normal return case of a function using
4335     eh_return: the eax and edx registers are marked as saved, but not
4336     restored along this path.  */
4337  offset = frame.nregs;
4338  if (current_function_calls_eh_return && style != 2)
4339    offset -= 2;
4340  offset *= -UNITS_PER_WORD;
4341
4342  /* If we're only restoring one register and sp is not valid then
4343     using a move instruction to restore the register since it's
4344     less work than reloading sp and popping the register.
4345
4346     The default code result in stack adjustment using add/lea instruction,
4347     while this code results in LEAVE instruction (or discrete equivalent),
4348     so it is profitable in some other cases as well.  Especially when there
4349     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4350     and there is exactly one register to pop. This heruistic may need some
4351     tuning in future.  */
4352  if ((!sp_valid && frame.nregs <= 1)
4353      || (TARGET_EPILOGUE_USING_MOVE
4354	  && use_fast_prologue_epilogue
4355	  && (frame.nregs > 1 || frame.to_allocate))
4356      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4357      || (frame_pointer_needed && TARGET_USE_LEAVE
4358	  && use_fast_prologue_epilogue && frame.nregs == 1)
4359      || current_function_calls_eh_return)
4360    {
4361      /* Restore registers.  We can use ebp or esp to address the memory
4362	 locations.  If both are available, default to ebp, since offsets
4363	 are known to be small.  Only exception is esp pointing directly to the
4364	 end of block of saved registers, where we may simplify addressing
4365	 mode.  */
4366
4367      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4368	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4369					  frame.to_allocate, style == 2);
4370      else
4371	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4372					  offset, style == 2);
4373
4374      /* eh_return epilogues need %ecx added to the stack pointer.  */
4375      if (style == 2)
4376	{
4377	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4378
4379	  if (frame_pointer_needed)
4380	    {
4381	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4382	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4383	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4384
4385	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4386	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4387
4388	      emit_insn (gen_pro_epilogue_adjust_stack
4389			 (stack_pointer_rtx, sa, const0_rtx));
4390	    }
4391	  else
4392	    {
4393	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4394	      tmp = plus_constant (tmp, (frame.to_allocate
4395                                         + frame.nregs * UNITS_PER_WORD));
4396	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4397	    }
4398	}
4399      else if (!frame_pointer_needed)
4400	emit_insn (gen_pro_epilogue_adjust_stack
4401		   (stack_pointer_rtx, stack_pointer_rtx,
4402		    GEN_INT (frame.to_allocate
4403			     + frame.nregs * UNITS_PER_WORD)));
4404      /* If not an i386, mov & pop is faster than "leave".  */
4405      else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4406	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4407      else
4408	{
4409	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4410						    hard_frame_pointer_rtx,
4411						    const0_rtx));
4412	  if (TARGET_64BIT)
4413	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4414	  else
4415	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4416	}
4417    }
4418  else
4419    {
4420      /* First step is to deallocate the stack frame so that we can
4421	 pop the registers.  */
4422      if (!sp_valid)
4423	{
4424	  if (!frame_pointer_needed)
4425	    abort ();
4426          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4427						    hard_frame_pointer_rtx,
4428						    GEN_INT (offset)));
4429	}
4430      else if (frame.to_allocate)
4431	emit_insn (gen_pro_epilogue_adjust_stack
4432		   (stack_pointer_rtx, stack_pointer_rtx,
4433		    GEN_INT (frame.to_allocate)));
4434
4435      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4436	if (ix86_save_reg (regno, false))
4437	  {
4438	    if (TARGET_64BIT)
4439	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4440	    else
4441	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4442	  }
4443      if (frame_pointer_needed)
4444	{
4445	  /* Leave results in shorter dependency chains on CPUs that are
4446	     able to grok it fast.  */
4447	  if (TARGET_USE_LEAVE)
4448	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4449	  else if (TARGET_64BIT)
4450	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4451	  else
4452	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4453	}
4454    }
4455
4456  /* Sibcall epilogues don't want a return instruction.  */
4457  if (style == 0)
4458    return;
4459
4460  if (current_function_pops_args && current_function_args_size)
4461    {
4462      rtx popc = GEN_INT (current_function_pops_args);
4463
4464      /* i386 can only pop 64K bytes.  If asked to pop more, pop
4465	 return address, do explicit add, and jump indirectly to the
4466	 caller.  */
4467
4468      if (current_function_pops_args >= 65536)
4469	{
4470	  rtx ecx = gen_rtx_REG (SImode, 2);
4471
4472	  /* There are is no "pascal" calling convention in 64bit ABI.  */
4473	  if (TARGET_64BIT)
4474	    abort ();
4475
4476	  emit_insn (gen_popsi1 (ecx));
4477	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4478	  emit_jump_insn (gen_return_indirect_internal (ecx));
4479	}
4480      else
4481	emit_jump_insn (gen_return_pop_internal (popc));
4482    }
4483  else
4484    emit_jump_insn (gen_return_internal ());
4485}
4486
4487/* Extract the parts of an RTL expression that is a valid memory address
4488   for an instruction.  Return 0 if the structure of the address is
4489   grossly off.  Return -1 if the address contains ASHIFT, so it is not
4490   strictly valid, but still used for computing length of lea instruction.
4491   */
4492
4493static int
4494ix86_decompose_address (addr, out)
4495     register rtx addr;
4496     struct ix86_address *out;
4497{
4498  rtx base = NULL_RTX;
4499  rtx index = NULL_RTX;
4500  rtx disp = NULL_RTX;
4501  HOST_WIDE_INT scale = 1;
4502  rtx scale_rtx = NULL_RTX;
4503  int retval = 1;
4504
4505  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4506    base = addr;
4507  else if (GET_CODE (addr) == PLUS)
4508    {
4509      rtx op0 = XEXP (addr, 0);
4510      rtx op1 = XEXP (addr, 1);
4511      enum rtx_code code0 = GET_CODE (op0);
4512      enum rtx_code code1 = GET_CODE (op1);
4513
4514      if (code0 == REG || code0 == SUBREG)
4515	{
4516	  if (code1 == REG || code1 == SUBREG)
4517	    index = op0, base = op1;	/* index + base */
4518	  else
4519	    base = op0, disp = op1;	/* base + displacement */
4520	}
4521      else if (code0 == MULT)
4522	{
4523	  index = XEXP (op0, 0);
4524	  scale_rtx = XEXP (op0, 1);
4525	  if (code1 == REG || code1 == SUBREG)
4526	    base = op1;			/* index*scale + base */
4527	  else
4528	    disp = op1;			/* index*scale + disp */
4529	}
4530      else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4531	{
4532	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
4533	  scale_rtx = XEXP (XEXP (op0, 0), 1);
4534	  base = XEXP (op0, 1);
4535	  disp = op1;
4536	}
4537      else if (code0 == PLUS)
4538	{
4539	  index = XEXP (op0, 0);	/* index + base + disp */
4540	  base = XEXP (op0, 1);
4541	  disp = op1;
4542	}
4543      else
4544	return 0;
4545    }
4546  else if (GET_CODE (addr) == MULT)
4547    {
4548      index = XEXP (addr, 0);		/* index*scale */
4549      scale_rtx = XEXP (addr, 1);
4550    }
4551  else if (GET_CODE (addr) == ASHIFT)
4552    {
4553      rtx tmp;
4554
4555      /* We're called for lea too, which implements ashift on occasion.  */
4556      index = XEXP (addr, 0);
4557      tmp = XEXP (addr, 1);
4558      if (GET_CODE (tmp) != CONST_INT)
4559	return 0;
4560      scale = INTVAL (tmp);
4561      if ((unsigned HOST_WIDE_INT) scale > 3)
4562	return 0;
4563      scale = 1 << scale;
4564      retval = -1;
4565    }
4566  else
4567    disp = addr;			/* displacement */
4568
4569  /* Extract the integral value of scale.  */
4570  if (scale_rtx)
4571    {
4572      if (GET_CODE (scale_rtx) != CONST_INT)
4573	return 0;
4574      scale = INTVAL (scale_rtx);
4575    }
4576
4577  /* Allow arg pointer and stack pointer as index if there is not scaling */
4578  if (base && index && scale == 1
4579      && (index == arg_pointer_rtx || index == frame_pointer_rtx
4580          || index == stack_pointer_rtx))
4581    {
4582      rtx tmp = base;
4583      base = index;
4584      index = tmp;
4585    }
4586
4587  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
4588  if ((base == hard_frame_pointer_rtx
4589       || base == frame_pointer_rtx
4590       || base == arg_pointer_rtx) && !disp)
4591    disp = const0_rtx;
4592
4593  /* Special case: on K6, [%esi] makes the instruction vector decoded.
4594     Avoid this by transforming to [%esi+0].  */
4595  if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4596      && base && !index && !disp
4597      && REG_P (base)
4598      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4599    disp = const0_rtx;
4600
4601  /* Special case: encode reg+reg instead of reg*2.  */
4602  if (!base && index && scale && scale == 2)
4603    base = index, scale = 1;
4604
4605  /* Special case: scaling cannot be encoded without base or displacement.  */
4606  if (!base && !disp && index && scale != 1)
4607    disp = const0_rtx;
4608
4609  out->base = base;
4610  out->index = index;
4611  out->disp = disp;
4612  out->scale = scale;
4613
4614  return retval;
4615}
4616
4617/* Return cost of the memory address x.
4618   For i386, it is better to use a complex address than let gcc copy
4619   the address into a reg and make a new pseudo.  But not if the address
4620   requires to two regs - that would mean more pseudos with longer
4621   lifetimes.  */
4622int
4623ix86_address_cost (x)
4624     rtx x;
4625{
4626  struct ix86_address parts;
4627  int cost = 1;
4628
4629  if (!ix86_decompose_address (x, &parts))
4630    abort ();
4631
4632  /* More complex memory references are better.  */
4633  if (parts.disp && parts.disp != const0_rtx)
4634    cost--;
4635
4636  /* Attempt to minimize number of registers in the address.  */
4637  if ((parts.base
4638       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4639      || (parts.index
4640	  && (!REG_P (parts.index)
4641	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4642    cost++;
4643
4644  if (parts.base
4645      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4646      && parts.index
4647      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4648      && parts.base != parts.index)
4649    cost++;
4650
4651  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4652     since it's predecode logic can't detect the length of instructions
4653     and it degenerates to vector decoded.  Increase cost of such
4654     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
4655     to split such addresses or even refuse such addresses at all.
4656
4657     Following addressing modes are affected:
4658      [base+scale*index]
4659      [scale*index+disp]
4660      [base+index]
4661
4662     The first and last case  may be avoidable by explicitly coding the zero in
4663     memory address, but I don't have AMD-K6 machine handy to check this
4664     theory.  */
4665
4666  if (TARGET_K6
4667      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4668	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4669	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4670    cost += 10;
4671
4672  return cost;
4673}
4674
4675/* If X is a machine specific address (i.e. a symbol or label being
4676   referenced as a displacement from the GOT implemented using an
4677   UNSPEC), then return the base term.  Otherwise return X.  */
4678
4679rtx
4680ix86_find_base_term (x)
4681     rtx x;
4682{
4683  rtx term;
4684
4685  if (TARGET_64BIT)
4686    {
4687      if (GET_CODE (x) != CONST)
4688	return x;
4689      term = XEXP (x, 0);
4690      if (GET_CODE (term) == PLUS
4691	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
4692	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4693	term = XEXP (term, 0);
4694      if (GET_CODE (term) != UNSPEC
4695	  || XVECLEN (term, 0) != 1
4696	  || XINT (term, 1) !=  15)
4697	return x;
4698
4699      term = XVECEXP (term, 0, 0);
4700
4701      if (GET_CODE (term) != SYMBOL_REF
4702	  && GET_CODE (term) != LABEL_REF)
4703	return x;
4704
4705      return term;
4706    }
4707
4708  if (GET_CODE (x) != PLUS
4709      || XEXP (x, 0) != pic_offset_table_rtx
4710      || GET_CODE (XEXP (x, 1)) != CONST)
4711    return x;
4712
4713  term = XEXP (XEXP (x, 1), 0);
4714
4715  if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4716    term = XEXP (term, 0);
4717
4718  if (GET_CODE (term) != UNSPEC
4719      || XVECLEN (term, 0) != 1
4720      || XINT (term, 1) !=  7)
4721    return x;
4722
4723  term = XVECEXP (term, 0, 0);
4724
4725  if (GET_CODE (term) != SYMBOL_REF
4726      && GET_CODE (term) != LABEL_REF)
4727    return x;
4728
4729  return term;
4730}
4731
4732/* Determine if a given CONST RTX is a valid memory displacement
4733   in PIC mode.  */
4734
4735int
4736legitimate_pic_address_disp_p (disp)
4737     register rtx disp;
4738{
4739  /* In 64bit mode we can allow direct addresses of symbols and labels
4740     when they are not dynamic symbols.  */
4741  if (TARGET_64BIT)
4742    {
4743      rtx x = disp;
4744      if (GET_CODE (disp) == CONST)
4745	x = XEXP (disp, 0);
4746      /* ??? Handle PIC code models */
4747      if (GET_CODE (x) == PLUS
4748	  && (GET_CODE (XEXP (x, 1)) == CONST_INT
4749	      && ix86_cmodel == CM_SMALL_PIC
4750	      && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4751	      && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4752	x = XEXP (x, 0);
4753      if (local_symbolic_operand (x, Pmode))
4754	return 1;
4755    }
4756  if (GET_CODE (disp) != CONST)
4757    return 0;
4758  disp = XEXP (disp, 0);
4759
4760  if (TARGET_64BIT)
4761    {
4762      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
4763         of GOT tables.  We should not need these anyway.  */
4764      if (GET_CODE (disp) != UNSPEC
4765	  || XVECLEN (disp, 0) != 1
4766	  || XINT (disp, 1) != 15)
4767	return 0;
4768
4769      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4770	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4771	return 0;
4772      return 1;
4773    }
4774
4775  if (GET_CODE (disp) == PLUS)
4776    {
4777      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4778	return 0;
4779      disp = XEXP (disp, 0);
4780    }
4781
4782  if (GET_CODE (disp) != UNSPEC
4783      || XVECLEN (disp, 0) != 1)
4784    return 0;
4785
4786  /* Must be @GOT or @GOTOFF.  */
4787  switch (XINT (disp, 1))
4788    {
4789    case 6: /* @GOT */
4790      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4791
4792    case 7: /* @GOTOFF */
4793      return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4794    }
4795
4796  return 0;
4797}
4798
4799/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4800   memory address for an instruction.  The MODE argument is the machine mode
4801   for the MEM expression that wants to use this address.
4802
4803   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
4804   convert common non-canonical forms to canonical form so that they will
4805   be recognized.  */
4806
4807int
4808legitimate_address_p (mode, addr, strict)
4809     enum machine_mode mode;
4810     register rtx addr;
4811     int strict;
4812{
4813  struct ix86_address parts;
4814  rtx base, index, disp;
4815  HOST_WIDE_INT scale;
4816  const char *reason = NULL;
4817  rtx reason_rtx = NULL_RTX;
4818
4819  if (TARGET_DEBUG_ADDR)
4820    {
4821      fprintf (stderr,
4822	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4823	       GET_MODE_NAME (mode), strict);
4824      debug_rtx (addr);
4825    }
4826
4827  if (ix86_decompose_address (addr, &parts) <= 0)
4828    {
4829      reason = "decomposition failed";
4830      goto report_error;
4831    }
4832
4833  base = parts.base;
4834  index = parts.index;
4835  disp = parts.disp;
4836  scale = parts.scale;
4837
4838  /* Validate base register.
4839
4840     Don't allow SUBREG's here, it can lead to spill failures when the base
4841     is one word out of a two word structure, which is represented internally
4842     as a DImode int.  */
4843
4844  if (base)
4845    {
4846      reason_rtx = base;
4847
4848      if (GET_CODE (base) != REG)
4849	{
4850	  reason = "base is not a register";
4851	  goto report_error;
4852	}
4853
4854      if (GET_MODE (base) != Pmode)
4855	{
4856	  reason = "base is not in Pmode";
4857	  goto report_error;
4858	}
4859
4860      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4861	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4862	{
4863	  reason = "base is not valid";
4864	  goto report_error;
4865	}
4866    }
4867
4868  /* Validate index register.
4869
4870     Don't allow SUBREG's here, it can lead to spill failures when the index
4871     is one word out of a two word structure, which is represented internally
4872     as a DImode int.  */
4873
4874  if (index)
4875    {
4876      reason_rtx = index;
4877
4878      if (GET_CODE (index) != REG)
4879	{
4880	  reason = "index is not a register";
4881	  goto report_error;
4882	}
4883
4884      if (GET_MODE (index) != Pmode)
4885	{
4886	  reason = "index is not in Pmode";
4887	  goto report_error;
4888	}
4889
4890      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4891	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4892	{
4893	  reason = "index is not valid";
4894	  goto report_error;
4895	}
4896    }
4897
4898  /* Validate scale factor.  */
4899  if (scale != 1)
4900    {
4901      reason_rtx = GEN_INT (scale);
4902      if (!index)
4903	{
4904	  reason = "scale without index";
4905	  goto report_error;
4906	}
4907
4908      if (scale != 2 && scale != 4 && scale != 8)
4909	{
4910	  reason = "scale is not a valid multiplier";
4911	  goto report_error;
4912	}
4913    }
4914
4915  /* Validate displacement.  */
4916  if (disp)
4917    {
4918      reason_rtx = disp;
4919
4920      if (!CONSTANT_ADDRESS_P (disp))
4921	{
4922	  reason = "displacement is not constant";
4923	  goto report_error;
4924	}
4925
4926      if (TARGET_64BIT)
4927	{
4928	  if (!x86_64_sign_extended_value (disp))
4929	    {
4930	      reason = "displacement is out of range";
4931	      goto report_error;
4932	    }
4933	}
4934      else
4935	{
4936	  if (GET_CODE (disp) == CONST_DOUBLE)
4937	    {
4938	      reason = "displacement is a const_double";
4939	      goto report_error;
4940	    }
4941	}
4942
4943      if (flag_pic && SYMBOLIC_CONST (disp))
4944	{
4945	  if (TARGET_64BIT && (index || base))
4946	    {
4947	      reason = "non-constant pic memory reference";
4948	      goto report_error;
4949	    }
4950	  if (! legitimate_pic_address_disp_p (disp))
4951	    {
4952	      reason = "displacement is an invalid pic construct";
4953	      goto report_error;
4954	    }
4955
4956          /* This code used to verify that a symbolic pic displacement
4957	     includes the pic_offset_table_rtx register.
4958
4959	     While this is good idea, unfortunately these constructs may
4960	     be created by "adds using lea" optimization for incorrect
4961	     code like:
4962
4963	     int a;
4964	     int foo(int i)
4965	       {
4966	         return *(&a+i);
4967	       }
4968
4969	     This code is nonsensical, but results in addressing
4970	     GOT table with pic_offset_table_rtx base.  We can't
4971	     just refuse it easily, since it gets matched by
4972	     "addsi3" pattern, that later gets split to lea in the
4973	     case output register differs from input.  While this
4974	     can be handled by separate addsi pattern for this case
4975	     that never results in lea, this seems to be easier and
4976	     correct fix for crash to disable this test.  */
4977	}
4978      else if (HALF_PIC_P ())
4979	{
4980	  if (! HALF_PIC_ADDRESS_P (disp)
4981	      || (base != NULL_RTX || index != NULL_RTX))
4982	    {
4983	      reason = "displacement is an invalid half-pic reference";
4984	      goto report_error;
4985	    }
4986	}
4987    }
4988
4989  /* Everything looks valid.  */
4990  if (TARGET_DEBUG_ADDR)
4991    fprintf (stderr, "Success.\n");
4992  return TRUE;
4993
4994report_error:
4995  if (TARGET_DEBUG_ADDR)
4996    {
4997      fprintf (stderr, "Error: %s\n", reason);
4998      debug_rtx (reason_rtx);
4999    }
5000  return FALSE;
5001}
5002
5003/* Return an unique alias set for the GOT.  */
5004
5005static HOST_WIDE_INT
5006ix86_GOT_alias_set ()
5007{
5008    static HOST_WIDE_INT set = -1;
5009    if (set == -1)
5010      set = new_alias_set ();
5011    return set;
5012}
5013
5014/* Return a legitimate reference for ORIG (an address) using the
5015   register REG.  If REG is 0, a new pseudo is generated.
5016
5017   There are two types of references that must be handled:
5018
5019   1. Global data references must load the address from the GOT, via
5020      the PIC reg.  An insn is emitted to do this load, and the reg is
5021      returned.
5022
5023   2. Static data references, constant pool addresses, and code labels
5024      compute the address as an offset from the GOT, whose base is in
5025      the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
5026      differentiate them from global data objects.  The returned
5027      address is the PIC reg + an unspec constant.
5028
5029   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5030   reg also appears in the address.  */
5031
5032rtx
5033legitimize_pic_address (orig, reg)
5034     rtx orig;
5035     rtx reg;
5036{
5037  rtx addr = orig;
5038  rtx new = orig;
5039  rtx base;
5040
5041  if (local_symbolic_operand (addr, Pmode))
5042    {
5043      /* In 64bit mode we can address such objects directly.  */
5044      if (TARGET_64BIT)
5045	new = addr;
5046      else
5047	{
5048	  /* This symbol may be referenced via a displacement from the PIC
5049	     base address (@GOTOFF).  */
5050
5051	  current_function_uses_pic_offset_table = 1;
5052	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
5053	  new = gen_rtx_CONST (Pmode, new);
5054	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5055
5056	  if (reg != 0)
5057	    {
5058	      emit_move_insn (reg, new);
5059	      new = reg;
5060	    }
5061      	}
5062    }
5063  else if (GET_CODE (addr) == SYMBOL_REF)
5064    {
5065      if (TARGET_64BIT)
5066	{
5067	  current_function_uses_pic_offset_table = 1;
5068	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
5069	  new = gen_rtx_CONST (Pmode, new);
5070	  new = gen_rtx_MEM (Pmode, new);
5071	  RTX_UNCHANGING_P (new) = 1;
5072	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5073
5074	  if (reg == 0)
5075	    reg = gen_reg_rtx (Pmode);
5076	  /* Use directly gen_movsi, otherwise the address is loaded
5077	     into register for CSE.  We don't want to CSE this addresses,
5078	     instead we CSE addresses from the GOT table, so skip this.  */
5079	  emit_insn (gen_movsi (reg, new));
5080	  new = reg;
5081	}
5082      else
5083	{
5084	  /* This symbol must be referenced via a load from the
5085	     Global Offset Table (@GOT).  */
5086
5087	  current_function_uses_pic_offset_table = 1;
5088	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
5089	  new = gen_rtx_CONST (Pmode, new);
5090	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5091	  new = gen_rtx_MEM (Pmode, new);
5092	  RTX_UNCHANGING_P (new) = 1;
5093	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5094
5095	  if (reg == 0)
5096	    reg = gen_reg_rtx (Pmode);
5097	  emit_move_insn (reg, new);
5098	  new = reg;
5099	}
5100    }
5101  else
5102    {
5103      if (GET_CODE (addr) == CONST)
5104	{
5105	  addr = XEXP (addr, 0);
5106
5107	  /* We must match stuff we generate before.  Assume the only
5108	     unspecs that can get here are ours.  Not that we could do
5109	     anything with them anyway...  */
5110	  if (GET_CODE (addr) == UNSPEC
5111	      || (GET_CODE (addr) == PLUS
5112		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5113	    return orig;
5114	  if (GET_CODE (addr) != PLUS)
5115	    abort ();
5116	}
5117      if (GET_CODE (addr) == PLUS)
5118	{
5119	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5120
5121	  /* Check first to see if this is a constant offset from a @GOTOFF
5122	     symbol reference.  */
5123	  if (local_symbolic_operand (op0, Pmode)
5124	      && GET_CODE (op1) == CONST_INT)
5125	    {
5126	      if (!TARGET_64BIT)
5127		{
5128		  current_function_uses_pic_offset_table = 1;
5129		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5130		  new = gen_rtx_PLUS (Pmode, new, op1);
5131		  new = gen_rtx_CONST (Pmode, new);
5132		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5133
5134		  if (reg != 0)
5135		    {
5136		      emit_move_insn (reg, new);
5137		      new = reg;
5138		    }
5139		}
5140	      else
5141		{
5142		  /* ??? We need to limit offsets here.  */
5143		}
5144	    }
5145	  else
5146	    {
5147	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5148	      new  = legitimize_pic_address (XEXP (addr, 1),
5149					     base == reg ? NULL_RTX : reg);
5150
5151	      if (GET_CODE (new) == CONST_INT)
5152		new = plus_constant (base, INTVAL (new));
5153	      else
5154		{
5155		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5156		    {
5157		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5158		      new = XEXP (new, 1);
5159		    }
5160		  new = gen_rtx_PLUS (Pmode, base, new);
5161		}
5162	    }
5163	}
5164    }
5165  return new;
5166}
5167
5168/* Try machine-dependent ways of modifying an illegitimate address
5169   to be legitimate.  If we find one, return the new, valid address.
5170   This macro is used in only one place: `memory_address' in explow.c.
5171
5172   OLDX is the address as it was before break_out_memory_refs was called.
5173   In some cases it is useful to look at this to decide what needs to be done.
5174
5175   MODE and WIN are passed so that this macro can use
5176   GO_IF_LEGITIMATE_ADDRESS.
5177
5178   It is always safe for this macro to do nothing.  It exists to recognize
5179   opportunities to optimize the output.
5180
5181   For the 80386, we handle X+REG by loading X into a register R and
5182   using R+REG.  R will go in a general reg and indexing will be used.
5183   However, if REG is a broken-out memory address or multiplication,
5184   nothing needs to be done because REG can certainly go in a general reg.
5185
5186   When -fpic is used, special handling is needed for symbolic references.
5187   See comments by legitimize_pic_address in i386.c for details.  */
5188
5189rtx
5190legitimize_address (x, oldx, mode)
5191     register rtx x;
5192     register rtx oldx ATTRIBUTE_UNUSED;
5193     enum machine_mode mode;
5194{
5195  int changed = 0;
5196  unsigned log;
5197
5198  if (TARGET_DEBUG_ADDR)
5199    {
5200      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5201	       GET_MODE_NAME (mode));
5202      debug_rtx (x);
5203    }
5204
5205  if (flag_pic && SYMBOLIC_CONST (x))
5206    return legitimize_pic_address (x, 0);
5207
5208  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5209  if (GET_CODE (x) == ASHIFT
5210      && GET_CODE (XEXP (x, 1)) == CONST_INT
5211      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5212    {
5213      changed = 1;
5214      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5215			GEN_INT (1 << log));
5216    }
5217
5218  if (GET_CODE (x) == PLUS)
5219    {
5220      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
5221
5222      if (GET_CODE (XEXP (x, 0)) == ASHIFT
5223	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5224	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5225	{
5226	  changed = 1;
5227	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
5228				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5229				      GEN_INT (1 << log));
5230	}
5231
5232      if (GET_CODE (XEXP (x, 1)) == ASHIFT
5233	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5234	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5235	{
5236	  changed = 1;
5237	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
5238				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5239				      GEN_INT (1 << log));
5240	}
5241
5242      /* Put multiply first if it isn't already.  */
5243      if (GET_CODE (XEXP (x, 1)) == MULT)
5244	{
5245	  rtx tmp = XEXP (x, 0);
5246	  XEXP (x, 0) = XEXP (x, 1);
5247	  XEXP (x, 1) = tmp;
5248	  changed = 1;
5249	}
5250
5251      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5252	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
5253	 created by virtual register instantiation, register elimination, and
5254	 similar optimizations.  */
5255      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5256	{
5257	  changed = 1;
5258	  x = gen_rtx_PLUS (Pmode,
5259			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
5260					  XEXP (XEXP (x, 1), 0)),
5261			    XEXP (XEXP (x, 1), 1));
5262	}
5263
5264      /* Canonicalize
5265	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5266	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
5267      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5268	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5269	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5270	       && CONSTANT_P (XEXP (x, 1)))
5271	{
5272	  rtx constant;
5273	  rtx other = NULL_RTX;
5274
5275	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5276	    {
5277	      constant = XEXP (x, 1);
5278	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5279	    }
5280	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5281	    {
5282	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5283	      other = XEXP (x, 1);
5284	    }
5285	  else
5286	    constant = 0;
5287
5288	  if (constant)
5289	    {
5290	      changed = 1;
5291	      x = gen_rtx_PLUS (Pmode,
5292				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5293					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
5294				plus_constant (other, INTVAL (constant)));
5295	    }
5296	}
5297
5298      if (changed && legitimate_address_p (mode, x, FALSE))
5299	return x;
5300
5301      if (GET_CODE (XEXP (x, 0)) == MULT)
5302	{
5303	  changed = 1;
5304	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5305	}
5306
5307      if (GET_CODE (XEXP (x, 1)) == MULT)
5308	{
5309	  changed = 1;
5310	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5311	}
5312
5313      if (changed
5314	  && GET_CODE (XEXP (x, 1)) == REG
5315	  && GET_CODE (XEXP (x, 0)) == REG)
5316	return x;
5317
5318      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5319	{
5320	  changed = 1;
5321	  x = legitimize_pic_address (x, 0);
5322	}
5323
5324      if (changed && legitimate_address_p (mode, x, FALSE))
5325	return x;
5326
5327      if (GET_CODE (XEXP (x, 0)) == REG)
5328	{
5329	  register rtx temp = gen_reg_rtx (Pmode);
5330	  register rtx val  = force_operand (XEXP (x, 1), temp);
5331	  if (val != temp)
5332	    emit_move_insn (temp, val);
5333
5334	  XEXP (x, 1) = temp;
5335	  return x;
5336	}
5337
5338      else if (GET_CODE (XEXP (x, 1)) == REG)
5339	{
5340	  register rtx temp = gen_reg_rtx (Pmode);
5341	  register rtx val  = force_operand (XEXP (x, 0), temp);
5342	  if (val != temp)
5343	    emit_move_insn (temp, val);
5344
5345	  XEXP (x, 0) = temp;
5346	  return x;
5347	}
5348    }
5349
5350  return x;
5351}
5352
5353/* Print an integer constant expression in assembler syntax.  Addition
5354   and subtraction are the only arithmetic that may appear in these
5355   expressions.  FILE is the stdio stream to write to, X is the rtx, and
5356   CODE is the operand print code from the output string.  */
5357
5358static void
5359output_pic_addr_const (file, x, code)
5360     FILE *file;
5361     rtx x;
5362     int code;
5363{
5364  char buf[256];
5365
5366  switch (GET_CODE (x))
5367    {
5368    case PC:
5369      if (flag_pic)
5370	putc ('.', file);
5371      else
5372	abort ();
5373      break;
5374
5375    case SYMBOL_REF:
5376      assemble_name (file, XSTR (x, 0));
5377      if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5378	fputs ("@PLT", file);
5379      break;
5380
5381    case LABEL_REF:
5382      x = XEXP (x, 0);
5383      /* FALLTHRU */
5384    case CODE_LABEL:
5385      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5386      assemble_name (asm_out_file, buf);
5387      break;
5388
5389    case CONST_INT:
5390      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5391      break;
5392
5393    case CONST:
5394      /* This used to output parentheses around the expression,
5395	 but that does not work on the 386 (either ATT or BSD assembler).  */
5396      output_pic_addr_const (file, XEXP (x, 0), code);
5397      break;
5398
5399    case CONST_DOUBLE:
5400      if (GET_MODE (x) == VOIDmode)
5401	{
5402	  /* We can use %d if the number is <32 bits and positive.  */
5403	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5404	    fprintf (file, "0x%lx%08lx",
5405		     (unsigned long) CONST_DOUBLE_HIGH (x),
5406		     (unsigned long) CONST_DOUBLE_LOW (x));
5407	  else
5408	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5409	}
5410      else
5411	/* We can't handle floating point constants;
5412	   PRINT_OPERAND must handle them.  */
5413	output_operand_lossage ("floating constant misused");
5414      break;
5415
5416    case PLUS:
5417      /* Some assemblers need integer constants to appear first.  */
5418      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5419	{
5420	  output_pic_addr_const (file, XEXP (x, 0), code);
5421	  putc ('+', file);
5422	  output_pic_addr_const (file, XEXP (x, 1), code);
5423	}
5424      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5425	{
5426	  output_pic_addr_const (file, XEXP (x, 1), code);
5427	  putc ('+', file);
5428	  output_pic_addr_const (file, XEXP (x, 0), code);
5429	}
5430      else
5431	abort ();
5432      break;
5433
5434    case MINUS:
5435      putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5436      output_pic_addr_const (file, XEXP (x, 0), code);
5437      putc ('-', file);
5438      output_pic_addr_const (file, XEXP (x, 1), code);
5439      putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5440      break;
5441
5442     case UNSPEC:
5443       if (XVECLEN (x, 0) != 1)
5444	abort ();
5445       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5446       switch (XINT (x, 1))
5447	{
5448	case 6:
5449	  fputs ("@GOT", file);
5450	  break;
5451	case 7:
5452	  fputs ("@GOTOFF", file);
5453	  break;
5454	case 8:
5455	  fputs ("@PLT", file);
5456	  break;
5457	case 15:
5458	  fputs ("@GOTPCREL(%RIP)", file);
5459	  break;
5460	default:
5461	  output_operand_lossage ("invalid UNSPEC as operand");
5462	  break;
5463	}
5464       break;
5465
5466    default:
5467      output_operand_lossage ("invalid expression as operand");
5468    }
5469}
5470
5471/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5472   We need to handle our special PIC relocations.  */
5473
5474void
5475i386_dwarf_output_addr_const (file, x)
5476     FILE *file;
5477     rtx x;
5478{
5479#ifdef ASM_QUAD
5480  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5481#else
5482  if (TARGET_64BIT)
5483    abort ();
5484  fprintf (file, "%s", ASM_LONG);
5485#endif
5486  if (flag_pic)
5487    output_pic_addr_const (file, x, '\0');
5488  else
5489    output_addr_const (file, x);
5490  fputc ('\n', file);
5491}
5492
5493/* In the name of slightly smaller debug output, and to cater to
5494   general assembler losage, recognize PIC+GOTOFF and turn it back
5495   into a direct symbol reference.  */
5496
5497rtx
5498i386_simplify_dwarf_addr (orig_x)
5499     rtx orig_x;
5500{
5501  rtx x = orig_x, y;
5502
5503  if (GET_CODE (x) == MEM)
5504    x = XEXP (x, 0);
5505
5506  if (TARGET_64BIT)
5507    {
5508      if (GET_CODE (x) != CONST
5509	  || GET_CODE (XEXP (x, 0)) != UNSPEC
5510	  || XINT (XEXP (x, 0), 1) != 15
5511	  || GET_CODE (orig_x) != MEM)
5512	return orig_x;
5513      return XVECEXP (XEXP (x, 0), 0, 0);
5514    }
5515
5516  if (GET_CODE (x) != PLUS
5517      || GET_CODE (XEXP (x, 1)) != CONST)
5518    return orig_x;
5519
5520  if (GET_CODE (XEXP (x, 0)) == REG
5521      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5522    /* %ebx + GOT/GOTOFF */
5523    y = NULL;
5524  else if (GET_CODE (XEXP (x, 0)) == PLUS)
5525    {
5526      /* %ebx + %reg * scale + GOT/GOTOFF */
5527      y = XEXP (x, 0);
5528      if (GET_CODE (XEXP (y, 0)) == REG
5529	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5530	y = XEXP (y, 1);
5531      else if (GET_CODE (XEXP (y, 1)) == REG
5532	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5533	y = XEXP (y, 0);
5534      else
5535	return orig_x;
5536      if (GET_CODE (y) != REG
5537	  && GET_CODE (y) != MULT
5538	  && GET_CODE (y) != ASHIFT)
5539	return orig_x;
5540    }
5541  else
5542    return orig_x;
5543
5544  x = XEXP (XEXP (x, 1), 0);
5545  if (GET_CODE (x) == UNSPEC
5546      && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM)
5547	  || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM)))
5548    {
5549      if (y)
5550	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5551      return XVECEXP (x, 0, 0);
5552    }
5553
5554  if (GET_CODE (x) == PLUS
5555      && GET_CODE (XEXP (x, 0)) == UNSPEC
5556      && GET_CODE (XEXP (x, 1)) == CONST_INT
5557      && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM)
5558	  || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM)))
5559    {
5560      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5561      if (y)
5562	return gen_rtx_PLUS (Pmode, y, x);
5563      return x;
5564    }
5565
5566  return orig_x;
5567}
5568
5569static void
5570put_condition_code (code, mode, reverse, fp, file)
5571     enum rtx_code code;
5572     enum machine_mode mode;
5573     int reverse, fp;
5574     FILE *file;
5575{
5576  const char *suffix;
5577
5578  if (mode == CCFPmode || mode == CCFPUmode)
5579    {
5580      enum rtx_code second_code, bypass_code;
5581      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5582      if (bypass_code != NIL || second_code != NIL)
5583	abort ();
5584      code = ix86_fp_compare_code_to_integer (code);
5585      mode = CCmode;
5586    }
5587  if (reverse)
5588    code = reverse_condition (code);
5589
5590  switch (code)
5591    {
5592    case EQ:
5593      suffix = "e";
5594      break;
5595    case NE:
5596      suffix = "ne";
5597      break;
5598    case GT:
5599      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5600	abort ();
5601      suffix = "g";
5602      break;
5603    case GTU:
5604      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5605	 Those same assemblers have the same but opposite losage on cmov.  */
5606      if (mode != CCmode)
5607	abort ();
5608      suffix = fp ? "nbe" : "a";
5609      break;
5610    case LT:
5611      if (mode == CCNOmode || mode == CCGOCmode)
5612	suffix = "s";
5613      else if (mode == CCmode || mode == CCGCmode)
5614	suffix = "l";
5615      else
5616	abort ();
5617      break;
5618    case LTU:
5619      if (mode != CCmode)
5620	abort ();
5621      suffix = "b";
5622      break;
5623    case GE:
5624      if (mode == CCNOmode || mode == CCGOCmode)
5625	suffix = "ns";
5626      else if (mode == CCmode || mode == CCGCmode)
5627	suffix = "ge";
5628      else
5629	abort ();
5630      break;
5631    case GEU:
5632      /* ??? As above.  */
5633      if (mode != CCmode)
5634	abort ();
5635      suffix = fp ? "nb" : "ae";
5636      break;
5637    case LE:
5638      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5639	abort ();
5640      suffix = "le";
5641      break;
5642    case LEU:
5643      if (mode != CCmode)
5644	abort ();
5645      suffix = "be";
5646      break;
5647    case UNORDERED:
5648      suffix = fp ? "u" : "p";
5649      break;
5650    case ORDERED:
5651      suffix = fp ? "nu" : "np";
5652      break;
5653    default:
5654      abort ();
5655    }
5656  fputs (suffix, file);
5657}
5658
5659void
5660print_reg (x, code, file)
5661     rtx x;
5662     int code;
5663     FILE *file;
5664{
5665  if (REGNO (x) == ARG_POINTER_REGNUM
5666      || REGNO (x) == FRAME_POINTER_REGNUM
5667      || REGNO (x) == FLAGS_REG
5668      || REGNO (x) == FPSR_REG)
5669    abort ();
5670
5671  if (ASSEMBLER_DIALECT == ASM_ATT  || USER_LABEL_PREFIX[0] == 0)
5672    putc ('%', file);
5673
5674  if (code == 'w' || MMX_REG_P (x))
5675    code = 2;
5676  else if (code == 'b')
5677    code = 1;
5678  else if (code == 'k')
5679    code = 4;
5680  else if (code == 'q')
5681    code = 8;
5682  else if (code == 'y')
5683    code = 3;
5684  else if (code == 'h')
5685    code = 0;
5686  else
5687    code = GET_MODE_SIZE (GET_MODE (x));
5688
5689  /* Irritatingly, AMD extended registers use different naming convention
5690     from the normal registers.  */
5691  if (REX_INT_REG_P (x))
5692    {
5693      if (!TARGET_64BIT)
5694	abort ();
5695      switch (code)
5696	{
5697	  case 0:
5698	    error ("extended registers have no high halves");
5699	    break;
5700	  case 1:
5701	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5702	    break;
5703	  case 2:
5704	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5705	    break;
5706	  case 4:
5707	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5708	    break;
5709	  case 8:
5710	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5711	    break;
5712	  default:
5713	    error ("unsupported operand size for extended register");
5714	    break;
5715	}
5716      return;
5717    }
5718  switch (code)
5719    {
5720    case 3:
5721      if (STACK_TOP_P (x))
5722	{
5723	  fputs ("st(0)", file);
5724	  break;
5725	}
5726      /* FALLTHRU */
5727    case 8:
5728    case 4:
5729    case 12:
5730      if (! ANY_FP_REG_P (x))
5731	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5732      /* FALLTHRU */
5733    case 16:
5734    case 2:
5735      fputs (hi_reg_name[REGNO (x)], file);
5736      break;
5737    case 1:
5738      fputs (qi_reg_name[REGNO (x)], file);
5739      break;
5740    case 0:
5741      fputs (qi_high_reg_name[REGNO (x)], file);
5742      break;
5743    default:
5744      abort ();
5745    }
5746}
5747
5748/* Meaning of CODE:
5749   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5750   C -- print opcode suffix for set/cmov insn.
5751   c -- like C, but print reversed condition
5752   F,f -- likewise, but for floating-point.
5753   O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5754        nothing
5755   R -- print the prefix for register names.
5756   z -- print the opcode suffix for the size of the current operand.
5757   * -- print a star (in certain assembler syntax)
5758   A -- print an absolute memory reference.
5759   w -- print the operand as if it's a "word" (HImode) even if it isn't.
5760   s -- print a shift double count, followed by the assemblers argument
5761	delimiter.
5762   b -- print the QImode name of the register for the indicated operand.
5763	%b0 would print %al if operands[0] is reg 0.
5764   w --  likewise, print the HImode name of the register.
5765   k --  likewise, print the SImode name of the register.
5766   q --  likewise, print the DImode name of the register.
5767   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5768   y -- print "st(0)" instead of "st" as a register.
5769   D -- print condition for SSE cmp instruction.
5770   P -- if PIC, print an @PLT suffix.
5771   X -- don't print any sort of PIC '@' suffix for a symbol.
5772 */
5773
5774void
5775print_operand (file, x, code)
5776     FILE *file;
5777     rtx x;
5778     int code;
5779{
5780  if (code)
5781    {
5782      switch (code)
5783	{
5784	case '*':
5785	  if (ASSEMBLER_DIALECT == ASM_ATT)
5786	    putc ('*', file);
5787	  return;
5788
5789	case 'A':
5790	  if (ASSEMBLER_DIALECT == ASM_ATT)
5791	    putc ('*', file);
5792	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
5793	    {
5794	      /* Intel syntax. For absolute addresses, registers should not
5795		 be surrounded by braces.  */
5796	      if (GET_CODE (x) != REG)
5797		{
5798		  putc ('[', file);
5799		  PRINT_OPERAND (file, x, 0);
5800		  putc (']', file);
5801		  return;
5802		}
5803	    }
5804	  else
5805	    abort ();
5806
5807	  PRINT_OPERAND (file, x, 0);
5808	  return;
5809
5810
5811	case 'L':
5812	  if (ASSEMBLER_DIALECT == ASM_ATT)
5813	    putc ('l', file);
5814	  return;
5815
5816	case 'W':
5817	  if (ASSEMBLER_DIALECT == ASM_ATT)
5818	    putc ('w', file);
5819	  return;
5820
5821	case 'B':
5822	  if (ASSEMBLER_DIALECT == ASM_ATT)
5823	    putc ('b', file);
5824	  return;
5825
5826	case 'Q':
5827	  if (ASSEMBLER_DIALECT == ASM_ATT)
5828	    putc ('l', file);
5829	  return;
5830
5831	case 'S':
5832	  if (ASSEMBLER_DIALECT == ASM_ATT)
5833	    putc ('s', file);
5834	  return;
5835
5836	case 'T':
5837	  if (ASSEMBLER_DIALECT == ASM_ATT)
5838	    putc ('t', file);
5839	  return;
5840
5841	case 'z':
5842	  /* 387 opcodes don't get size suffixes if the operands are
5843	     registers.  */
5844	  if (STACK_REG_P (x))
5845	    return;
5846
5847	  /* Likewise if using Intel opcodes.  */
5848	  if (ASSEMBLER_DIALECT == ASM_INTEL)
5849	    return;
5850
5851	  /* This is the size of op from size of operand.  */
5852	  switch (GET_MODE_SIZE (GET_MODE (x)))
5853	    {
5854	    case 2:
5855#ifdef HAVE_GAS_FILDS_FISTS
5856	      putc ('s', file);
5857#endif
5858	      return;
5859
5860	    case 4:
5861	      if (GET_MODE (x) == SFmode)
5862		{
5863		  putc ('s', file);
5864		  return;
5865		}
5866	      else
5867		putc ('l', file);
5868	      return;
5869
5870	    case 12:
5871	    case 16:
5872	      putc ('t', file);
5873	      return;
5874
5875	    case 8:
5876	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5877		{
5878#ifdef GAS_MNEMONICS
5879		  putc ('q', file);
5880#else
5881		  putc ('l', file);
5882		  putc ('l', file);
5883#endif
5884		}
5885	      else
5886	        putc ('l', file);
5887	      return;
5888
5889	    default:
5890	      abort ();
5891	    }
5892
5893	case 'b':
5894	case 'w':
5895	case 'k':
5896	case 'q':
5897	case 'h':
5898	case 'y':
5899	case 'X':
5900	case 'P':
5901	  break;
5902
5903	case 's':
5904	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5905	    {
5906	      PRINT_OPERAND (file, x, 0);
5907	      putc (',', file);
5908	    }
5909	  return;
5910
5911	case 'D':
5912	  /* Little bit of braindamage here.  The SSE compare instructions
5913	     does use completely different names for the comparisons that the
5914	     fp conditional moves.  */
5915	  switch (GET_CODE (x))
5916	    {
5917	    case EQ:
5918	    case UNEQ:
5919	      fputs ("eq", file);
5920	      break;
5921	    case LT:
5922	    case UNLT:
5923	      fputs ("lt", file);
5924	      break;
5925	    case LE:
5926	    case UNLE:
5927	      fputs ("le", file);
5928	      break;
5929	    case UNORDERED:
5930	      fputs ("unord", file);
5931	      break;
5932	    case NE:
5933	    case LTGT:
5934	      fputs ("neq", file);
5935	      break;
5936	    case UNGE:
5937	    case GE:
5938	      fputs ("nlt", file);
5939	      break;
5940	    case UNGT:
5941	    case GT:
5942	      fputs ("nle", file);
5943	      break;
5944	    case ORDERED:
5945	      fputs ("ord", file);
5946	      break;
5947	    default:
5948	      abort ();
5949	      break;
5950	    }
5951	  return;
5952	case 'O':
5953#ifdef CMOV_SUN_AS_SYNTAX
5954	  if (ASSEMBLER_DIALECT == ASM_ATT)
5955	    {
5956	      switch (GET_MODE (x))
5957		{
5958		case HImode: putc ('w', file); break;
5959		case SImode:
5960		case SFmode: putc ('l', file); break;
5961		case DImode:
5962		case DFmode: putc ('q', file); break;
5963		default: abort ();
5964		}
5965	      putc ('.', file);
5966	    }
5967#endif
5968	  return;
5969	case 'C':
5970	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5971	  return;
5972	case 'F':
5973#ifdef CMOV_SUN_AS_SYNTAX
5974	  if (ASSEMBLER_DIALECT == ASM_ATT)
5975	    putc ('.', file);
5976#endif
5977	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5978	  return;
5979
5980	  /* Like above, but reverse condition */
5981	case 'c':
5982	  /* Check to see if argument to %c is really a constant
5983	     and not a condition code which needs to be reversed.  */
5984	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5985	  {
5986	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5987	     return;
5988	  }
5989	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5990	  return;
5991	case 'f':
5992#ifdef CMOV_SUN_AS_SYNTAX
5993	  if (ASSEMBLER_DIALECT == ASM_ATT)
5994	    putc ('.', file);
5995#endif
5996	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5997	  return;
5998	case '+':
5999	  {
6000	    rtx x;
6001
6002	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6003	      return;
6004
6005	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6006	    if (x)
6007	      {
6008		int pred_val = INTVAL (XEXP (x, 0));
6009
6010		if (pred_val < REG_BR_PROB_BASE * 45 / 100
6011		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
6012		  {
6013		    int taken = pred_val > REG_BR_PROB_BASE / 2;
6014		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
6015
6016		    /* Emit hints only in the case default branch prediction
6017		       heruistics would fail.  */
6018		    if (taken != cputaken)
6019		      {
6020			/* We use 3e (DS) prefix for taken branches and
6021			   2e (CS) prefix for not taken branches.  */
6022			if (taken)
6023			  fputs ("ds ; ", file);
6024			else
6025			  fputs ("cs ; ", file);
6026		      }
6027		  }
6028	      }
6029	    return;
6030	  }
6031	default:
6032	    output_operand_lossage ("invalid operand code `%c'", code);
6033	}
6034    }
6035
6036  if (GET_CODE (x) == REG)
6037    {
6038      PRINT_REG (x, code, file);
6039    }
6040
6041  else if (GET_CODE (x) == MEM)
6042    {
6043      /* No `byte ptr' prefix for call instructions.  */
6044      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6045	{
6046	  const char * size;
6047	  switch (GET_MODE_SIZE (GET_MODE (x)))
6048	    {
6049	    case 1: size = "BYTE"; break;
6050	    case 2: size = "WORD"; break;
6051	    case 4: size = "DWORD"; break;
6052	    case 8: size = "QWORD"; break;
6053	    case 12: size = "XWORD"; break;
6054	    case 16: size = "XMMWORD"; break;
6055	    default:
6056	      abort ();
6057	    }
6058
6059	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
6060	  if (code == 'b')
6061	    size = "BYTE";
6062	  else if (code == 'w')
6063	    size = "WORD";
6064	  else if (code == 'k')
6065	    size = "DWORD";
6066
6067	  fputs (size, file);
6068	  fputs (" PTR ", file);
6069	}
6070
6071      x = XEXP (x, 0);
6072      if (flag_pic && CONSTANT_ADDRESS_P (x))
6073	output_pic_addr_const (file, x, code);
6074      /* Avoid (%rip) for call operands.  */
6075      else if (CONSTANT_ADDRESS_P (x) && code =='P'
6076	       && GET_CODE (x) != CONST_INT)
6077	output_addr_const (file, x);
6078      else
6079	output_address (x);
6080    }
6081
6082  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6083    {
6084      REAL_VALUE_TYPE r;
6085      long l;
6086
6087      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6088      REAL_VALUE_TO_TARGET_SINGLE (r, l);
6089
6090      if (ASSEMBLER_DIALECT == ASM_ATT)
6091	putc ('$', file);
6092      fprintf (file, "0x%lx", l);
6093    }
6094
6095 /* These float cases don't actually occur as immediate operands.  */
6096 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6097    {
6098      REAL_VALUE_TYPE r;
6099      char dstr[30];
6100
6101      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6102      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6103      fprintf (file, "%s", dstr);
6104    }
6105
6106  else if (GET_CODE (x) == CONST_DOUBLE
6107	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6108    {
6109      REAL_VALUE_TYPE r;
6110      char dstr[30];
6111
6112      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6113      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6114      fprintf (file, "%s", dstr);
6115    }
6116  else
6117    {
6118      if (code != 'P')
6119	{
6120	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6121	    {
6122	      if (ASSEMBLER_DIALECT == ASM_ATT)
6123		putc ('$', file);
6124	    }
6125	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6126		   || GET_CODE (x) == LABEL_REF)
6127	    {
6128	      if (ASSEMBLER_DIALECT == ASM_ATT)
6129		putc ('$', file);
6130	      else
6131		fputs ("OFFSET FLAT:", file);
6132	    }
6133	}
6134      if (GET_CODE (x) == CONST_INT)
6135	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6136      else if (flag_pic)
6137	output_pic_addr_const (file, x, code);
6138      else
6139	output_addr_const (file, x);
6140    }
6141}
6142
6143/* Print a memory operand whose address is ADDR.  */
6144
6145void
6146print_operand_address (file, addr)
6147     FILE *file;
6148     register rtx addr;
6149{
6150  struct ix86_address parts;
6151  rtx base, index, disp;
6152  int scale;
6153
6154  if (! ix86_decompose_address (addr, &parts))
6155    {
6156      output_operand_lossage ("Wrong address expression or operand constraint");
6157      return;
6158    }
6159
6160  base = parts.base;
6161  index = parts.index;
6162  disp = parts.disp;
6163  scale = parts.scale;
6164
6165  if (!base && !index)
6166    {
6167      /* Displacement only requires special attention.  */
6168
6169      if (GET_CODE (disp) == CONST_INT)
6170	{
6171	  if (ASSEMBLER_DIALECT == ASM_INTEL)
6172	    {
6173	      if (USER_LABEL_PREFIX[0] == 0)
6174		putc ('%', file);
6175	      fputs ("ds:", file);
6176	    }
6177	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6178	}
6179      else if (flag_pic)
6180	output_pic_addr_const (file, addr, 0);
6181      else
6182	output_addr_const (file, addr);
6183
6184      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
6185      if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6186	fputs ("(%rip)", file);
6187    }
6188  else
6189    {
6190      if (ASSEMBLER_DIALECT == ASM_ATT)
6191	{
6192	  if (disp)
6193	    {
6194	      if (flag_pic)
6195		output_pic_addr_const (file, disp, 0);
6196	      else if (GET_CODE (disp) == LABEL_REF)
6197		output_asm_label (disp);
6198	      else
6199		output_addr_const (file, disp);
6200	    }
6201
6202	  putc ('(', file);
6203	  if (base)
6204	    PRINT_REG (base, 0, file);
6205	  if (index)
6206	    {
6207	      putc (',', file);
6208	      PRINT_REG (index, 0, file);
6209	      if (scale != 1)
6210		fprintf (file, ",%d", scale);
6211	    }
6212	  putc (')', file);
6213	}
6214      else
6215	{
6216	  rtx offset = NULL_RTX;
6217
6218	  if (disp)
6219	    {
6220	      /* Pull out the offset of a symbol; print any symbol itself.  */
6221	      if (GET_CODE (disp) == CONST
6222		  && GET_CODE (XEXP (disp, 0)) == PLUS
6223		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6224		{
6225		  offset = XEXP (XEXP (disp, 0), 1);
6226		  disp = gen_rtx_CONST (VOIDmode,
6227					XEXP (XEXP (disp, 0), 0));
6228		}
6229
6230	      if (flag_pic)
6231		output_pic_addr_const (file, disp, 0);
6232	      else if (GET_CODE (disp) == LABEL_REF)
6233		output_asm_label (disp);
6234	      else if (GET_CODE (disp) == CONST_INT)
6235		offset = disp;
6236	      else
6237		output_addr_const (file, disp);
6238	    }
6239
6240	  putc ('[', file);
6241	  if (base)
6242	    {
6243	      PRINT_REG (base, 0, file);
6244	      if (offset)
6245		{
6246		  if (INTVAL (offset) >= 0)
6247		    putc ('+', file);
6248		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6249		}
6250	    }
6251	  else if (offset)
6252	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6253	  else
6254	    putc ('0', file);
6255
6256	  if (index)
6257	    {
6258	      putc ('+', file);
6259	      PRINT_REG (index, 0, file);
6260	      if (scale != 1)
6261		fprintf (file, "*%d", scale);
6262	    }
6263	  putc (']', file);
6264	}
6265    }
6266}
6267
6268/* Split one or more DImode RTL references into pairs of SImode
6269   references.  The RTL can be REG, offsettable MEM, integer constant, or
6270   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6271   split and "num" is its length.  lo_half and hi_half are output arrays
6272   that parallel "operands".  */
6273
6274void
6275split_di (operands, num, lo_half, hi_half)
6276     rtx operands[];
6277     int num;
6278     rtx lo_half[], hi_half[];
6279{
6280  while (num--)
6281    {
6282      rtx op = operands[num];
6283
6284      /* simplify_subreg refuse to split volatile memory addresses,
6285         but we still have to handle it.  */
6286      if (GET_CODE (op) == MEM)
6287	{
6288	  lo_half[num] = adjust_address (op, SImode, 0);
6289	  hi_half[num] = adjust_address (op, SImode, 4);
6290	}
6291      else
6292	{
6293	  lo_half[num] = simplify_gen_subreg (SImode, op,
6294					      GET_MODE (op) == VOIDmode
6295					      ? DImode : GET_MODE (op), 0);
6296	  hi_half[num] = simplify_gen_subreg (SImode, op,
6297					      GET_MODE (op) == VOIDmode
6298					      ? DImode : GET_MODE (op), 4);
6299	}
6300    }
6301}
6302/* Split one or more TImode RTL references into pairs of SImode
6303   references.  The RTL can be REG, offsettable MEM, integer constant, or
6304   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6305   split and "num" is its length.  lo_half and hi_half are output arrays
6306   that parallel "operands".  */
6307
6308void
6309split_ti (operands, num, lo_half, hi_half)
6310     rtx operands[];
6311     int num;
6312     rtx lo_half[], hi_half[];
6313{
6314  while (num--)
6315    {
6316      rtx op = operands[num];
6317
6318      /* simplify_subreg refuse to split volatile memory addresses, but we
6319         still have to handle it.  */
6320      if (GET_CODE (op) == MEM)
6321	{
6322	  lo_half[num] = adjust_address (op, DImode, 0);
6323	  hi_half[num] = adjust_address (op, DImode, 8);
6324	}
6325      else
6326	{
6327	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6328	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6329	}
6330    }
6331}
6332
6333/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6334   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
6335   is the expression of the binary operation.  The output may either be
6336   emitted here, or returned to the caller, like all output_* functions.
6337
6338   There is no guarantee that the operands are the same mode, as they
6339   might be within FLOAT or FLOAT_EXTEND expressions.  */
6340
6341#ifndef SYSV386_COMPAT
6342/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
6343   wants to fix the assemblers because that causes incompatibility
6344   with gcc.  No-one wants to fix gcc because that causes
6345   incompatibility with assemblers...  You can use the option of
6346   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
6347#define SYSV386_COMPAT 1
6348#endif
6349
6350const char *
6351output_387_binary_op (insn, operands)
6352     rtx insn;
6353     rtx *operands;
6354{
6355  static char buf[30];
6356  const char *p;
6357  const char *ssep;
6358  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6359
6360#ifdef ENABLE_CHECKING
6361  /* Even if we do not want to check the inputs, this documents input
6362     constraints.  Which helps in understanding the following code.  */
6363  if (STACK_REG_P (operands[0])
6364      && ((REG_P (operands[1])
6365	   && REGNO (operands[0]) == REGNO (operands[1])
6366	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6367	  || (REG_P (operands[2])
6368	      && REGNO (operands[0]) == REGNO (operands[2])
6369	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6370      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6371    ; /* ok */
6372  else if (!is_sse)
6373    abort ();
6374#endif
6375
6376  switch (GET_CODE (operands[3]))
6377    {
6378    case PLUS:
6379      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6380	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6381	p = "fiadd";
6382      else
6383	p = "fadd";
6384      ssep = "add";
6385      break;
6386
6387    case MINUS:
6388      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6389	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6390	p = "fisub";
6391      else
6392	p = "fsub";
6393      ssep = "sub";
6394      break;
6395
6396    case MULT:
6397      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6398	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6399	p = "fimul";
6400      else
6401	p = "fmul";
6402      ssep = "mul";
6403      break;
6404
6405    case DIV:
6406      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6407	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6408	p = "fidiv";
6409      else
6410	p = "fdiv";
6411      ssep = "div";
6412      break;
6413
6414    default:
6415      abort ();
6416    }
6417
6418  if (is_sse)
6419   {
6420      strcpy (buf, ssep);
6421      if (GET_MODE (operands[0]) == SFmode)
6422	strcat (buf, "ss\t{%2, %0|%0, %2}");
6423      else
6424	strcat (buf, "sd\t{%2, %0|%0, %2}");
6425      return buf;
6426   }
6427  strcpy (buf, p);
6428
6429  switch (GET_CODE (operands[3]))
6430    {
6431    case MULT:
6432    case PLUS:
6433      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6434	{
6435	  rtx temp = operands[2];
6436	  operands[2] = operands[1];
6437	  operands[1] = temp;
6438	}
6439
6440      /* know operands[0] == operands[1].  */
6441
6442      if (GET_CODE (operands[2]) == MEM)
6443	{
6444	  p = "%z2\t%2";
6445	  break;
6446	}
6447
6448      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6449	{
6450	  if (STACK_TOP_P (operands[0]))
6451	    /* How is it that we are storing to a dead operand[2]?
6452	       Well, presumably operands[1] is dead too.  We can't
6453	       store the result to st(0) as st(0) gets popped on this
6454	       instruction.  Instead store to operands[2] (which I
6455	       think has to be st(1)).  st(1) will be popped later.
6456	       gcc <= 2.8.1 didn't have this check and generated
6457	       assembly code that the Unixware assembler rejected.  */
6458	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6459	  else
6460	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6461	  break;
6462	}
6463
6464      if (STACK_TOP_P (operands[0]))
6465	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6466      else
6467	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6468      break;
6469
6470    case MINUS:
6471    case DIV:
6472      if (GET_CODE (operands[1]) == MEM)
6473	{
6474	  p = "r%z1\t%1";
6475	  break;
6476	}
6477
6478      if (GET_CODE (operands[2]) == MEM)
6479	{
6480	  p = "%z2\t%2";
6481	  break;
6482	}
6483
6484      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6485	{
6486#if SYSV386_COMPAT
6487	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6488	     derived assemblers, confusingly reverse the direction of
6489	     the operation for fsub{r} and fdiv{r} when the
6490	     destination register is not st(0).  The Intel assembler
6491	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
6492	     figure out what the hardware really does.  */
6493	  if (STACK_TOP_P (operands[0]))
6494	    p = "{p\t%0, %2|rp\t%2, %0}";
6495	  else
6496	    p = "{rp\t%2, %0|p\t%0, %2}";
6497#else
6498	  if (STACK_TOP_P (operands[0]))
6499	    /* As above for fmul/fadd, we can't store to st(0).  */
6500	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6501	  else
6502	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6503#endif
6504	  break;
6505	}
6506
6507      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6508	{
6509#if SYSV386_COMPAT
6510	  if (STACK_TOP_P (operands[0]))
6511	    p = "{rp\t%0, %1|p\t%1, %0}";
6512	  else
6513	    p = "{p\t%1, %0|rp\t%0, %1}";
6514#else
6515	  if (STACK_TOP_P (operands[0]))
6516	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
6517	  else
6518	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
6519#endif
6520	  break;
6521	}
6522
6523      if (STACK_TOP_P (operands[0]))
6524	{
6525	  if (STACK_TOP_P (operands[1]))
6526	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6527	  else
6528	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
6529	  break;
6530	}
6531      else if (STACK_TOP_P (operands[1]))
6532	{
6533#if SYSV386_COMPAT
6534	  p = "{\t%1, %0|r\t%0, %1}";
6535#else
6536	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
6537#endif
6538	}
6539      else
6540	{
6541#if SYSV386_COMPAT
6542	  p = "{r\t%2, %0|\t%0, %2}";
6543#else
6544	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6545#endif
6546	}
6547      break;
6548
6549    default:
6550      abort ();
6551    }
6552
6553  strcat (buf, p);
6554  return buf;
6555}
6556
6557/* Output code to initialize control word copies used by
6558   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
6559   is set to control word rounding downwards.  */
6560void
6561emit_i387_cw_initialization (normal, round_down)
6562     rtx normal, round_down;
6563{
6564  rtx reg = gen_reg_rtx (HImode);
6565
6566  emit_insn (gen_x86_fnstcw_1 (normal));
6567  emit_move_insn (reg, normal);
6568  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6569      && !TARGET_64BIT)
6570    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6571  else
6572    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6573  emit_move_insn (round_down, reg);
6574}
6575
6576/* Output code for INSN to convert a float to a signed int.  OPERANDS
6577   are the insn operands.  The output may be [HSD]Imode and the input
6578   operand may be [SDX]Fmode.  */
6579
6580const char *
6581output_fix_trunc (insn, operands)
6582     rtx insn;
6583     rtx *operands;
6584{
6585  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6586  int dimode_p = GET_MODE (operands[0]) == DImode;
6587
6588  /* Jump through a hoop or two for DImode, since the hardware has no
6589     non-popping instruction.  We used to do this a different way, but
6590     that was somewhat fragile and broke with post-reload splitters.  */
6591  if (dimode_p && !stack_top_dies)
6592    output_asm_insn ("fld\t%y1", operands);
6593
6594  if (!STACK_TOP_P (operands[1]))
6595    abort ();
6596
6597  if (GET_CODE (operands[0]) != MEM)
6598    abort ();
6599
6600  output_asm_insn ("fldcw\t%3", operands);
6601  if (stack_top_dies || dimode_p)
6602    output_asm_insn ("fistp%z0\t%0", operands);
6603  else
6604    output_asm_insn ("fist%z0\t%0", operands);
6605  output_asm_insn ("fldcw\t%2", operands);
6606
6607  return "";
6608}
6609
6610/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
6611   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
6612   when fucom should be used.  */
6613
6614const char *
6615output_fp_compare (insn, operands, eflags_p, unordered_p)
6616     rtx insn;
6617     rtx *operands;
6618     int eflags_p, unordered_p;
6619{
6620  int stack_top_dies;
6621  rtx cmp_op0 = operands[0];
6622  rtx cmp_op1 = operands[1];
6623  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6624
6625  if (eflags_p == 2)
6626    {
6627      cmp_op0 = cmp_op1;
6628      cmp_op1 = operands[2];
6629    }
6630  if (is_sse)
6631    {
6632      if (GET_MODE (operands[0]) == SFmode)
6633	if (unordered_p)
6634	  return "ucomiss\t{%1, %0|%0, %1}";
6635	else
6636	  return "comiss\t{%1, %0|%0, %y}";
6637      else
6638	if (unordered_p)
6639	  return "ucomisd\t{%1, %0|%0, %1}";
6640	else
6641	  return "comisd\t{%1, %0|%0, %y}";
6642    }
6643
6644  if (! STACK_TOP_P (cmp_op0))
6645    abort ();
6646
6647  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6648
6649  if (STACK_REG_P (cmp_op1)
6650      && stack_top_dies
6651      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6652      && REGNO (cmp_op1) != FIRST_STACK_REG)
6653    {
6654      /* If both the top of the 387 stack dies, and the other operand
6655	 is also a stack register that dies, then this must be a
6656	 `fcompp' float compare */
6657
6658      if (eflags_p == 1)
6659	{
6660	  /* There is no double popping fcomi variant.  Fortunately,
6661	     eflags is immune from the fstp's cc clobbering.  */
6662	  if (unordered_p)
6663	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6664	  else
6665	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6666	  return "fstp\t%y0";
6667	}
6668      else
6669	{
6670	  if (eflags_p == 2)
6671	    {
6672	      if (unordered_p)
6673		return "fucompp\n\tfnstsw\t%0";
6674	      else
6675		return "fcompp\n\tfnstsw\t%0";
6676	    }
6677	  else
6678	    {
6679	      if (unordered_p)
6680		return "fucompp";
6681	      else
6682		return "fcompp";
6683	    }
6684	}
6685    }
6686  else
6687    {
6688      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
6689
6690      static const char * const alt[24] =
6691      {
6692	"fcom%z1\t%y1",
6693	"fcomp%z1\t%y1",
6694	"fucom%z1\t%y1",
6695	"fucomp%z1\t%y1",
6696
6697	"ficom%z1\t%y1",
6698	"ficomp%z1\t%y1",
6699	NULL,
6700	NULL,
6701
6702	"fcomi\t{%y1, %0|%0, %y1}",
6703	"fcomip\t{%y1, %0|%0, %y1}",
6704	"fucomi\t{%y1, %0|%0, %y1}",
6705	"fucomip\t{%y1, %0|%0, %y1}",
6706
6707	NULL,
6708	NULL,
6709	NULL,
6710	NULL,
6711
6712	"fcom%z2\t%y2\n\tfnstsw\t%0",
6713	"fcomp%z2\t%y2\n\tfnstsw\t%0",
6714	"fucom%z2\t%y2\n\tfnstsw\t%0",
6715	"fucomp%z2\t%y2\n\tfnstsw\t%0",
6716
6717	"ficom%z2\t%y2\n\tfnstsw\t%0",
6718	"ficomp%z2\t%y2\n\tfnstsw\t%0",
6719	NULL,
6720	NULL
6721      };
6722
6723      int mask;
6724      const char *ret;
6725
6726      mask  = eflags_p << 3;
6727      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6728      mask |= unordered_p << 1;
6729      mask |= stack_top_dies;
6730
6731      if (mask >= 24)
6732	abort ();
6733      ret = alt[mask];
6734      if (ret == NULL)
6735	abort ();
6736
6737      return ret;
6738    }
6739}
6740
6741void
6742ix86_output_addr_vec_elt (file, value)
6743     FILE *file;
6744     int value;
6745{
6746  const char *directive = ASM_LONG;
6747
6748  if (TARGET_64BIT)
6749    {
6750#ifdef ASM_QUAD
6751      directive = ASM_QUAD;
6752#else
6753      abort ();
6754#endif
6755    }
6756
6757  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6758}
6759
6760void
6761ix86_output_addr_diff_elt (file, value, rel)
6762     FILE *file;
6763     int value, rel;
6764{
6765  if (TARGET_64BIT)
6766    fprintf (file, "%s%s%d-.+(.-%s%d)\n",
6767	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
6768  else if (HAVE_AS_GOTOFF_IN_DATA)
6769    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6770  else
6771    asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6772		 ASM_LONG, LPREFIX, value);
6773}
6774
6775/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6776   for the target.  */
6777
6778void
6779ix86_expand_clear (dest)
6780     rtx dest;
6781{
6782  rtx tmp;
6783
6784  /* We play register width games, which are only valid after reload.  */
6785  if (!reload_completed)
6786    abort ();
6787
6788  /* Avoid HImode and its attendant prefix byte.  */
6789  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6790    dest = gen_rtx_REG (SImode, REGNO (dest));
6791
6792  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6793
6794  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
6795  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6796    {
6797      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6798      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6799    }
6800
6801  emit_insn (tmp);
6802}
6803
6804void
6805ix86_expand_move (mode, operands)
6806     enum machine_mode mode;
6807     rtx operands[];
6808{
6809  int strict = (reload_in_progress || reload_completed);
6810  rtx insn;
6811
6812  if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6813    {
6814      /* Emit insns to move operands[1] into operands[0].  */
6815
6816      if (GET_CODE (operands[0]) == MEM)
6817	operands[1] = force_reg (Pmode, operands[1]);
6818      else
6819	{
6820	  rtx temp = operands[0];
6821	  if (GET_CODE (temp) != REG)
6822	    temp = gen_reg_rtx (Pmode);
6823	  temp = legitimize_pic_address (operands[1], temp);
6824	  if (temp == operands[0])
6825	    return;
6826	  operands[1] = temp;
6827	}
6828    }
6829  else
6830    {
6831      if (GET_CODE (operands[0]) == MEM
6832	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6833	      || !push_operand (operands[0], mode))
6834	  && GET_CODE (operands[1]) == MEM)
6835	operands[1] = force_reg (mode, operands[1]);
6836
6837      if (push_operand (operands[0], mode)
6838	  && ! general_no_elim_operand (operands[1], mode))
6839	operands[1] = copy_to_mode_reg (mode, operands[1]);
6840
6841      /* Force large constants in 64bit compilation into register
6842	 to get them CSEed.  */
6843      if (TARGET_64BIT && mode == DImode
6844	  && immediate_operand (operands[1], mode)
6845	  && !x86_64_zero_extended_value (operands[1])
6846	  && !register_operand (operands[0], mode)
6847	  && optimize && !reload_completed && !reload_in_progress)
6848	operands[1] = copy_to_mode_reg (mode, operands[1]);
6849
6850      if (FLOAT_MODE_P (mode))
6851	{
6852	  /* If we are loading a floating point constant to a register,
6853	     force the value to memory now, since we'll get better code
6854	     out the back end.  */
6855
6856	  if (strict)
6857	    ;
6858	  else if (GET_CODE (operands[1]) == CONST_DOUBLE
6859		   && register_operand (operands[0], mode))
6860	    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6861	}
6862    }
6863
6864  insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6865
6866  emit_insn (insn);
6867}
6868
6869void
6870ix86_expand_vector_move (mode, operands)
6871     enum machine_mode mode;
6872     rtx operands[];
6873{
6874  /* Force constants other than zero into memory.  We do not know how
6875     the instructions used to build constants modify the upper 64 bits
6876     of the register, once we have that information we may be able
6877     to handle some of them more efficiently.  */
6878  if ((reload_in_progress | reload_completed) == 0
6879      && register_operand (operands[0], mode)
6880      && CONSTANT_P (operands[1]))
6881    {
6882      rtx addr = gen_reg_rtx (Pmode);
6883      emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6884      operands[1] = gen_rtx_MEM (mode, addr);
6885    }
6886
6887  /* Make operand1 a register if it isn't already.  */
6888  if ((reload_in_progress | reload_completed) == 0
6889      && !register_operand (operands[0], mode)
6890      && !register_operand (operands[1], mode))
6891    {
6892      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6893      emit_move_insn (operands[0], temp);
6894      return;
6895    }
6896
6897  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6898}
6899
6900/* Attempt to expand a binary operator.  Make the expansion closer to the
6901   actual machine, then just general_operand, which will allow 3 separate
6902   memory references (one output, two input) in a single insn.  */
6903
6904void
6905ix86_expand_binary_operator (code, mode, operands)
6906     enum rtx_code code;
6907     enum machine_mode mode;
6908     rtx operands[];
6909{
6910  int matching_memory;
6911  rtx src1, src2, dst, op, clob;
6912
6913  dst = operands[0];
6914  src1 = operands[1];
6915  src2 = operands[2];
6916
6917  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6918  if (GET_RTX_CLASS (code) == 'c'
6919      && (rtx_equal_p (dst, src2)
6920	  || immediate_operand (src1, mode)))
6921    {
6922      rtx temp = src1;
6923      src1 = src2;
6924      src2 = temp;
6925    }
6926
6927  /* If the destination is memory, and we do not have matching source
6928     operands, do things in registers.  */
6929  matching_memory = 0;
6930  if (GET_CODE (dst) == MEM)
6931    {
6932      if (rtx_equal_p (dst, src1))
6933	matching_memory = 1;
6934      else if (GET_RTX_CLASS (code) == 'c'
6935	       && rtx_equal_p (dst, src2))
6936	matching_memory = 2;
6937      else
6938	dst = gen_reg_rtx (mode);
6939    }
6940
6941  /* Both source operands cannot be in memory.  */
6942  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6943    {
6944      if (matching_memory != 2)
6945	src2 = force_reg (mode, src2);
6946      else
6947	src1 = force_reg (mode, src1);
6948    }
6949
6950  /* If the operation is not commutable, source 1 cannot be a constant
6951     or non-matching memory.  */
6952  if ((CONSTANT_P (src1)
6953       || (!matching_memory && GET_CODE (src1) == MEM))
6954      && GET_RTX_CLASS (code) != 'c')
6955    src1 = force_reg (mode, src1);
6956
6957  /* If optimizing, copy to regs to improve CSE */
6958  if (optimize && ! no_new_pseudos)
6959    {
6960      if (GET_CODE (dst) == MEM)
6961	dst = gen_reg_rtx (mode);
6962      if (GET_CODE (src1) == MEM)
6963	src1 = force_reg (mode, src1);
6964      if (GET_CODE (src2) == MEM)
6965	src2 = force_reg (mode, src2);
6966    }
6967
6968  /* Emit the instruction.  */
6969
6970  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6971  if (reload_in_progress)
6972    {
6973      /* Reload doesn't know about the flags register, and doesn't know that
6974         it doesn't want to clobber it.  We can only do this with PLUS.  */
6975      if (code != PLUS)
6976	abort ();
6977      emit_insn (op);
6978    }
6979  else
6980    {
6981      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6982      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6983    }
6984
6985  /* Fix up the destination if needed.  */
6986  if (dst != operands[0])
6987    emit_move_insn (operands[0], dst);
6988}
6989
6990/* Return TRUE or FALSE depending on whether the binary operator meets the
6991   appropriate constraints.  */
6992
6993int
6994ix86_binary_operator_ok (code, mode, operands)
6995     enum rtx_code code;
6996     enum machine_mode mode ATTRIBUTE_UNUSED;
6997     rtx operands[3];
6998{
6999  /* Both source operands cannot be in memory.  */
7000  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7001    return 0;
7002  /* If the operation is not commutable, source 1 cannot be a constant.  */
7003  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7004    return 0;
7005  /* If the destination is memory, we must have a matching source operand.  */
7006  if (GET_CODE (operands[0]) == MEM
7007      && ! (rtx_equal_p (operands[0], operands[1])
7008	    || (GET_RTX_CLASS (code) == 'c'
7009		&& rtx_equal_p (operands[0], operands[2]))))
7010    return 0;
7011  /* If the operation is not commutable and the source 1 is memory, we must
7012     have a matching destination.  */
7013  if (GET_CODE (operands[1]) == MEM
7014      && GET_RTX_CLASS (code) != 'c'
7015      && ! rtx_equal_p (operands[0], operands[1]))
7016    return 0;
7017  return 1;
7018}
7019
7020/* Attempt to expand a unary operator.  Make the expansion closer to the
7021   actual machine, then just general_operand, which will allow 2 separate
7022   memory references (one output, one input) in a single insn.  */
7023
7024void
7025ix86_expand_unary_operator (code, mode, operands)
7026     enum rtx_code code;
7027     enum machine_mode mode;
7028     rtx operands[];
7029{
7030  int matching_memory;
7031  rtx src, dst, op, clob;
7032
7033  dst = operands[0];
7034  src = operands[1];
7035
7036  /* If the destination is memory, and we do not have matching source
7037     operands, do things in registers.  */
7038  matching_memory = 0;
7039  if (GET_CODE (dst) == MEM)
7040    {
7041      if (rtx_equal_p (dst, src))
7042	matching_memory = 1;
7043      else
7044	dst = gen_reg_rtx (mode);
7045    }
7046
7047  /* When source operand is memory, destination must match.  */
7048  if (!matching_memory && GET_CODE (src) == MEM)
7049    src = force_reg (mode, src);
7050
7051  /* If optimizing, copy to regs to improve CSE */
7052  if (optimize && ! no_new_pseudos)
7053    {
7054      if (GET_CODE (dst) == MEM)
7055	dst = gen_reg_rtx (mode);
7056      if (GET_CODE (src) == MEM)
7057	src = force_reg (mode, src);
7058    }
7059
7060  /* Emit the instruction.  */
7061
7062  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7063  if (reload_in_progress || code == NOT)
7064    {
7065      /* Reload doesn't know about the flags register, and doesn't know that
7066         it doesn't want to clobber it.  */
7067      if (code != NOT)
7068        abort ();
7069      emit_insn (op);
7070    }
7071  else
7072    {
7073      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7074      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7075    }
7076
7077  /* Fix up the destination if needed.  */
7078  if (dst != operands[0])
7079    emit_move_insn (operands[0], dst);
7080}
7081
7082/* Return TRUE or FALSE depending on whether the unary operator meets the
7083   appropriate constraints.  */
7084
7085int
7086ix86_unary_operator_ok (code, mode, operands)
7087     enum rtx_code code ATTRIBUTE_UNUSED;
7088     enum machine_mode mode ATTRIBUTE_UNUSED;
7089     rtx operands[2] ATTRIBUTE_UNUSED;
7090{
7091  /* If one of operands is memory, source and destination must match.  */
7092  if ((GET_CODE (operands[0]) == MEM
7093       || GET_CODE (operands[1]) == MEM)
7094      && ! rtx_equal_p (operands[0], operands[1]))
7095    return FALSE;
7096  return TRUE;
7097}
7098
7099/* Return TRUE or FALSE depending on whether the first SET in INSN
7100   has source and destination with matching CC modes, and that the
7101   CC mode is at least as constrained as REQ_MODE.  */
7102
7103int
7104ix86_match_ccmode (insn, req_mode)
7105     rtx insn;
7106     enum machine_mode req_mode;
7107{
7108  rtx set;
7109  enum machine_mode set_mode;
7110
7111  set = PATTERN (insn);
7112  if (GET_CODE (set) == PARALLEL)
7113    set = XVECEXP (set, 0, 0);
7114  if (GET_CODE (set) != SET)
7115    abort ();
7116  if (GET_CODE (SET_SRC (set)) != COMPARE)
7117    abort ();
7118
7119  set_mode = GET_MODE (SET_DEST (set));
7120  switch (set_mode)
7121    {
7122    case CCNOmode:
7123      if (req_mode != CCNOmode
7124	  && (req_mode != CCmode
7125	      || XEXP (SET_SRC (set), 1) != const0_rtx))
7126	return 0;
7127      break;
7128    case CCmode:
7129      if (req_mode == CCGCmode)
7130	return 0;
7131      /* FALLTHRU */
7132    case CCGCmode:
7133      if (req_mode == CCGOCmode || req_mode == CCNOmode)
7134	return 0;
7135      /* FALLTHRU */
7136    case CCGOCmode:
7137      if (req_mode == CCZmode)
7138	return 0;
7139      /* FALLTHRU */
7140    case CCZmode:
7141      break;
7142
7143    default:
7144      abort ();
7145    }
7146
7147  return (GET_MODE (SET_SRC (set)) == set_mode);
7148}
7149
7150/* Generate insn patterns to do an integer compare of OPERANDS.  */
7151
7152static rtx
7153ix86_expand_int_compare (code, op0, op1)
7154     enum rtx_code code;
7155     rtx op0, op1;
7156{
7157  enum machine_mode cmpmode;
7158  rtx tmp, flags;
7159
7160  cmpmode = SELECT_CC_MODE (code, op0, op1);
7161  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7162
7163  /* This is very simple, but making the interface the same as in the
7164     FP case makes the rest of the code easier.  */
7165  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7166  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7167
7168  /* Return the test that should be put into the flags user, i.e.
7169     the bcc, scc, or cmov instruction.  */
7170  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7171}
7172
7173/* Figure out whether to use ordered or unordered fp comparisons.
7174   Return the appropriate mode to use.  */
7175
7176enum machine_mode
7177ix86_fp_compare_mode (code)
7178     enum rtx_code code ATTRIBUTE_UNUSED;
7179{
7180  /* ??? In order to make all comparisons reversible, we do all comparisons
7181     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
7182     all forms trapping and nontrapping comparisons, we can make inequality
7183     comparisons trapping again, since it results in better code when using
7184     FCOM based compares.  */
7185  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7186}
7187
7188enum machine_mode
7189ix86_cc_mode (code, op0, op1)
7190     enum rtx_code code;
7191     rtx op0, op1;
7192{
7193  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7194    return ix86_fp_compare_mode (code);
7195  switch (code)
7196    {
7197      /* Only zero flag is needed.  */
7198    case EQ:			/* ZF=0 */
7199    case NE:			/* ZF!=0 */
7200      return CCZmode;
7201      /* Codes needing carry flag.  */
7202    case GEU:			/* CF=0 */
7203    case GTU:			/* CF=0 & ZF=0 */
7204    case LTU:			/* CF=1 */
7205    case LEU:			/* CF=1 | ZF=1 */
7206      return CCmode;
7207      /* Codes possibly doable only with sign flag when
7208         comparing against zero.  */
7209    case GE:			/* SF=OF   or   SF=0 */
7210    case LT:			/* SF<>OF  or   SF=1 */
7211      if (op1 == const0_rtx)
7212	return CCGOCmode;
7213      else
7214	/* For other cases Carry flag is not required.  */
7215	return CCGCmode;
7216      /* Codes doable only with sign flag when comparing
7217         against zero, but we miss jump instruction for it
7218         so we need to use relational tests agains overflow
7219         that thus needs to be zero.  */
7220    case GT:			/* ZF=0 & SF=OF */
7221    case LE:			/* ZF=1 | SF<>OF */
7222      if (op1 == const0_rtx)
7223	return CCNOmode;
7224      else
7225	return CCGCmode;
7226      /* strcmp pattern do (use flags) and combine may ask us for proper
7227	 mode.  */
7228    case USE:
7229      return CCmode;
7230    default:
7231      abort ();
7232    }
7233}
7234
7235/* Return true if we should use an FCOMI instruction for this fp comparison.  */
7236
7237int
7238ix86_use_fcomi_compare (code)
7239     enum rtx_code code ATTRIBUTE_UNUSED;
7240{
7241  enum rtx_code swapped_code = swap_condition (code);
7242  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7243	  || (ix86_fp_comparison_cost (swapped_code)
7244	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
7245}
7246
7247/* Swap, force into registers, or otherwise massage the two operands
7248   to a fp comparison.  The operands are updated in place; the new
7249   comparsion code is returned.  */
7250
7251static enum rtx_code
7252ix86_prepare_fp_compare_args (code, pop0, pop1)
7253     enum rtx_code code;
7254     rtx *pop0, *pop1;
7255{
7256  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7257  rtx op0 = *pop0, op1 = *pop1;
7258  enum machine_mode op_mode = GET_MODE (op0);
7259  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7260
7261  /* All of the unordered compare instructions only work on registers.
7262     The same is true of the XFmode compare instructions.  The same is
7263     true of the fcomi compare instructions.  */
7264
7265  if (!is_sse
7266      && (fpcmp_mode == CCFPUmode
7267	  || op_mode == XFmode
7268	  || op_mode == TFmode
7269	  || ix86_use_fcomi_compare (code)))
7270    {
7271      op0 = force_reg (op_mode, op0);
7272      op1 = force_reg (op_mode, op1);
7273    }
7274  else
7275    {
7276      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
7277	 things around if they appear profitable, otherwise force op0
7278	 into a register.  */
7279
7280      if (standard_80387_constant_p (op0) == 0
7281	  || (GET_CODE (op0) == MEM
7282	      && ! (standard_80387_constant_p (op1) == 0
7283		    || GET_CODE (op1) == MEM)))
7284	{
7285	  rtx tmp;
7286	  tmp = op0, op0 = op1, op1 = tmp;
7287	  code = swap_condition (code);
7288	}
7289
7290      if (GET_CODE (op0) != REG)
7291	op0 = force_reg (op_mode, op0);
7292
7293      if (CONSTANT_P (op1))
7294	{
7295	  if (standard_80387_constant_p (op1))
7296	    op1 = force_reg (op_mode, op1);
7297	  else
7298	    op1 = validize_mem (force_const_mem (op_mode, op1));
7299	}
7300    }
7301
7302  /* Try to rearrange the comparison to make it cheaper.  */
7303  if (ix86_fp_comparison_cost (code)
7304      > ix86_fp_comparison_cost (swap_condition (code))
7305      && (GET_CODE (op1) == REG || !no_new_pseudos))
7306    {
7307      rtx tmp;
7308      tmp = op0, op0 = op1, op1 = tmp;
7309      code = swap_condition (code);
7310      if (GET_CODE (op0) != REG)
7311	op0 = force_reg (op_mode, op0);
7312    }
7313
7314  *pop0 = op0;
7315  *pop1 = op1;
7316  return code;
7317}
7318
7319/* Convert comparison codes we use to represent FP comparison to integer
7320   code that will result in proper branch.  Return UNKNOWN if no such code
7321   is available.  */
7322static enum rtx_code
7323ix86_fp_compare_code_to_integer (code)
7324     enum rtx_code code;
7325{
7326  switch (code)
7327    {
7328    case GT:
7329      return GTU;
7330    case GE:
7331      return GEU;
7332    case ORDERED:
7333    case UNORDERED:
7334      return code;
7335      break;
7336    case UNEQ:
7337      return EQ;
7338      break;
7339    case UNLT:
7340      return LTU;
7341      break;
7342    case UNLE:
7343      return LEU;
7344      break;
7345    case LTGT:
7346      return NE;
7347      break;
7348    default:
7349      return UNKNOWN;
7350    }
7351}
7352
7353/* Split comparison code CODE into comparisons we can do using branch
7354   instructions.  BYPASS_CODE is comparison code for branch that will
7355   branch around FIRST_CODE and SECOND_CODE.  If some of branches
7356   is not required, set value to NIL.
7357   We never require more than two branches.  */
7358static void
7359ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7360     enum rtx_code code, *bypass_code, *first_code, *second_code;
7361{
7362  *first_code = code;
7363  *bypass_code = NIL;
7364  *second_code = NIL;
7365
7366  /* The fcomi comparison sets flags as follows:
7367
7368     cmp    ZF PF CF
7369     >      0  0  0
7370     <      0  0  1
7371     =      1  0  0
7372     un     1  1  1 */
7373
7374  switch (code)
7375    {
7376    case GT:			/* GTU - CF=0 & ZF=0 */
7377    case GE:			/* GEU - CF=0 */
7378    case ORDERED:		/* PF=0 */
7379    case UNORDERED:		/* PF=1 */
7380    case UNEQ:			/* EQ - ZF=1 */
7381    case UNLT:			/* LTU - CF=1 */
7382    case UNLE:			/* LEU - CF=1 | ZF=1 */
7383    case LTGT:			/* EQ - ZF=0 */
7384      break;
7385    case LT:			/* LTU - CF=1 - fails on unordered */
7386      *first_code = UNLT;
7387      *bypass_code = UNORDERED;
7388      break;
7389    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
7390      *first_code = UNLE;
7391      *bypass_code = UNORDERED;
7392      break;
7393    case EQ:			/* EQ - ZF=1 - fails on unordered */
7394      *first_code = UNEQ;
7395      *bypass_code = UNORDERED;
7396      break;
7397    case NE:			/* NE - ZF=0 - fails on unordered */
7398      *first_code = LTGT;
7399      *second_code = UNORDERED;
7400      break;
7401    case UNGE:			/* GEU - CF=0 - fails on unordered */
7402      *first_code = GE;
7403      *second_code = UNORDERED;
7404      break;
7405    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
7406      *first_code = GT;
7407      *second_code = UNORDERED;
7408      break;
7409    default:
7410      abort ();
7411    }
7412  if (!TARGET_IEEE_FP)
7413    {
7414      *second_code = NIL;
7415      *bypass_code = NIL;
7416    }
7417}
7418
7419/* Return cost of comparison done fcom + arithmetics operations on AX.
7420   All following functions do use number of instructions as an cost metrics.
7421   In future this should be tweaked to compute bytes for optimize_size and
7422   take into account performance of various instructions on various CPUs.  */
7423static int
7424ix86_fp_comparison_arithmetics_cost (code)
7425     enum rtx_code code;
7426{
7427  if (!TARGET_IEEE_FP)
7428    return 4;
7429  /* The cost of code output by ix86_expand_fp_compare.  */
7430  switch (code)
7431    {
7432    case UNLE:
7433    case UNLT:
7434    case LTGT:
7435    case GT:
7436    case GE:
7437    case UNORDERED:
7438    case ORDERED:
7439    case UNEQ:
7440      return 4;
7441      break;
7442    case LT:
7443    case NE:
7444    case EQ:
7445    case UNGE:
7446      return 5;
7447      break;
7448    case LE:
7449    case UNGT:
7450      return 6;
7451      break;
7452    default:
7453      abort ();
7454    }
7455}
7456
7457/* Return cost of comparison done using fcomi operation.
7458   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7459static int
7460ix86_fp_comparison_fcomi_cost (code)
7461     enum rtx_code code;
7462{
7463  enum rtx_code bypass_code, first_code, second_code;
7464  /* Return arbitarily high cost when instruction is not supported - this
7465     prevents gcc from using it.  */
7466  if (!TARGET_CMOVE)
7467    return 1024;
7468  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7469  return (bypass_code != NIL || second_code != NIL) + 2;
7470}
7471
7472/* Return cost of comparison done using sahf operation.
7473   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7474static int
7475ix86_fp_comparison_sahf_cost (code)
7476     enum rtx_code code;
7477{
7478  enum rtx_code bypass_code, first_code, second_code;
7479  /* Return arbitarily high cost when instruction is not preferred - this
7480     avoids gcc from using it.  */
7481  if (!TARGET_USE_SAHF && !optimize_size)
7482    return 1024;
7483  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7484  return (bypass_code != NIL || second_code != NIL) + 3;
7485}
7486
7487/* Compute cost of the comparison done using any method.
7488   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7489static int
7490ix86_fp_comparison_cost (code)
7491     enum rtx_code code;
7492{
7493  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7494  int min;
7495
7496  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7497  sahf_cost = ix86_fp_comparison_sahf_cost (code);
7498
7499  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7500  if (min > sahf_cost)
7501    min = sahf_cost;
7502  if (min > fcomi_cost)
7503    min = fcomi_cost;
7504  return min;
7505}
7506
7507/* Generate insn patterns to do a floating point compare of OPERANDS.  */
7508
7509static rtx
7510ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7511     enum rtx_code code;
7512     rtx op0, op1, scratch;
7513     rtx *second_test;
7514     rtx *bypass_test;
7515{
7516  enum machine_mode fpcmp_mode, intcmp_mode;
7517  rtx tmp, tmp2;
7518  int cost = ix86_fp_comparison_cost (code);
7519  enum rtx_code bypass_code, first_code, second_code;
7520
7521  fpcmp_mode = ix86_fp_compare_mode (code);
7522  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7523
7524  if (second_test)
7525    *second_test = NULL_RTX;
7526  if (bypass_test)
7527    *bypass_test = NULL_RTX;
7528
7529  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7530
7531  /* Do fcomi/sahf based test when profitable.  */
7532  if ((bypass_code == NIL || bypass_test)
7533      && (second_code == NIL || second_test)
7534      && ix86_fp_comparison_arithmetics_cost (code) > cost)
7535    {
7536      if (TARGET_CMOVE)
7537	{
7538	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7539	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7540			     tmp);
7541	  emit_insn (tmp);
7542	}
7543      else
7544	{
7545	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7546	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7547	  if (!scratch)
7548	    scratch = gen_reg_rtx (HImode);
7549	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7550	  emit_insn (gen_x86_sahf_1 (scratch));
7551	}
7552
7553      /* The FP codes work out to act like unsigned.  */
7554      intcmp_mode = fpcmp_mode;
7555      code = first_code;
7556      if (bypass_code != NIL)
7557	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7558				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7559				       const0_rtx);
7560      if (second_code != NIL)
7561	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7562				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7563				       const0_rtx);
7564    }
7565  else
7566    {
7567      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
7568      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7569      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7570      if (!scratch)
7571	scratch = gen_reg_rtx (HImode);
7572      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7573
7574      /* In the unordered case, we have to check C2 for NaN's, which
7575	 doesn't happen to work out to anything nice combination-wise.
7576	 So do some bit twiddling on the value we've got in AH to come
7577	 up with an appropriate set of condition codes.  */
7578
7579      intcmp_mode = CCNOmode;
7580      switch (code)
7581	{
7582	case GT:
7583	case UNGT:
7584	  if (code == GT || !TARGET_IEEE_FP)
7585	    {
7586	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7587	      code = EQ;
7588	    }
7589	  else
7590	    {
7591	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7592	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7593	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7594	      intcmp_mode = CCmode;
7595	      code = GEU;
7596	    }
7597	  break;
7598	case LT:
7599	case UNLT:
7600	  if (code == LT && TARGET_IEEE_FP)
7601	    {
7602	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7603	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7604	      intcmp_mode = CCmode;
7605	      code = EQ;
7606	    }
7607	  else
7608	    {
7609	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7610	      code = NE;
7611	    }
7612	  break;
7613	case GE:
7614	case UNGE:
7615	  if (code == GE || !TARGET_IEEE_FP)
7616	    {
7617	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7618	      code = EQ;
7619	    }
7620	  else
7621	    {
7622	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7623	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7624					     GEN_INT (0x01)));
7625	      code = NE;
7626	    }
7627	  break;
7628	case LE:
7629	case UNLE:
7630	  if (code == LE && TARGET_IEEE_FP)
7631	    {
7632	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7633	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7634	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7635	      intcmp_mode = CCmode;
7636	      code = LTU;
7637	    }
7638	  else
7639	    {
7640	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7641	      code = NE;
7642	    }
7643	  break;
7644	case EQ:
7645	case UNEQ:
7646	  if (code == EQ && TARGET_IEEE_FP)
7647	    {
7648	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7649	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7650	      intcmp_mode = CCmode;
7651	      code = EQ;
7652	    }
7653	  else
7654	    {
7655	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7656	      code = NE;
7657	      break;
7658	    }
7659	  break;
7660	case NE:
7661	case LTGT:
7662	  if (code == NE && TARGET_IEEE_FP)
7663	    {
7664	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7665	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7666					     GEN_INT (0x40)));
7667	      code = NE;
7668	    }
7669	  else
7670	    {
7671	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7672	      code = EQ;
7673	    }
7674	  break;
7675
7676	case UNORDERED:
7677	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7678	  code = NE;
7679	  break;
7680	case ORDERED:
7681	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7682	  code = EQ;
7683	  break;
7684
7685	default:
7686	  abort ();
7687	}
7688    }
7689
7690  /* Return the test that should be put into the flags user, i.e.
7691     the bcc, scc, or cmov instruction.  */
7692  return gen_rtx_fmt_ee (code, VOIDmode,
7693			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7694			 const0_rtx);
7695}
7696
7697rtx
7698ix86_expand_compare (code, second_test, bypass_test)
7699     enum rtx_code code;
7700     rtx *second_test, *bypass_test;
7701{
7702  rtx op0, op1, ret;
7703  op0 = ix86_compare_op0;
7704  op1 = ix86_compare_op1;
7705
7706  if (second_test)
7707    *second_test = NULL_RTX;
7708  if (bypass_test)
7709    *bypass_test = NULL_RTX;
7710
7711  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7712    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7713				  second_test, bypass_test);
7714  else
7715    ret = ix86_expand_int_compare (code, op0, op1);
7716
7717  return ret;
7718}
7719
7720/* Return true if the CODE will result in nontrivial jump sequence.  */
7721bool
7722ix86_fp_jump_nontrivial_p (code)
7723    enum rtx_code code;
7724{
7725  enum rtx_code bypass_code, first_code, second_code;
7726  if (!TARGET_CMOVE)
7727    return true;
7728  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7729  return bypass_code != NIL || second_code != NIL;
7730}
7731
7732void
7733ix86_expand_branch (code, label)
7734     enum rtx_code code;
7735     rtx label;
7736{
7737  rtx tmp;
7738
7739  switch (GET_MODE (ix86_compare_op0))
7740    {
7741    case QImode:
7742    case HImode:
7743    case SImode:
7744      simple:
7745      tmp = ix86_expand_compare (code, NULL, NULL);
7746      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7747				  gen_rtx_LABEL_REF (VOIDmode, label),
7748				  pc_rtx);
7749      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7750      return;
7751
7752    case SFmode:
7753    case DFmode:
7754    case XFmode:
7755    case TFmode:
7756      {
7757	rtvec vec;
7758	int use_fcomi;
7759	enum rtx_code bypass_code, first_code, second_code;
7760
7761	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7762					     &ix86_compare_op1);
7763
7764	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7765
7766	/* Check whether we will use the natural sequence with one jump.  If
7767	   so, we can expand jump early.  Otherwise delay expansion by
7768	   creating compound insn to not confuse optimizers.  */
7769	if (bypass_code == NIL && second_code == NIL
7770	    && TARGET_CMOVE)
7771	  {
7772	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7773				  gen_rtx_LABEL_REF (VOIDmode, label),
7774				  pc_rtx, NULL_RTX);
7775	  }
7776	else
7777	  {
7778	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
7779				  ix86_compare_op0, ix86_compare_op1);
7780	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7781					gen_rtx_LABEL_REF (VOIDmode, label),
7782					pc_rtx);
7783	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7784
7785	    use_fcomi = ix86_use_fcomi_compare (code);
7786	    vec = rtvec_alloc (3 + !use_fcomi);
7787	    RTVEC_ELT (vec, 0) = tmp;
7788	    RTVEC_ELT (vec, 1)
7789	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7790	    RTVEC_ELT (vec, 2)
7791	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7792	    if (! use_fcomi)
7793	      RTVEC_ELT (vec, 3)
7794		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7795
7796	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7797	  }
7798	return;
7799      }
7800
7801    case DImode:
7802      if (TARGET_64BIT)
7803	goto simple;
7804      /* Expand DImode branch into multiple compare+branch.  */
7805      {
7806	rtx lo[2], hi[2], label2;
7807	enum rtx_code code1, code2, code3;
7808
7809	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7810	  {
7811	    tmp = ix86_compare_op0;
7812	    ix86_compare_op0 = ix86_compare_op1;
7813	    ix86_compare_op1 = tmp;
7814	    code = swap_condition (code);
7815	  }
7816	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7817	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7818
7819	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7820	   avoid two branches.  This costs one extra insn, so disable when
7821	   optimizing for size.  */
7822
7823	if ((code == EQ || code == NE)
7824	    && (!optimize_size
7825	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
7826	  {
7827	    rtx xor0, xor1;
7828
7829	    xor1 = hi[0];
7830	    if (hi[1] != const0_rtx)
7831	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7832				   NULL_RTX, 0, OPTAB_WIDEN);
7833
7834	    xor0 = lo[0];
7835	    if (lo[1] != const0_rtx)
7836	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7837				   NULL_RTX, 0, OPTAB_WIDEN);
7838
7839	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7840				NULL_RTX, 0, OPTAB_WIDEN);
7841
7842	    ix86_compare_op0 = tmp;
7843	    ix86_compare_op1 = const0_rtx;
7844	    ix86_expand_branch (code, label);
7845	    return;
7846	  }
7847
7848	/* Otherwise, if we are doing less-than or greater-or-equal-than,
7849	   op1 is a constant and the low word is zero, then we can just
7850	   examine the high word.  */
7851
7852	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7853	  switch (code)
7854	    {
7855	    case LT: case LTU: case GE: case GEU:
7856	      ix86_compare_op0 = hi[0];
7857	      ix86_compare_op1 = hi[1];
7858	      ix86_expand_branch (code, label);
7859	      return;
7860	    default:
7861	      break;
7862	    }
7863
7864	/* Otherwise, we need two or three jumps.  */
7865
7866	label2 = gen_label_rtx ();
7867
7868	code1 = code;
7869	code2 = swap_condition (code);
7870	code3 = unsigned_condition (code);
7871
7872	switch (code)
7873	  {
7874	  case LT: case GT: case LTU: case GTU:
7875	    break;
7876
7877	  case LE:   code1 = LT;  code2 = GT;  break;
7878	  case GE:   code1 = GT;  code2 = LT;  break;
7879	  case LEU:  code1 = LTU; code2 = GTU; break;
7880	  case GEU:  code1 = GTU; code2 = LTU; break;
7881
7882	  case EQ:   code1 = NIL; code2 = NE;  break;
7883	  case NE:   code2 = NIL; break;
7884
7885	  default:
7886	    abort ();
7887	  }
7888
7889	/*
7890	 * a < b =>
7891	 *    if (hi(a) < hi(b)) goto true;
7892	 *    if (hi(a) > hi(b)) goto false;
7893	 *    if (lo(a) < lo(b)) goto true;
7894	 *  false:
7895	 */
7896
7897	ix86_compare_op0 = hi[0];
7898	ix86_compare_op1 = hi[1];
7899
7900	if (code1 != NIL)
7901	  ix86_expand_branch (code1, label);
7902	if (code2 != NIL)
7903	  ix86_expand_branch (code2, label2);
7904
7905	ix86_compare_op0 = lo[0];
7906	ix86_compare_op1 = lo[1];
7907	ix86_expand_branch (code3, label);
7908
7909	if (code2 != NIL)
7910	  emit_label (label2);
7911	return;
7912      }
7913
7914    default:
7915      abort ();
7916    }
7917}
7918
7919/* Split branch based on floating point condition.  */
7920void
7921ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7922     enum rtx_code code;
7923     rtx op1, op2, target1, target2, tmp;
7924{
7925  rtx second, bypass;
7926  rtx label = NULL_RTX;
7927  rtx condition;
7928  int bypass_probability = -1, second_probability = -1, probability = -1;
7929  rtx i;
7930
7931  if (target2 != pc_rtx)
7932    {
7933      rtx tmp = target2;
7934      code = reverse_condition_maybe_unordered (code);
7935      target2 = target1;
7936      target1 = tmp;
7937    }
7938
7939  condition = ix86_expand_fp_compare (code, op1, op2,
7940				      tmp, &second, &bypass);
7941
7942  if (split_branch_probability >= 0)
7943    {
7944      /* Distribute the probabilities across the jumps.
7945	 Assume the BYPASS and SECOND to be always test
7946	 for UNORDERED.  */
7947      probability = split_branch_probability;
7948
7949      /* Value of 1 is low enough to make no need for probability
7950	 to be updated.  Later we may run some experiments and see
7951	 if unordered values are more frequent in practice.  */
7952      if (bypass)
7953	bypass_probability = 1;
7954      if (second)
7955	second_probability = 1;
7956    }
7957  if (bypass != NULL_RTX)
7958    {
7959      label = gen_label_rtx ();
7960      i = emit_jump_insn (gen_rtx_SET
7961			  (VOIDmode, pc_rtx,
7962			   gen_rtx_IF_THEN_ELSE (VOIDmode,
7963						 bypass,
7964						 gen_rtx_LABEL_REF (VOIDmode,
7965								    label),
7966						 pc_rtx)));
7967      if (bypass_probability >= 0)
7968	REG_NOTES (i)
7969	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7970			       GEN_INT (bypass_probability),
7971			       REG_NOTES (i));
7972    }
7973  i = emit_jump_insn (gen_rtx_SET
7974		      (VOIDmode, pc_rtx,
7975		       gen_rtx_IF_THEN_ELSE (VOIDmode,
7976					     condition, target1, target2)));
7977  if (probability >= 0)
7978    REG_NOTES (i)
7979      = gen_rtx_EXPR_LIST (REG_BR_PROB,
7980			   GEN_INT (probability),
7981			   REG_NOTES (i));
7982  if (second != NULL_RTX)
7983    {
7984      i = emit_jump_insn (gen_rtx_SET
7985			  (VOIDmode, pc_rtx,
7986			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7987						 target2)));
7988      if (second_probability >= 0)
7989	REG_NOTES (i)
7990	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7991			       GEN_INT (second_probability),
7992			       REG_NOTES (i));
7993    }
7994  if (label != NULL_RTX)
7995    emit_label (label);
7996}
7997
7998int
7999ix86_expand_setcc (code, dest)
8000     enum rtx_code code;
8001     rtx dest;
8002{
8003  rtx ret, tmp, tmpreg;
8004  rtx second_test, bypass_test;
8005
8006  if (GET_MODE (ix86_compare_op0) == DImode
8007      && !TARGET_64BIT)
8008    return 0; /* FAIL */
8009
8010  if (GET_MODE (dest) != QImode)
8011    abort ();
8012
8013  ret = ix86_expand_compare (code, &second_test, &bypass_test);
8014  PUT_MODE (ret, QImode);
8015
8016  tmp = dest;
8017  tmpreg = dest;
8018
8019  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8020  if (bypass_test || second_test)
8021    {
8022      rtx test = second_test;
8023      int bypass = 0;
8024      rtx tmp2 = gen_reg_rtx (QImode);
8025      if (bypass_test)
8026	{
8027	  if (second_test)
8028	    abort ();
8029	  test = bypass_test;
8030	  bypass = 1;
8031	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8032	}
8033      PUT_MODE (test, QImode);
8034      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8035
8036      if (bypass)
8037	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8038      else
8039	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8040    }
8041
8042  return 1; /* DONE */
8043}
8044
8045int
8046ix86_expand_int_movcc (operands)
8047     rtx operands[];
8048{
8049  enum rtx_code code = GET_CODE (operands[1]), compare_code;
8050  rtx compare_seq, compare_op;
8051  rtx second_test, bypass_test;
8052  enum machine_mode mode = GET_MODE (operands[0]);
8053
8054  /* When the compare code is not LTU or GEU, we can not use sbbl case.
8055     In case comparsion is done with immediate, we can convert it to LTU or
8056     GEU by altering the integer.  */
8057
8058  if ((code == LEU || code == GTU)
8059      && GET_CODE (ix86_compare_op1) == CONST_INT
8060      && mode != HImode
8061      && INTVAL (ix86_compare_op1) != -1
8062      /* For x86-64, the immediate field in the instruction is 32-bit
8063	 signed, so we can't increment a DImode value above 0x7fffffff.  */
8064      && (!TARGET_64BIT
8065	  || GET_MODE (ix86_compare_op0) != DImode
8066	  || INTVAL (ix86_compare_op1) != 0x7fffffff)
8067      && GET_CODE (operands[2]) == CONST_INT
8068      && GET_CODE (operands[3]) == CONST_INT)
8069    {
8070      if (code == LEU)
8071	code = LTU;
8072      else
8073	code = GEU;
8074      ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8075				       GET_MODE (ix86_compare_op0));
8076    }
8077
8078  start_sequence ();
8079  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8080  compare_seq = gen_sequence ();
8081  end_sequence ();
8082
8083  compare_code = GET_CODE (compare_op);
8084
8085  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8086     HImode insns, we'd be swallowed in word prefix ops.  */
8087
8088  if (mode != HImode
8089      && (mode != DImode || TARGET_64BIT)
8090      && GET_CODE (operands[2]) == CONST_INT
8091      && GET_CODE (operands[3]) == CONST_INT)
8092    {
8093      rtx out = operands[0];
8094      HOST_WIDE_INT ct = INTVAL (operands[2]);
8095      HOST_WIDE_INT cf = INTVAL (operands[3]);
8096      HOST_WIDE_INT diff;
8097
8098      if ((compare_code == LTU || compare_code == GEU)
8099	  && !second_test && !bypass_test)
8100	{
8101
8102	  /* Detect overlap between destination and compare sources.  */
8103	  rtx tmp = out;
8104
8105	  /* To simplify rest of code, restrict to the GEU case.  */
8106	  if (compare_code == LTU)
8107	    {
8108	      int tmp = ct;
8109	      ct = cf;
8110	      cf = tmp;
8111	      compare_code = reverse_condition (compare_code);
8112	      code = reverse_condition (code);
8113	    }
8114	  diff = ct - cf;
8115
8116	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8117	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
8118	    tmp = gen_reg_rtx (mode);
8119
8120	  emit_insn (compare_seq);
8121	  if (mode == DImode)
8122	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8123	  else
8124	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
8125
8126	  if (diff == 1)
8127	    {
8128	      /*
8129	       * cmpl op0,op1
8130	       * sbbl dest,dest
8131	       * [addl dest, ct]
8132	       *
8133	       * Size 5 - 8.
8134	       */
8135	      if (ct)
8136	       	tmp = expand_simple_binop (mode, PLUS,
8137					   tmp, GEN_INT (ct),
8138					   tmp, 1, OPTAB_DIRECT);
8139	    }
8140	  else if (cf == -1)
8141	    {
8142	      /*
8143	       * cmpl op0,op1
8144	       * sbbl dest,dest
8145	       * orl $ct, dest
8146	       *
8147	       * Size 8.
8148	       */
8149	      tmp = expand_simple_binop (mode, IOR,
8150					 tmp, GEN_INT (ct),
8151					 tmp, 1, OPTAB_DIRECT);
8152	    }
8153	  else if (diff == -1 && ct)
8154	    {
8155	      /*
8156	       * cmpl op0,op1
8157	       * sbbl dest,dest
8158	       * xorl $-1, dest
8159	       * [addl dest, cf]
8160	       *
8161	       * Size 8 - 11.
8162	       */
8163	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8164	      if (cf)
8165	       	tmp = expand_simple_binop (mode, PLUS,
8166					   tmp, GEN_INT (cf),
8167					   tmp, 1, OPTAB_DIRECT);
8168	    }
8169	  else
8170	    {
8171	      /*
8172	       * cmpl op0,op1
8173	       * sbbl dest,dest
8174	       * andl cf - ct, dest
8175	       * [addl dest, ct]
8176	       *
8177	       * Size 8 - 11.
8178	       */
8179	      tmp = expand_simple_binop (mode, AND,
8180					 tmp,
8181					 GEN_INT (trunc_int_for_mode
8182						  (cf - ct, mode)),
8183					 tmp, 1, OPTAB_DIRECT);
8184	      if (ct)
8185	       	tmp = expand_simple_binop (mode, PLUS,
8186					   tmp, GEN_INT (ct),
8187					   tmp, 1, OPTAB_DIRECT);
8188	    }
8189
8190	  if (tmp != out)
8191	    emit_move_insn (out, tmp);
8192
8193	  return 1; /* DONE */
8194	}
8195
8196      diff = ct - cf;
8197      if (diff < 0)
8198	{
8199	  HOST_WIDE_INT tmp;
8200	  tmp = ct, ct = cf, cf = tmp;
8201	  diff = -diff;
8202	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8203	    {
8204	      /* We may be reversing unordered compare to normal compare, that
8205		 is not valid in general (we may convert non-trapping condition
8206		 to trapping one), however on i386 we currently emit all
8207		 comparisons unordered.  */
8208	      compare_code = reverse_condition_maybe_unordered (compare_code);
8209	      code = reverse_condition_maybe_unordered (code);
8210	    }
8211	  else
8212	    {
8213	      compare_code = reverse_condition (compare_code);
8214	      code = reverse_condition (code);
8215	    }
8216	}
8217      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8218	   || diff == 3 || diff == 5 || diff == 9)
8219	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8220	{
8221	  /*
8222	   * xorl dest,dest
8223	   * cmpl op1,op2
8224	   * setcc dest
8225	   * lea cf(dest*(ct-cf)),dest
8226	   *
8227	   * Size 14.
8228	   *
8229	   * This also catches the degenerate setcc-only case.
8230	   */
8231
8232	  rtx tmp;
8233	  int nops;
8234
8235	  out = emit_store_flag (out, code, ix86_compare_op0,
8236				 ix86_compare_op1, VOIDmode, 0, 1);
8237
8238	  nops = 0;
8239	  /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8240	     done in proper mode to match.  */
8241	  if (diff == 1)
8242	    tmp = out;
8243	  else
8244	    {
8245	      rtx out1;
8246	      out1 = out;
8247	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8248	      nops++;
8249	      if (diff & 1)
8250		{
8251		  tmp = gen_rtx_PLUS (mode, tmp, out1);
8252		  nops++;
8253		}
8254	    }
8255	  if (cf != 0)
8256	    {
8257	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8258	      nops++;
8259	    }
8260	  if (tmp != out
8261	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8262	    {
8263	      if (nops == 1)
8264		{
8265		  rtx clob;
8266
8267		  clob = gen_rtx_REG (CCmode, FLAGS_REG);
8268		  clob = gen_rtx_CLOBBER (VOIDmode, clob);
8269
8270		  tmp = gen_rtx_SET (VOIDmode, out, tmp);
8271		  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, copy_rtx (tmp), clob));
8272		  emit_insn (tmp);
8273		}
8274	      else
8275		emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8276	    }
8277	  if (out != operands[0])
8278	    emit_move_insn (operands[0], out);
8279
8280	  return 1; /* DONE */
8281	}
8282
8283      /*
8284       * General case:			Jumpful:
8285       *   xorl dest,dest		cmpl op1, op2
8286       *   cmpl op1, op2		movl ct, dest
8287       *   setcc dest			jcc 1f
8288       *   decl dest			movl cf, dest
8289       *   andl (cf-ct),dest		1:
8290       *   addl ct,dest
8291       *
8292       * Size 20.			Size 14.
8293       *
8294       * This is reasonably steep, but branch mispredict costs are
8295       * high on modern cpus, so consider failing only if optimizing
8296       * for space.
8297       *
8298       * %%% Parameterize branch_cost on the tuning architecture, then
8299       * use that.  The 80386 couldn't care less about mispredicts.
8300       */
8301
8302      if (!optimize_size && !TARGET_CMOVE)
8303	{
8304	  if (ct == 0)
8305	    {
8306	      ct = cf;
8307	      cf = 0;
8308	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8309		{
8310		  /* We may be reversing unordered compare to normal compare,
8311		     that is not valid in general (we may convert non-trapping
8312		     condition to trapping one), however on i386 we currently
8313		     emit all comparisons unordered.  */
8314		  compare_code = reverse_condition_maybe_unordered (compare_code);
8315		  code = reverse_condition_maybe_unordered (code);
8316		}
8317	      else
8318		{
8319		  compare_code = reverse_condition (compare_code);
8320		  code = reverse_condition (code);
8321		}
8322	    }
8323
8324	  out = emit_store_flag (out, code, ix86_compare_op0,
8325				 ix86_compare_op1, VOIDmode, 0, 1);
8326
8327	  out = expand_simple_binop (mode, PLUS,
8328				     out, constm1_rtx,
8329				     out, 1, OPTAB_DIRECT);
8330	  out = expand_simple_binop (mode, AND,
8331				     out,
8332				     GEN_INT (trunc_int_for_mode
8333					      (cf - ct, mode)),
8334				     out, 1, OPTAB_DIRECT);
8335	  out = expand_simple_binop (mode, PLUS,
8336				     out, GEN_INT (ct),
8337				     out, 1, OPTAB_DIRECT);
8338	  if (out != operands[0])
8339	    emit_move_insn (operands[0], out);
8340
8341	  return 1; /* DONE */
8342	}
8343    }
8344
8345  if (!TARGET_CMOVE)
8346    {
8347      /* Try a few things more with specific constants and a variable.  */
8348
8349      optab op;
8350      rtx var, orig_out, out, tmp;
8351
8352      if (optimize_size)
8353	return 0; /* FAIL */
8354
8355      /* If one of the two operands is an interesting constant, load a
8356	 constant with the above and mask it in with a logical operation.  */
8357
8358      if (GET_CODE (operands[2]) == CONST_INT)
8359	{
8360	  var = operands[3];
8361	  if (INTVAL (operands[2]) == 0)
8362	    operands[3] = constm1_rtx, op = and_optab;
8363	  else if (INTVAL (operands[2]) == -1)
8364	    operands[3] = const0_rtx, op = ior_optab;
8365	  else
8366	    return 0; /* FAIL */
8367	}
8368      else if (GET_CODE (operands[3]) == CONST_INT)
8369	{
8370	  var = operands[2];
8371	  if (INTVAL (operands[3]) == 0)
8372	    operands[2] = constm1_rtx, op = and_optab;
8373	  else if (INTVAL (operands[3]) == -1)
8374	    operands[2] = const0_rtx, op = ior_optab;
8375	  else
8376	    return 0; /* FAIL */
8377	}
8378      else
8379        return 0; /* FAIL */
8380
8381      orig_out = operands[0];
8382      tmp = gen_reg_rtx (mode);
8383      operands[0] = tmp;
8384
8385      /* Recurse to get the constant loaded.  */
8386      if (ix86_expand_int_movcc (operands) == 0)
8387        return 0; /* FAIL */
8388
8389      /* Mask in the interesting variable.  */
8390      out = expand_binop (mode, op, var, tmp, orig_out, 0,
8391			  OPTAB_WIDEN);
8392      if (out != orig_out)
8393	emit_move_insn (orig_out, out);
8394
8395      return 1; /* DONE */
8396    }
8397
8398  /*
8399   * For comparison with above,
8400   *
8401   * movl cf,dest
8402   * movl ct,tmp
8403   * cmpl op1,op2
8404   * cmovcc tmp,dest
8405   *
8406   * Size 15.
8407   */
8408
8409  if (! nonimmediate_operand (operands[2], mode))
8410    operands[2] = force_reg (mode, operands[2]);
8411  if (! nonimmediate_operand (operands[3], mode))
8412    operands[3] = force_reg (mode, operands[3]);
8413
8414  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8415    {
8416      rtx tmp = gen_reg_rtx (mode);
8417      emit_move_insn (tmp, operands[3]);
8418      operands[3] = tmp;
8419    }
8420  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8421    {
8422      rtx tmp = gen_reg_rtx (mode);
8423      emit_move_insn (tmp, operands[2]);
8424      operands[2] = tmp;
8425    }
8426  if (! register_operand (operands[2], VOIDmode)
8427      && ! register_operand (operands[3], VOIDmode))
8428    operands[2] = force_reg (mode, operands[2]);
8429
8430  emit_insn (compare_seq);
8431  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8432			  gen_rtx_IF_THEN_ELSE (mode,
8433						compare_op, operands[2],
8434						operands[3])));
8435  if (bypass_test)
8436    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8437			    gen_rtx_IF_THEN_ELSE (mode,
8438				  bypass_test,
8439				  operands[3],
8440				  operands[0])));
8441  if (second_test)
8442    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8443			    gen_rtx_IF_THEN_ELSE (mode,
8444				  second_test,
8445				  operands[2],
8446				  operands[0])));
8447
8448  return 1; /* DONE */
8449}
8450
8451int
8452ix86_expand_fp_movcc (operands)
8453     rtx operands[];
8454{
8455  enum rtx_code code;
8456  rtx tmp;
8457  rtx compare_op, second_test, bypass_test;
8458
8459  /* For SF/DFmode conditional moves based on comparisons
8460     in same mode, we may want to use SSE min/max instructions.  */
8461  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8462       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8463      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8464      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
8465      && (!TARGET_IEEE_FP
8466	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8467      /* We may be called from the post-reload splitter.  */
8468      && (!REG_P (operands[0])
8469	  || SSE_REG_P (operands[0])
8470	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8471    {
8472      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8473      code = GET_CODE (operands[1]);
8474
8475      /* See if we have (cross) match between comparison operands and
8476         conditional move operands.  */
8477      if (rtx_equal_p (operands[2], op1))
8478	{
8479	  rtx tmp = op0;
8480	  op0 = op1;
8481	  op1 = tmp;
8482	  code = reverse_condition_maybe_unordered (code);
8483	}
8484      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8485	{
8486	  /* Check for min operation.  */
8487	  if (code == LT)
8488	    {
8489	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8490	       if (memory_operand (op0, VOIDmode))
8491		 op0 = force_reg (GET_MODE (operands[0]), op0);
8492	       if (GET_MODE (operands[0]) == SFmode)
8493		 emit_insn (gen_minsf3 (operands[0], op0, op1));
8494	       else
8495		 emit_insn (gen_mindf3 (operands[0], op0, op1));
8496	       return 1;
8497	    }
8498	  /* Check for max operation.  */
8499	  if (code == GT)
8500	    {
8501	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8502	       if (memory_operand (op0, VOIDmode))
8503		 op0 = force_reg (GET_MODE (operands[0]), op0);
8504	       if (GET_MODE (operands[0]) == SFmode)
8505		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8506	       else
8507		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8508	       return 1;
8509	    }
8510	}
8511      /* Manage condition to be sse_comparison_operator.  In case we are
8512	 in non-ieee mode, try to canonicalize the destination operand
8513	 to be first in the comparison - this helps reload to avoid extra
8514	 moves.  */
8515      if (!sse_comparison_operator (operands[1], VOIDmode)
8516	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8517	{
8518	  rtx tmp = ix86_compare_op0;
8519	  ix86_compare_op0 = ix86_compare_op1;
8520	  ix86_compare_op1 = tmp;
8521	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8522					VOIDmode, ix86_compare_op0,
8523					ix86_compare_op1);
8524	}
8525      /* Similary try to manage result to be first operand of conditional
8526	 move. We also don't support the NE comparison on SSE, so try to
8527	 avoid it.  */
8528      if ((rtx_equal_p (operands[0], operands[3])
8529	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8530	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8531	{
8532	  rtx tmp = operands[2];
8533	  operands[2] = operands[3];
8534	  operands[3] = tmp;
8535	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8536					  (GET_CODE (operands[1])),
8537					VOIDmode, ix86_compare_op0,
8538					ix86_compare_op1);
8539	}
8540      if (GET_MODE (operands[0]) == SFmode)
8541	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8542				    operands[2], operands[3],
8543				    ix86_compare_op0, ix86_compare_op1));
8544      else
8545	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8546				    operands[2], operands[3],
8547				    ix86_compare_op0, ix86_compare_op1));
8548      return 1;
8549    }
8550
8551  /* The floating point conditional move instructions don't directly
8552     support conditions resulting from a signed integer comparison.  */
8553
8554  code = GET_CODE (operands[1]);
8555  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8556
8557  /* The floating point conditional move instructions don't directly
8558     support signed integer comparisons.  */
8559
8560  if (!fcmov_comparison_operator (compare_op, VOIDmode))
8561    {
8562      if (second_test != NULL || bypass_test != NULL)
8563	abort ();
8564      tmp = gen_reg_rtx (QImode);
8565      ix86_expand_setcc (code, tmp);
8566      code = NE;
8567      ix86_compare_op0 = tmp;
8568      ix86_compare_op1 = const0_rtx;
8569      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
8570    }
8571  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8572    {
8573      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8574      emit_move_insn (tmp, operands[3]);
8575      operands[3] = tmp;
8576    }
8577  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8578    {
8579      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8580      emit_move_insn (tmp, operands[2]);
8581      operands[2] = tmp;
8582    }
8583
8584  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8585			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8586				compare_op,
8587				operands[2],
8588				operands[3])));
8589  if (bypass_test)
8590    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8591			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8592				  bypass_test,
8593				  operands[3],
8594				  operands[0])));
8595  if (second_test)
8596    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8597			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8598				  second_test,
8599				  operands[2],
8600				  operands[0])));
8601
8602  return 1;
8603}
8604
8605/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
8606   works for floating pointer parameters and nonoffsetable memories.
8607   For pushes, it returns just stack offsets; the values will be saved
8608   in the right order.  Maximally three parts are generated.  */
8609
8610static int
8611ix86_split_to_parts (operand, parts, mode)
8612     rtx operand;
8613     rtx *parts;
8614     enum machine_mode mode;
8615{
8616  int size;
8617
8618  if (!TARGET_64BIT)
8619    size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8620  else
8621    size = (GET_MODE_SIZE (mode) + 4) / 8;
8622
8623  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8624    abort ();
8625  if (size < 2 || size > 3)
8626    abort ();
8627
8628  /* Optimize constant pool reference to immediates.  This is used by fp moves,
8629     that force all constants to memory to allow combining.  */
8630
8631  if (GET_CODE (operand) == MEM
8632      && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8633      && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8634    operand = get_pool_constant (XEXP (operand, 0));
8635
8636  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8637    {
8638      /* The only non-offsetable memories we handle are pushes.  */
8639      if (! push_operand (operand, VOIDmode))
8640	abort ();
8641
8642      operand = copy_rtx (operand);
8643      PUT_MODE (operand, Pmode);
8644      parts[0] = parts[1] = parts[2] = operand;
8645    }
8646  else if (!TARGET_64BIT)
8647    {
8648      if (mode == DImode)
8649	split_di (&operand, 1, &parts[0], &parts[1]);
8650      else
8651	{
8652	  if (REG_P (operand))
8653	    {
8654	      if (!reload_completed)
8655		abort ();
8656	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8657	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8658	      if (size == 3)
8659		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8660	    }
8661	  else if (offsettable_memref_p (operand))
8662	    {
8663	      operand = adjust_address (operand, SImode, 0);
8664	      parts[0] = operand;
8665	      parts[1] = adjust_address (operand, SImode, 4);
8666	      if (size == 3)
8667		parts[2] = adjust_address (operand, SImode, 8);
8668	    }
8669	  else if (GET_CODE (operand) == CONST_DOUBLE)
8670	    {
8671	      REAL_VALUE_TYPE r;
8672	      long l[4];
8673
8674	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8675	      switch (mode)
8676		{
8677		case XFmode:
8678		case TFmode:
8679		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8680		  parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8681		  break;
8682		case DFmode:
8683		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8684		  break;
8685		default:
8686		  abort ();
8687		}
8688	      parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8689	      parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8690	    }
8691	  else
8692	    abort ();
8693	}
8694    }
8695  else
8696    {
8697      if (mode == TImode)
8698	split_ti (&operand, 1, &parts[0], &parts[1]);
8699      if (mode == XFmode || mode == TFmode)
8700	{
8701	  if (REG_P (operand))
8702	    {
8703	      if (!reload_completed)
8704		abort ();
8705	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8706	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8707	    }
8708	  else if (offsettable_memref_p (operand))
8709	    {
8710	      operand = adjust_address (operand, DImode, 0);
8711	      parts[0] = operand;
8712	      parts[1] = adjust_address (operand, SImode, 8);
8713	    }
8714	  else if (GET_CODE (operand) == CONST_DOUBLE)
8715	    {
8716	      REAL_VALUE_TYPE r;
8717	      long l[3];
8718
8719	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8720	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8721	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
8722	      if (HOST_BITS_PER_WIDE_INT >= 64)
8723	        parts[0]
8724		  = GEN_INT (trunc_int_for_mode
8725		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8726		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8727		       DImode));
8728	      else
8729	        parts[0] = immed_double_const (l[0], l[1], DImode);
8730	      parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8731	    }
8732	  else
8733	    abort ();
8734	}
8735    }
8736
8737  return size;
8738}
8739
8740/* Emit insns to perform a move or push of DI, DF, and XF values.
8741   Return false when normal moves are needed; true when all required
8742   insns have been emitted.  Operands 2-4 contain the input values
8743   int the correct order; operands 5-7 contain the output values.  */
8744
8745void
8746ix86_split_long_move (operands)
8747     rtx operands[];
8748{
8749  rtx part[2][3];
8750  int nparts;
8751  int push = 0;
8752  int collisions = 0;
8753  enum machine_mode mode = GET_MODE (operands[0]);
8754
8755  /* The DFmode expanders may ask us to move double.
8756     For 64bit target this is single move.  By hiding the fact
8757     here we simplify i386.md splitters.  */
8758  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8759    {
8760      /* Optimize constant pool reference to immediates.  This is used by
8761	 fp moves, that force all constants to memory to allow combining.  */
8762
8763      if (GET_CODE (operands[1]) == MEM
8764	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8765	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8766	operands[1] = get_pool_constant (XEXP (operands[1], 0));
8767      if (push_operand (operands[0], VOIDmode))
8768	{
8769	  operands[0] = copy_rtx (operands[0]);
8770	  PUT_MODE (operands[0], Pmode);
8771	}
8772      else
8773        operands[0] = gen_lowpart (DImode, operands[0]);
8774      operands[1] = gen_lowpart (DImode, operands[1]);
8775      emit_move_insn (operands[0], operands[1]);
8776      return;
8777    }
8778
8779  /* The only non-offsettable memory we handle is push.  */
8780  if (push_operand (operands[0], VOIDmode))
8781    push = 1;
8782  else if (GET_CODE (operands[0]) == MEM
8783	   && ! offsettable_memref_p (operands[0]))
8784    abort ();
8785
8786  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8787  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8788
8789  /* When emitting push, take care for source operands on the stack.  */
8790  if (push && GET_CODE (operands[1]) == MEM
8791      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8792    {
8793      if (nparts == 3)
8794	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8795				     XEXP (part[1][2], 0));
8796      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8797				   XEXP (part[1][1], 0));
8798    }
8799
8800  /* We need to do copy in the right order in case an address register
8801     of the source overlaps the destination.  */
8802  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8803    {
8804      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8805	collisions++;
8806      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8807	collisions++;
8808      if (nparts == 3
8809	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8810	collisions++;
8811
8812      /* Collision in the middle part can be handled by reordering.  */
8813      if (collisions == 1 && nparts == 3
8814	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8815	{
8816	  rtx tmp;
8817	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8818	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8819	}
8820
8821      /* If there are more collisions, we can't handle it by reordering.
8822	 Do an lea to the last part and use only one colliding move.  */
8823      else if (collisions > 1)
8824	{
8825	  collisions = 1;
8826	  emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8827				  XEXP (part[1][0], 0)));
8828	  part[1][0] = change_address (part[1][0],
8829				       TARGET_64BIT ? DImode : SImode,
8830				       part[0][nparts - 1]);
8831	  part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8832	  if (nparts == 3)
8833	    part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8834	}
8835    }
8836
8837  if (push)
8838    {
8839      if (!TARGET_64BIT)
8840	{
8841	  if (nparts == 3)
8842	    {
8843	      /* We use only first 12 bytes of TFmode value, but for pushing we
8844		 are required to adjust stack as if we were pushing real 16byte
8845		 value.  */
8846	      if (mode == TFmode && !TARGET_64BIT)
8847		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8848				       GEN_INT (-4)));
8849	      emit_move_insn (part[0][2], part[1][2]);
8850	    }
8851	}
8852      else
8853	{
8854	  /* In 64bit mode we don't have 32bit push available.  In case this is
8855	     register, it is OK - we will just use larger counterpart.  We also
8856	     retype memory - these comes from attempt to avoid REX prefix on
8857	     moving of second half of TFmode value.  */
8858	  if (GET_MODE (part[1][1]) == SImode)
8859	    {
8860	      if (GET_CODE (part[1][1]) == MEM)
8861		part[1][1] = adjust_address (part[1][1], DImode, 0);
8862	      else if (REG_P (part[1][1]))
8863		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8864	      else
8865		abort ();
8866	      if (GET_MODE (part[1][0]) == SImode)
8867		part[1][0] = part[1][1];
8868	    }
8869	}
8870      emit_move_insn (part[0][1], part[1][1]);
8871      emit_move_insn (part[0][0], part[1][0]);
8872      return;
8873    }
8874
8875  /* Choose correct order to not overwrite the source before it is copied.  */
8876  if ((REG_P (part[0][0])
8877       && REG_P (part[1][1])
8878       && (REGNO (part[0][0]) == REGNO (part[1][1])
8879	   || (nparts == 3
8880	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
8881      || (collisions > 0
8882	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8883    {
8884      if (nparts == 3)
8885	{
8886	  operands[2] = part[0][2];
8887	  operands[3] = part[0][1];
8888	  operands[4] = part[0][0];
8889	  operands[5] = part[1][2];
8890	  operands[6] = part[1][1];
8891	  operands[7] = part[1][0];
8892	}
8893      else
8894	{
8895	  operands[2] = part[0][1];
8896	  operands[3] = part[0][0];
8897	  operands[5] = part[1][1];
8898	  operands[6] = part[1][0];
8899	}
8900    }
8901  else
8902    {
8903      if (nparts == 3)
8904	{
8905	  operands[2] = part[0][0];
8906	  operands[3] = part[0][1];
8907	  operands[4] = part[0][2];
8908	  operands[5] = part[1][0];
8909	  operands[6] = part[1][1];
8910	  operands[7] = part[1][2];
8911	}
8912      else
8913	{
8914	  operands[2] = part[0][0];
8915	  operands[3] = part[0][1];
8916	  operands[5] = part[1][0];
8917	  operands[6] = part[1][1];
8918	}
8919    }
8920  emit_move_insn (operands[2], operands[5]);
8921  emit_move_insn (operands[3], operands[6]);
8922  if (nparts == 3)
8923    emit_move_insn (operands[4], operands[7]);
8924
8925  return;
8926}
8927
8928void
8929ix86_split_ashldi (operands, scratch)
8930     rtx *operands, scratch;
8931{
8932  rtx low[2], high[2];
8933  int count;
8934
8935  if (GET_CODE (operands[2]) == CONST_INT)
8936    {
8937      split_di (operands, 2, low, high);
8938      count = INTVAL (operands[2]) & 63;
8939
8940      if (count >= 32)
8941	{
8942	  emit_move_insn (high[0], low[1]);
8943	  emit_move_insn (low[0], const0_rtx);
8944
8945	  if (count > 32)
8946	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8947	}
8948      else
8949	{
8950	  if (!rtx_equal_p (operands[0], operands[1]))
8951	    emit_move_insn (operands[0], operands[1]);
8952	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8953	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8954	}
8955    }
8956  else
8957    {
8958      if (!rtx_equal_p (operands[0], operands[1]))
8959	emit_move_insn (operands[0], operands[1]);
8960
8961      split_di (operands, 1, low, high);
8962
8963      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8964      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8965
8966      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8967	{
8968	  if (! no_new_pseudos)
8969	    scratch = force_reg (SImode, const0_rtx);
8970	  else
8971	    emit_move_insn (scratch, const0_rtx);
8972
8973	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8974					  scratch));
8975	}
8976      else
8977	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8978    }
8979}
8980
8981void
8982ix86_split_ashrdi (operands, scratch)
8983     rtx *operands, scratch;
8984{
8985  rtx low[2], high[2];
8986  int count;
8987
8988  if (GET_CODE (operands[2]) == CONST_INT)
8989    {
8990      split_di (operands, 2, low, high);
8991      count = INTVAL (operands[2]) & 63;
8992
8993      if (count >= 32)
8994	{
8995	  emit_move_insn (low[0], high[1]);
8996
8997	  if (! reload_completed)
8998	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8999	  else
9000	    {
9001	      emit_move_insn (high[0], low[0]);
9002	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9003	    }
9004
9005	  if (count > 32)
9006	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9007	}
9008      else
9009	{
9010	  if (!rtx_equal_p (operands[0], operands[1]))
9011	    emit_move_insn (operands[0], operands[1]);
9012	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9013	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9014	}
9015    }
9016  else
9017    {
9018      if (!rtx_equal_p (operands[0], operands[1]))
9019	emit_move_insn (operands[0], operands[1]);
9020
9021      split_di (operands, 1, low, high);
9022
9023      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9024      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9025
9026      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9027	{
9028	  if (! no_new_pseudos)
9029	    scratch = gen_reg_rtx (SImode);
9030	  emit_move_insn (scratch, high[0]);
9031	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9032	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9033					  scratch));
9034	}
9035      else
9036	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9037    }
9038}
9039
9040void
9041ix86_split_lshrdi (operands, scratch)
9042     rtx *operands, scratch;
9043{
9044  rtx low[2], high[2];
9045  int count;
9046
9047  if (GET_CODE (operands[2]) == CONST_INT)
9048    {
9049      split_di (operands, 2, low, high);
9050      count = INTVAL (operands[2]) & 63;
9051
9052      if (count >= 32)
9053	{
9054	  emit_move_insn (low[0], high[1]);
9055	  emit_move_insn (high[0], const0_rtx);
9056
9057	  if (count > 32)
9058	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9059	}
9060      else
9061	{
9062	  if (!rtx_equal_p (operands[0], operands[1]))
9063	    emit_move_insn (operands[0], operands[1]);
9064	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9065	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9066	}
9067    }
9068  else
9069    {
9070      if (!rtx_equal_p (operands[0], operands[1]))
9071	emit_move_insn (operands[0], operands[1]);
9072
9073      split_di (operands, 1, low, high);
9074
9075      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9076      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9077
9078      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
9079      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9080	{
9081	  if (! no_new_pseudos)
9082	    scratch = force_reg (SImode, const0_rtx);
9083	  else
9084	    emit_move_insn (scratch, const0_rtx);
9085
9086	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9087					  scratch));
9088	}
9089      else
9090	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9091    }
9092}
9093
9094/* Helper function for the string operations below.  Dest VARIABLE whether
9095   it is aligned to VALUE bytes.  If true, jump to the label.  */
9096static rtx
9097ix86_expand_aligntest (variable, value)
9098     rtx variable;
9099     int value;
9100{
9101  rtx label = gen_label_rtx ();
9102  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9103  if (GET_MODE (variable) == DImode)
9104    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9105  else
9106    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9107  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9108			   1, label);
9109  return label;
9110}
9111
9112/* Adjust COUNTER by the VALUE.  */
9113static void
9114ix86_adjust_counter (countreg, value)
9115     rtx countreg;
9116     HOST_WIDE_INT value;
9117{
9118  if (GET_MODE (countreg) == DImode)
9119    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9120  else
9121    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9122}
9123
9124/* Zero extend possibly SImode EXP to Pmode register.  */
9125rtx
9126ix86_zero_extend_to_Pmode (exp)
9127   rtx exp;
9128{
9129  rtx r;
9130  if (GET_MODE (exp) == VOIDmode)
9131    return force_reg (Pmode, exp);
9132  if (GET_MODE (exp) == Pmode)
9133    return copy_to_mode_reg (Pmode, exp);
9134  r = gen_reg_rtx (Pmode);
9135  emit_insn (gen_zero_extendsidi2 (r, exp));
9136  return r;
9137}
9138
9139/* Expand string move (memcpy) operation.  Use i386 string operations when
9140   profitable.  expand_clrstr contains similar code.  */
9141int
9142ix86_expand_movstr (dst, src, count_exp, align_exp)
9143     rtx dst, src, count_exp, align_exp;
9144{
9145  rtx srcreg, destreg, countreg;
9146  enum machine_mode counter_mode;
9147  HOST_WIDE_INT align = 0;
9148  unsigned HOST_WIDE_INT count = 0;
9149  rtx insns;
9150
9151  start_sequence ();
9152
9153  if (GET_CODE (align_exp) == CONST_INT)
9154    align = INTVAL (align_exp);
9155
9156  /* This simple hack avoids all inlining code and simplifies code below.  */
9157  if (!TARGET_ALIGN_STRINGOPS)
9158    align = 64;
9159
9160  if (GET_CODE (count_exp) == CONST_INT)
9161    count = INTVAL (count_exp);
9162
9163  /* Figure out proper mode for counter.  For 32bits it is always SImode,
9164     for 64bits use SImode when possible, otherwise DImode.
9165     Set count to number of bytes copied when known at compile time.  */
9166  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9167      || x86_64_zero_extended_value (count_exp))
9168    counter_mode = SImode;
9169  else
9170    counter_mode = DImode;
9171
9172  if (counter_mode != SImode && counter_mode != DImode)
9173    abort ();
9174
9175  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9176  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9177
9178  emit_insn (gen_cld ());
9179
9180  /* When optimizing for size emit simple rep ; movsb instruction for
9181     counts not divisible by 4.  */
9182
9183  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9184    {
9185      countreg = ix86_zero_extend_to_Pmode (count_exp);
9186      if (TARGET_64BIT)
9187	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9188				        destreg, srcreg, countreg));
9189      else
9190	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9191				  destreg, srcreg, countreg));
9192    }
9193
9194  /* For constant aligned (or small unaligned) copies use rep movsl
9195     followed by code copying the rest.  For PentiumPro ensure 8 byte
9196     alignment to allow rep movsl acceleration.  */
9197
9198  else if (count != 0
9199	   && (align >= 8
9200	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9201	       || optimize_size || count < (unsigned int) 64))
9202    {
9203      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9204      if (count & ~(size - 1))
9205	{
9206	  countreg = copy_to_mode_reg (counter_mode,
9207				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9208						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9209	  countreg = ix86_zero_extend_to_Pmode (countreg);
9210	  if (size == 4)
9211	    {
9212	      if (TARGET_64BIT)
9213		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9214					        destreg, srcreg, countreg));
9215	      else
9216		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9217					  destreg, srcreg, countreg));
9218	    }
9219	  else
9220	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9221					    destreg, srcreg, countreg));
9222	}
9223      if (size == 8 && (count & 0x04))
9224	emit_insn (gen_strmovsi (destreg, srcreg));
9225      if (count & 0x02)
9226	emit_insn (gen_strmovhi (destreg, srcreg));
9227      if (count & 0x01)
9228	emit_insn (gen_strmovqi (destreg, srcreg));
9229    }
9230  /* The generic code based on the glibc implementation:
9231     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9232     allowing accelerated copying there)
9233     - copy the data using rep movsl
9234     - copy the rest.  */
9235  else
9236    {
9237      rtx countreg2;
9238      rtx label = NULL;
9239      int desired_alignment = (TARGET_PENTIUMPRO
9240			       && (count == 0 || count >= (unsigned int) 260)
9241			       ? 8 : UNITS_PER_WORD);
9242
9243      /* In case we don't know anything about the alignment, default to
9244         library version, since it is usually equally fast and result in
9245         shorter code.  */
9246      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9247	{
9248	  end_sequence ();
9249	  return 0;
9250	}
9251
9252      if (TARGET_SINGLE_STRINGOP)
9253	emit_insn (gen_cld ());
9254
9255      countreg2 = gen_reg_rtx (Pmode);
9256      countreg = copy_to_mode_reg (counter_mode, count_exp);
9257
9258      /* We don't use loops to align destination and to copy parts smaller
9259         than 4 bytes, because gcc is able to optimize such code better (in
9260         the case the destination or the count really is aligned, gcc is often
9261         able to predict the branches) and also it is friendlier to the
9262         hardware branch prediction.
9263
9264         Using loops is benefical for generic case, because we can
9265         handle small counts using the loops.  Many CPUs (such as Athlon)
9266         have large REP prefix setup costs.
9267
9268         This is quite costy.  Maybe we can revisit this decision later or
9269         add some customizability to this code.  */
9270
9271      if (count == 0 && align < desired_alignment)
9272	{
9273	  label = gen_label_rtx ();
9274	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9275				   LEU, 0, counter_mode, 1, label);
9276	}
9277      if (align <= 1)
9278	{
9279	  rtx label = ix86_expand_aligntest (destreg, 1);
9280	  emit_insn (gen_strmovqi (destreg, srcreg));
9281	  ix86_adjust_counter (countreg, 1);
9282	  emit_label (label);
9283	  LABEL_NUSES (label) = 1;
9284	}
9285      if (align <= 2)
9286	{
9287	  rtx label = ix86_expand_aligntest (destreg, 2);
9288	  emit_insn (gen_strmovhi (destreg, srcreg));
9289	  ix86_adjust_counter (countreg, 2);
9290	  emit_label (label);
9291	  LABEL_NUSES (label) = 1;
9292	}
9293      if (align <= 4 && desired_alignment > 4)
9294	{
9295	  rtx label = ix86_expand_aligntest (destreg, 4);
9296	  emit_insn (gen_strmovsi (destreg, srcreg));
9297	  ix86_adjust_counter (countreg, 4);
9298	  emit_label (label);
9299	  LABEL_NUSES (label) = 1;
9300	}
9301
9302      if (label && desired_alignment > 4 && !TARGET_64BIT)
9303	{
9304	  emit_label (label);
9305	  LABEL_NUSES (label) = 1;
9306	  label = NULL_RTX;
9307	}
9308      if (!TARGET_SINGLE_STRINGOP)
9309	emit_insn (gen_cld ());
9310      if (TARGET_64BIT)
9311	{
9312	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9313				  GEN_INT (3)));
9314	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9315					  destreg, srcreg, countreg2));
9316	}
9317      else
9318	{
9319	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9320	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9321				    destreg, srcreg, countreg2));
9322	}
9323
9324      if (label)
9325	{
9326	  emit_label (label);
9327	  LABEL_NUSES (label) = 1;
9328	}
9329      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9330	emit_insn (gen_strmovsi (destreg, srcreg));
9331      if ((align <= 4 || count == 0) && TARGET_64BIT)
9332	{
9333	  rtx label = ix86_expand_aligntest (countreg, 4);
9334	  emit_insn (gen_strmovsi (destreg, srcreg));
9335	  emit_label (label);
9336	  LABEL_NUSES (label) = 1;
9337	}
9338      if (align > 2 && count != 0 && (count & 2))
9339	emit_insn (gen_strmovhi (destreg, srcreg));
9340      if (align <= 2 || count == 0)
9341	{
9342	  rtx label = ix86_expand_aligntest (countreg, 2);
9343	  emit_insn (gen_strmovhi (destreg, srcreg));
9344	  emit_label (label);
9345	  LABEL_NUSES (label) = 1;
9346	}
9347      if (align > 1 && count != 0 && (count & 1))
9348	emit_insn (gen_strmovqi (destreg, srcreg));
9349      if (align <= 1 || count == 0)
9350	{
9351	  rtx label = ix86_expand_aligntest (countreg, 1);
9352	  emit_insn (gen_strmovqi (destreg, srcreg));
9353	  emit_label (label);
9354	  LABEL_NUSES (label) = 1;
9355	}
9356    }
9357
9358  insns = get_insns ();
9359  end_sequence ();
9360
9361  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9362  emit_insns (insns);
9363  return 1;
9364}
9365
9366/* Expand string clear operation (bzero).  Use i386 string operations when
9367   profitable.  expand_movstr contains similar code.  */
9368int
9369ix86_expand_clrstr (src, count_exp, align_exp)
9370     rtx src, count_exp, align_exp;
9371{
9372  rtx destreg, zeroreg, countreg;
9373  enum machine_mode counter_mode;
9374  HOST_WIDE_INT align = 0;
9375  unsigned HOST_WIDE_INT count = 0;
9376
9377  if (GET_CODE (align_exp) == CONST_INT)
9378    align = INTVAL (align_exp);
9379
9380  /* This simple hack avoids all inlining code and simplifies code below.  */
9381  if (!TARGET_ALIGN_STRINGOPS)
9382    align = 32;
9383
9384  if (GET_CODE (count_exp) == CONST_INT)
9385    count = INTVAL (count_exp);
9386  /* Figure out proper mode for counter.  For 32bits it is always SImode,
9387     for 64bits use SImode when possible, otherwise DImode.
9388     Set count to number of bytes copied when known at compile time.  */
9389  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9390      || x86_64_zero_extended_value (count_exp))
9391    counter_mode = SImode;
9392  else
9393    counter_mode = DImode;
9394
9395  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9396
9397  emit_insn (gen_cld ());
9398
9399  /* When optimizing for size emit simple rep ; movsb instruction for
9400     counts not divisible by 4.  */
9401
9402  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9403    {
9404      countreg = ix86_zero_extend_to_Pmode (count_exp);
9405      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9406      if (TARGET_64BIT)
9407	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9408				         destreg, countreg));
9409      else
9410	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9411				   destreg, countreg));
9412    }
9413  else if (count != 0
9414	   && (align >= 8
9415	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9416	       || optimize_size || count < (unsigned int) 64))
9417    {
9418      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9419      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9420      if (count & ~(size - 1))
9421	{
9422	  countreg = copy_to_mode_reg (counter_mode,
9423				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9424						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9425	  countreg = ix86_zero_extend_to_Pmode (countreg);
9426	  if (size == 4)
9427	    {
9428	      if (TARGET_64BIT)
9429		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9430					         destreg, countreg));
9431	      else
9432		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9433					   destreg, countreg));
9434	    }
9435	  else
9436	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9437					     destreg, countreg));
9438	}
9439      if (size == 8 && (count & 0x04))
9440	emit_insn (gen_strsetsi (destreg,
9441				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9442      if (count & 0x02)
9443	emit_insn (gen_strsethi (destreg,
9444				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9445      if (count & 0x01)
9446	emit_insn (gen_strsetqi (destreg,
9447				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9448    }
9449  else
9450    {
9451      rtx countreg2;
9452      rtx label = NULL;
9453      /* Compute desired alignment of the string operation.  */
9454      int desired_alignment = (TARGET_PENTIUMPRO
9455			       && (count == 0 || count >= (unsigned int) 260)
9456			       ? 8 : UNITS_PER_WORD);
9457
9458      /* In case we don't know anything about the alignment, default to
9459         library version, since it is usually equally fast and result in
9460         shorter code.  */
9461      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9462	return 0;
9463
9464      if (TARGET_SINGLE_STRINGOP)
9465	emit_insn (gen_cld ());
9466
9467      countreg2 = gen_reg_rtx (Pmode);
9468      countreg = copy_to_mode_reg (counter_mode, count_exp);
9469      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9470
9471      if (count == 0 && align < desired_alignment)
9472	{
9473	  label = gen_label_rtx ();
9474	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9475				   LEU, 0, counter_mode, 1, label);
9476	}
9477      if (align <= 1)
9478	{
9479	  rtx label = ix86_expand_aligntest (destreg, 1);
9480	  emit_insn (gen_strsetqi (destreg,
9481				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9482	  ix86_adjust_counter (countreg, 1);
9483	  emit_label (label);
9484	  LABEL_NUSES (label) = 1;
9485	}
9486      if (align <= 2)
9487	{
9488	  rtx label = ix86_expand_aligntest (destreg, 2);
9489	  emit_insn (gen_strsethi (destreg,
9490				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9491	  ix86_adjust_counter (countreg, 2);
9492	  emit_label (label);
9493	  LABEL_NUSES (label) = 1;
9494	}
9495      if (align <= 4 && desired_alignment > 4)
9496	{
9497	  rtx label = ix86_expand_aligntest (destreg, 4);
9498	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9499					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9500					     : zeroreg)));
9501	  ix86_adjust_counter (countreg, 4);
9502	  emit_label (label);
9503	  LABEL_NUSES (label) = 1;
9504	}
9505
9506      if (label && desired_alignment > 4 && !TARGET_64BIT)
9507	{
9508	  emit_label (label);
9509	  LABEL_NUSES (label) = 1;
9510	  label = NULL_RTX;
9511	}
9512
9513      if (!TARGET_SINGLE_STRINGOP)
9514	emit_insn (gen_cld ());
9515      if (TARGET_64BIT)
9516	{
9517	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9518				  GEN_INT (3)));
9519	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9520					   destreg, countreg2));
9521	}
9522      else
9523	{
9524	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9525	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9526				     destreg, countreg2));
9527	}
9528      if (label)
9529	{
9530	  emit_label (label);
9531	  LABEL_NUSES (label) = 1;
9532	}
9533
9534      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9535	emit_insn (gen_strsetsi (destreg,
9536				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9537      if (TARGET_64BIT && (align <= 4 || count == 0))
9538	{
9539	  rtx label = ix86_expand_aligntest (countreg, 4);
9540	  emit_insn (gen_strsetsi (destreg,
9541				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
9542	  emit_label (label);
9543	  LABEL_NUSES (label) = 1;
9544	}
9545      if (align > 2 && count != 0 && (count & 2))
9546	emit_insn (gen_strsethi (destreg,
9547				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9548      if (align <= 2 || count == 0)
9549	{
9550	  rtx label = ix86_expand_aligntest (countreg, 2);
9551	  emit_insn (gen_strsethi (destreg,
9552				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9553	  emit_label (label);
9554	  LABEL_NUSES (label) = 1;
9555	}
9556      if (align > 1 && count != 0 && (count & 1))
9557	emit_insn (gen_strsetqi (destreg,
9558				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9559      if (align <= 1 || count == 0)
9560	{
9561	  rtx label = ix86_expand_aligntest (countreg, 1);
9562	  emit_insn (gen_strsetqi (destreg,
9563				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9564	  emit_label (label);
9565	  LABEL_NUSES (label) = 1;
9566	}
9567    }
9568  return 1;
9569}
9570/* Expand strlen.  */
9571int
9572ix86_expand_strlen (out, src, eoschar, align)
9573     rtx out, src, eoschar, align;
9574{
9575  rtx addr, scratch1, scratch2, scratch3, scratch4;
9576
9577  /* The generic case of strlen expander is long.  Avoid it's
9578     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
9579
9580  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9581      && !TARGET_INLINE_ALL_STRINGOPS
9582      && !optimize_size
9583      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9584    return 0;
9585
9586  addr = force_reg (Pmode, XEXP (src, 0));
9587  scratch1 = gen_reg_rtx (Pmode);
9588
9589  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9590      && !optimize_size)
9591    {
9592      /* Well it seems that some optimizer does not combine a call like
9593         foo(strlen(bar), strlen(bar));
9594         when the move and the subtraction is done here.  It does calculate
9595         the length just once when these instructions are done inside of
9596         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
9597         often used and I use one fewer register for the lifetime of
9598         output_strlen_unroll() this is better.  */
9599
9600      emit_move_insn (out, addr);
9601
9602      ix86_expand_strlensi_unroll_1 (out, align);
9603
9604      /* strlensi_unroll_1 returns the address of the zero at the end of
9605         the string, like memchr(), so compute the length by subtracting
9606         the start address.  */
9607      if (TARGET_64BIT)
9608	emit_insn (gen_subdi3 (out, out, addr));
9609      else
9610	emit_insn (gen_subsi3 (out, out, addr));
9611    }
9612  else
9613    {
9614      scratch2 = gen_reg_rtx (Pmode);
9615      scratch3 = gen_reg_rtx (Pmode);
9616      scratch4 = force_reg (Pmode, constm1_rtx);
9617
9618      emit_move_insn (scratch3, addr);
9619      eoschar = force_reg (QImode, eoschar);
9620
9621      emit_insn (gen_cld ());
9622      if (TARGET_64BIT)
9623	{
9624	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9625					 align, scratch4, scratch3));
9626	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9627	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9628	}
9629      else
9630	{
9631	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9632				     align, scratch4, scratch3));
9633	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9634	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9635	}
9636    }
9637  return 1;
9638}
9639
9640/* Expand the appropriate insns for doing strlen if not just doing
9641   repnz; scasb
9642
9643   out = result, initialized with the start address
9644   align_rtx = alignment of the address.
9645   scratch = scratch register, initialized with the startaddress when
9646	not aligned, otherwise undefined
9647
9648   This is just the body. It needs the initialisations mentioned above and
9649   some address computing at the end.  These things are done in i386.md.  */
9650
9651static void
9652ix86_expand_strlensi_unroll_1 (out, align_rtx)
9653     rtx out, align_rtx;
9654{
9655  int align;
9656  rtx tmp;
9657  rtx align_2_label = NULL_RTX;
9658  rtx align_3_label = NULL_RTX;
9659  rtx align_4_label = gen_label_rtx ();
9660  rtx end_0_label = gen_label_rtx ();
9661  rtx mem;
9662  rtx tmpreg = gen_reg_rtx (SImode);
9663  rtx scratch = gen_reg_rtx (SImode);
9664
9665  align = 0;
9666  if (GET_CODE (align_rtx) == CONST_INT)
9667    align = INTVAL (align_rtx);
9668
9669  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
9670
9671  /* Is there a known alignment and is it less than 4?  */
9672  if (align < 4)
9673    {
9674      rtx scratch1 = gen_reg_rtx (Pmode);
9675      emit_move_insn (scratch1, out);
9676      /* Is there a known alignment and is it not 2? */
9677      if (align != 2)
9678	{
9679	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9680	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9681
9682	  /* Leave just the 3 lower bits.  */
9683	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9684				    NULL_RTX, 0, OPTAB_WIDEN);
9685
9686	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9687				   Pmode, 1, align_4_label);
9688	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9689				   Pmode, 1, align_2_label);
9690	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9691				   Pmode, 1, align_3_label);
9692	}
9693      else
9694        {
9695	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
9696	     check if is aligned to 4 - byte.  */
9697
9698	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9699				    NULL_RTX, 0, OPTAB_WIDEN);
9700
9701	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9702				   Pmode, 1, align_4_label);
9703        }
9704
9705      mem = gen_rtx_MEM (QImode, out);
9706
9707      /* Now compare the bytes.  */
9708
9709      /* Compare the first n unaligned byte on a byte per byte basis.  */
9710      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9711			       QImode, 1, end_0_label);
9712
9713      /* Increment the address.  */
9714      if (TARGET_64BIT)
9715	emit_insn (gen_adddi3 (out, out, const1_rtx));
9716      else
9717	emit_insn (gen_addsi3 (out, out, const1_rtx));
9718
9719      /* Not needed with an alignment of 2 */
9720      if (align != 2)
9721	{
9722	  emit_label (align_2_label);
9723
9724	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9725				   end_0_label);
9726
9727	  if (TARGET_64BIT)
9728	    emit_insn (gen_adddi3 (out, out, const1_rtx));
9729	  else
9730	    emit_insn (gen_addsi3 (out, out, const1_rtx));
9731
9732	  emit_label (align_3_label);
9733	}
9734
9735      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9736			       end_0_label);
9737
9738      if (TARGET_64BIT)
9739	emit_insn (gen_adddi3 (out, out, const1_rtx));
9740      else
9741	emit_insn (gen_addsi3 (out, out, const1_rtx));
9742    }
9743
9744  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
9745     align this loop.  It gives only huge programs, but does not help to
9746     speed up.  */
9747  emit_label (align_4_label);
9748
9749  mem = gen_rtx_MEM (SImode, out);
9750  emit_move_insn (scratch, mem);
9751  if (TARGET_64BIT)
9752    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9753  else
9754    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9755
9756  /* This formula yields a nonzero result iff one of the bytes is zero.
9757     This saves three branches inside loop and many cycles.  */
9758
9759  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9760  emit_insn (gen_one_cmplsi2 (scratch, scratch));
9761  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9762  emit_insn (gen_andsi3 (tmpreg, tmpreg,
9763			 GEN_INT (trunc_int_for_mode
9764				  (0x80808080, SImode))));
9765  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9766			   align_4_label);
9767
9768  if (TARGET_CMOVE)
9769    {
9770       rtx reg = gen_reg_rtx (SImode);
9771       rtx reg2 = gen_reg_rtx (Pmode);
9772       emit_move_insn (reg, tmpreg);
9773       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9774
9775       /* If zero is not in the first two bytes, move two bytes forward.  */
9776       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9777       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9778       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9779       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9780			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
9781						     reg,
9782						     tmpreg)));
9783       /* Emit lea manually to avoid clobbering of flags.  */
9784       emit_insn (gen_rtx_SET (SImode, reg2,
9785			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9786
9787       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9788       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9789       emit_insn (gen_rtx_SET (VOIDmode, out,
9790			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9791						     reg2,
9792						     out)));
9793
9794    }
9795  else
9796    {
9797       rtx end_2_label = gen_label_rtx ();
9798       /* Is zero in the first two bytes? */
9799
9800       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9801       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9802       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9803       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9804                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9805                            pc_rtx);
9806       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9807       JUMP_LABEL (tmp) = end_2_label;
9808
9809       /* Not in the first two.  Move two bytes forward.  */
9810       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9811       if (TARGET_64BIT)
9812	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9813       else
9814	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9815
9816       emit_label (end_2_label);
9817
9818    }
9819
9820  /* Avoid branch in fixing the byte.  */
9821  tmpreg = gen_lowpart (QImode, tmpreg);
9822  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9823  if (TARGET_64BIT)
9824    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9825  else
9826    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9827
9828  emit_label (end_0_label);
9829}
9830
9831/* Clear stack slot assignments remembered from previous functions.
9832   This is called from INIT_EXPANDERS once before RTL is emitted for each
9833   function.  */
9834
9835static void
9836ix86_init_machine_status (p)
9837     struct function *p;
9838{
9839  p->machine = (struct machine_function *)
9840    xcalloc (1, sizeof (struct machine_function));
9841}
9842
9843/* Mark machine specific bits of P for GC.  */
9844static void
9845ix86_mark_machine_status (p)
9846     struct function *p;
9847{
9848  struct machine_function *machine = p->machine;
9849  enum machine_mode mode;
9850  int n;
9851
9852  if (! machine)
9853    return;
9854
9855  for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9856       mode = (enum machine_mode) ((int) mode + 1))
9857    for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9858      ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9859}
9860
9861static void
9862ix86_free_machine_status (p)
9863     struct function *p;
9864{
9865  free (p->machine);
9866  p->machine = NULL;
9867}
9868
9869/* Return a MEM corresponding to a stack slot with mode MODE.
9870   Allocate a new slot if necessary.
9871
9872   The RTL for a function can have several slots available: N is
9873   which slot to use.  */
9874
9875rtx
9876assign_386_stack_local (mode, n)
9877     enum machine_mode mode;
9878     int n;
9879{
9880  if (n < 0 || n >= MAX_386_STACK_LOCALS)
9881    abort ();
9882
9883  if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9884    ix86_stack_locals[(int) mode][n]
9885      = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9886
9887  return ix86_stack_locals[(int) mode][n];
9888}
9889
9890/* Calculate the length of the memory address in the instruction
9891   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
9892
9893static int
9894memory_address_length (addr)
9895     rtx addr;
9896{
9897  struct ix86_address parts;
9898  rtx base, index, disp;
9899  int len;
9900
9901  if (GET_CODE (addr) == PRE_DEC
9902      || GET_CODE (addr) == POST_INC
9903      || GET_CODE (addr) == PRE_MODIFY
9904      || GET_CODE (addr) == POST_MODIFY)
9905    return 0;
9906
9907  if (! ix86_decompose_address (addr, &parts))
9908    abort ();
9909
9910  base = parts.base;
9911  index = parts.index;
9912  disp = parts.disp;
9913  len = 0;
9914
9915  /* Register Indirect.  */
9916  if (base && !index && !disp)
9917    {
9918      /* Special cases: ebp and esp need the two-byte modrm form.  */
9919      if (addr == stack_pointer_rtx
9920	  || addr == arg_pointer_rtx
9921	  || addr == frame_pointer_rtx
9922	  || addr == hard_frame_pointer_rtx)
9923	len = 1;
9924    }
9925
9926  /* Direct Addressing.  */
9927  else if (disp && !base && !index)
9928    len = 4;
9929
9930  else
9931    {
9932      /* Find the length of the displacement constant.  */
9933      if (disp)
9934	{
9935	  if (GET_CODE (disp) == CONST_INT
9936	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9937	    len = 1;
9938	  else
9939	    len = 4;
9940	}
9941
9942      /* An index requires the two-byte modrm form.  */
9943      if (index)
9944	len += 1;
9945    }
9946
9947  return len;
9948}
9949
9950/* Compute default value for "length_immediate" attribute.  When SHORTFORM is set
9951   expect that insn have 8bit immediate alternative.  */
9952int
9953ix86_attr_length_immediate_default (insn, shortform)
9954     rtx insn;
9955     int shortform;
9956{
9957  int len = 0;
9958  int i;
9959  extract_insn_cached (insn);
9960  for (i = recog_data.n_operands - 1; i >= 0; --i)
9961    if (CONSTANT_P (recog_data.operand[i]))
9962      {
9963	if (len)
9964	  abort ();
9965	if (shortform
9966	    && GET_CODE (recog_data.operand[i]) == CONST_INT
9967	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9968	  len = 1;
9969	else
9970	  {
9971	    switch (get_attr_mode (insn))
9972	      {
9973		case MODE_QI:
9974		  len+=1;
9975		  break;
9976		case MODE_HI:
9977		  len+=2;
9978		  break;
9979		case MODE_SI:
9980		  len+=4;
9981		  break;
9982		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
9983		case MODE_DI:
9984		  len+=4;
9985		  break;
9986		default:
9987		  fatal_insn ("unknown insn mode", insn);
9988	      }
9989	  }
9990      }
9991  return len;
9992}
9993/* Compute default value for "length_address" attribute.  */
9994int
9995ix86_attr_length_address_default (insn)
9996     rtx insn;
9997{
9998  int i;
9999  extract_insn_cached (insn);
10000  for (i = recog_data.n_operands - 1; i >= 0; --i)
10001    if (GET_CODE (recog_data.operand[i]) == MEM)
10002      {
10003	return memory_address_length (XEXP (recog_data.operand[i], 0));
10004	break;
10005      }
10006  return 0;
10007}
10008
10009/* Return the maximum number of instructions a cpu can issue.  */
10010
10011static int
10012ix86_issue_rate ()
10013{
10014  switch (ix86_cpu)
10015    {
10016    case PROCESSOR_PENTIUM:
10017    case PROCESSOR_K6:
10018      return 2;
10019
10020    case PROCESSOR_PENTIUMPRO:
10021    case PROCESSOR_PENTIUM4:
10022    case PROCESSOR_ATHLON:
10023      return 3;
10024
10025    default:
10026      return 1;
10027    }
10028}
10029
10030/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10031   by DEP_INSN and nothing set by DEP_INSN.  */
10032
10033static int
10034ix86_flags_dependant (insn, dep_insn, insn_type)
10035     rtx insn, dep_insn;
10036     enum attr_type insn_type;
10037{
10038  rtx set, set2;
10039
10040  /* Simplify the test for uninteresting insns.  */
10041  if (insn_type != TYPE_SETCC
10042      && insn_type != TYPE_ICMOV
10043      && insn_type != TYPE_FCMOV
10044      && insn_type != TYPE_IBR)
10045    return 0;
10046
10047  if ((set = single_set (dep_insn)) != 0)
10048    {
10049      set = SET_DEST (set);
10050      set2 = NULL_RTX;
10051    }
10052  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10053	   && XVECLEN (PATTERN (dep_insn), 0) == 2
10054	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10055	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10056    {
10057      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10058      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10059    }
10060  else
10061    return 0;
10062
10063  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10064    return 0;
10065
10066  /* This test is true if the dependent insn reads the flags but
10067     not any other potentially set register.  */
10068  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10069    return 0;
10070
10071  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10072    return 0;
10073
10074  return 1;
10075}
10076
10077/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10078   address with operands set by DEP_INSN.  */
10079
10080static int
10081ix86_agi_dependant (insn, dep_insn, insn_type)
10082     rtx insn, dep_insn;
10083     enum attr_type insn_type;
10084{
10085  rtx addr;
10086
10087  if (insn_type == TYPE_LEA
10088      && TARGET_PENTIUM)
10089    {
10090      addr = PATTERN (insn);
10091      if (GET_CODE (addr) == SET)
10092	;
10093      else if (GET_CODE (addr) == PARALLEL
10094	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10095	addr = XVECEXP (addr, 0, 0);
10096      else
10097	abort ();
10098      addr = SET_SRC (addr);
10099    }
10100  else
10101    {
10102      int i;
10103      extract_insn_cached (insn);
10104      for (i = recog_data.n_operands - 1; i >= 0; --i)
10105	if (GET_CODE (recog_data.operand[i]) == MEM)
10106	  {
10107	    addr = XEXP (recog_data.operand[i], 0);
10108	    goto found;
10109	  }
10110      return 0;
10111    found:;
10112    }
10113
10114  return modified_in_p (addr, dep_insn);
10115}
10116
10117static int
10118ix86_adjust_cost (insn, link, dep_insn, cost)
10119     rtx insn, link, dep_insn;
10120     int cost;
10121{
10122  enum attr_type insn_type, dep_insn_type;
10123  enum attr_memory memory, dep_memory;
10124  rtx set, set2;
10125  int dep_insn_code_number;
10126
10127  /* Anti and output depenancies have zero cost on all CPUs.  */
10128  if (REG_NOTE_KIND (link) != 0)
10129    return 0;
10130
10131  dep_insn_code_number = recog_memoized (dep_insn);
10132
10133  /* If we can't recognize the insns, we can't really do anything.  */
10134  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10135    return cost;
10136
10137  insn_type = get_attr_type (insn);
10138  dep_insn_type = get_attr_type (dep_insn);
10139
10140  switch (ix86_cpu)
10141    {
10142    case PROCESSOR_PENTIUM:
10143      /* Address Generation Interlock adds a cycle of latency.  */
10144      if (ix86_agi_dependant (insn, dep_insn, insn_type))
10145	cost += 1;
10146
10147      /* ??? Compares pair with jump/setcc.  */
10148      if (ix86_flags_dependant (insn, dep_insn, insn_type))
10149	cost = 0;
10150
10151      /* Floating point stores require value to be ready one cycle ealier.  */
10152      if (insn_type == TYPE_FMOV
10153	  && get_attr_memory (insn) == MEMORY_STORE
10154	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10155	cost += 1;
10156      break;
10157
10158    case PROCESSOR_PENTIUMPRO:
10159      memory = get_attr_memory (insn);
10160      dep_memory = get_attr_memory (dep_insn);
10161
10162      /* Since we can't represent delayed latencies of load+operation,
10163	 increase the cost here for non-imov insns.  */
10164      if (dep_insn_type != TYPE_IMOV
10165          && dep_insn_type != TYPE_FMOV
10166          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10167	cost += 1;
10168
10169      /* INT->FP conversion is expensive.  */
10170      if (get_attr_fp_int_src (dep_insn))
10171	cost += 5;
10172
10173      /* There is one cycle extra latency between an FP op and a store.  */
10174      if (insn_type == TYPE_FMOV
10175	  && (set = single_set (dep_insn)) != NULL_RTX
10176	  && (set2 = single_set (insn)) != NULL_RTX
10177	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10178	  && GET_CODE (SET_DEST (set2)) == MEM)
10179	cost += 1;
10180
10181      /* Show ability of reorder buffer to hide latency of load by executing
10182	 in parallel with previous instruction in case
10183	 previous instruction is not needed to compute the address.  */
10184      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10185	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10186 	{
10187	  /* Claim moves to take one cycle, as core can issue one load
10188	     at time and the next load can start cycle later.  */
10189	  if (dep_insn_type == TYPE_IMOV
10190	      || dep_insn_type == TYPE_FMOV)
10191	    cost = 1;
10192	  else if (cost > 1)
10193	    cost--;
10194	}
10195      break;
10196
10197    case PROCESSOR_K6:
10198      memory = get_attr_memory (insn);
10199      dep_memory = get_attr_memory (dep_insn);
10200      /* The esp dependency is resolved before the instruction is really
10201         finished.  */
10202      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10203	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10204	return 1;
10205
10206      /* Since we can't represent delayed latencies of load+operation,
10207	 increase the cost here for non-imov insns.  */
10208      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10209	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10210
10211      /* INT->FP conversion is expensive.  */
10212      if (get_attr_fp_int_src (dep_insn))
10213	cost += 5;
10214
10215      /* Show ability of reorder buffer to hide latency of load by executing
10216	 in parallel with previous instruction in case
10217	 previous instruction is not needed to compute the address.  */
10218      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10219	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10220 	{
10221	  /* Claim moves to take one cycle, as core can issue one load
10222	     at time and the next load can start cycle later.  */
10223	  if (dep_insn_type == TYPE_IMOV
10224	      || dep_insn_type == TYPE_FMOV)
10225	    cost = 1;
10226	  else if (cost > 2)
10227	    cost -= 2;
10228	  else
10229	    cost = 1;
10230	}
10231      break;
10232
10233    case PROCESSOR_ATHLON:
10234      memory = get_attr_memory (insn);
10235      dep_memory = get_attr_memory (dep_insn);
10236
10237      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10238	{
10239	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10240	    cost += 2;
10241	  else
10242	    cost += 3;
10243        }
10244      /* Show ability of reorder buffer to hide latency of load by executing
10245	 in parallel with previous instruction in case
10246	 previous instruction is not needed to compute the address.  */
10247      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10248	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10249 	{
10250	  /* Claim moves to take one cycle, as core can issue one load
10251	     at time and the next load can start cycle later.  */
10252	  if (dep_insn_type == TYPE_IMOV
10253	      || dep_insn_type == TYPE_FMOV)
10254	    cost = 0;
10255	  else if (cost >= 3)
10256	    cost -= 3;
10257	  else
10258	    cost = 0;
10259	}
10260
10261    default:
10262      break;
10263    }
10264
10265  return cost;
10266}
10267
10268static union
10269{
10270  struct ppro_sched_data
10271  {
10272    rtx decode[3];
10273    int issued_this_cycle;
10274  } ppro;
10275} ix86_sched_data;
10276
10277static int
10278ix86_safe_length (insn)
10279     rtx insn;
10280{
10281  if (recog_memoized (insn) >= 0)
10282    return get_attr_length (insn);
10283  else
10284    return 128;
10285}
10286
10287static int
10288ix86_safe_length_prefix (insn)
10289     rtx insn;
10290{
10291  if (recog_memoized (insn) >= 0)
10292    return get_attr_length (insn);
10293  else
10294    return 0;
10295}
10296
10297static enum attr_memory
10298ix86_safe_memory (insn)
10299     rtx insn;
10300{
10301  if (recog_memoized (insn) >= 0)
10302    return get_attr_memory (insn);
10303  else
10304    return MEMORY_UNKNOWN;
10305}
10306
10307static enum attr_pent_pair
10308ix86_safe_pent_pair (insn)
10309     rtx insn;
10310{
10311  if (recog_memoized (insn) >= 0)
10312    return get_attr_pent_pair (insn);
10313  else
10314    return PENT_PAIR_NP;
10315}
10316
10317static enum attr_ppro_uops
10318ix86_safe_ppro_uops (insn)
10319     rtx insn;
10320{
10321  if (recog_memoized (insn) >= 0)
10322    return get_attr_ppro_uops (insn);
10323  else
10324    return PPRO_UOPS_MANY;
10325}
10326
10327static void
10328ix86_dump_ppro_packet (dump)
10329     FILE *dump;
10330{
10331  if (ix86_sched_data.ppro.decode[0])
10332    {
10333      fprintf (dump, "PPRO packet: %d",
10334	       INSN_UID (ix86_sched_data.ppro.decode[0]));
10335      if (ix86_sched_data.ppro.decode[1])
10336	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10337      if (ix86_sched_data.ppro.decode[2])
10338	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10339      fputc ('\n', dump);
10340    }
10341}
10342
10343/* We're beginning a new block.  Initialize data structures as necessary.  */
10344
10345static void
10346ix86_sched_init (dump, sched_verbose, veclen)
10347     FILE *dump ATTRIBUTE_UNUSED;
10348     int sched_verbose ATTRIBUTE_UNUSED;
10349     int veclen ATTRIBUTE_UNUSED;
10350{
10351  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10352}
10353
10354/* Shift INSN to SLOT, and shift everything else down.  */
10355
10356static void
10357ix86_reorder_insn (insnp, slot)
10358     rtx *insnp, *slot;
10359{
10360  if (insnp != slot)
10361    {
10362      rtx insn = *insnp;
10363      do
10364	insnp[0] = insnp[1];
10365      while (++insnp != slot);
10366      *insnp = insn;
10367    }
10368}
10369
10370/* Find an instruction with given pairability and minimal amount of cycles
10371   lost by the fact that the CPU waits for both pipelines to finish before
10372   reading next instructions.  Also take care that both instructions together
10373   can not exceed 7 bytes.  */
10374
10375static rtx *
10376ix86_pent_find_pair (e_ready, ready, type, first)
10377     rtx *e_ready;
10378     rtx *ready;
10379     enum attr_pent_pair type;
10380     rtx first;
10381{
10382  int mincycles, cycles;
10383  enum attr_pent_pair tmp;
10384  enum attr_memory memory;
10385  rtx *insnp, *bestinsnp = NULL;
10386
10387  if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10388    return NULL;
10389
10390  memory = ix86_safe_memory (first);
10391  cycles = result_ready_cost (first);
10392  mincycles = INT_MAX;
10393
10394  for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10395    if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10396	&& ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10397      {
10398	enum attr_memory second_memory;
10399	int secondcycles, currentcycles;
10400
10401	second_memory = ix86_safe_memory (*insnp);
10402	secondcycles = result_ready_cost (*insnp);
10403	currentcycles = abs (cycles - secondcycles);
10404
10405	if (secondcycles >= 1 && cycles >= 1)
10406	  {
10407	    /* Two read/modify/write instructions together takes two
10408	       cycles longer.  */
10409	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10410	      currentcycles += 2;
10411
10412	    /* Read modify/write instruction followed by read/modify
10413	       takes one cycle longer.  */
10414	    if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10415	        && tmp != PENT_PAIR_UV
10416	        && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10417	      currentcycles += 1;
10418	  }
10419	if (currentcycles < mincycles)
10420	  bestinsnp = insnp, mincycles = currentcycles;
10421      }
10422
10423  return bestinsnp;
10424}
10425
10426/* Subroutines of ix86_sched_reorder.  */
10427
10428static void
10429ix86_sched_reorder_pentium (ready, e_ready)
10430     rtx *ready;
10431     rtx *e_ready;
10432{
10433  enum attr_pent_pair pair1, pair2;
10434  rtx *insnp;
10435
10436  /* This wouldn't be necessary if Haifa knew that static insn ordering
10437     is important to which pipe an insn is issued to.  So we have to make
10438     some minor rearrangements.  */
10439
10440  pair1 = ix86_safe_pent_pair (*e_ready);
10441
10442  /* If the first insn is non-pairable, let it be.  */
10443  if (pair1 == PENT_PAIR_NP)
10444    return;
10445
10446  pair2 = PENT_PAIR_NP;
10447  insnp = 0;
10448
10449  /* If the first insn is UV or PV pairable, search for a PU
10450     insn to go with.  */
10451  if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10452    {
10453      insnp = ix86_pent_find_pair (e_ready-1, ready,
10454				   PENT_PAIR_PU, *e_ready);
10455      if (insnp)
10456	pair2 = PENT_PAIR_PU;
10457    }
10458
10459  /* If the first insn is PU or UV pairable, search for a PV
10460     insn to go with.  */
10461  if (pair2 == PENT_PAIR_NP
10462      && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10463    {
10464      insnp = ix86_pent_find_pair (e_ready-1, ready,
10465				   PENT_PAIR_PV, *e_ready);
10466      if (insnp)
10467	pair2 = PENT_PAIR_PV;
10468    }
10469
10470  /* If the first insn is pairable, search for a UV
10471     insn to go with.  */
10472  if (pair2 == PENT_PAIR_NP)
10473    {
10474      insnp = ix86_pent_find_pair (e_ready-1, ready,
10475				   PENT_PAIR_UV, *e_ready);
10476      if (insnp)
10477	pair2 = PENT_PAIR_UV;
10478    }
10479
10480  if (pair2 == PENT_PAIR_NP)
10481    return;
10482
10483  /* Found something!  Decide if we need to swap the order.  */
10484  if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10485      || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10486	  && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10487	  && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10488    ix86_reorder_insn (insnp, e_ready);
10489  else
10490    ix86_reorder_insn (insnp, e_ready - 1);
10491}
10492
10493static void
10494ix86_sched_reorder_ppro (ready, e_ready)
10495     rtx *ready;
10496     rtx *e_ready;
10497{
10498  rtx decode[3];
10499  enum attr_ppro_uops cur_uops;
10500  int issued_this_cycle;
10501  rtx *insnp;
10502  int i;
10503
10504  /* At this point .ppro.decode contains the state of the three
10505     decoders from last "cycle".  That is, those insns that were
10506     actually independent.  But here we're scheduling for the
10507     decoder, and we may find things that are decodable in the
10508     same cycle.  */
10509
10510  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10511  issued_this_cycle = 0;
10512
10513  insnp = e_ready;
10514  cur_uops = ix86_safe_ppro_uops (*insnp);
10515
10516  /* If the decoders are empty, and we've a complex insn at the
10517     head of the priority queue, let it issue without complaint.  */
10518  if (decode[0] == NULL)
10519    {
10520      if (cur_uops == PPRO_UOPS_MANY)
10521	{
10522	  decode[0] = *insnp;
10523	  goto ppro_done;
10524	}
10525
10526      /* Otherwise, search for a 2-4 uop unsn to issue.  */
10527      while (cur_uops != PPRO_UOPS_FEW)
10528	{
10529	  if (insnp == ready)
10530	    break;
10531	  cur_uops = ix86_safe_ppro_uops (*--insnp);
10532	}
10533
10534      /* If so, move it to the head of the line.  */
10535      if (cur_uops == PPRO_UOPS_FEW)
10536	ix86_reorder_insn (insnp, e_ready);
10537
10538      /* Issue the head of the queue.  */
10539      issued_this_cycle = 1;
10540      decode[0] = *e_ready--;
10541    }
10542
10543  /* Look for simple insns to fill in the other two slots.  */
10544  for (i = 1; i < 3; ++i)
10545    if (decode[i] == NULL)
10546      {
10547	if (ready >= e_ready)
10548	  goto ppro_done;
10549
10550	insnp = e_ready;
10551	cur_uops = ix86_safe_ppro_uops (*insnp);
10552	while (cur_uops != PPRO_UOPS_ONE)
10553	  {
10554	    if (insnp == ready)
10555	      break;
10556	    cur_uops = ix86_safe_ppro_uops (*--insnp);
10557	  }
10558
10559	/* Found one.  Move it to the head of the queue and issue it.  */
10560	if (cur_uops == PPRO_UOPS_ONE)
10561	  {
10562	    ix86_reorder_insn (insnp, e_ready);
10563	    decode[i] = *e_ready--;
10564	    issued_this_cycle++;
10565	    continue;
10566	  }
10567
10568	/* ??? Didn't find one.  Ideally, here we would do a lazy split
10569	   of 2-uop insns, issue one and queue the other.  */
10570      }
10571
10572 ppro_done:
10573  if (issued_this_cycle == 0)
10574    issued_this_cycle = 1;
10575  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10576}
10577
10578/* We are about to being issuing insns for this clock cycle.
10579   Override the default sort algorithm to better slot instructions.  */
10580static int
10581ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10582     FILE *dump ATTRIBUTE_UNUSED;
10583     int sched_verbose ATTRIBUTE_UNUSED;
10584     rtx *ready;
10585     int *n_readyp;
10586     int clock_var ATTRIBUTE_UNUSED;
10587{
10588  int n_ready = *n_readyp;
10589  rtx *e_ready = ready + n_ready - 1;
10590
10591  if (n_ready < 2)
10592    goto out;
10593
10594  switch (ix86_cpu)
10595    {
10596    default:
10597      break;
10598
10599    case PROCESSOR_PENTIUM:
10600      ix86_sched_reorder_pentium (ready, e_ready);
10601      break;
10602
10603    case PROCESSOR_PENTIUMPRO:
10604      ix86_sched_reorder_ppro (ready, e_ready);
10605      break;
10606    }
10607
10608out:
10609  return ix86_issue_rate ();
10610}
10611
10612/* We are about to issue INSN.  Return the number of insns left on the
10613   ready queue that can be issued this cycle.  */
10614
10615static int
10616ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10617     FILE *dump;
10618     int sched_verbose;
10619     rtx insn;
10620     int can_issue_more;
10621{
10622  int i;
10623  switch (ix86_cpu)
10624    {
10625    default:
10626      return can_issue_more - 1;
10627
10628    case PROCESSOR_PENTIUMPRO:
10629      {
10630	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10631
10632	if (uops == PPRO_UOPS_MANY)
10633	  {
10634	    if (sched_verbose)
10635	      ix86_dump_ppro_packet (dump);
10636	    ix86_sched_data.ppro.decode[0] = insn;
10637	    ix86_sched_data.ppro.decode[1] = NULL;
10638	    ix86_sched_data.ppro.decode[2] = NULL;
10639	    if (sched_verbose)
10640	      ix86_dump_ppro_packet (dump);
10641	    ix86_sched_data.ppro.decode[0] = NULL;
10642	  }
10643	else if (uops == PPRO_UOPS_FEW)
10644	  {
10645	    if (sched_verbose)
10646	      ix86_dump_ppro_packet (dump);
10647	    ix86_sched_data.ppro.decode[0] = insn;
10648	    ix86_sched_data.ppro.decode[1] = NULL;
10649	    ix86_sched_data.ppro.decode[2] = NULL;
10650	  }
10651	else
10652	  {
10653	    for (i = 0; i < 3; ++i)
10654	      if (ix86_sched_data.ppro.decode[i] == NULL)
10655		{
10656		  ix86_sched_data.ppro.decode[i] = insn;
10657		  break;
10658		}
10659	    if (i == 3)
10660	      abort ();
10661	    if (i == 2)
10662	      {
10663	        if (sched_verbose)
10664	          ix86_dump_ppro_packet (dump);
10665		ix86_sched_data.ppro.decode[0] = NULL;
10666		ix86_sched_data.ppro.decode[1] = NULL;
10667		ix86_sched_data.ppro.decode[2] = NULL;
10668	      }
10669	  }
10670      }
10671      return --ix86_sched_data.ppro.issued_this_cycle;
10672    }
10673}
10674
10675/* Walk through INSNS and look for MEM references whose address is DSTREG or
10676   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10677   appropriate.  */
10678
10679void
10680ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10681     rtx insns;
10682     rtx dstref, srcref, dstreg, srcreg;
10683{
10684  rtx insn;
10685
10686  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10687    if (INSN_P (insn))
10688      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10689				 dstreg, srcreg);
10690}
10691
10692/* Subroutine of above to actually do the updating by recursively walking
10693   the rtx.  */
10694
10695static void
10696ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10697     rtx x;
10698     rtx dstref, srcref, dstreg, srcreg;
10699{
10700  enum rtx_code code = GET_CODE (x);
10701  const char *format_ptr = GET_RTX_FORMAT (code);
10702  int i, j;
10703
10704  if (code == MEM && XEXP (x, 0) == dstreg)
10705    MEM_COPY_ATTRIBUTES (x, dstref);
10706  else if (code == MEM && XEXP (x, 0) == srcreg)
10707    MEM_COPY_ATTRIBUTES (x, srcref);
10708
10709  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10710    {
10711      if (*format_ptr == 'e')
10712	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10713				   dstreg, srcreg);
10714      else if (*format_ptr == 'E')
10715	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10716	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10717				     dstreg, srcreg);
10718    }
10719}
10720
10721/* Compute the alignment given to a constant that is being placed in memory.
10722   EXP is the constant and ALIGN is the alignment that the object would
10723   ordinarily have.
10724   The value of this function is used instead of that alignment to align
10725   the object.  */
10726
10727int
10728ix86_constant_alignment (exp, align)
10729     tree exp;
10730     int align;
10731{
10732  if (TREE_CODE (exp) == REAL_CST)
10733    {
10734      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10735	return 64;
10736      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10737	return 128;
10738    }
10739  else if (TREE_CODE (exp) == STRING_CST && !TARGET_NO_ALIGN_LONG_STRINGS
10740	   && TREE_STRING_LENGTH (exp) >= 31 && align < 256)
10741    return 256;
10742
10743  return align;
10744}
10745
10746/* Compute the alignment for a static variable.
10747   TYPE is the data type, and ALIGN is the alignment that
10748   the object would ordinarily have.  The value of this function is used
10749   instead of that alignment to align the object.  */
10750
10751int
10752ix86_data_alignment (type, align)
10753     tree type;
10754     int align;
10755{
10756  if (AGGREGATE_TYPE_P (type)
10757       && TYPE_SIZE (type)
10758       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10759       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10760	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10761    return 256;
10762
10763  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10764     to 16byte boundary.  */
10765  if (TARGET_64BIT)
10766    {
10767      if (AGGREGATE_TYPE_P (type)
10768	   && TYPE_SIZE (type)
10769	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10770	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10771	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10772	return 128;
10773    }
10774
10775  if (TREE_CODE (type) == ARRAY_TYPE)
10776    {
10777      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10778	return 64;
10779      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10780	return 128;
10781    }
10782  else if (TREE_CODE (type) == COMPLEX_TYPE)
10783    {
10784
10785      if (TYPE_MODE (type) == DCmode && align < 64)
10786	return 64;
10787      if (TYPE_MODE (type) == XCmode && align < 128)
10788	return 128;
10789    }
10790  else if ((TREE_CODE (type) == RECORD_TYPE
10791	    || TREE_CODE (type) == UNION_TYPE
10792	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10793	   && TYPE_FIELDS (type))
10794    {
10795      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10796	return 64;
10797      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10798	return 128;
10799    }
10800  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10801	   || TREE_CODE (type) == INTEGER_TYPE)
10802    {
10803      if (TYPE_MODE (type) == DFmode && align < 64)
10804	return 64;
10805      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10806	return 128;
10807    }
10808
10809  return align;
10810}
10811
10812/* Compute the alignment for a local variable.
10813   TYPE is the data type, and ALIGN is the alignment that
10814   the object would ordinarily have.  The value of this macro is used
10815   instead of that alignment to align the object.  */
10816
10817int
10818ix86_local_alignment (type, align)
10819     tree type;
10820     int align;
10821{
10822  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10823     to 16byte boundary.  */
10824  if (TARGET_64BIT)
10825    {
10826      if (AGGREGATE_TYPE_P (type)
10827	   && TYPE_SIZE (type)
10828	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10829	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10830	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10831	return 128;
10832    }
10833  if (TREE_CODE (type) == ARRAY_TYPE)
10834    {
10835      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10836	return 64;
10837      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10838	return 128;
10839    }
10840  else if (TREE_CODE (type) == COMPLEX_TYPE)
10841    {
10842      if (TYPE_MODE (type) == DCmode && align < 64)
10843	return 64;
10844      if (TYPE_MODE (type) == XCmode && align < 128)
10845	return 128;
10846    }
10847  else if ((TREE_CODE (type) == RECORD_TYPE
10848	    || TREE_CODE (type) == UNION_TYPE
10849	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10850	   && TYPE_FIELDS (type))
10851    {
10852      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10853	return 64;
10854      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10855	return 128;
10856    }
10857  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10858	   || TREE_CODE (type) == INTEGER_TYPE)
10859    {
10860
10861      if (TYPE_MODE (type) == DFmode && align < 64)
10862	return 64;
10863      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10864	return 128;
10865    }
10866  return align;
10867}
10868
10869/* Emit RTL insns to initialize the variable parts of a trampoline.
10870   FNADDR is an RTX for the address of the function's pure code.
10871   CXT is an RTX for the static chain value for the function.  */
10872void
10873x86_initialize_trampoline (tramp, fnaddr, cxt)
10874     rtx tramp, fnaddr, cxt;
10875{
10876  if (!TARGET_64BIT)
10877    {
10878      /* Compute offset from the end of the jmp to the target function.  */
10879      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10880			       plus_constant (tramp, 10),
10881			       NULL_RTX, 1, OPTAB_DIRECT);
10882      emit_move_insn (gen_rtx_MEM (QImode, tramp),
10883		      GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10884      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10885      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10886		      GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10887      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10888    }
10889  else
10890    {
10891      int offset = 0;
10892      /* Try to load address using shorter movl instead of movabs.
10893         We may want to support movq for kernel mode, but kernel does not use
10894         trampolines at the moment.  */
10895      if (x86_64_zero_extended_value (fnaddr))
10896	{
10897	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
10898	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10899			  GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10900	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10901			  gen_lowpart (SImode, fnaddr));
10902	  offset += 6;
10903	}
10904      else
10905	{
10906	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10907			  GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10908	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10909			  fnaddr);
10910	  offset += 10;
10911	}
10912      /* Load static chain using movabs to r10.  */
10913      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10914		      GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10915      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10916		      cxt);
10917      offset += 10;
10918      /* Jump to the r11 */
10919      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10920		      GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10921      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10922		      GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10923      offset += 3;
10924      if (offset > TRAMPOLINE_SIZE)
10925	abort ();
10926    }
10927}
10928
10929#define def_builtin(MASK, NAME, TYPE, CODE)				\
10930do {									\
10931  if ((MASK) & target_flags)						\
10932    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL);	\
10933} while (0)
10934
10935struct builtin_description
10936{
10937  const unsigned int mask;
10938  const enum insn_code icode;
10939  const char *const name;
10940  const enum ix86_builtins code;
10941  const enum rtx_code comparison;
10942  const unsigned int flag;
10943};
10944
10945static const struct builtin_description bdesc_comi[] =
10946{
10947  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10948  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10949  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10950  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10951  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10952  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10953  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10954  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10955  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10956  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10957  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10958  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10959};
10960
10961static const struct builtin_description bdesc_2arg[] =
10962{
10963  /* SSE */
10964  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10965  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10966  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10967  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10968  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10969  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10970  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10971  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10972
10973  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10974  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10975  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10976  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10977  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10978  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10979  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10980  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10981  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10982  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10983  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10984  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10985  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10986  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10987  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10988  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10989  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10990  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10991  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10992  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10993
10994  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10995  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10996  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10997  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10998
10999  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11000  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11001  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11002  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11003  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11004
11005  /* MMX */
11006  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11007  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11008  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11009  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11010  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11011  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11012
11013  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11014  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11015  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11016  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11017  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11018  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11019  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11020  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11021
11022  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11023  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11024  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11025
11026  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11027  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11028  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11029  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11030
11031  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11032  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11033
11034  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11035  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11036  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11037  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11038  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11039  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11040
11041  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11042  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11043  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11044  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11045
11046  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11047  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11048  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11049  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11050  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11051  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11052
11053  /* Special.  */
11054  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11055  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11056  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11057
11058  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11059  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11060
11061  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11062  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11063  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11064  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11065  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11066  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11067
11068  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11069  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11070  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11071  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11072  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11073  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11074
11075  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11076  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11077  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11078  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11079
11080  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11081  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
11082
11083};
11084
11085static const struct builtin_description bdesc_1arg[] =
11086{
11087  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11088  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11089
11090  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11091  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11092  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11093
11094  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11095  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11096  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11097  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
11098
11099};
11100
11101void
11102ix86_init_builtins ()
11103{
11104  if (TARGET_MMX)
11105    ix86_init_mmx_sse_builtins ();
11106}
11107
11108/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
11109   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
11110   builtins.  */
11111static void
11112ix86_init_mmx_sse_builtins ()
11113{
11114  const struct builtin_description * d;
11115  size_t i;
11116  tree endlink = void_list_node;
11117
11118  tree pchar_type_node = build_pointer_type (char_type_node);
11119  tree pfloat_type_node = build_pointer_type (float_type_node);
11120  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11121  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11122
11123  /* Comparisons.  */
11124  tree int_ftype_v4sf_v4sf
11125    = build_function_type (integer_type_node,
11126			   tree_cons (NULL_TREE, V4SF_type_node,
11127				      tree_cons (NULL_TREE,
11128						 V4SF_type_node,
11129						 endlink)));
11130  tree v4si_ftype_v4sf_v4sf
11131    = build_function_type (V4SI_type_node,
11132			   tree_cons (NULL_TREE, V4SF_type_node,
11133				      tree_cons (NULL_TREE,
11134						 V4SF_type_node,
11135						 endlink)));
11136  /* MMX/SSE/integer conversions.  */
11137  tree int_ftype_v4sf
11138    = build_function_type (integer_type_node,
11139			   tree_cons (NULL_TREE, V4SF_type_node,
11140				      endlink));
11141  tree int_ftype_v8qi
11142    = build_function_type (integer_type_node,
11143			   tree_cons (NULL_TREE, V8QI_type_node,
11144				      endlink));
11145  tree v4sf_ftype_v4sf_int
11146    = build_function_type (V4SF_type_node,
11147			   tree_cons (NULL_TREE, V4SF_type_node,
11148				      tree_cons (NULL_TREE, integer_type_node,
11149						 endlink)));
11150  tree v4sf_ftype_v4sf_v2si
11151    = build_function_type (V4SF_type_node,
11152			   tree_cons (NULL_TREE, V4SF_type_node,
11153				      tree_cons (NULL_TREE, V2SI_type_node,
11154						 endlink)));
11155  tree int_ftype_v4hi_int
11156    = build_function_type (integer_type_node,
11157			   tree_cons (NULL_TREE, V4HI_type_node,
11158				      tree_cons (NULL_TREE, integer_type_node,
11159						 endlink)));
11160  tree v4hi_ftype_v4hi_int_int
11161    = build_function_type (V4HI_type_node,
11162			   tree_cons (NULL_TREE, V4HI_type_node,
11163				      tree_cons (NULL_TREE, integer_type_node,
11164						 tree_cons (NULL_TREE,
11165							    integer_type_node,
11166							    endlink))));
11167  /* Miscellaneous.  */
11168  tree v8qi_ftype_v4hi_v4hi
11169    = build_function_type (V8QI_type_node,
11170			   tree_cons (NULL_TREE, V4HI_type_node,
11171				      tree_cons (NULL_TREE, V4HI_type_node,
11172						 endlink)));
11173  tree v4hi_ftype_v2si_v2si
11174    = build_function_type (V4HI_type_node,
11175			   tree_cons (NULL_TREE, V2SI_type_node,
11176				      tree_cons (NULL_TREE, V2SI_type_node,
11177						 endlink)));
11178  tree v4sf_ftype_v4sf_v4sf_int
11179    = build_function_type (V4SF_type_node,
11180			   tree_cons (NULL_TREE, V4SF_type_node,
11181				      tree_cons (NULL_TREE, V4SF_type_node,
11182						 tree_cons (NULL_TREE,
11183							    integer_type_node,
11184							    endlink))));
11185  tree v4hi_ftype_v8qi_v8qi
11186    = build_function_type (V4HI_type_node,
11187			   tree_cons (NULL_TREE, V8QI_type_node,
11188				      tree_cons (NULL_TREE, V8QI_type_node,
11189						 endlink)));
11190  tree v2si_ftype_v4hi_v4hi
11191    = build_function_type (V2SI_type_node,
11192			   tree_cons (NULL_TREE, V4HI_type_node,
11193				      tree_cons (NULL_TREE, V4HI_type_node,
11194						 endlink)));
11195  tree v4hi_ftype_v4hi_int
11196    = build_function_type (V4HI_type_node,
11197			   tree_cons (NULL_TREE, V4HI_type_node,
11198				      tree_cons (NULL_TREE, integer_type_node,
11199						 endlink)));
11200  tree v4hi_ftype_v4hi_di
11201    = build_function_type (V4HI_type_node,
11202			   tree_cons (NULL_TREE, V4HI_type_node,
11203				      tree_cons (NULL_TREE,
11204						 long_long_integer_type_node,
11205						 endlink)));
11206  tree v2si_ftype_v2si_di
11207    = build_function_type (V2SI_type_node,
11208			   tree_cons (NULL_TREE, V2SI_type_node,
11209				      tree_cons (NULL_TREE,
11210						 long_long_integer_type_node,
11211						 endlink)));
11212  tree void_ftype_void
11213    = build_function_type (void_type_node, endlink);
11214  tree void_ftype_unsigned
11215    = build_function_type (void_type_node,
11216			   tree_cons (NULL_TREE, unsigned_type_node,
11217				      endlink));
11218  tree unsigned_ftype_void
11219    = build_function_type (unsigned_type_node, endlink);
11220  tree di_ftype_void
11221    = build_function_type (long_long_unsigned_type_node, endlink);
11222  tree v4sf_ftype_void
11223    = build_function_type (V4SF_type_node, endlink);
11224  tree v2si_ftype_v4sf
11225    = build_function_type (V2SI_type_node,
11226			   tree_cons (NULL_TREE, V4SF_type_node,
11227				      endlink));
11228  /* Loads/stores.  */
11229  tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11230				  tree_cons (NULL_TREE, V8QI_type_node,
11231					     tree_cons (NULL_TREE,
11232							pchar_type_node,
11233							endlink)));
11234  tree void_ftype_v8qi_v8qi_pchar
11235    = build_function_type (void_type_node, maskmovq_args);
11236  tree v4sf_ftype_pfloat
11237    = build_function_type (V4SF_type_node,
11238			   tree_cons (NULL_TREE, pfloat_type_node,
11239				      endlink));
11240  /* @@@ the type is bogus */
11241  tree v4sf_ftype_v4sf_pv2si
11242    = build_function_type (V4SF_type_node,
11243			   tree_cons (NULL_TREE, V4SF_type_node,
11244				      tree_cons (NULL_TREE, pv2si_type_node,
11245						 endlink)));
11246  tree void_ftype_pv2si_v4sf
11247    = build_function_type (void_type_node,
11248			   tree_cons (NULL_TREE, pv2si_type_node,
11249				      tree_cons (NULL_TREE, V4SF_type_node,
11250						 endlink)));
11251  tree void_ftype_pfloat_v4sf
11252    = build_function_type (void_type_node,
11253			   tree_cons (NULL_TREE, pfloat_type_node,
11254				      tree_cons (NULL_TREE, V4SF_type_node,
11255						 endlink)));
11256  tree void_ftype_pdi_di
11257    = build_function_type (void_type_node,
11258			   tree_cons (NULL_TREE, pdi_type_node,
11259				      tree_cons (NULL_TREE,
11260						 long_long_unsigned_type_node,
11261						 endlink)));
11262  /* Normal vector unops.  */
11263  tree v4sf_ftype_v4sf
11264    = build_function_type (V4SF_type_node,
11265			   tree_cons (NULL_TREE, V4SF_type_node,
11266				      endlink));
11267
11268  /* Normal vector binops.  */
11269  tree v4sf_ftype_v4sf_v4sf
11270    = build_function_type (V4SF_type_node,
11271			   tree_cons (NULL_TREE, V4SF_type_node,
11272				      tree_cons (NULL_TREE, V4SF_type_node,
11273						 endlink)));
11274  tree v8qi_ftype_v8qi_v8qi
11275    = build_function_type (V8QI_type_node,
11276			   tree_cons (NULL_TREE, V8QI_type_node,
11277				      tree_cons (NULL_TREE, V8QI_type_node,
11278						 endlink)));
11279  tree v4hi_ftype_v4hi_v4hi
11280    = build_function_type (V4HI_type_node,
11281			   tree_cons (NULL_TREE, V4HI_type_node,
11282				      tree_cons (NULL_TREE, V4HI_type_node,
11283						 endlink)));
11284  tree v2si_ftype_v2si_v2si
11285    = build_function_type (V2SI_type_node,
11286			   tree_cons (NULL_TREE, V2SI_type_node,
11287				      tree_cons (NULL_TREE, V2SI_type_node,
11288						 endlink)));
11289  tree di_ftype_di_di
11290    = build_function_type (long_long_unsigned_type_node,
11291			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
11292				      tree_cons (NULL_TREE,
11293						 long_long_unsigned_type_node,
11294						 endlink)));
11295
11296  tree v2si_ftype_v2sf
11297    = build_function_type (V2SI_type_node,
11298                           tree_cons (NULL_TREE, V2SF_type_node,
11299                                      endlink));
11300  tree v2sf_ftype_v2si
11301    = build_function_type (V2SF_type_node,
11302                           tree_cons (NULL_TREE, V2SI_type_node,
11303                                      endlink));
11304  tree v2si_ftype_v2si
11305    = build_function_type (V2SI_type_node,
11306                           tree_cons (NULL_TREE, V2SI_type_node,
11307                                      endlink));
11308  tree v2sf_ftype_v2sf
11309    = build_function_type (V2SF_type_node,
11310                           tree_cons (NULL_TREE, V2SF_type_node,
11311                                      endlink));
11312  tree v2sf_ftype_v2sf_v2sf
11313    = build_function_type (V2SF_type_node,
11314                           tree_cons (NULL_TREE, V2SF_type_node,
11315                                      tree_cons (NULL_TREE,
11316                                                 V2SF_type_node,
11317                                                 endlink)));
11318  tree v2si_ftype_v2sf_v2sf
11319    = build_function_type (V2SI_type_node,
11320                           tree_cons (NULL_TREE, V2SF_type_node,
11321                                      tree_cons (NULL_TREE,
11322                                                 V2SF_type_node,
11323                                                 endlink)));
11324
11325  /* Add all builtins that are more or less simple operations on two
11326     operands.  */
11327  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11328    {
11329      /* Use one of the operands; the target can have a different mode for
11330	 mask-generating compares.  */
11331      enum machine_mode mode;
11332      tree type;
11333
11334      if (d->name == 0)
11335	continue;
11336      mode = insn_data[d->icode].operand[1].mode;
11337
11338      switch (mode)
11339	{
11340	case V4SFmode:
11341	  type = v4sf_ftype_v4sf_v4sf;
11342	  break;
11343	case V8QImode:
11344	  type = v8qi_ftype_v8qi_v8qi;
11345	  break;
11346	case V4HImode:
11347	  type = v4hi_ftype_v4hi_v4hi;
11348	  break;
11349	case V2SImode:
11350	  type = v2si_ftype_v2si_v2si;
11351	  break;
11352	case DImode:
11353	  type = di_ftype_di_di;
11354	  break;
11355
11356	default:
11357	  abort ();
11358	}
11359
11360      /* Override for comparisons.  */
11361      if (d->icode == CODE_FOR_maskcmpv4sf3
11362	  || d->icode == CODE_FOR_maskncmpv4sf3
11363	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
11364	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11365	type = v4si_ftype_v4sf_v4sf;
11366
11367      def_builtin (d->mask, d->name, type, d->code);
11368    }
11369
11370  /* Add the remaining MMX insns with somewhat more complicated types.  */
11371  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11372  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11373  def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11374  def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11375  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11376  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11377  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11378
11379  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11380  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11381  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11382
11383  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11384  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11385
11386  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11387  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11388
11389  /* comi/ucomi insns.  */
11390  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11391    def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11392
11393  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11394  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11395  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11396
11397  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11398  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11399  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11400  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11401  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11402  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11403
11404  def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11405  def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11406  def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11407  def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11408
11409  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11410  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11411
11412  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11413
11414  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11415  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11416  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11417  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11418  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11419  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11420
11421  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11422  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11423  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11424  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11425
11426  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11427  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11428  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11429  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11430
11431  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11432
11433  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11434
11435  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11436  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11437  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11438  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11439  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11440  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11441
11442  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11443
11444  /* Original 3DNow!  */
11445  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11446  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11447  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11448  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11449  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11450  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11451  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11452  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11453  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11454  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11455  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11456  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11457  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11458  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11459  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11460  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11461  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11462  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11463  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11464  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11465
11466  /* 3DNow! extension as used in the Athlon CPU.  */
11467  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11468  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11469  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11470  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11471  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11472  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11473
11474  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11475}
11476
11477/* Errors in the source file can cause expand_expr to return const0_rtx
11478   where we expect a vector.  To avoid crashing, use one of the vector
11479   clear instructions.  */
11480static rtx
11481safe_vector_operand (x, mode)
11482     rtx x;
11483     enum machine_mode mode;
11484{
11485  if (x != const0_rtx)
11486    return x;
11487  x = gen_reg_rtx (mode);
11488
11489  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11490    emit_insn (gen_mmx_clrdi (mode == DImode ? x
11491			      : gen_rtx_SUBREG (DImode, x, 0)));
11492  else
11493    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11494				: gen_rtx_SUBREG (V4SFmode, x, 0)));
11495  return x;
11496}
11497
11498/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
11499
11500static rtx
11501ix86_expand_binop_builtin (icode, arglist, target)
11502     enum insn_code icode;
11503     tree arglist;
11504     rtx target;
11505{
11506  rtx pat;
11507  tree arg0 = TREE_VALUE (arglist);
11508  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11509  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11510  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11511  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11512  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11513  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11514
11515  if (VECTOR_MODE_P (mode0))
11516    op0 = safe_vector_operand (op0, mode0);
11517  if (VECTOR_MODE_P (mode1))
11518    op1 = safe_vector_operand (op1, mode1);
11519
11520  if (! target
11521      || GET_MODE (target) != tmode
11522      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11523    target = gen_reg_rtx (tmode);
11524
11525  /* In case the insn wants input operands in modes different from
11526     the result, abort.  */
11527  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11528    abort ();
11529
11530  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11531    op0 = copy_to_mode_reg (mode0, op0);
11532  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11533    op1 = copy_to_mode_reg (mode1, op1);
11534
11535  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11536     yet one of the two must not be a memory.  This is normally enforced
11537     by expanders, but we didn't bother to create one here.  */
11538  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11539    op0 = copy_to_mode_reg (mode0, op0);
11540
11541  pat = GEN_FCN (icode) (target, op0, op1);
11542  if (! pat)
11543    return 0;
11544  emit_insn (pat);
11545  return target;
11546}
11547
11548/* In type_for_mode we restrict the ability to create TImode types
11549   to hosts with 64-bit H_W_I.  So we've defined the SSE logicals
11550   to have a V4SFmode signature.  Convert them in-place to TImode.  */
11551
11552static rtx
11553ix86_expand_timode_binop_builtin (icode, arglist, target)
11554     enum insn_code icode;
11555     tree arglist;
11556     rtx target;
11557{
11558  rtx pat;
11559  tree arg0 = TREE_VALUE (arglist);
11560  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11561  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11562  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11563
11564  op0 = gen_lowpart (TImode, op0);
11565  op1 = gen_lowpart (TImode, op1);
11566  target = gen_reg_rtx (TImode);
11567
11568  if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11569    op0 = copy_to_mode_reg (TImode, op0);
11570  if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11571    op1 = copy_to_mode_reg (TImode, op1);
11572
11573  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11574     yet one of the two must not be a memory.  This is normally enforced
11575     by expanders, but we didn't bother to create one here.  */
11576  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11577    op0 = copy_to_mode_reg (TImode, op0);
11578
11579  pat = GEN_FCN (icode) (target, op0, op1);
11580  if (! pat)
11581    return 0;
11582  emit_insn (pat);
11583
11584  return gen_lowpart (V4SFmode, target);
11585}
11586
11587/* Subroutine of ix86_expand_builtin to take care of stores.  */
11588
11589static rtx
11590ix86_expand_store_builtin (icode, arglist)
11591     enum insn_code icode;
11592     tree arglist;
11593{
11594  rtx pat;
11595  tree arg0 = TREE_VALUE (arglist);
11596  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11597  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11598  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11599  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11600  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11601
11602  if (VECTOR_MODE_P (mode1))
11603    op1 = safe_vector_operand (op1, mode1);
11604
11605  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11606
11607  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11608    op1 = copy_to_mode_reg (mode1, op1);
11609
11610  pat = GEN_FCN (icode) (op0, op1);
11611  if (pat)
11612    emit_insn (pat);
11613  return 0;
11614}
11615
11616/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
11617
11618static rtx
11619ix86_expand_unop_builtin (icode, arglist, target, do_load)
11620     enum insn_code icode;
11621     tree arglist;
11622     rtx target;
11623     int do_load;
11624{
11625  rtx pat;
11626  tree arg0 = TREE_VALUE (arglist);
11627  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11628  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11629  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11630
11631  if (! target
11632      || GET_MODE (target) != tmode
11633      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11634    target = gen_reg_rtx (tmode);
11635  if (do_load)
11636    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11637  else
11638    {
11639      if (VECTOR_MODE_P (mode0))
11640	op0 = safe_vector_operand (op0, mode0);
11641
11642      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11643	op0 = copy_to_mode_reg (mode0, op0);
11644    }
11645
11646  pat = GEN_FCN (icode) (target, op0);
11647  if (! pat)
11648    return 0;
11649  emit_insn (pat);
11650  return target;
11651}
11652
11653/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11654   sqrtss, rsqrtss, rcpss.  */
11655
11656static rtx
11657ix86_expand_unop1_builtin (icode, arglist, target)
11658     enum insn_code icode;
11659     tree arglist;
11660     rtx target;
11661{
11662  rtx pat;
11663  tree arg0 = TREE_VALUE (arglist);
11664  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11665  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11666  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11667
11668  if (! target
11669      || GET_MODE (target) != tmode
11670      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11671    target = gen_reg_rtx (tmode);
11672
11673  if (VECTOR_MODE_P (mode0))
11674    op0 = safe_vector_operand (op0, mode0);
11675
11676  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11677    op0 = copy_to_mode_reg (mode0, op0);
11678
11679  op1 = op0;
11680  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11681    op1 = copy_to_mode_reg (mode0, op1);
11682
11683  pat = GEN_FCN (icode) (target, op0, op1);
11684  if (! pat)
11685    return 0;
11686  emit_insn (pat);
11687  return target;
11688}
11689
11690/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
11691
11692static rtx
11693ix86_expand_sse_compare (d, arglist, target)
11694     const struct builtin_description *d;
11695     tree arglist;
11696     rtx target;
11697{
11698  rtx pat;
11699  tree arg0 = TREE_VALUE (arglist);
11700  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11701  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11702  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11703  rtx op2;
11704  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11705  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11706  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11707  enum rtx_code comparison = d->comparison;
11708
11709  if (VECTOR_MODE_P (mode0))
11710    op0 = safe_vector_operand (op0, mode0);
11711  if (VECTOR_MODE_P (mode1))
11712    op1 = safe_vector_operand (op1, mode1);
11713
11714  /* Swap operands if we have a comparison that isn't available in
11715     hardware.  */
11716  if (d->flag)
11717    {
11718      rtx tmp = gen_reg_rtx (mode1);
11719      emit_move_insn (tmp, op1);
11720      op1 = op0;
11721      op0 = tmp;
11722    }
11723
11724  if (! target
11725      || GET_MODE (target) != tmode
11726      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11727    target = gen_reg_rtx (tmode);
11728
11729  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11730    op0 = copy_to_mode_reg (mode0, op0);
11731  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11732    op1 = copy_to_mode_reg (mode1, op1);
11733
11734  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11735  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11736  if (! pat)
11737    return 0;
11738  emit_insn (pat);
11739  return target;
11740}
11741
11742/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
11743
11744static rtx
11745ix86_expand_sse_comi (d, arglist, target)
11746     const struct builtin_description *d;
11747     tree arglist;
11748     rtx target;
11749{
11750  rtx pat;
11751  tree arg0 = TREE_VALUE (arglist);
11752  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11753  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11754  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11755  rtx op2;
11756  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11757  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11758  enum rtx_code comparison = d->comparison;
11759
11760  if (VECTOR_MODE_P (mode0))
11761    op0 = safe_vector_operand (op0, mode0);
11762  if (VECTOR_MODE_P (mode1))
11763    op1 = safe_vector_operand (op1, mode1);
11764
11765  /* Swap operands if we have a comparison that isn't available in
11766     hardware.  */
11767  if (d->flag)
11768    {
11769      rtx tmp = op1;
11770      op1 = op0;
11771      op0 = tmp;
11772    }
11773
11774  target = gen_reg_rtx (SImode);
11775  emit_move_insn (target, const0_rtx);
11776  target = gen_rtx_SUBREG (QImode, target, 0);
11777
11778  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11779    op0 = copy_to_mode_reg (mode0, op0);
11780  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11781    op1 = copy_to_mode_reg (mode1, op1);
11782
11783  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11784  pat = GEN_FCN (d->icode) (op0, op1, op2);
11785  if (! pat)
11786    return 0;
11787  emit_insn (pat);
11788  emit_insn (gen_rtx_SET (VOIDmode,
11789			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11790			  gen_rtx_fmt_ee (comparison, QImode,
11791					  gen_rtx_REG (CCmode, FLAGS_REG),
11792					  const0_rtx)));
11793
11794  return SUBREG_REG (target);
11795}
11796
11797/* Expand an expression EXP that calls a built-in function,
11798   with result going to TARGET if that's convenient
11799   (and in mode MODE if that's convenient).
11800   SUBTARGET may be used as the target for computing one of EXP's operands.
11801   IGNORE is nonzero if the value is to be ignored.  */
11802
11803rtx
11804ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11805     tree exp;
11806     rtx target;
11807     rtx subtarget ATTRIBUTE_UNUSED;
11808     enum machine_mode mode ATTRIBUTE_UNUSED;
11809     int ignore ATTRIBUTE_UNUSED;
11810{
11811  const struct builtin_description *d;
11812  size_t i;
11813  enum insn_code icode;
11814  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11815  tree arglist = TREE_OPERAND (exp, 1);
11816  tree arg0, arg1, arg2;
11817  rtx op0, op1, op2, pat;
11818  enum machine_mode tmode, mode0, mode1, mode2;
11819  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11820
11821  switch (fcode)
11822    {
11823    case IX86_BUILTIN_EMMS:
11824      emit_insn (gen_emms ());
11825      return 0;
11826
11827    case IX86_BUILTIN_SFENCE:
11828      emit_insn (gen_sfence ());
11829      return 0;
11830
11831    case IX86_BUILTIN_PEXTRW:
11832      icode = CODE_FOR_mmx_pextrw;
11833      arg0 = TREE_VALUE (arglist);
11834      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11835      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11836      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11837      tmode = insn_data[icode].operand[0].mode;
11838      mode0 = insn_data[icode].operand[1].mode;
11839      mode1 = insn_data[icode].operand[2].mode;
11840
11841      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11842	op0 = copy_to_mode_reg (mode0, op0);
11843      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11844	{
11845	  /* @@@ better error message */
11846	  error ("selector must be an immediate");
11847	  return gen_reg_rtx (tmode);
11848	}
11849      if (target == 0
11850	  || GET_MODE (target) != tmode
11851	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11852	target = gen_reg_rtx (tmode);
11853      pat = GEN_FCN (icode) (target, op0, op1);
11854      if (! pat)
11855	return 0;
11856      emit_insn (pat);
11857      return target;
11858
11859    case IX86_BUILTIN_PINSRW:
11860      icode = CODE_FOR_mmx_pinsrw;
11861      arg0 = TREE_VALUE (arglist);
11862      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11863      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11864      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11865      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11866      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11867      tmode = insn_data[icode].operand[0].mode;
11868      mode0 = insn_data[icode].operand[1].mode;
11869      mode1 = insn_data[icode].operand[2].mode;
11870      mode2 = insn_data[icode].operand[3].mode;
11871
11872      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11873	op0 = copy_to_mode_reg (mode0, op0);
11874      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11875	op1 = copy_to_mode_reg (mode1, op1);
11876      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11877	{
11878	  /* @@@ better error message */
11879	  error ("selector must be an immediate");
11880	  return const0_rtx;
11881	}
11882      if (target == 0
11883	  || GET_MODE (target) != tmode
11884	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11885	target = gen_reg_rtx (tmode);
11886      pat = GEN_FCN (icode) (target, op0, op1, op2);
11887      if (! pat)
11888	return 0;
11889      emit_insn (pat);
11890      return target;
11891
11892    case IX86_BUILTIN_MASKMOVQ:
11893      icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
11894      /* Note the arg order is different from the operand order.  */
11895      arg1 = TREE_VALUE (arglist);
11896      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11897      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11898      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11899      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11900      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11901      mode0 = insn_data[icode].operand[0].mode;
11902      mode1 = insn_data[icode].operand[1].mode;
11903      mode2 = insn_data[icode].operand[2].mode;
11904
11905      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11906	op0 = copy_to_mode_reg (mode0, op0);
11907      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11908	op1 = copy_to_mode_reg (mode1, op1);
11909      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11910	op2 = copy_to_mode_reg (mode2, op2);
11911      pat = GEN_FCN (icode) (op0, op1, op2);
11912      if (! pat)
11913	return 0;
11914      emit_insn (pat);
11915      return 0;
11916
11917    case IX86_BUILTIN_SQRTSS:
11918      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11919    case IX86_BUILTIN_RSQRTSS:
11920      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11921    case IX86_BUILTIN_RCPSS:
11922      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11923
11924    case IX86_BUILTIN_ANDPS:
11925      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11926					       arglist, target);
11927    case IX86_BUILTIN_ANDNPS:
11928      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11929					       arglist, target);
11930    case IX86_BUILTIN_ORPS:
11931      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11932					       arglist, target);
11933    case IX86_BUILTIN_XORPS:
11934      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11935					       arglist, target);
11936
11937    case IX86_BUILTIN_LOADAPS:
11938      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11939
11940    case IX86_BUILTIN_LOADUPS:
11941      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11942
11943    case IX86_BUILTIN_STOREAPS:
11944      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11945    case IX86_BUILTIN_STOREUPS:
11946      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11947
11948    case IX86_BUILTIN_LOADSS:
11949      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11950
11951    case IX86_BUILTIN_STORESS:
11952      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11953
11954    case IX86_BUILTIN_LOADHPS:
11955    case IX86_BUILTIN_LOADLPS:
11956      icode = (fcode == IX86_BUILTIN_LOADHPS
11957	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11958      arg0 = TREE_VALUE (arglist);
11959      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11960      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11961      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11962      tmode = insn_data[icode].operand[0].mode;
11963      mode0 = insn_data[icode].operand[1].mode;
11964      mode1 = insn_data[icode].operand[2].mode;
11965
11966      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11967	op0 = copy_to_mode_reg (mode0, op0);
11968      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11969      if (target == 0
11970	  || GET_MODE (target) != tmode
11971	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11972	target = gen_reg_rtx (tmode);
11973      pat = GEN_FCN (icode) (target, op0, op1);
11974      if (! pat)
11975	return 0;
11976      emit_insn (pat);
11977      return target;
11978
11979    case IX86_BUILTIN_STOREHPS:
11980    case IX86_BUILTIN_STORELPS:
11981      icode = (fcode == IX86_BUILTIN_STOREHPS
11982	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11983      arg0 = TREE_VALUE (arglist);
11984      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11985      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11986      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11987      mode0 = insn_data[icode].operand[1].mode;
11988      mode1 = insn_data[icode].operand[2].mode;
11989
11990      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11991      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11992	op1 = copy_to_mode_reg (mode1, op1);
11993
11994      pat = GEN_FCN (icode) (op0, op0, op1);
11995      if (! pat)
11996	return 0;
11997      emit_insn (pat);
11998      return 0;
11999
12000    case IX86_BUILTIN_MOVNTPS:
12001      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12002    case IX86_BUILTIN_MOVNTQ:
12003      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12004
12005    case IX86_BUILTIN_LDMXCSR:
12006      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12007      target = assign_386_stack_local (SImode, 0);
12008      emit_move_insn (target, op0);
12009      emit_insn (gen_ldmxcsr (target));
12010      return 0;
12011
12012    case IX86_BUILTIN_STMXCSR:
12013      target = assign_386_stack_local (SImode, 0);
12014      emit_insn (gen_stmxcsr (target));
12015      return copy_to_mode_reg (SImode, target);
12016
12017    case IX86_BUILTIN_SHUFPS:
12018      icode = CODE_FOR_sse_shufps;
12019      arg0 = TREE_VALUE (arglist);
12020      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12021      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12022      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12023      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12024      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12025      tmode = insn_data[icode].operand[0].mode;
12026      mode0 = insn_data[icode].operand[1].mode;
12027      mode1 = insn_data[icode].operand[2].mode;
12028      mode2 = insn_data[icode].operand[3].mode;
12029
12030      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12031	op0 = copy_to_mode_reg (mode0, op0);
12032      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12033	op1 = copy_to_mode_reg (mode1, op1);
12034      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12035	{
12036	  /* @@@ better error message */
12037	  error ("mask must be an immediate");
12038	  return gen_reg_rtx (tmode);
12039	}
12040      if (target == 0
12041	  || GET_MODE (target) != tmode
12042	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12043	target = gen_reg_rtx (tmode);
12044      pat = GEN_FCN (icode) (target, op0, op1, op2);
12045      if (! pat)
12046	return 0;
12047      emit_insn (pat);
12048      return target;
12049
12050    case IX86_BUILTIN_PSHUFW:
12051      icode = CODE_FOR_mmx_pshufw;
12052      arg0 = TREE_VALUE (arglist);
12053      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12054      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12055      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12056      tmode = insn_data[icode].operand[0].mode;
12057      mode1 = insn_data[icode].operand[1].mode;
12058      mode2 = insn_data[icode].operand[2].mode;
12059
12060      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12061	op0 = copy_to_mode_reg (mode1, op0);
12062      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12063	{
12064	  /* @@@ better error message */
12065	  error ("mask must be an immediate");
12066	  return const0_rtx;
12067	}
12068      if (target == 0
12069	  || GET_MODE (target) != tmode
12070	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12071	target = gen_reg_rtx (tmode);
12072      pat = GEN_FCN (icode) (target, op0, op1);
12073      if (! pat)
12074	return 0;
12075      emit_insn (pat);
12076      return target;
12077
12078    case IX86_BUILTIN_FEMMS:
12079      emit_insn (gen_femms ());
12080      return NULL_RTX;
12081
12082    case IX86_BUILTIN_PAVGUSB:
12083      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12084
12085    case IX86_BUILTIN_PF2ID:
12086      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
12087
12088    case IX86_BUILTIN_PFACC:
12089      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
12090
12091    case IX86_BUILTIN_PFADD:
12092     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
12093
12094    case IX86_BUILTIN_PFCMPEQ:
12095      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
12096
12097    case IX86_BUILTIN_PFCMPGE:
12098      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
12099
12100    case IX86_BUILTIN_PFCMPGT:
12101      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12102
12103    case IX86_BUILTIN_PFMAX:
12104      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12105
12106    case IX86_BUILTIN_PFMIN:
12107      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12108
12109    case IX86_BUILTIN_PFMUL:
12110      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12111
12112    case IX86_BUILTIN_PFRCP:
12113      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12114
12115    case IX86_BUILTIN_PFRCPIT1:
12116      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12117
12118    case IX86_BUILTIN_PFRCPIT2:
12119      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12120
12121    case IX86_BUILTIN_PFRSQIT1:
12122      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12123
12124    case IX86_BUILTIN_PFRSQRT:
12125      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12126
12127    case IX86_BUILTIN_PFSUB:
12128      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12129
12130    case IX86_BUILTIN_PFSUBR:
12131      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12132
12133    case IX86_BUILTIN_PI2FD:
12134      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12135
12136    case IX86_BUILTIN_PMULHRW:
12137      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12138
12139    case IX86_BUILTIN_PF2IW:
12140      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12141
12142    case IX86_BUILTIN_PFNACC:
12143      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12144
12145    case IX86_BUILTIN_PFPNACC:
12146      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12147
12148    case IX86_BUILTIN_PI2FW:
12149      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12150
12151    case IX86_BUILTIN_PSWAPDSI:
12152      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12153
12154    case IX86_BUILTIN_PSWAPDSF:
12155      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12156
12157    case IX86_BUILTIN_SSE_ZERO:
12158      target = gen_reg_rtx (V4SFmode);
12159      emit_insn (gen_sse_clrv4sf (target));
12160      return target;
12161
12162    case IX86_BUILTIN_MMX_ZERO:
12163      target = gen_reg_rtx (DImode);
12164      emit_insn (gen_mmx_clrdi (target));
12165      return target;
12166
12167    default:
12168      break;
12169    }
12170
12171  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12172    if (d->code == fcode)
12173      {
12174	/* Compares are treated specially.  */
12175	if (d->icode == CODE_FOR_maskcmpv4sf3
12176	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
12177	    || d->icode == CODE_FOR_maskncmpv4sf3
12178	    || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12179	  return ix86_expand_sse_compare (d, arglist, target);
12180
12181	return ix86_expand_binop_builtin (d->icode, arglist, target);
12182      }
12183
12184  for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12185    if (d->code == fcode)
12186      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12187
12188  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12189    if (d->code == fcode)
12190      return ix86_expand_sse_comi (d, arglist, target);
12191
12192  /* @@@ Should really do something sensible here.  */
12193  return 0;
12194}
12195
12196/* Store OPERAND to the memory after reload is completed.  This means
12197   that we can't easily use assign_stack_local.  */
12198rtx
12199ix86_force_to_memory (mode, operand)
12200     enum machine_mode mode;
12201     rtx operand;
12202{
12203  rtx result;
12204  if (!reload_completed)
12205    abort ();
12206  if (TARGET_64BIT && TARGET_RED_ZONE)
12207    {
12208      result = gen_rtx_MEM (mode,
12209			    gen_rtx_PLUS (Pmode,
12210					  stack_pointer_rtx,
12211					  GEN_INT (-RED_ZONE_SIZE)));
12212      emit_move_insn (result, operand);
12213    }
12214  else if (TARGET_64BIT && !TARGET_RED_ZONE)
12215    {
12216      switch (mode)
12217	{
12218	case HImode:
12219	case SImode:
12220	  operand = gen_lowpart (DImode, operand);
12221	  /* FALLTHRU */
12222	case DImode:
12223	  emit_insn (
12224		      gen_rtx_SET (VOIDmode,
12225				   gen_rtx_MEM (DImode,
12226						gen_rtx_PRE_DEC (DImode,
12227							stack_pointer_rtx)),
12228				   operand));
12229	  break;
12230	default:
12231	  abort ();
12232	}
12233      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12234    }
12235  else
12236    {
12237      switch (mode)
12238	{
12239	case DImode:
12240	  {
12241	    rtx operands[2];
12242	    split_di (&operand, 1, operands, operands + 1);
12243	    emit_insn (
12244			gen_rtx_SET (VOIDmode,
12245				     gen_rtx_MEM (SImode,
12246						  gen_rtx_PRE_DEC (Pmode,
12247							stack_pointer_rtx)),
12248				     operands[1]));
12249	    emit_insn (
12250			gen_rtx_SET (VOIDmode,
12251				     gen_rtx_MEM (SImode,
12252						  gen_rtx_PRE_DEC (Pmode,
12253							stack_pointer_rtx)),
12254				     operands[0]));
12255	  }
12256	  break;
12257	case HImode:
12258	  /* It is better to store HImodes as SImodes.  */
12259	  if (!TARGET_PARTIAL_REG_STALL)
12260	    operand = gen_lowpart (SImode, operand);
12261	  /* FALLTHRU */
12262	case SImode:
12263	  emit_insn (
12264		      gen_rtx_SET (VOIDmode,
12265				   gen_rtx_MEM (GET_MODE (operand),
12266						gen_rtx_PRE_DEC (SImode,
12267							stack_pointer_rtx)),
12268				   operand));
12269	  break;
12270	default:
12271	  abort ();
12272	}
12273      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12274    }
12275  return result;
12276}
12277
12278/* Free operand from the memory.  */
12279void
12280ix86_free_from_memory (mode)
12281     enum machine_mode mode;
12282{
12283  if (!TARGET_64BIT || !TARGET_RED_ZONE)
12284    {
12285      int size;
12286
12287      if (mode == DImode || TARGET_64BIT)
12288	size = 8;
12289      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12290	size = 2;
12291      else
12292	size = 4;
12293      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
12294         to pop or add instruction if registers are available.  */
12295      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12296			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12297					    GEN_INT (size))));
12298    }
12299}
12300
12301/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12302   QImode must go into class Q_REGS.
12303   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
12304   movdf to do mem-to-mem moves through integer regs.  */
12305enum reg_class
12306ix86_preferred_reload_class (x, class)
12307     rtx x;
12308     enum reg_class class;
12309{
12310  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12311    {
12312      /* SSE can't load any constant directly yet.  */
12313      if (SSE_CLASS_P (class))
12314	return NO_REGS;
12315      /* Floats can load 0 and 1.  */
12316      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12317	{
12318	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
12319	  if (MAYBE_SSE_CLASS_P (class))
12320	    return (reg_class_subset_p (class, GENERAL_REGS)
12321		    ? GENERAL_REGS : FLOAT_REGS);
12322	  else
12323	    return class;
12324	}
12325      /* General regs can load everything.  */
12326      if (reg_class_subset_p (class, GENERAL_REGS))
12327	return GENERAL_REGS;
12328      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
12329      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12330	return NO_REGS;
12331    }
12332  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12333    return NO_REGS;
12334  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12335    return Q_REGS;
12336  return class;
12337}
12338
12339/* If we are copying between general and FP registers, we need a memory
12340   location. The same is true for SSE and MMX registers.
12341
12342   The macro can't work reliably when one of the CLASSES is class containing
12343   registers from multiple units (SSE, MMX, integer).  We avoid this by never
12344   combining those units in single alternative in the machine description.
12345   Ensure that this constraint holds to avoid unexpected surprises.
12346
12347   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12348   enforce these sanity checks.  */
12349int
12350ix86_secondary_memory_needed (class1, class2, mode, strict)
12351     enum reg_class class1, class2;
12352     enum machine_mode mode;
12353     int strict;
12354{
12355  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12356      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12357      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12358      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12359      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12360      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12361    {
12362      if (strict)
12363	abort ();
12364      else
12365	return 1;
12366    }
12367  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12368	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12369	      && (mode) != SImode)
12370	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12371	      && (mode) != SImode));
12372}
12373/* Return the cost of moving data from a register in class CLASS1 to
12374   one in class CLASS2.
12375
12376   It is not required that the cost always equal 2 when FROM is the same as TO;
12377   on some machines it is expensive to move between registers if they are not
12378   general registers.  */
12379int
12380ix86_register_move_cost (mode, class1, class2)
12381     enum machine_mode mode;
12382     enum reg_class class1, class2;
12383{
12384  /* In case we require secondary memory, compute cost of the store followed
12385     by load.  In order to avoid bad register allocation choices, we need
12386     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
12387
12388  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12389    {
12390      int cost = 1;
12391
12392      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
12393		   MEMORY_MOVE_COST (mode, class1, 1));
12394      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
12395		   MEMORY_MOVE_COST (mode, class2, 1));
12396
12397      /* In case of copying from general_purpose_register we may emit multiple
12398         stores followed by single load causing memory size mismatch stall.
12399         Count this as arbitarily high cost of 20.  */
12400      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12401	cost += 20;
12402
12403      /* In the case of FP/MMX moves, the registers actually overlap, and we
12404	 have to switch modes in order to treat them differently.  */
12405      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
12406          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
12407	cost += 20;
12408
12409      return cost;
12410    }
12411
12412  /* Moves between SSE/MMX and integer unit are expensive.  */
12413  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12414      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12415    return ix86_cost->mmxsse_to_integer;
12416  if (MAYBE_FLOAT_CLASS_P (class1))
12417    return ix86_cost->fp_move;
12418  if (MAYBE_SSE_CLASS_P (class1))
12419    return ix86_cost->sse_move;
12420  if (MAYBE_MMX_CLASS_P (class1))
12421    return ix86_cost->mmx_move;
12422  return 2;
12423}
12424
12425/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
12426int
12427ix86_hard_regno_mode_ok (regno, mode)
12428     int regno;
12429     enum machine_mode mode;
12430{
12431  /* Flags and only flags can only hold CCmode values.  */
12432  if (CC_REGNO_P (regno))
12433    return GET_MODE_CLASS (mode) == MODE_CC;
12434  if (GET_MODE_CLASS (mode) == MODE_CC
12435      || GET_MODE_CLASS (mode) == MODE_RANDOM
12436      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12437    return 0;
12438  if (FP_REGNO_P (regno))
12439    return VALID_FP_MODE_P (mode);
12440  if (SSE_REGNO_P (regno))
12441    return VALID_SSE_REG_MODE (mode);
12442  if (MMX_REGNO_P (regno))
12443    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12444  /* We handle both integer and floats in the general purpose registers.
12445     In future we should be able to handle vector modes as well.  */
12446  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12447    return 0;
12448  /* Take care for QImode values - they can be in non-QI regs, but then
12449     they do cause partial register stalls.  */
12450  if (regno < 4 || mode != QImode || TARGET_64BIT)
12451    return 1;
12452  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12453}
12454
12455/* Return the cost of moving data of mode M between a
12456   register and memory.  A value of 2 is the default; this cost is
12457   relative to those in `REGISTER_MOVE_COST'.
12458
12459   If moving between registers and memory is more expensive than
12460   between two registers, you should define this macro to express the
12461   relative cost.
12462
12463   Model also increased moving costs of QImode registers in non
12464   Q_REGS classes.
12465 */
12466int
12467ix86_memory_move_cost (mode, class, in)
12468     enum machine_mode mode;
12469     enum reg_class class;
12470     int in;
12471{
12472  if (FLOAT_CLASS_P (class))
12473    {
12474      int index;
12475      switch (mode)
12476	{
12477	  case SFmode:
12478	    index = 0;
12479	    break;
12480	  case DFmode:
12481	    index = 1;
12482	    break;
12483	  case XFmode:
12484	  case TFmode:
12485	    index = 2;
12486	    break;
12487	  default:
12488	    return 100;
12489	}
12490      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12491    }
12492  if (SSE_CLASS_P (class))
12493    {
12494      int index;
12495      switch (GET_MODE_SIZE (mode))
12496	{
12497	  case 4:
12498	    index = 0;
12499	    break;
12500	  case 8:
12501	    index = 1;
12502	    break;
12503	  case 16:
12504	    index = 2;
12505	    break;
12506	  default:
12507	    return 100;
12508	}
12509      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12510    }
12511  if (MMX_CLASS_P (class))
12512    {
12513      int index;
12514      switch (GET_MODE_SIZE (mode))
12515	{
12516	  case 4:
12517	    index = 0;
12518	    break;
12519	  case 8:
12520	    index = 1;
12521	    break;
12522	  default:
12523	    return 100;
12524	}
12525      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12526    }
12527  switch (GET_MODE_SIZE (mode))
12528    {
12529      case 1:
12530	if (in)
12531	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12532		  : ix86_cost->movzbl_load);
12533	else
12534	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12535		  : ix86_cost->int_store[0] + 4);
12536	break;
12537      case 2:
12538	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12539      default:
12540	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
12541	if (mode == TFmode)
12542	  mode = XFmode;
12543	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12544		* (int) GET_MODE_SIZE (mode) / 4);
12545    }
12546}
12547
12548#ifdef DO_GLOBAL_CTORS_BODY
12549static void
12550ix86_svr3_asm_out_constructor (symbol, priority)
12551     rtx symbol;
12552     int priority ATTRIBUTE_UNUSED;
12553{
12554  init_section ();
12555  fputs ("\tpushl $", asm_out_file);
12556  assemble_name (asm_out_file, XSTR (symbol, 0));
12557  fputc ('\n', asm_out_file);
12558}
12559#endif
12560
12561/* Order the registers for register allocator.  */
12562
12563void
12564x86_order_regs_for_local_alloc ()
12565{
12566   int pos = 0;
12567   int i;
12568
12569   /* First allocate the local general purpose registers.  */
12570   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12571     if (GENERAL_REGNO_P (i) && call_used_regs[i])
12572	reg_alloc_order [pos++] = i;
12573
12574   /* Global general purpose registers.  */
12575   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12576     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12577	reg_alloc_order [pos++] = i;
12578
12579   /* x87 registers come first in case we are doing FP math
12580      using them.  */
12581   if (!TARGET_SSE_MATH)
12582     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12583       reg_alloc_order [pos++] = i;
12584
12585   /* SSE registers.  */
12586   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12587     reg_alloc_order [pos++] = i;
12588   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12589     reg_alloc_order [pos++] = i;
12590
12591   /* x87 registerts.  */
12592   if (TARGET_SSE_MATH)
12593     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12594       reg_alloc_order [pos++] = i;
12595
12596   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12597     reg_alloc_order [pos++] = i;
12598
12599   /* Initialize the rest of array as we do not allocate some registers
12600      at all.  */
12601   while (pos < FIRST_PSEUDO_REGISTER)
12602     reg_alloc_order [pos++] = 0;
12603}
12604
12605void
12606x86_output_mi_thunk (file, delta, function)
12607     FILE *file;
12608     int delta;
12609     tree function;
12610{
12611  tree parm;
12612  rtx xops[3];
12613
12614  if (ix86_regparm > 0)
12615    parm = TYPE_ARG_TYPES (TREE_TYPE (function));
12616  else
12617    parm = NULL_TREE;
12618  for (; parm; parm = TREE_CHAIN (parm))
12619    if (TREE_VALUE (parm) == void_type_node)
12620      break;
12621
12622  xops[0] = GEN_INT (delta);
12623  if (TARGET_64BIT)
12624    {
12625      int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
12626      xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
12627      output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
12628      if (flag_pic)
12629	{
12630	  fprintf (file, "\tjmp *");
12631	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
12632	  fprintf (file, "@GOTPCREL(%%rip)\n");
12633	}
12634      else
12635	{
12636	  fprintf (file, "\tjmp ");
12637	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
12638	  fprintf (file, "\n");
12639	}
12640    }
12641  else
12642    {
12643      if (parm)
12644	xops[1] = gen_rtx_REG (SImode, 0);
12645      else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
12646	xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
12647      else
12648	xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
12649      output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
12650
12651      if (flag_pic)
12652	{
12653	  xops[0] = pic_offset_table_rtx;
12654	  xops[1] = gen_label_rtx ();
12655	  xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12656
12657	  if (ix86_regparm > 2)
12658	    abort ();
12659	  output_asm_insn ("push{l}\t%0", xops);
12660	  output_asm_insn ("call\t%P1", xops);
12661	  ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
12662	  output_asm_insn ("pop{l}\t%0", xops);
12663	  output_asm_insn
12664	    ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
12665	  xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
12666	  output_asm_insn
12667	    ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
12668	  asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
12669	  asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
12670	}
12671      else
12672	{
12673	  fprintf (file, "\tjmp ");
12674	  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
12675	  fprintf (file, "\n");
12676	}
12677    }
12678}
12679
12680int
12681x86_field_alignment (field, computed)
12682     tree field;
12683     int computed;
12684{
12685  enum machine_mode mode;
12686  tree type = TREE_TYPE (field);
12687
12688  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
12689    return computed;
12690  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
12691		    ? get_inner_array_type (type) : type);
12692  if (mode == DFmode || mode == DCmode
12693      || GET_MODE_CLASS (mode) == MODE_INT
12694      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
12695    return MIN (32, computed);
12696  return computed;
12697}
12698