i386.c revision 97827
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 97827 2002-06-04 18:06:12Z obrien $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-attr.h"
38#include "flags.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49
50#ifndef CHECK_STACK_LIMIT
51#define CHECK_STACK_LIMIT (-1)
52#endif
53
54#warning NEED TO REVISIT "PIC_REG_USED" AND -mprofiler-epilogue SUPPORT
55#if 0
56#define PIC_REG_USED 					\
57  (flag_pic && (current_function_uses_pic_offset_table	\
58		|| current_function_uses_const_pool	\
59		|| profile_flag || profile_block_flag))
60#endif
61
62/* Processor costs (relative to an add) */
63static const
64struct processor_costs size_cost = {	/* costs for tunning for size */
65  2,					/* cost of an add instruction */
66  3,					/* cost of a lea instruction */
67  2,					/* variable shift costs */
68  3,					/* constant shift costs */
69  3,					/* cost of starting a multiply */
70  0,					/* cost of multiply per each bit set */
71  3,					/* cost of a divide/mod */
72  3,					/* cost of movsx */
73  3,					/* cost of movzx */
74  0,					/* "large" insn */
75  2,					/* MOVE_RATIO */
76  2,					/* cost for loading QImode using movzbl */
77  {2, 2, 2},				/* cost of loading integer registers
78					   in QImode, HImode and SImode.
79					   Relative to reg-reg move (2).  */
80  {2, 2, 2},				/* cost of storing integer registers */
81  2,					/* cost of reg,reg fld/fst */
82  {2, 2, 2},				/* cost of loading fp registers
83					   in SFmode, DFmode and XFmode */
84  {2, 2, 2},				/* cost of loading integer registers */
85  3,					/* cost of moving MMX register */
86  {3, 3},				/* cost of loading MMX registers
87					   in SImode and DImode */
88  {3, 3},				/* cost of storing MMX registers
89					   in SImode and DImode */
90  3,					/* cost of moving SSE register */
91  {3, 3, 3},				/* cost of loading SSE registers
92					   in SImode, DImode and TImode */
93  {3, 3, 3},				/* cost of storing SSE registers
94					   in SImode, DImode and TImode */
95  3,					/* MMX or SSE register to integer */
96  0,					/* size of prefetch block */
97  0,					/* number of parallel prefetches */
98};
99/* Processor costs (relative to an add) */
100static const
101struct processor_costs i386_cost = {	/* 386 specific costs */
102  1,					/* cost of an add instruction */
103  1,					/* cost of a lea instruction */
104  3,					/* variable shift costs */
105  2,					/* constant shift costs */
106  6,					/* cost of starting a multiply */
107  1,					/* cost of multiply per each bit set */
108  23,					/* cost of a divide/mod */
109  3,					/* cost of movsx */
110  2,					/* cost of movzx */
111  15,					/* "large" insn */
112  3,					/* MOVE_RATIO */
113  4,					/* cost for loading QImode using movzbl */
114  {2, 4, 2},				/* cost of loading integer registers
115					   in QImode, HImode and SImode.
116					   Relative to reg-reg move (2).  */
117  {2, 4, 2},				/* cost of storing integer registers */
118  2,					/* cost of reg,reg fld/fst */
119  {8, 8, 8},				/* cost of loading fp registers
120					   in SFmode, DFmode and XFmode */
121  {8, 8, 8},				/* cost of loading integer registers */
122  2,					/* cost of moving MMX register */
123  {4, 8},				/* cost of loading MMX registers
124					   in SImode and DImode */
125  {4, 8},				/* cost of storing MMX registers
126					   in SImode and DImode */
127  2,					/* cost of moving SSE register */
128  {4, 8, 16},				/* cost of loading SSE registers
129					   in SImode, DImode and TImode */
130  {4, 8, 16},				/* cost of storing SSE registers
131					   in SImode, DImode and TImode */
132  3,					/* MMX or SSE register to integer */
133  0,					/* size of prefetch block */
134  0,					/* number of parallel prefetches */
135};
136
137static const
138struct processor_costs i486_cost = {	/* 486 specific costs */
139  1,					/* cost of an add instruction */
140  1,					/* cost of a lea instruction */
141  3,					/* variable shift costs */
142  2,					/* constant shift costs */
143  12,					/* cost of starting a multiply */
144  1,					/* cost of multiply per each bit set */
145  40,					/* cost of a divide/mod */
146  3,					/* cost of movsx */
147  2,					/* cost of movzx */
148  15,					/* "large" insn */
149  3,					/* MOVE_RATIO */
150  4,					/* cost for loading QImode using movzbl */
151  {2, 4, 2},				/* cost of loading integer registers
152					   in QImode, HImode and SImode.
153					   Relative to reg-reg move (2).  */
154  {2, 4, 2},				/* cost of storing integer registers */
155  2,					/* cost of reg,reg fld/fst */
156  {8, 8, 8},				/* cost of loading fp registers
157					   in SFmode, DFmode and XFmode */
158  {8, 8, 8},				/* cost of loading integer registers */
159  2,					/* cost of moving MMX register */
160  {4, 8},				/* cost of loading MMX registers
161					   in SImode and DImode */
162  {4, 8},				/* cost of storing MMX registers
163					   in SImode and DImode */
164  2,					/* cost of moving SSE register */
165  {4, 8, 16},				/* cost of loading SSE registers
166					   in SImode, DImode and TImode */
167  {4, 8, 16},				/* cost of storing SSE registers
168					   in SImode, DImode and TImode */
169  3,					/* MMX or SSE register to integer */
170  0,					/* size of prefetch block */
171  0,					/* number of parallel prefetches */
172};
173
174static const
175struct processor_costs pentium_cost = {
176  1,					/* cost of an add instruction */
177  1,					/* cost of a lea instruction */
178  4,					/* variable shift costs */
179  1,					/* constant shift costs */
180  11,					/* cost of starting a multiply */
181  0,					/* cost of multiply per each bit set */
182  25,					/* cost of a divide/mod */
183  3,					/* cost of movsx */
184  2,					/* cost of movzx */
185  8,					/* "large" insn */
186  6,					/* MOVE_RATIO */
187  6,					/* cost for loading QImode using movzbl */
188  {2, 4, 2},				/* cost of loading integer registers
189					   in QImode, HImode and SImode.
190					   Relative to reg-reg move (2).  */
191  {2, 4, 2},				/* cost of storing integer registers */
192  2,					/* cost of reg,reg fld/fst */
193  {2, 2, 6},				/* cost of loading fp registers
194					   in SFmode, DFmode and XFmode */
195  {4, 4, 6},				/* cost of loading integer registers */
196  8,					/* cost of moving MMX register */
197  {8, 8},				/* cost of loading MMX registers
198					   in SImode and DImode */
199  {8, 8},				/* cost of storing MMX registers
200					   in SImode and DImode */
201  2,					/* cost of moving SSE register */
202  {4, 8, 16},				/* cost of loading SSE registers
203					   in SImode, DImode and TImode */
204  {4, 8, 16},				/* cost of storing SSE registers
205					   in SImode, DImode and TImode */
206  3,					/* MMX or SSE register to integer */
207  0,					/* size of prefetch block */
208  0,					/* number of parallel prefetches */
209};
210
211static const
212struct processor_costs pentiumpro_cost = {
213  1,					/* cost of an add instruction */
214  1,					/* cost of a lea instruction */
215  1,					/* variable shift costs */
216  1,					/* constant shift costs */
217  4,					/* cost of starting a multiply */
218  0,					/* cost of multiply per each bit set */
219  17,					/* cost of a divide/mod */
220  1,					/* cost of movsx */
221  1,					/* cost of movzx */
222  8,					/* "large" insn */
223  6,					/* MOVE_RATIO */
224  2,					/* cost for loading QImode using movzbl */
225  {4, 4, 4},				/* cost of loading integer registers
226					   in QImode, HImode and SImode.
227					   Relative to reg-reg move (2).  */
228  {2, 2, 2},				/* cost of storing integer registers */
229  2,					/* cost of reg,reg fld/fst */
230  {2, 2, 6},				/* cost of loading fp registers
231					   in SFmode, DFmode and XFmode */
232  {4, 4, 6},				/* cost of loading integer registers */
233  2,					/* cost of moving MMX register */
234  {2, 2},				/* cost of loading MMX registers
235					   in SImode and DImode */
236  {2, 2},				/* cost of storing MMX registers
237					   in SImode and DImode */
238  2,					/* cost of moving SSE register */
239  {2, 2, 8},				/* cost of loading SSE registers
240					   in SImode, DImode and TImode */
241  {2, 2, 8},				/* cost of storing SSE registers
242					   in SImode, DImode and TImode */
243  3,					/* MMX or SSE register to integer */
244  32,					/* size of prefetch block */
245  6,					/* number of parallel prefetches */
246};
247
248static const
249struct processor_costs k6_cost = {
250  1,					/* cost of an add instruction */
251  2,					/* cost of a lea instruction */
252  1,					/* variable shift costs */
253  1,					/* constant shift costs */
254  3,					/* cost of starting a multiply */
255  0,					/* cost of multiply per each bit set */
256  18,					/* cost of a divide/mod */
257  2,					/* cost of movsx */
258  2,					/* cost of movzx */
259  8,					/* "large" insn */
260  4,					/* MOVE_RATIO */
261  3,					/* cost for loading QImode using movzbl */
262  {4, 5, 4},				/* cost of loading integer registers
263					   in QImode, HImode and SImode.
264					   Relative to reg-reg move (2).  */
265  {2, 3, 2},				/* cost of storing integer registers */
266  4,					/* cost of reg,reg fld/fst */
267  {6, 6, 6},				/* cost of loading fp registers
268					   in SFmode, DFmode and XFmode */
269  {4, 4, 4},				/* cost of loading integer registers */
270  2,					/* cost of moving MMX register */
271  {2, 2},				/* cost of loading MMX registers
272					   in SImode and DImode */
273  {2, 2},				/* cost of storing MMX registers
274					   in SImode and DImode */
275  2,					/* cost of moving SSE register */
276  {2, 2, 8},				/* cost of loading SSE registers
277					   in SImode, DImode and TImode */
278  {2, 2, 8},				/* cost of storing SSE registers
279					   in SImode, DImode and TImode */
280  6,					/* MMX or SSE register to integer */
281  32,					/* size of prefetch block */
282  1,					/* number of parallel prefetches */
283};
284
285static const
286struct processor_costs athlon_cost = {
287  1,					/* cost of an add instruction */
288  2,					/* cost of a lea instruction */
289  1,					/* variable shift costs */
290  1,					/* constant shift costs */
291  5,					/* cost of starting a multiply */
292  0,					/* cost of multiply per each bit set */
293  42,					/* cost of a divide/mod */
294  1,					/* cost of movsx */
295  1,					/* cost of movzx */
296  8,					/* "large" insn */
297  9,					/* MOVE_RATIO */
298  4,					/* cost for loading QImode using movzbl */
299  {4, 5, 4},				/* cost of loading integer registers
300					   in QImode, HImode and SImode.
301					   Relative to reg-reg move (2).  */
302  {2, 3, 2},				/* cost of storing integer registers */
303  4,					/* cost of reg,reg fld/fst */
304  {6, 6, 20},				/* cost of loading fp registers
305					   in SFmode, DFmode and XFmode */
306  {4, 4, 16},				/* cost of loading integer registers */
307  2,					/* cost of moving MMX register */
308  {2, 2},				/* cost of loading MMX registers
309					   in SImode and DImode */
310  {2, 2},				/* cost of storing MMX registers
311					   in SImode and DImode */
312  2,					/* cost of moving SSE register */
313  {2, 2, 8},				/* cost of loading SSE registers
314					   in SImode, DImode and TImode */
315  {2, 2, 8},				/* cost of storing SSE registers
316					   in SImode, DImode and TImode */
317  6,					/* MMX or SSE register to integer */
318  64,					/* size of prefetch block */
319  6,					/* number of parallel prefetches */
320};
321
322static const
323struct processor_costs pentium4_cost = {
324  1,					/* cost of an add instruction */
325  1,					/* cost of a lea instruction */
326  8,					/* variable shift costs */
327  8,					/* constant shift costs */
328  30,					/* cost of starting a multiply */
329  0,					/* cost of multiply per each bit set */
330  112,					/* cost of a divide/mod */
331  1,					/* cost of movsx */
332  1,					/* cost of movzx */
333  16,					/* "large" insn */
334  6,					/* MOVE_RATIO */
335  2,					/* cost for loading QImode using movzbl */
336  {4, 5, 4},				/* cost of loading integer registers
337					   in QImode, HImode and SImode.
338					   Relative to reg-reg move (2).  */
339  {2, 3, 2},				/* cost of storing integer registers */
340  2,					/* cost of reg,reg fld/fst */
341  {2, 2, 6},				/* cost of loading fp registers
342					   in SFmode, DFmode and XFmode */
343  {4, 4, 6},				/* cost of loading integer registers */
344  2,					/* cost of moving MMX register */
345  {2, 2},				/* cost of loading MMX registers
346					   in SImode and DImode */
347  {2, 2},				/* cost of storing MMX registers
348					   in SImode and DImode */
349  12,					/* cost of moving SSE register */
350  {12, 12, 12},				/* cost of loading SSE registers
351					   in SImode, DImode and TImode */
352  {2, 2, 8},				/* cost of storing SSE registers
353					   in SImode, DImode and TImode */
354  10,					/* MMX or SSE register to integer */
355  64,					/* size of prefetch block */
356  6,					/* number of parallel prefetches */
357};
358
359const struct processor_costs *ix86_cost = &pentium_cost;
360
361/* Processor feature/optimization bitmasks.  */
362#define m_386 (1<<PROCESSOR_I386)
363#define m_486 (1<<PROCESSOR_I486)
364#define m_PENT (1<<PROCESSOR_PENTIUM)
365#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
366#define m_K6  (1<<PROCESSOR_K6)
367#define m_ATHLON  (1<<PROCESSOR_ATHLON)
368#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
369
370const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
371const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
372const int x86_zero_extend_with_and = m_486 | m_PENT;
373const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
374const int x86_double_with_add = ~m_386;
375const int x86_use_bit_test = m_386;
376const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
377const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
378const int x86_3dnow_a = m_ATHLON;
379const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
380const int x86_branch_hints = m_PENT4;
381const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
382const int x86_partial_reg_stall = m_PPRO;
383const int x86_use_loop = m_K6;
384const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
385const int x86_use_mov0 = m_K6;
386const int x86_use_cltd = ~(m_PENT | m_K6);
387const int x86_read_modify_write = ~m_PENT;
388const int x86_read_modify = ~(m_PENT | m_PPRO);
389const int x86_split_long_moves = m_PPRO;
390const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
391const int x86_single_stringop = m_386 | m_PENT4;
392const int x86_qimode_math = ~(0);
393const int x86_promote_qi_regs = 0;
394const int x86_himode_math = ~(m_PPRO);
395const int x86_promote_hi_regs = m_PPRO;
396const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
397const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
398const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
399const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
400const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
401const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
402const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
403const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
404const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
405const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
406const int x86_decompose_lea = m_PENT4;
407const int x86_arch_always_fancy_math_387 = m_PENT|m_PPRO|m_ATHLON|m_PENT4;
408
409/* In case the avreage insn count for single function invocation is
410   lower than this constant, emit fast (but longer) prologue and
411   epilogue code.  */
412#define FAST_PROLOGUE_INSN_COUNT 30
413/* Set by prologue expander and used by epilogue expander to determine
414   the style used.  */
415static int use_fast_prologue_epilogue;
416
417#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
418
419static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
420static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
421static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
422
423/* Array of the smallest class containing reg number REGNO, indexed by
424   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
425
426enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
427{
428  /* ax, dx, cx, bx */
429  AREG, DREG, CREG, BREG,
430  /* si, di, bp, sp */
431  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
432  /* FP registers */
433  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
434  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
435  /* arg pointer */
436  NON_Q_REGS,
437  /* flags, fpsr, dirflag, frame */
438  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
439  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
440  SSE_REGS, SSE_REGS,
441  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
442  MMX_REGS, MMX_REGS,
443  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
444  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
445  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
446  SSE_REGS, SSE_REGS,
447};
448
449/* The "default" register map used in 32bit mode.  */
450
451int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
452{
453  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
454  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
455  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
456  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
457  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
458  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
459  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
460};
461
462static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
463					        1 /*RDX*/, 2 /*RCX*/,
464					        FIRST_REX_INT_REG /*R8 */,
465					        FIRST_REX_INT_REG + 1 /*R9 */};
466static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
467
468/* The "default" register map used in 64bit mode.  */
469int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
470{
471  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
472  33, 34, 35, 36, 37, 38, 39, 40	/* fp regs */
473  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
474  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
475  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
476  8,9,10,11,12,13,14,15,		/* extended integer registers */
477  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
478};
479
480/* Define the register numbers to be used in Dwarf debugging information.
481   The SVR4 reference port C compiler uses the following register numbers
482   in its Dwarf output code:
483	0 for %eax (gcc regno = 0)
484	1 for %ecx (gcc regno = 2)
485	2 for %edx (gcc regno = 1)
486	3 for %ebx (gcc regno = 3)
487	4 for %esp (gcc regno = 7)
488	5 for %ebp (gcc regno = 6)
489	6 for %esi (gcc regno = 4)
490	7 for %edi (gcc regno = 5)
491   The following three DWARF register numbers are never generated by
492   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
493   believes these numbers have these meanings.
494	8  for %eip    (no gcc equivalent)
495	9  for %eflags (gcc regno = 17)
496	10 for %trapno (no gcc equivalent)
497   It is not at all clear how we should number the FP stack registers
498   for the x86 architecture.  If the version of SDB on x86/svr4 were
499   a bit less brain dead with respect to floating-point then we would
500   have a precedent to follow with respect to DWARF register numbers
501   for x86 FP registers, but the SDB on x86/svr4 is so completely
502   broken with respect to FP registers that it is hardly worth thinking
503   of it as something to strive for compatibility with.
504   The version of x86/svr4 SDB I have at the moment does (partially)
505   seem to believe that DWARF register number 11 is associated with
506   the x86 register %st(0), but that's about all.  Higher DWARF
507   register numbers don't seem to be associated with anything in
508   particular, and even for DWARF regno 11, SDB only seems to under-
509   stand that it should say that a variable lives in %st(0) (when
510   asked via an `=' command) if we said it was in DWARF regno 11,
511   but SDB still prints garbage when asked for the value of the
512   variable in question (via a `/' command).
513   (Also note that the labels SDB prints for various FP stack regs
514   when doing an `x' command are all wrong.)
515   Note that these problems generally don't affect the native SVR4
516   C compiler because it doesn't allow the use of -O with -g and
517   because when it is *not* optimizing, it allocates a memory
518   location for each floating-point variable, and the memory
519   location is what gets described in the DWARF AT_location
520   attribute for the variable in question.
521   Regardless of the severe mental illness of the x86/svr4 SDB, we
522   do something sensible here and we use the following DWARF
523   register numbers.  Note that these are all stack-top-relative
524   numbers.
525	11 for %st(0) (gcc regno = 8)
526	12 for %st(1) (gcc regno = 9)
527	13 for %st(2) (gcc regno = 10)
528	14 for %st(3) (gcc regno = 11)
529	15 for %st(4) (gcc regno = 12)
530	16 for %st(5) (gcc regno = 13)
531	17 for %st(6) (gcc regno = 14)
532	18 for %st(7) (gcc regno = 15)
533*/
534int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
535{
536  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
537  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
538  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
539  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
540  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
541  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
542  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
543};
544
545/* Test and compare insns in i386.md store the information needed to
546   generate branch and scc insns here.  */
547
548rtx ix86_compare_op0 = NULL_RTX;
549rtx ix86_compare_op1 = NULL_RTX;
550
551#define MAX_386_STACK_LOCALS 3
552/* Size of the register save area.  */
553#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555/* Define the structure for the machine field in struct function.  */
556struct machine_function
557{
558  rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559  int save_varrargs_registers;
560  int accesses_prev_frame;
561};
562
563#define ix86_stack_locals (cfun->machine->stack_locals)
564#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
565
566/* Structure describing stack frame layout.
567   Stack grows downward:
568
569   [arguments]
570					      <- ARG_POINTER
571   saved pc
572
573   saved frame pointer if frame_pointer_needed
574					      <- HARD_FRAME_POINTER
575   [saved regs]
576
577   [padding1]          \
578		        )
579   [va_arg registers]  (
580		        > to_allocate	      <- FRAME_POINTER
581   [frame]	       (
582		        )
583   [padding2]	       /
584  */
585struct ix86_frame
586{
587  int nregs;
588  int padding1;
589  int va_arg_size;
590  HOST_WIDE_INT frame;
591  int padding2;
592  int outgoing_arguments_size;
593  int red_zone_size;
594
595  HOST_WIDE_INT to_allocate;
596  /* The offsets relative to ARG_POINTER.  */
597  HOST_WIDE_INT frame_pointer_offset;
598  HOST_WIDE_INT hard_frame_pointer_offset;
599  HOST_WIDE_INT stack_pointer_offset;
600};
601
602/* Used to enable/disable debugging features.  */
603const char *ix86_debug_arg_string, *ix86_debug_addr_string;
604/* Code model option as passed by user.  */
605const char *ix86_cmodel_string;
606/* Parsed value.  */
607enum cmodel ix86_cmodel;
608/* Asm dialect.  */
609const char *ix86_asm_string;
610enum asm_dialect ix86_asm_dialect = ASM_ATT;
611
612/* which cpu are we scheduling for */
613enum processor_type ix86_cpu;
614
615/* which unit we are generating floating point math for */
616enum fpmath_unit ix86_fpmath;
617
618/* which instruction set architecture to use.  */
619int ix86_arch;
620
621/* Strings to hold which cpu and instruction set architecture  to use.  */
622const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
623const char *ix86_arch_string;		/* for -march=<xxx> */
624const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
625
626/* # of registers to use to pass arguments.  */
627const char *ix86_regparm_string;
628
629/* true if sse prefetch instruction is not NOOP.  */
630int x86_prefetch_sse;
631
632/* ix86_regparm_string as a number */
633int ix86_regparm;
634
635/* Alignment to use for loops and jumps:  */
636
637/* Power of two alignment for loops.  */
638const char *ix86_align_loops_string;
639
640/* Power of two alignment for non-loop jumps.  */
641const char *ix86_align_jumps_string;
642
643/* Power of two alignment for stack boundary in bytes.  */
644const char *ix86_preferred_stack_boundary_string;
645
646/* Preferred alignment for stack boundary in bits.  */
647int ix86_preferred_stack_boundary;
648
649/* Values 1-5: see jump.c */
650int ix86_branch_cost;
651const char *ix86_branch_cost_string;
652
653/* Power of two alignment for functions.  */
654const char *ix86_align_funcs_string;
655
656/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
657static char internal_label_prefix[16];
658static int internal_label_prefix_len;
659
660static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
661static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
662static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
663				       int, int, FILE *));
664static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
665static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
666							   rtx *, rtx *));
667static rtx gen_push PARAMS ((rtx));
668static int memory_address_length PARAMS ((rtx addr));
669static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
670static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
671static int ix86_safe_length PARAMS ((rtx));
672static enum attr_memory ix86_safe_memory PARAMS ((rtx));
673static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
674static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
675static void ix86_dump_ppro_packet PARAMS ((FILE *));
676static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
677static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
678					 rtx));
679static void ix86_init_machine_status PARAMS ((struct function *));
680static void ix86_mark_machine_status PARAMS ((struct function *));
681static void ix86_free_machine_status PARAMS ((struct function *));
682static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
683static int ix86_safe_length_prefix PARAMS ((rtx));
684static int ix86_nsaved_regs PARAMS ((void));
685static void ix86_emit_save_regs PARAMS ((void));
686static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
687static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
688static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
689static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
690static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
691static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
692static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
693static rtx ix86_expand_aligntest PARAMS ((rtx, int));
694static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
695static int ix86_issue_rate PARAMS ((void));
696static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
697static void ix86_sched_init PARAMS ((FILE *, int, int));
698static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
699static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
700static void ix86_init_mmx_sse_builtins PARAMS ((void));
701
702struct ix86_address
703{
704  rtx base, index, disp;
705  HOST_WIDE_INT scale;
706};
707
708static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
709
710struct builtin_description;
711static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
712					 tree, rtx));
713static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
714					    tree, rtx));
715static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
716static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
717static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
718static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
719						     tree, rtx));
720static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
721static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
722static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
723static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
724					      enum rtx_code *,
725					      enum rtx_code *,
726					      enum rtx_code *));
727static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
728					  rtx *, rtx *));
729static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
730static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
731static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
732static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
733static int ix86_save_reg PARAMS ((int, int));
734static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
735static int ix86_comp_type_attributes PARAMS ((tree, tree));
736const struct attribute_spec ix86_attribute_table[];
737static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
738static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
739
740#ifdef DO_GLOBAL_CTORS_BODY
741static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
742#endif
743
744/* Register class used for passing given 64bit part of the argument.
745   These represent classes as documented by the PS ABI, with the exception
746   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
747   use SF or DFmode move instead of DImode to avoid reformating penalties.
748
749   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
750   whenever possible (upper half does contain padding).
751 */
752enum x86_64_reg_class
753  {
754    X86_64_NO_CLASS,
755    X86_64_INTEGER_CLASS,
756    X86_64_INTEGERSI_CLASS,
757    X86_64_SSE_CLASS,
758    X86_64_SSESF_CLASS,
759    X86_64_SSEDF_CLASS,
760    X86_64_SSEUP_CLASS,
761    X86_64_X87_CLASS,
762    X86_64_X87UP_CLASS,
763    X86_64_MEMORY_CLASS
764  };
765static const char * const x86_64_reg_class_name[] =
766   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
767
768#define MAX_CLASSES 4
769static int classify_argument PARAMS ((enum machine_mode, tree,
770				      enum x86_64_reg_class [MAX_CLASSES],
771				      int));
772static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
773				     int *));
774static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
775					const int *, int));
776static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
777						    enum x86_64_reg_class));
778
779/* Initialize the GCC target structure.  */
780#undef TARGET_ATTRIBUTE_TABLE
781#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
782#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
783#  undef TARGET_MERGE_DECL_ATTRIBUTES
784#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
785#endif
786
787#undef TARGET_COMP_TYPE_ATTRIBUTES
788#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
789
790#undef TARGET_INIT_BUILTINS
791#define TARGET_INIT_BUILTINS ix86_init_builtins
792
793#undef TARGET_EXPAND_BUILTIN
794#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
795
796#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
797   static void ix86_osf_output_function_prologue PARAMS ((FILE *,
798							  HOST_WIDE_INT));
799#  undef TARGET_ASM_FUNCTION_PROLOGUE
800#  define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
801#endif
802
803#undef TARGET_ASM_OPEN_PAREN
804#define TARGET_ASM_OPEN_PAREN ""
805#undef TARGET_ASM_CLOSE_PAREN
806#define TARGET_ASM_CLOSE_PAREN ""
807
808#undef TARGET_ASM_ALIGNED_HI_OP
809#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
810#undef TARGET_ASM_ALIGNED_SI_OP
811#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
812#ifdef ASM_QUAD
813#undef TARGET_ASM_ALIGNED_DI_OP
814#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
815#endif
816
817#undef TARGET_ASM_UNALIGNED_HI_OP
818#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
819#undef TARGET_ASM_UNALIGNED_SI_OP
820#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
821#undef TARGET_ASM_UNALIGNED_DI_OP
822#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
823
824#undef TARGET_SCHED_ADJUST_COST
825#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
826#undef TARGET_SCHED_ISSUE_RATE
827#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
828#undef TARGET_SCHED_VARIABLE_ISSUE
829#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
830#undef TARGET_SCHED_INIT
831#define TARGET_SCHED_INIT ix86_sched_init
832#undef TARGET_SCHED_REORDER
833#define TARGET_SCHED_REORDER ix86_sched_reorder
834
835struct gcc_target targetm = TARGET_INITIALIZER;
836
837/* Sometimes certain combinations of command options do not make
838   sense on a particular target machine.  You can define a macro
839   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
840   defined, is executed once just after all the command options have
841   been parsed.
842
843   Don't use this macro to turn on various extra optimizations for
844   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
845
846void
847override_options ()
848{
849  int i;
850  /* Comes from final.c -- no real reason to change it.  */
851#define MAX_CODE_ALIGN 16
852
853  static struct ptt
854    {
855      const struct processor_costs *cost;	/* Processor costs */
856      const int target_enable;			/* Target flags to enable.  */
857      const int target_disable;			/* Target flags to disable.  */
858      const int align_loop;			/* Default alignments.  */
859      const int align_loop_max_skip;
860      const int align_jump;
861      const int align_jump_max_skip;
862      const int align_func;
863      const int branch_cost;
864    }
865  const processor_target_table[PROCESSOR_max] =
866    {
867      {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
868      {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
869      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
870      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
871      {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
872      {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
873      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
874    };
875
876  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
877  static struct pta
878    {
879      const char *const name;		/* processor name or nickname.  */
880      const enum processor_type processor;
881      const enum pta_flags
882	{
883	  PTA_SSE = 1,
884	  PTA_SSE2 = 2,
885	  PTA_MMX = 4,
886	  PTA_PREFETCH_SSE = 8,
887	  PTA_3DNOW = 16,
888	  PTA_3DNOW_A = 64
889	} flags;
890    }
891  const processor_alias_table[] =
892    {
893      {"i386", PROCESSOR_I386, 0},
894      {"i486", PROCESSOR_I486, 0},
895      {"i586", PROCESSOR_PENTIUM, 0},
896      {"pentium", PROCESSOR_PENTIUM, 0},
897      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
898      {"i686", PROCESSOR_PENTIUMPRO, 0},
899      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
900      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
901      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
902      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
903				       PTA_MMX | PTA_PREFETCH_SSE},
904      {"k6", PROCESSOR_K6, PTA_MMX},
905      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
906      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
907      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
908				   | PTA_3DNOW_A},
909      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
910					 | PTA_3DNOW | PTA_3DNOW_A},
911      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
912				    | PTA_3DNOW_A | PTA_SSE},
913      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
914				      | PTA_3DNOW_A | PTA_SSE},
915      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
916				      | PTA_3DNOW_A | PTA_SSE},
917    };
918
919  int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
920
921#ifdef SUBTARGET_OVERRIDE_OPTIONS
922  SUBTARGET_OVERRIDE_OPTIONS;
923#endif
924
925  if (!ix86_cpu_string && ix86_arch_string)
926    ix86_cpu_string = ix86_arch_string;
927  if (!ix86_cpu_string)
928    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
929  if (!ix86_arch_string)
930    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
931
932  if (ix86_cmodel_string != 0)
933    {
934      if (!strcmp (ix86_cmodel_string, "small"))
935	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
936      else if (flag_pic)
937	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
938      else if (!strcmp (ix86_cmodel_string, "32"))
939	ix86_cmodel = CM_32;
940      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
941	ix86_cmodel = CM_KERNEL;
942      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
943	ix86_cmodel = CM_MEDIUM;
944      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
945	ix86_cmodel = CM_LARGE;
946      else
947	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
948    }
949  else
950    {
951      ix86_cmodel = CM_32;
952      if (TARGET_64BIT)
953	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
954    }
955  if (ix86_asm_string != 0)
956    {
957      if (!strcmp (ix86_asm_string, "intel"))
958	ix86_asm_dialect = ASM_INTEL;
959      else if (!strcmp (ix86_asm_string, "att"))
960	ix86_asm_dialect = ASM_ATT;
961      else
962	error ("bad value (%s) for -masm= switch", ix86_asm_string);
963    }
964  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
965    error ("code model `%s' not supported in the %s bit mode",
966	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
967  if (ix86_cmodel == CM_LARGE)
968    sorry ("code model `large' not supported yet");
969  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
970    sorry ("%i-bit mode not compiled in",
971	   (target_flags & MASK_64BIT) ? 64 : 32);
972
973  for (i = 0; i < pta_size; i++)
974    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
975      {
976	ix86_arch = processor_alias_table[i].processor;
977	/* Default cpu tuning to the architecture.  */
978	ix86_cpu = ix86_arch;
979	if (processor_alias_table[i].flags & PTA_MMX
980	    && !(target_flags & MASK_MMX_SET))
981	  target_flags |= MASK_MMX;
982	if (processor_alias_table[i].flags & PTA_3DNOW
983	    && !(target_flags & MASK_3DNOW_SET))
984	  target_flags |= MASK_3DNOW;
985	if (processor_alias_table[i].flags & PTA_3DNOW_A
986	    && !(target_flags & MASK_3DNOW_A_SET))
987	  target_flags |= MASK_3DNOW_A;
988	if (processor_alias_table[i].flags & PTA_SSE
989	    && !(target_flags & MASK_SSE_SET))
990	  target_flags |= MASK_SSE;
991	if (processor_alias_table[i].flags & PTA_SSE2
992	    && !(target_flags & MASK_SSE2_SET))
993	  target_flags |= MASK_SSE2;
994	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
995	  x86_prefetch_sse = true;
996	break;
997      }
998
999  if (i == pta_size)
1000    error ("bad value (%s) for -march= switch", ix86_arch_string);
1001
1002  for (i = 0; i < pta_size; i++)
1003    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1004      {
1005	ix86_cpu = processor_alias_table[i].processor;
1006	break;
1007      }
1008  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1009    x86_prefetch_sse = true;
1010  if (i == pta_size)
1011    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1012
1013  if (optimize_size)
1014    ix86_cost = &size_cost;
1015  else
1016    ix86_cost = processor_target_table[ix86_cpu].cost;
1017  target_flags |= processor_target_table[ix86_cpu].target_enable;
1018  target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1019
1020  /* Arrange to set up i386_stack_locals for all functions.  */
1021  init_machine_status = ix86_init_machine_status;
1022  mark_machine_status = ix86_mark_machine_status;
1023  free_machine_status = ix86_free_machine_status;
1024
1025  /* Validate -mregparm= value.  */
1026  if (ix86_regparm_string)
1027    {
1028      i = atoi (ix86_regparm_string);
1029      if (i < 0 || i > REGPARM_MAX)
1030	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1031      else
1032	ix86_regparm = i;
1033    }
1034  else
1035   if (TARGET_64BIT)
1036     ix86_regparm = REGPARM_MAX;
1037
1038  /* If the user has provided any of the -malign-* options,
1039     warn and use that value only if -falign-* is not set.
1040     Remove this code in GCC 3.2 or later.  */
1041  if (ix86_align_loops_string)
1042    {
1043      warning ("-malign-loops is obsolete, use -falign-loops");
1044      if (align_loops == 0)
1045	{
1046	  i = atoi (ix86_align_loops_string);
1047	  if (i < 0 || i > MAX_CODE_ALIGN)
1048	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1049	  else
1050	    align_loops = 1 << i;
1051	}
1052    }
1053
1054  if (ix86_align_jumps_string)
1055    {
1056      warning ("-malign-jumps is obsolete, use -falign-jumps");
1057      if (align_jumps == 0)
1058	{
1059	  i = atoi (ix86_align_jumps_string);
1060	  if (i < 0 || i > MAX_CODE_ALIGN)
1061	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1062	  else
1063	    align_jumps = 1 << i;
1064	}
1065    }
1066
1067  if (ix86_align_funcs_string)
1068    {
1069      warning ("-malign-functions is obsolete, use -falign-functions");
1070      if (align_functions == 0)
1071	{
1072	  i = atoi (ix86_align_funcs_string);
1073	  if (i < 0 || i > MAX_CODE_ALIGN)
1074	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1075	  else
1076	    align_functions = 1 << i;
1077	}
1078    }
1079
1080  /* Default align_* from the processor table.  */
1081  if (align_loops == 0)
1082    {
1083      align_loops = processor_target_table[ix86_cpu].align_loop;
1084      align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1085    }
1086  if (align_jumps == 0)
1087    {
1088      align_jumps = processor_target_table[ix86_cpu].align_jump;
1089      align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1090    }
1091  if (align_functions == 0)
1092    {
1093      align_functions = processor_target_table[ix86_cpu].align_func;
1094    }
1095
1096  /* Validate -mpreferred-stack-boundary= value, or provide default.
1097     The default of 128 bits is for Pentium III's SSE __m128, but we
1098     don't want additional code to keep the stack aligned when
1099     optimizing for code size.  */
1100  ix86_preferred_stack_boundary = (optimize_size
1101				   ? TARGET_64BIT ? 128 : 32
1102				   : 128);
1103  if (ix86_preferred_stack_boundary_string)
1104    {
1105      i = atoi (ix86_preferred_stack_boundary_string);
1106      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1107	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1108	       TARGET_64BIT ? 4 : 2);
1109      else
1110	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1111    }
1112
1113  /* Validate -mbranch-cost= value, or provide default.  */
1114  ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1115  if (ix86_branch_cost_string)
1116    {
1117      i = atoi (ix86_branch_cost_string);
1118      if (i < 0 || i > 5)
1119	error ("-mbranch-cost=%d is not between 0 and 5", i);
1120      else
1121	ix86_branch_cost = i;
1122    }
1123
1124  /* Keep nonleaf frame pointers.  */
1125  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1126    flag_omit_frame_pointer = 1;
1127
1128  /* If we're doing fast math, we don't care about comparison order
1129     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1130  if (flag_unsafe_math_optimizations)
1131    target_flags &= ~MASK_IEEE_FP;
1132
1133  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1134     since the insns won't need emulation.  */
1135  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1136    target_flags &= ~MASK_NO_FANCY_MATH_387;
1137
1138  if (TARGET_64BIT)
1139    {
1140      if (TARGET_ALIGN_DOUBLE)
1141	error ("-malign-double makes no sense in the 64bit mode");
1142      if (TARGET_RTD)
1143	error ("-mrtd calling convention not supported in the 64bit mode");
1144      /* Enable by default the SSE and MMX builtins.  */
1145      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1146      ix86_fpmath = FPMATH_SSE;
1147     }
1148  else
1149    ix86_fpmath = FPMATH_387;
1150
1151  if (ix86_fpmath_string != 0)
1152    {
1153      if (! strcmp (ix86_fpmath_string, "387"))
1154	ix86_fpmath = FPMATH_387;
1155      else if (! strcmp (ix86_fpmath_string, "sse"))
1156	{
1157	  if (!TARGET_SSE)
1158	    {
1159	      warning ("SSE instruction set disabled, using 387 arithmetics");
1160	      ix86_fpmath = FPMATH_387;
1161	    }
1162	  else
1163	    ix86_fpmath = FPMATH_SSE;
1164	}
1165      else if (! strcmp (ix86_fpmath_string, "387,sse")
1166	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1167	{
1168	  if (!TARGET_SSE)
1169	    {
1170	      warning ("SSE instruction set disabled, using 387 arithmetics");
1171	      ix86_fpmath = FPMATH_387;
1172	    }
1173	  else if (!TARGET_80387)
1174	    {
1175	      warning ("387 instruction set disabled, using SSE arithmetics");
1176	      ix86_fpmath = FPMATH_SSE;
1177	    }
1178	  else
1179	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1180	}
1181      else
1182	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1183    }
1184
1185  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1186     on by -msse.  */
1187  if (TARGET_SSE)
1188    {
1189      target_flags |= MASK_MMX;
1190      x86_prefetch_sse = true;
1191    }
1192
1193  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1194  if (TARGET_3DNOW)
1195    {
1196      target_flags |= MASK_MMX;
1197      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1198	 extensions it adds.  */
1199      if (x86_3dnow_a & (1 << ix86_arch))
1200	target_flags |= MASK_3DNOW_A;
1201    }
1202  if ((x86_accumulate_outgoing_args & CPUMASK)
1203      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1204      && !optimize_size)
1205    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1206
1207  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1208  {
1209    char *p;
1210    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1211    p = strchr (internal_label_prefix, 'X');
1212    internal_label_prefix_len = p - internal_label_prefix;
1213    *p = '\0';
1214  }
1215}
1216
1217void
1218optimization_options (level, size)
1219     int level;
1220     int size ATTRIBUTE_UNUSED;
1221{
1222  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1223     make the problem with not enough registers even worse.  */
1224#ifdef INSN_SCHEDULING
1225  if (level > 1)
1226    flag_schedule_insns = 0;
1227#endif
1228  if (TARGET_64BIT && optimize >= 1)
1229    flag_omit_frame_pointer = 1;
1230  if (TARGET_64BIT)
1231    {
1232      flag_pcc_struct_return = 0;
1233      flag_asynchronous_unwind_tables = 1;
1234    }
1235}
1236
1237/* Table of valid machine attributes.  */
1238const struct attribute_spec ix86_attribute_table[] =
1239{
1240  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1241  /* Stdcall attribute says callee is responsible for popping arguments
1242     if they are not variable.  */
1243  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1244  /* Cdecl attribute says the callee is a normal C declaration */
1245  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1246  /* Regparm attribute specifies how many integer arguments are to be
1247     passed in registers.  */
1248  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1249#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1250  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1251  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1252  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1253#endif
1254  { NULL,        0, 0, false, false, false, NULL }
1255};
1256
1257/* Handle a "cdecl" or "stdcall" attribute;
1258   arguments as in struct attribute_spec.handler.  */
1259static tree
1260ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1261     tree *node;
1262     tree name;
1263     tree args ATTRIBUTE_UNUSED;
1264     int flags ATTRIBUTE_UNUSED;
1265     bool *no_add_attrs;
1266{
1267  if (TREE_CODE (*node) != FUNCTION_TYPE
1268      && TREE_CODE (*node) != METHOD_TYPE
1269      && TREE_CODE (*node) != FIELD_DECL
1270      && TREE_CODE (*node) != TYPE_DECL)
1271    {
1272      warning ("`%s' attribute only applies to functions",
1273	       IDENTIFIER_POINTER (name));
1274      *no_add_attrs = true;
1275    }
1276
1277  if (TARGET_64BIT)
1278    {
1279      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1280      *no_add_attrs = true;
1281    }
1282
1283  return NULL_TREE;
1284}
1285
1286/* Handle a "regparm" attribute;
1287   arguments as in struct attribute_spec.handler.  */
1288static tree
1289ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1290     tree *node;
1291     tree name;
1292     tree args;
1293     int flags ATTRIBUTE_UNUSED;
1294     bool *no_add_attrs;
1295{
1296  if (TREE_CODE (*node) != FUNCTION_TYPE
1297      && TREE_CODE (*node) != METHOD_TYPE
1298      && TREE_CODE (*node) != FIELD_DECL
1299      && TREE_CODE (*node) != TYPE_DECL)
1300    {
1301      warning ("`%s' attribute only applies to functions",
1302	       IDENTIFIER_POINTER (name));
1303      *no_add_attrs = true;
1304    }
1305  else
1306    {
1307      tree cst;
1308
1309      cst = TREE_VALUE (args);
1310      if (TREE_CODE (cst) != INTEGER_CST)
1311	{
1312	  warning ("`%s' attribute requires an integer constant argument",
1313		   IDENTIFIER_POINTER (name));
1314	  *no_add_attrs = true;
1315	}
1316      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1317	{
1318	  warning ("argument to `%s' attribute larger than %d",
1319		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1320	  *no_add_attrs = true;
1321	}
1322    }
1323
1324  return NULL_TREE;
1325}
1326
1327#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1328
1329/* Generate the assembly code for function entry.  FILE is a stdio
1330   stream to output the code to.  SIZE is an int: how many units of
1331   temporary storage to allocate.
1332
1333   Refer to the array `regs_ever_live' to determine which registers to
1334   save; `regs_ever_live[I]' is nonzero if register number I is ever
1335   used in the function.  This function is responsible for knowing
1336   which registers should not be saved even if used.
1337
1338   We override it here to allow for the new profiling code to go before
1339   the prologue and the old mcount code to go after the prologue (and
1340   after %ebx has been set up for ELF shared library support).  */
1341
1342static void
1343ix86_osf_output_function_prologue (file, size)
1344     FILE *file;
1345     HOST_WIDE_INT size;
1346{
1347  const char *prefix = "";
1348  const char *const lprefix = LPREFIX;
1349  int labelno = current_function_profile_label_no;
1350
1351#ifdef OSF_OS
1352
1353  if (TARGET_UNDERSCORES)
1354    prefix = "_";
1355
1356  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1357    {
1358      if (!flag_pic && !HALF_PIC_P ())
1359	{
1360	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1361	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1362	}
1363
1364      else if (HALF_PIC_P ())
1365	{
1366	  rtx symref;
1367
1368	  HALF_PIC_EXTERNAL ("_mcount_ptr");
1369	  symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1370						     "_mcount_ptr"));
1371
1372	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1373	  fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1374		   XSTR (symref, 0));
1375	  fprintf (file, "\tcall *(%%eax)\n");
1376	}
1377
1378      else
1379	{
1380	  static int call_no = 0;
1381
1382	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1383	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1384	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1385		   lprefix, call_no++);
1386	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1387		   lprefix, labelno);
1388	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1389		   prefix);
1390	  fprintf (file, "\tcall *(%%eax)\n");
1391	}
1392    }
1393
1394#else  /* !OSF_OS */
1395
1396  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1397    {
1398      if (!flag_pic)
1399	{
1400	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1401	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1402	}
1403
1404      else
1405	{
1406	  static int call_no = 0;
1407
1408	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1409	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1410	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1411		   lprefix, call_no++);
1412	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1413		   lprefix, labelno);
1414	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1415		   prefix);
1416	  fprintf (file, "\tcall *(%%eax)\n");
1417	}
1418    }
1419#endif /* !OSF_OS */
1420
1421  function_prologue (file, size);
1422}
1423
1424#endif  /* OSF_OS || TARGET_OSF1ELF */
1425
1426/* Return 0 if the attributes for two types are incompatible, 1 if they
1427   are compatible, and 2 if they are nearly compatible (which causes a
1428   warning to be generated).  */
1429
1430static int
1431ix86_comp_type_attributes (type1, type2)
1432     tree type1;
1433     tree type2;
1434{
1435  /* Check for mismatch of non-default calling convention.  */
1436  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1437
1438  if (TREE_CODE (type1) != FUNCTION_TYPE)
1439    return 1;
1440
1441  /* Check for mismatched return types (cdecl vs stdcall).  */
1442  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1443      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1444    return 0;
1445  return 1;
1446}
1447
1448/* Value is the number of bytes of arguments automatically
1449   popped when returning from a subroutine call.
1450   FUNDECL is the declaration node of the function (as a tree),
1451   FUNTYPE is the data type of the function (as a tree),
1452   or for a library call it is an identifier node for the subroutine name.
1453   SIZE is the number of bytes of arguments passed on the stack.
1454
1455   On the 80386, the RTD insn may be used to pop them if the number
1456     of args is fixed, but if the number is variable then the caller
1457     must pop them all.  RTD can't be used for library calls now
1458     because the library is compiled with the Unix compiler.
1459   Use of RTD is a selectable option, since it is incompatible with
1460   standard Unix calling sequences.  If the option is not selected,
1461   the caller must always pop the args.
1462
1463   The attribute stdcall is equivalent to RTD on a per module basis.  */
1464
1465int
1466ix86_return_pops_args (fundecl, funtype, size)
1467     tree fundecl;
1468     tree funtype;
1469     int size;
1470{
1471  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1472
1473    /* Cdecl functions override -mrtd, and never pop the stack.  */
1474  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1475
1476    /* Stdcall functions will pop the stack if not variable args.  */
1477    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1478      rtd = 1;
1479
1480    if (rtd
1481        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1482	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1483		== void_type_node)))
1484      return size;
1485  }
1486
1487  /* Lose any fake structure return argument if it is passed on the stack.  */
1488  if (aggregate_value_p (TREE_TYPE (funtype))
1489      && !TARGET_64BIT)
1490    {
1491      int nregs = ix86_regparm;
1492
1493      if (funtype)
1494	{
1495	  tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1496
1497	  if (attr)
1498	    nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1499	}
1500
1501      if (!nregs)
1502	return GET_MODE_SIZE (Pmode);
1503    }
1504
1505  return 0;
1506}
1507
1508/* Argument support functions.  */
1509
1510/* Return true when register may be used to pass function parameters.  */
1511bool
1512ix86_function_arg_regno_p (regno)
1513     int regno;
1514{
1515  int i;
1516  if (!TARGET_64BIT)
1517    return (regno < REGPARM_MAX
1518	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1519  if (SSE_REGNO_P (regno) && TARGET_SSE)
1520    return true;
1521  /* RAX is used as hidden argument to va_arg functions.  */
1522  if (!regno)
1523    return true;
1524  for (i = 0; i < REGPARM_MAX; i++)
1525    if (regno == x86_64_int_parameter_registers[i])
1526      return true;
1527  return false;
1528}
1529
1530/* Initialize a variable CUM of type CUMULATIVE_ARGS
1531   for a call to a function whose data type is FNTYPE.
1532   For a library call, FNTYPE is 0.  */
1533
1534void
1535init_cumulative_args (cum, fntype, libname)
1536     CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1537     tree fntype;		/* tree ptr for function decl */
1538     rtx libname;		/* SYMBOL_REF of library name or 0 */
1539{
1540  static CUMULATIVE_ARGS zero_cum;
1541  tree param, next_param;
1542
1543  if (TARGET_DEBUG_ARG)
1544    {
1545      fprintf (stderr, "\ninit_cumulative_args (");
1546      if (fntype)
1547	fprintf (stderr, "fntype code = %s, ret code = %s",
1548		 tree_code_name[(int) TREE_CODE (fntype)],
1549		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1550      else
1551	fprintf (stderr, "no fntype");
1552
1553      if (libname)
1554	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1555    }
1556
1557  *cum = zero_cum;
1558
1559  /* Set up the number of registers to use for passing arguments.  */
1560  cum->nregs = ix86_regparm;
1561  cum->sse_nregs = SSE_REGPARM_MAX;
1562  if (fntype && !TARGET_64BIT)
1563    {
1564      tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1565
1566      if (attr)
1567	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1568    }
1569  cum->maybe_vaarg = false;
1570
1571  /* Determine if this function has variable arguments.  This is
1572     indicated by the last argument being 'void_type_mode' if there
1573     are no variable arguments.  If there are variable arguments, then
1574     we won't pass anything in registers */
1575
1576  if (cum->nregs)
1577    {
1578      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1579	   param != 0; param = next_param)
1580	{
1581	  next_param = TREE_CHAIN (param);
1582	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1583	    {
1584	      if (!TARGET_64BIT)
1585		cum->nregs = 0;
1586	      cum->maybe_vaarg = true;
1587	    }
1588	}
1589    }
1590  if ((!fntype && !libname)
1591      || (fntype && !TYPE_ARG_TYPES (fntype)))
1592    cum->maybe_vaarg = 1;
1593
1594  if (TARGET_DEBUG_ARG)
1595    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1596
1597  return;
1598}
1599
1600/* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1601   of this code is to classify each 8bytes of incoming argument by the register
1602   class and assign registers accordingly.  */
1603
1604/* Return the union class of CLASS1 and CLASS2.
1605   See the x86-64 PS ABI for details.  */
1606
1607static enum x86_64_reg_class
1608merge_classes (class1, class2)
1609     enum x86_64_reg_class class1, class2;
1610{
1611  /* Rule #1: If both classes are equal, this is the resulting class.  */
1612  if (class1 == class2)
1613    return class1;
1614
1615  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1616     the other class.  */
1617  if (class1 == X86_64_NO_CLASS)
1618    return class2;
1619  if (class2 == X86_64_NO_CLASS)
1620    return class1;
1621
1622  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1623  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1624    return X86_64_MEMORY_CLASS;
1625
1626  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1627  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1628      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1629    return X86_64_INTEGERSI_CLASS;
1630  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1631      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1632    return X86_64_INTEGER_CLASS;
1633
1634  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1635  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1636      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1637    return X86_64_MEMORY_CLASS;
1638
1639  /* Rule #6: Otherwise class SSE is used.  */
1640  return X86_64_SSE_CLASS;
1641}
1642
1643/* Classify the argument of type TYPE and mode MODE.
1644   CLASSES will be filled by the register class used to pass each word
1645   of the operand.  The number of words is returned.  In case the parameter
1646   should be passed in memory, 0 is returned. As a special case for zero
1647   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1648
1649   BIT_OFFSET is used internally for handling records and specifies offset
1650   of the offset in bits modulo 256 to avoid overflow cases.
1651
1652   See the x86-64 PS ABI for details.
1653*/
1654
1655static int
1656classify_argument (mode, type, classes, bit_offset)
1657     enum machine_mode mode;
1658     tree type;
1659     enum x86_64_reg_class classes[MAX_CLASSES];
1660     int bit_offset;
1661{
1662  int bytes =
1663    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1664  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1665
1666  if (type && AGGREGATE_TYPE_P (type))
1667    {
1668      int i;
1669      tree field;
1670      enum x86_64_reg_class subclasses[MAX_CLASSES];
1671
1672      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1673      if (bytes > 16)
1674	return 0;
1675
1676      for (i = 0; i < words; i++)
1677	classes[i] = X86_64_NO_CLASS;
1678
1679      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1680	 signalize memory class, so handle it as special case.  */
1681      if (!words)
1682	{
1683	  classes[0] = X86_64_NO_CLASS;
1684	  return 1;
1685	}
1686
1687      /* Classify each field of record and merge classes.  */
1688      if (TREE_CODE (type) == RECORD_TYPE)
1689	{
1690	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1691	    {
1692	      if (TREE_CODE (field) == FIELD_DECL)
1693		{
1694		  int num;
1695
1696		  /* Bitfields are always classified as integer.  Handle them
1697		     early, since later code would consider them to be
1698		     misaligned integers.  */
1699		  if (DECL_BIT_FIELD (field))
1700		    {
1701		      for (i = int_bit_position (field) / 8 / 8;
1702			   i < (int_bit_position (field)
1703			        + tree_low_cst (DECL_SIZE (field), 0)
1704			       	+ 63) / 8 / 8; i++)
1705			classes[i] =
1706			  merge_classes (X86_64_INTEGER_CLASS,
1707					 classes[i]);
1708		    }
1709		  else
1710		    {
1711		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1712					       TREE_TYPE (field), subclasses,
1713					       (int_bit_position (field)
1714						+ bit_offset) % 256);
1715		      if (!num)
1716			return 0;
1717		      for (i = 0; i < num; i++)
1718			{
1719			  int pos =
1720			    (int_bit_position (field) + bit_offset) / 8 / 8;
1721			  classes[i + pos] =
1722			    merge_classes (subclasses[i], classes[i + pos]);
1723			}
1724		    }
1725		}
1726	    }
1727	}
1728      /* Arrays are handled as small records.  */
1729      else if (TREE_CODE (type) == ARRAY_TYPE)
1730	{
1731	  int num;
1732	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1733				   TREE_TYPE (type), subclasses, bit_offset);
1734	  if (!num)
1735	    return 0;
1736
1737	  /* The partial classes are now full classes.  */
1738	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1739	    subclasses[0] = X86_64_SSE_CLASS;
1740	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1741	    subclasses[0] = X86_64_INTEGER_CLASS;
1742
1743	  for (i = 0; i < words; i++)
1744	    classes[i] = subclasses[i % num];
1745	}
1746      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1747      else if (TREE_CODE (type) == UNION_TYPE
1748	       || TREE_CODE (type) == QUAL_UNION_TYPE)
1749	{
1750	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1751	    {
1752	      if (TREE_CODE (field) == FIELD_DECL)
1753		{
1754		  int num;
1755		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1756					   TREE_TYPE (field), subclasses,
1757					   bit_offset);
1758		  if (!num)
1759		    return 0;
1760		  for (i = 0; i < num; i++)
1761		    classes[i] = merge_classes (subclasses[i], classes[i]);
1762		}
1763	    }
1764	}
1765      else
1766	abort ();
1767
1768      /* Final merger cleanup.  */
1769      for (i = 0; i < words; i++)
1770	{
1771	  /* If one class is MEMORY, everything should be passed in
1772	     memory.  */
1773	  if (classes[i] == X86_64_MEMORY_CLASS)
1774	    return 0;
1775
1776	  /* The X86_64_SSEUP_CLASS should be always preceded by
1777	     X86_64_SSE_CLASS.  */
1778	  if (classes[i] == X86_64_SSEUP_CLASS
1779	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1780	    classes[i] = X86_64_SSE_CLASS;
1781
1782	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1783	  if (classes[i] == X86_64_X87UP_CLASS
1784	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1785	    classes[i] = X86_64_SSE_CLASS;
1786	}
1787      return words;
1788    }
1789
1790  /* Compute alignment needed.  We align all types to natural boundaries with
1791     exception of XFmode that is aligned to 64bits.  */
1792  if (mode != VOIDmode && mode != BLKmode)
1793    {
1794      int mode_alignment = GET_MODE_BITSIZE (mode);
1795
1796      if (mode == XFmode)
1797	mode_alignment = 128;
1798      else if (mode == XCmode)
1799	mode_alignment = 256;
1800      /* Misaligned fields are always returned in memory.  */
1801      if (bit_offset % mode_alignment)
1802	return 0;
1803    }
1804
1805  /* Classification of atomic types.  */
1806  switch (mode)
1807    {
1808    case DImode:
1809    case SImode:
1810    case HImode:
1811    case QImode:
1812    case CSImode:
1813    case CHImode:
1814    case CQImode:
1815      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1816	classes[0] = X86_64_INTEGERSI_CLASS;
1817      else
1818	classes[0] = X86_64_INTEGER_CLASS;
1819      return 1;
1820    case CDImode:
1821    case TImode:
1822      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1823      return 2;
1824    case CTImode:
1825      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1826      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1827      return 4;
1828    case SFmode:
1829      if (!(bit_offset % 64))
1830	classes[0] = X86_64_SSESF_CLASS;
1831      else
1832	classes[0] = X86_64_SSE_CLASS;
1833      return 1;
1834    case DFmode:
1835      classes[0] = X86_64_SSEDF_CLASS;
1836      return 1;
1837    case TFmode:
1838      classes[0] = X86_64_X87_CLASS;
1839      classes[1] = X86_64_X87UP_CLASS;
1840      return 2;
1841    case TCmode:
1842      classes[0] = X86_64_X87_CLASS;
1843      classes[1] = X86_64_X87UP_CLASS;
1844      classes[2] = X86_64_X87_CLASS;
1845      classes[3] = X86_64_X87UP_CLASS;
1846      return 4;
1847    case DCmode:
1848      classes[0] = X86_64_SSEDF_CLASS;
1849      classes[1] = X86_64_SSEDF_CLASS;
1850      return 2;
1851    case SCmode:
1852      classes[0] = X86_64_SSE_CLASS;
1853      return 1;
1854    case V4SFmode:
1855    case V4SImode:
1856      classes[0] = X86_64_SSE_CLASS;
1857      classes[1] = X86_64_SSEUP_CLASS;
1858      return 2;
1859    case V2SFmode:
1860    case V2SImode:
1861    case V4HImode:
1862    case V8QImode:
1863      classes[0] = X86_64_SSE_CLASS;
1864      return 1;
1865    case BLKmode:
1866    case VOIDmode:
1867      return 0;
1868    default:
1869      abort ();
1870    }
1871}
1872
1873/* Examine the argument and return set number of register required in each
1874   class.  Return 0 iff parameter should be passed in memory.  */
1875static int
1876examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1877     enum machine_mode mode;
1878     tree type;
1879     int *int_nregs, *sse_nregs;
1880     int in_return;
1881{
1882  enum x86_64_reg_class class[MAX_CLASSES];
1883  int n = classify_argument (mode, type, class, 0);
1884
1885  *int_nregs = 0;
1886  *sse_nregs = 0;
1887  if (!n)
1888    return 0;
1889  for (n--; n >= 0; n--)
1890    switch (class[n])
1891      {
1892      case X86_64_INTEGER_CLASS:
1893      case X86_64_INTEGERSI_CLASS:
1894	(*int_nregs)++;
1895	break;
1896      case X86_64_SSE_CLASS:
1897      case X86_64_SSESF_CLASS:
1898      case X86_64_SSEDF_CLASS:
1899	(*sse_nregs)++;
1900	break;
1901      case X86_64_NO_CLASS:
1902      case X86_64_SSEUP_CLASS:
1903	break;
1904      case X86_64_X87_CLASS:
1905      case X86_64_X87UP_CLASS:
1906	if (!in_return)
1907	  return 0;
1908	break;
1909      case X86_64_MEMORY_CLASS:
1910	abort ();
1911      }
1912  return 1;
1913}
1914/* Construct container for the argument used by GCC interface.  See
1915   FUNCTION_ARG for the detailed description.  */
1916static rtx
1917construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1918     enum machine_mode mode;
1919     tree type;
1920     int in_return;
1921     int nintregs, nsseregs;
1922     const int * intreg;
1923     int sse_regno;
1924{
1925  enum machine_mode tmpmode;
1926  int bytes =
1927    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1928  enum x86_64_reg_class class[MAX_CLASSES];
1929  int n;
1930  int i;
1931  int nexps = 0;
1932  int needed_sseregs, needed_intregs;
1933  rtx exp[MAX_CLASSES];
1934  rtx ret;
1935
1936  n = classify_argument (mode, type, class, 0);
1937  if (TARGET_DEBUG_ARG)
1938    {
1939      if (!n)
1940	fprintf (stderr, "Memory class\n");
1941      else
1942	{
1943	  fprintf (stderr, "Classes:");
1944	  for (i = 0; i < n; i++)
1945	    {
1946	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1947	    }
1948	   fprintf (stderr, "\n");
1949	}
1950    }
1951  if (!n)
1952    return NULL;
1953  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1954    return NULL;
1955  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1956    return NULL;
1957
1958  /* First construct simple cases.  Avoid SCmode, since we want to use
1959     single register to pass this type.  */
1960  if (n == 1 && mode != SCmode)
1961    switch (class[0])
1962      {
1963      case X86_64_INTEGER_CLASS:
1964      case X86_64_INTEGERSI_CLASS:
1965	return gen_rtx_REG (mode, intreg[0]);
1966      case X86_64_SSE_CLASS:
1967      case X86_64_SSESF_CLASS:
1968      case X86_64_SSEDF_CLASS:
1969	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1970      case X86_64_X87_CLASS:
1971	return gen_rtx_REG (mode, FIRST_STACK_REG);
1972      case X86_64_NO_CLASS:
1973	/* Zero sized array, struct or class.  */
1974	return NULL;
1975      default:
1976	abort ();
1977      }
1978  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1979    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1980  if (n == 2
1981      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1982    return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1983  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1984      && class[1] == X86_64_INTEGER_CLASS
1985      && (mode == CDImode || mode == TImode)
1986      && intreg[0] + 1 == intreg[1])
1987    return gen_rtx_REG (mode, intreg[0]);
1988  if (n == 4
1989      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1990      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1991    return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1992
1993  /* Otherwise figure out the entries of the PARALLEL.  */
1994  for (i = 0; i < n; i++)
1995    {
1996      switch (class[i])
1997        {
1998	  case X86_64_NO_CLASS:
1999	    break;
2000	  case X86_64_INTEGER_CLASS:
2001	  case X86_64_INTEGERSI_CLASS:
2002	    /* Merge TImodes on aligned occassions here too.  */
2003	    if (i * 8 + 8 > bytes)
2004	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2005	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2006	      tmpmode = SImode;
2007	    else
2008	      tmpmode = DImode;
2009	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2010	    if (tmpmode == BLKmode)
2011	      tmpmode = DImode;
2012	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2013					       gen_rtx_REG (tmpmode, *intreg),
2014					       GEN_INT (i*8));
2015	    intreg++;
2016	    break;
2017	  case X86_64_SSESF_CLASS:
2018	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2019					       gen_rtx_REG (SFmode,
2020							    SSE_REGNO (sse_regno)),
2021					       GEN_INT (i*8));
2022	    sse_regno++;
2023	    break;
2024	  case X86_64_SSEDF_CLASS:
2025	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2026					       gen_rtx_REG (DFmode,
2027							    SSE_REGNO (sse_regno)),
2028					       GEN_INT (i*8));
2029	    sse_regno++;
2030	    break;
2031	  case X86_64_SSE_CLASS:
2032	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2033	      tmpmode = TImode, i++;
2034	    else
2035	      tmpmode = DImode;
2036	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2037					       gen_rtx_REG (tmpmode,
2038							    SSE_REGNO (sse_regno)),
2039					       GEN_INT (i*8));
2040	    sse_regno++;
2041	    break;
2042	  default:
2043	    abort ();
2044	}
2045    }
2046  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2047  for (i = 0; i < nexps; i++)
2048    XVECEXP (ret, 0, i) = exp [i];
2049  return ret;
2050}
2051
2052/* Update the data in CUM to advance over an argument
2053   of mode MODE and data type TYPE.
2054   (TYPE is null for libcalls where that information may not be available.)  */
2055
2056void
2057function_arg_advance (cum, mode, type, named)
2058     CUMULATIVE_ARGS *cum;	/* current arg information */
2059     enum machine_mode mode;	/* current arg mode */
2060     tree type;			/* type of the argument or 0 if lib support */
2061     int named;			/* whether or not the argument was named */
2062{
2063  int bytes =
2064    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2065  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2066
2067  if (TARGET_DEBUG_ARG)
2068    fprintf (stderr,
2069	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2070	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2071  if (TARGET_64BIT)
2072    {
2073      int int_nregs, sse_nregs;
2074      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2075	cum->words += words;
2076      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2077	{
2078	  cum->nregs -= int_nregs;
2079	  cum->sse_nregs -= sse_nregs;
2080	  cum->regno += int_nregs;
2081	  cum->sse_regno += sse_nregs;
2082	}
2083      else
2084	cum->words += words;
2085    }
2086  else
2087    {
2088      if (TARGET_SSE && mode == TImode)
2089	{
2090	  cum->sse_words += words;
2091	  cum->sse_nregs -= 1;
2092	  cum->sse_regno += 1;
2093	  if (cum->sse_nregs <= 0)
2094	    {
2095	      cum->sse_nregs = 0;
2096	      cum->sse_regno = 0;
2097	    }
2098	}
2099      else
2100	{
2101	  cum->words += words;
2102	  cum->nregs -= words;
2103	  cum->regno += words;
2104
2105	  if (cum->nregs <= 0)
2106	    {
2107	      cum->nregs = 0;
2108	      cum->regno = 0;
2109	    }
2110	}
2111    }
2112  return;
2113}
2114
2115/* Define where to put the arguments to a function.
2116   Value is zero to push the argument on the stack,
2117   or a hard register in which to store the argument.
2118
2119   MODE is the argument's machine mode.
2120   TYPE is the data type of the argument (as a tree).
2121    This is null for libcalls where that information may
2122    not be available.
2123   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2124    the preceding args and about the function being called.
2125   NAMED is nonzero if this argument is a named parameter
2126    (otherwise it is an extra parameter matching an ellipsis).  */
2127
2128rtx
2129function_arg (cum, mode, type, named)
2130     CUMULATIVE_ARGS *cum;	/* current arg information */
2131     enum machine_mode mode;	/* current arg mode */
2132     tree type;			/* type of the argument or 0 if lib support */
2133     int named;			/* != 0 for normal args, == 0 for ... args */
2134{
2135  rtx ret   = NULL_RTX;
2136  int bytes =
2137    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2138  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2139
2140  /* Handle an hidden AL argument containing number of registers for varargs
2141     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2142     any AL settings.  */
2143  if (mode == VOIDmode)
2144    {
2145      if (TARGET_64BIT)
2146	return GEN_INT (cum->maybe_vaarg
2147			? (cum->sse_nregs < 0
2148			   ? SSE_REGPARM_MAX
2149			   : cum->sse_regno)
2150			: -1);
2151      else
2152	return constm1_rtx;
2153    }
2154  if (TARGET_64BIT)
2155    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2156			       &x86_64_int_parameter_registers [cum->regno],
2157			       cum->sse_regno);
2158  else
2159    switch (mode)
2160      {
2161	/* For now, pass fp/complex values on the stack.  */
2162      default:
2163	break;
2164
2165      case BLKmode:
2166      case DImode:
2167      case SImode:
2168      case HImode:
2169      case QImode:
2170	if (words <= cum->nregs)
2171	  ret = gen_rtx_REG (mode, cum->regno);
2172	break;
2173      case TImode:
2174	if (cum->sse_nregs)
2175	  ret = gen_rtx_REG (mode, cum->sse_regno);
2176	break;
2177      }
2178
2179  if (TARGET_DEBUG_ARG)
2180    {
2181      fprintf (stderr,
2182	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2183	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2184
2185      if (ret)
2186	fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2187      else
2188	fprintf (stderr, ", stack");
2189
2190      fprintf (stderr, " )\n");
2191    }
2192
2193  return ret;
2194}
2195
2196/* Gives the alignment boundary, in bits, of an argument with the specified mode
2197   and type.   */
2198
2199int
2200ix86_function_arg_boundary (mode, type)
2201     enum machine_mode mode;
2202     tree type;
2203{
2204  int align;
2205  if (!TARGET_64BIT)
2206    return PARM_BOUNDARY;
2207  if (type)
2208    align = TYPE_ALIGN (type);
2209  else
2210    align = GET_MODE_ALIGNMENT (mode);
2211  if (align < PARM_BOUNDARY)
2212    align = PARM_BOUNDARY;
2213  if (align > 128)
2214    align = 128;
2215  return align;
2216}
2217
2218/* Return true if N is a possible register number of function value.  */
2219bool
2220ix86_function_value_regno_p (regno)
2221     int regno;
2222{
2223  if (!TARGET_64BIT)
2224    {
2225      return ((regno) == 0
2226	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2227	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2228    }
2229  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2230	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2231	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2232}
2233
2234/* Define how to find the value returned by a function.
2235   VALTYPE is the data type of the value (as a tree).
2236   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2237   otherwise, FUNC is 0.  */
2238rtx
2239ix86_function_value (valtype)
2240     tree valtype;
2241{
2242  if (TARGET_64BIT)
2243    {
2244      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2245				     REGPARM_MAX, SSE_REGPARM_MAX,
2246				     x86_64_int_return_registers, 0);
2247      /* For zero sized structures, construct_continer return NULL, but we need
2248         to keep rest of compiler happy by returning meaningfull value.  */
2249      if (!ret)
2250	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2251      return ret;
2252    }
2253  else
2254    return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2255}
2256
2257/* Return false iff type is returned in memory.  */
2258int
2259ix86_return_in_memory (type)
2260     tree type;
2261{
2262  int needed_intregs, needed_sseregs;
2263  if (TARGET_64BIT)
2264    {
2265      return !examine_argument (TYPE_MODE (type), type, 1,
2266				&needed_intregs, &needed_sseregs);
2267    }
2268  else
2269    {
2270      if (TYPE_MODE (type) == BLKmode
2271	  || (VECTOR_MODE_P (TYPE_MODE (type))
2272	      && int_size_in_bytes (type) == 8)
2273	  || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2274	      && TYPE_MODE (type) != TFmode
2275	      && !VECTOR_MODE_P (TYPE_MODE (type))))
2276	return 1;
2277      return 0;
2278    }
2279}
2280
2281/* Define how to find the value returned by a library function
2282   assuming the value has mode MODE.  */
2283rtx
2284ix86_libcall_value (mode)
2285   enum machine_mode mode;
2286{
2287  if (TARGET_64BIT)
2288    {
2289      switch (mode)
2290	{
2291	  case SFmode:
2292	  case SCmode:
2293	  case DFmode:
2294	  case DCmode:
2295	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2296	  case TFmode:
2297	  case TCmode:
2298	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2299	  default:
2300	    return gen_rtx_REG (mode, 0);
2301	}
2302    }
2303  else
2304   return gen_rtx_REG (mode, VALUE_REGNO (mode));
2305}
2306
2307/* Create the va_list data type.  */
2308
2309tree
2310ix86_build_va_list ()
2311{
2312  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2313
2314  /* For i386 we use plain pointer to argument area.  */
2315  if (!TARGET_64BIT)
2316    return build_pointer_type (char_type_node);
2317
2318  record = make_lang_type (RECORD_TYPE);
2319  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2320
2321  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2322		      unsigned_type_node);
2323  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2324		      unsigned_type_node);
2325  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2326		      ptr_type_node);
2327  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2328		      ptr_type_node);
2329
2330  DECL_FIELD_CONTEXT (f_gpr) = record;
2331  DECL_FIELD_CONTEXT (f_fpr) = record;
2332  DECL_FIELD_CONTEXT (f_ovf) = record;
2333  DECL_FIELD_CONTEXT (f_sav) = record;
2334
2335  TREE_CHAIN (record) = type_decl;
2336  TYPE_NAME (record) = type_decl;
2337  TYPE_FIELDS (record) = f_gpr;
2338  TREE_CHAIN (f_gpr) = f_fpr;
2339  TREE_CHAIN (f_fpr) = f_ovf;
2340  TREE_CHAIN (f_ovf) = f_sav;
2341
2342  layout_type (record);
2343
2344  /* The correct type is an array type of one element.  */
2345  return build_array_type (record, build_index_type (size_zero_node));
2346}
2347
2348/* Perform any needed actions needed for a function that is receiving a
2349   variable number of arguments.
2350
2351   CUM is as above.
2352
2353   MODE and TYPE are the mode and type of the current parameter.
2354
2355   PRETEND_SIZE is a variable that should be set to the amount of stack
2356   that must be pushed by the prolog to pretend that our caller pushed
2357   it.
2358
2359   Normally, this macro will push all remaining incoming registers on the
2360   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2361
2362void
2363ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2364     CUMULATIVE_ARGS *cum;
2365     enum machine_mode mode;
2366     tree type;
2367     int *pretend_size ATTRIBUTE_UNUSED;
2368     int no_rtl;
2369
2370{
2371  CUMULATIVE_ARGS next_cum;
2372  rtx save_area = NULL_RTX, mem;
2373  rtx label;
2374  rtx label_ref;
2375  rtx tmp_reg;
2376  rtx nsse_reg;
2377  int set;
2378  tree fntype;
2379  int stdarg_p;
2380  int i;
2381
2382  if (!TARGET_64BIT)
2383    return;
2384
2385  /* Indicate to allocate space on the stack for varargs save area.  */
2386  ix86_save_varrargs_registers = 1;
2387
2388  fntype = TREE_TYPE (current_function_decl);
2389  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2390	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2391		  != void_type_node));
2392
2393  /* For varargs, we do not want to skip the dummy va_dcl argument.
2394     For stdargs, we do want to skip the last named argument.  */
2395  next_cum = *cum;
2396  if (stdarg_p)
2397    function_arg_advance (&next_cum, mode, type, 1);
2398
2399  if (!no_rtl)
2400    save_area = frame_pointer_rtx;
2401
2402  set = get_varargs_alias_set ();
2403
2404  for (i = next_cum.regno; i < ix86_regparm; i++)
2405    {
2406      mem = gen_rtx_MEM (Pmode,
2407			 plus_constant (save_area, i * UNITS_PER_WORD));
2408      set_mem_alias_set (mem, set);
2409      emit_move_insn (mem, gen_rtx_REG (Pmode,
2410					x86_64_int_parameter_registers[i]));
2411    }
2412
2413  if (next_cum.sse_nregs)
2414    {
2415      /* Now emit code to save SSE registers.  The AX parameter contains number
2416	 of SSE parameter regsiters used to call this function.  We use
2417	 sse_prologue_save insn template that produces computed jump across
2418	 SSE saves.  We need some preparation work to get this working.  */
2419
2420      label = gen_label_rtx ();
2421      label_ref = gen_rtx_LABEL_REF (Pmode, label);
2422
2423      /* Compute address to jump to :
2424         label - 5*eax + nnamed_sse_arguments*5  */
2425      tmp_reg = gen_reg_rtx (Pmode);
2426      nsse_reg = gen_reg_rtx (Pmode);
2427      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2428      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2429			      gen_rtx_MULT (Pmode, nsse_reg,
2430					    GEN_INT (4))));
2431      if (next_cum.sse_regno)
2432	emit_move_insn
2433	  (nsse_reg,
2434	   gen_rtx_CONST (DImode,
2435			  gen_rtx_PLUS (DImode,
2436					label_ref,
2437					GEN_INT (next_cum.sse_regno * 4))));
2438      else
2439	emit_move_insn (nsse_reg, label_ref);
2440      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2441
2442      /* Compute address of memory block we save into.  We always use pointer
2443	 pointing 127 bytes after first byte to store - this is needed to keep
2444	 instruction size limited by 4 bytes.  */
2445      tmp_reg = gen_reg_rtx (Pmode);
2446      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2447			      plus_constant (save_area,
2448					     8 * REGPARM_MAX + 127)));
2449      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2450      set_mem_alias_set (mem, set);
2451      set_mem_align (mem, BITS_PER_WORD);
2452
2453      /* And finally do the dirty job!  */
2454      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2455					GEN_INT (next_cum.sse_regno), label));
2456    }
2457
2458}
2459
2460/* Implement va_start.  */
2461
2462void
2463ix86_va_start (stdarg_p, valist, nextarg)
2464     int stdarg_p;
2465     tree valist;
2466     rtx nextarg;
2467{
2468  HOST_WIDE_INT words, n_gpr, n_fpr;
2469  tree f_gpr, f_fpr, f_ovf, f_sav;
2470  tree gpr, fpr, ovf, sav, t;
2471
2472  /* Only 64bit target needs something special.  */
2473  if (!TARGET_64BIT)
2474    {
2475      std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2476      return;
2477    }
2478
2479  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2480  f_fpr = TREE_CHAIN (f_gpr);
2481  f_ovf = TREE_CHAIN (f_fpr);
2482  f_sav = TREE_CHAIN (f_ovf);
2483
2484  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2485  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2486  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2487  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2488  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2489
2490  /* Count number of gp and fp argument registers used.  */
2491  words = current_function_args_info.words;
2492  n_gpr = current_function_args_info.regno;
2493  n_fpr = current_function_args_info.sse_regno;
2494
2495  if (TARGET_DEBUG_ARG)
2496    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2497	     (int) words, (int) n_gpr, (int) n_fpr);
2498
2499  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2500	     build_int_2 (n_gpr * 8, 0));
2501  TREE_SIDE_EFFECTS (t) = 1;
2502  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2503
2504  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2505	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2506  TREE_SIDE_EFFECTS (t) = 1;
2507  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2508
2509  /* Find the overflow area.  */
2510  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2511  if (words != 0)
2512    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2513	       build_int_2 (words * UNITS_PER_WORD, 0));
2514  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2515  TREE_SIDE_EFFECTS (t) = 1;
2516  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2517
2518  /* Find the register save area.
2519     Prologue of the function save it right above stack frame.  */
2520  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2521  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2522  TREE_SIDE_EFFECTS (t) = 1;
2523  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2524  cfun->preferred_stack_boundary = 128;
2525}
2526
2527/* Implement va_arg.  */
2528rtx
2529ix86_va_arg (valist, type)
2530     tree valist, type;
2531{
2532  static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2533  tree f_gpr, f_fpr, f_ovf, f_sav;
2534  tree gpr, fpr, ovf, sav, t;
2535  int size, rsize;
2536  rtx lab_false, lab_over = NULL_RTX;
2537  rtx addr_rtx, r;
2538  rtx container;
2539
2540  /* Only 64bit target needs something special.  */
2541  if (!TARGET_64BIT)
2542    {
2543      return std_expand_builtin_va_arg (valist, type);
2544    }
2545
2546  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2547  f_fpr = TREE_CHAIN (f_gpr);
2548  f_ovf = TREE_CHAIN (f_fpr);
2549  f_sav = TREE_CHAIN (f_ovf);
2550
2551  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2552  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2553  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2554  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2555  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2556
2557  size = int_size_in_bytes (type);
2558  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2559
2560  container = construct_container (TYPE_MODE (type), type, 0,
2561				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2562  /*
2563   * Pull the value out of the saved registers ...
2564   */
2565
2566  addr_rtx = gen_reg_rtx (Pmode);
2567
2568  if (container)
2569    {
2570      rtx int_addr_rtx, sse_addr_rtx;
2571      int needed_intregs, needed_sseregs;
2572      int need_temp;
2573
2574      lab_over = gen_label_rtx ();
2575      lab_false = gen_label_rtx ();
2576
2577      examine_argument (TYPE_MODE (type), type, 0,
2578		        &needed_intregs, &needed_sseregs);
2579
2580
2581      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2582		   || TYPE_ALIGN (type) > 128);
2583
2584      /* In case we are passing structure, verify that it is consetuctive block
2585         on the register save area.  If not we need to do moves.  */
2586      if (!need_temp && !REG_P (container))
2587	{
2588	  /* Verify that all registers are strictly consetuctive  */
2589	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2590	    {
2591	      int i;
2592
2593	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2594		{
2595		  rtx slot = XVECEXP (container, 0, i);
2596		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2597		      || INTVAL (XEXP (slot, 1)) != i * 16)
2598		    need_temp = 1;
2599		}
2600	    }
2601	  else
2602	    {
2603	      int i;
2604
2605	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2606		{
2607		  rtx slot = XVECEXP (container, 0, i);
2608		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2609		      || INTVAL (XEXP (slot, 1)) != i * 8)
2610		    need_temp = 1;
2611		}
2612	    }
2613	}
2614      if (!need_temp)
2615	{
2616	  int_addr_rtx = addr_rtx;
2617	  sse_addr_rtx = addr_rtx;
2618	}
2619      else
2620	{
2621	  int_addr_rtx = gen_reg_rtx (Pmode);
2622	  sse_addr_rtx = gen_reg_rtx (Pmode);
2623	}
2624      /* First ensure that we fit completely in registers.  */
2625      if (needed_intregs)
2626	{
2627	  emit_cmp_and_jump_insns (expand_expr
2628				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2629				   GEN_INT ((REGPARM_MAX - needed_intregs +
2630					     1) * 8), GE, const1_rtx, SImode,
2631				   1, lab_false);
2632	}
2633      if (needed_sseregs)
2634	{
2635	  emit_cmp_and_jump_insns (expand_expr
2636				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2637				   GEN_INT ((SSE_REGPARM_MAX -
2638					     needed_sseregs + 1) * 16 +
2639					    REGPARM_MAX * 8), GE, const1_rtx,
2640				   SImode, 1, lab_false);
2641	}
2642
2643      /* Compute index to start of area used for integer regs.  */
2644      if (needed_intregs)
2645	{
2646	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2647	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2648	  if (r != int_addr_rtx)
2649	    emit_move_insn (int_addr_rtx, r);
2650	}
2651      if (needed_sseregs)
2652	{
2653	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2654	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2655	  if (r != sse_addr_rtx)
2656	    emit_move_insn (sse_addr_rtx, r);
2657	}
2658      if (need_temp)
2659	{
2660	  int i;
2661	  rtx mem;
2662
2663	  /* Never use the memory itself, as it has the alias set.  */
2664	  addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2665	  mem = gen_rtx_MEM (BLKmode, addr_rtx);
2666	  set_mem_alias_set (mem, get_varargs_alias_set ());
2667	  set_mem_align (mem, BITS_PER_UNIT);
2668
2669	  for (i = 0; i < XVECLEN (container, 0); i++)
2670	    {
2671	      rtx slot = XVECEXP (container, 0, i);
2672	      rtx reg = XEXP (slot, 0);
2673	      enum machine_mode mode = GET_MODE (reg);
2674	      rtx src_addr;
2675	      rtx src_mem;
2676	      int src_offset;
2677	      rtx dest_mem;
2678
2679	      if (SSE_REGNO_P (REGNO (reg)))
2680		{
2681		  src_addr = sse_addr_rtx;
2682		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2683		}
2684	      else
2685		{
2686		  src_addr = int_addr_rtx;
2687		  src_offset = REGNO (reg) * 8;
2688		}
2689	      src_mem = gen_rtx_MEM (mode, src_addr);
2690	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2691	      src_mem = adjust_address (src_mem, mode, src_offset);
2692	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2693	      emit_move_insn (dest_mem, src_mem);
2694	    }
2695	}
2696
2697      if (needed_intregs)
2698	{
2699	  t =
2700	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2701		   build_int_2 (needed_intregs * 8, 0));
2702	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2703	  TREE_SIDE_EFFECTS (t) = 1;
2704	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2705	}
2706      if (needed_sseregs)
2707	{
2708	  t =
2709	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2710		   build_int_2 (needed_sseregs * 16, 0));
2711	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2712	  TREE_SIDE_EFFECTS (t) = 1;
2713	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2714	}
2715
2716      emit_jump_insn (gen_jump (lab_over));
2717      emit_barrier ();
2718      emit_label (lab_false);
2719    }
2720
2721  /* ... otherwise out of the overflow area.  */
2722
2723  /* Care for on-stack alignment if needed.  */
2724  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2725    t = ovf;
2726  else
2727    {
2728      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2729      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2730      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2731    }
2732  t = save_expr (t);
2733
2734  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2735  if (r != addr_rtx)
2736    emit_move_insn (addr_rtx, r);
2737
2738  t =
2739    build (PLUS_EXPR, TREE_TYPE (t), t,
2740	   build_int_2 (rsize * UNITS_PER_WORD, 0));
2741  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2742  TREE_SIDE_EFFECTS (t) = 1;
2743  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2744
2745  if (container)
2746    emit_label (lab_over);
2747
2748  return addr_rtx;
2749}
2750
2751/* Return nonzero if OP is general operand representable on x86_64.  */
2752
2753int
2754x86_64_general_operand (op, mode)
2755     rtx op;
2756     enum machine_mode mode;
2757{
2758  if (!TARGET_64BIT)
2759    return general_operand (op, mode);
2760  if (nonimmediate_operand (op, mode))
2761    return 1;
2762  return x86_64_sign_extended_value (op);
2763}
2764
2765/* Return nonzero if OP is general operand representable on x86_64
2766   as either sign extended or zero extended constant.  */
2767
2768int
2769x86_64_szext_general_operand (op, mode)
2770     rtx op;
2771     enum machine_mode mode;
2772{
2773  if (!TARGET_64BIT)
2774    return general_operand (op, mode);
2775  if (nonimmediate_operand (op, mode))
2776    return 1;
2777  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2778}
2779
2780/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2781
2782int
2783x86_64_nonmemory_operand (op, mode)
2784     rtx op;
2785     enum machine_mode mode;
2786{
2787  if (!TARGET_64BIT)
2788    return nonmemory_operand (op, mode);
2789  if (register_operand (op, mode))
2790    return 1;
2791  return x86_64_sign_extended_value (op);
2792}
2793
2794/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
2795
2796int
2797x86_64_movabs_operand (op, mode)
2798     rtx op;
2799     enum machine_mode mode;
2800{
2801  if (!TARGET_64BIT || !flag_pic)
2802    return nonmemory_operand (op, mode);
2803  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2804    return 1;
2805  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2806    return 1;
2807  return 0;
2808}
2809
2810/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2811
2812int
2813x86_64_szext_nonmemory_operand (op, mode)
2814     rtx op;
2815     enum machine_mode mode;
2816{
2817  if (!TARGET_64BIT)
2818    return nonmemory_operand (op, mode);
2819  if (register_operand (op, mode))
2820    return 1;
2821  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2822}
2823
2824/* Return nonzero if OP is immediate operand representable on x86_64.  */
2825
2826int
2827x86_64_immediate_operand (op, mode)
2828     rtx op;
2829     enum machine_mode mode;
2830{
2831  if (!TARGET_64BIT)
2832    return immediate_operand (op, mode);
2833  return x86_64_sign_extended_value (op);
2834}
2835
2836/* Return nonzero if OP is immediate operand representable on x86_64.  */
2837
2838int
2839x86_64_zext_immediate_operand (op, mode)
2840     rtx op;
2841     enum machine_mode mode ATTRIBUTE_UNUSED;
2842{
2843  return x86_64_zero_extended_value (op);
2844}
2845
2846/* Return nonzero if OP is (const_int 1), else return zero.  */
2847
2848int
2849const_int_1_operand (op, mode)
2850     rtx op;
2851     enum machine_mode mode ATTRIBUTE_UNUSED;
2852{
2853  return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2854}
2855
2856/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2857   reference and a constant.  */
2858
2859int
2860symbolic_operand (op, mode)
2861     register rtx op;
2862     enum machine_mode mode ATTRIBUTE_UNUSED;
2863{
2864  switch (GET_CODE (op))
2865    {
2866    case SYMBOL_REF:
2867    case LABEL_REF:
2868      return 1;
2869
2870    case CONST:
2871      op = XEXP (op, 0);
2872      if (GET_CODE (op) == SYMBOL_REF
2873	  || GET_CODE (op) == LABEL_REF
2874	  || (GET_CODE (op) == UNSPEC
2875	      && (XINT (op, 1) == 6
2876		  || XINT (op, 1) == 7
2877		  || XINT (op, 1) == 15)))
2878	return 1;
2879      if (GET_CODE (op) != PLUS
2880	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
2881	return 0;
2882
2883      op = XEXP (op, 0);
2884      if (GET_CODE (op) == SYMBOL_REF
2885	  || GET_CODE (op) == LABEL_REF)
2886	return 1;
2887      /* Only @GOTOFF gets offsets.  */
2888      if (GET_CODE (op) != UNSPEC
2889	  || XINT (op, 1) != 7)
2890	return 0;
2891
2892      op = XVECEXP (op, 0, 0);
2893      if (GET_CODE (op) == SYMBOL_REF
2894	  || GET_CODE (op) == LABEL_REF)
2895	return 1;
2896      return 0;
2897
2898    default:
2899      return 0;
2900    }
2901}
2902
2903/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
2904
2905int
2906pic_symbolic_operand (op, mode)
2907     register rtx op;
2908     enum machine_mode mode ATTRIBUTE_UNUSED;
2909{
2910  if (GET_CODE (op) != CONST)
2911    return 0;
2912  op = XEXP (op, 0);
2913  if (TARGET_64BIT)
2914    {
2915      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2916	return 1;
2917    }
2918  else
2919    {
2920      if (GET_CODE (op) == UNSPEC)
2921	return 1;
2922      if (GET_CODE (op) != PLUS
2923	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
2924	return 0;
2925      op = XEXP (op, 0);
2926      if (GET_CODE (op) == UNSPEC)
2927	return 1;
2928    }
2929  return 0;
2930}
2931
2932/* Return true if OP is a symbolic operand that resolves locally.  */
2933
2934static int
2935local_symbolic_operand (op, mode)
2936     rtx op;
2937     enum machine_mode mode ATTRIBUTE_UNUSED;
2938{
2939  if (GET_CODE (op) == LABEL_REF)
2940    return 1;
2941
2942  if (GET_CODE (op) == CONST
2943      && GET_CODE (XEXP (op, 0)) == PLUS
2944      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2945    op = XEXP (XEXP (op, 0), 0);
2946
2947  if (GET_CODE (op) != SYMBOL_REF)
2948    return 0;
2949
2950  /* These we've been told are local by varasm and encode_section_info
2951     respectively.  */
2952  if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2953    return 1;
2954
2955  /* There is, however, a not insubstantial body of code in the rest of
2956     the compiler that assumes it can just stick the results of
2957     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
2958  /* ??? This is a hack.  Should update the body of the compiler to
2959     always create a DECL an invoke ENCODE_SECTION_INFO.  */
2960  if (strncmp (XSTR (op, 0), internal_label_prefix,
2961	       internal_label_prefix_len) == 0)
2962    return 1;
2963
2964  return 0;
2965}
2966
2967/* Test for a valid operand for a call instruction.  Don't allow the
2968   arg pointer register or virtual regs since they may decay into
2969   reg + const, which the patterns can't handle.  */
2970
2971int
2972call_insn_operand (op, mode)
2973     rtx op;
2974     enum machine_mode mode ATTRIBUTE_UNUSED;
2975{
2976  /* Disallow indirect through a virtual register.  This leads to
2977     compiler aborts when trying to eliminate them.  */
2978  if (GET_CODE (op) == REG
2979      && (op == arg_pointer_rtx
2980	  || op == frame_pointer_rtx
2981	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2982	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2983    return 0;
2984
2985  /* Disallow `call 1234'.  Due to varying assembler lameness this
2986     gets either rejected or translated to `call .+1234'.  */
2987  if (GET_CODE (op) == CONST_INT)
2988    return 0;
2989
2990  /* Explicitly allow SYMBOL_REF even if pic.  */
2991  if (GET_CODE (op) == SYMBOL_REF)
2992    return 1;
2993
2994  /* Half-pic doesn't allow anything but registers and constants.
2995     We've just taken care of the later.  */
2996  if (HALF_PIC_P ())
2997    return register_operand (op, Pmode);
2998
2999  /* Otherwise we can allow any general_operand in the address.  */
3000  return general_operand (op, Pmode);
3001}
3002
3003int
3004constant_call_address_operand (op, mode)
3005     rtx op;
3006     enum machine_mode mode ATTRIBUTE_UNUSED;
3007{
3008  if (GET_CODE (op) == CONST
3009      && GET_CODE (XEXP (op, 0)) == PLUS
3010      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3011    op = XEXP (XEXP (op, 0), 0);
3012  return GET_CODE (op) == SYMBOL_REF;
3013}
3014
3015/* Match exactly zero and one.  */
3016
3017int
3018const0_operand (op, mode)
3019     register rtx op;
3020     enum machine_mode mode;
3021{
3022  return op == CONST0_RTX (mode);
3023}
3024
3025int
3026const1_operand (op, mode)
3027     register rtx op;
3028     enum machine_mode mode ATTRIBUTE_UNUSED;
3029{
3030  return op == const1_rtx;
3031}
3032
3033/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3034
3035int
3036const248_operand (op, mode)
3037     register rtx op;
3038     enum machine_mode mode ATTRIBUTE_UNUSED;
3039{
3040  return (GET_CODE (op) == CONST_INT
3041	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3042}
3043
3044/* True if this is a constant appropriate for an increment or decremenmt.  */
3045
3046int
3047incdec_operand (op, mode)
3048     register rtx op;
3049     enum machine_mode mode ATTRIBUTE_UNUSED;
3050{
3051  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3052     registers, since carry flag is not set.  */
3053  if (TARGET_PENTIUM4 && !optimize_size)
3054    return 0;
3055  return op == const1_rtx || op == constm1_rtx;
3056}
3057
3058/* Return nonzero if OP is acceptable as operand of DImode shift
3059   expander.  */
3060
3061int
3062shiftdi_operand (op, mode)
3063     rtx op;
3064     enum machine_mode mode ATTRIBUTE_UNUSED;
3065{
3066  if (TARGET_64BIT)
3067    return nonimmediate_operand (op, mode);
3068  else
3069    return register_operand (op, mode);
3070}
3071
3072/* Return false if this is the stack pointer, or any other fake
3073   register eliminable to the stack pointer.  Otherwise, this is
3074   a register operand.
3075
3076   This is used to prevent esp from being used as an index reg.
3077   Which would only happen in pathological cases.  */
3078
3079int
3080reg_no_sp_operand (op, mode)
3081     register rtx op;
3082     enum machine_mode mode;
3083{
3084  rtx t = op;
3085  if (GET_CODE (t) == SUBREG)
3086    t = SUBREG_REG (t);
3087  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3088    return 0;
3089
3090  return register_operand (op, mode);
3091}
3092
3093int
3094mmx_reg_operand (op, mode)
3095     register rtx op;
3096     enum machine_mode mode ATTRIBUTE_UNUSED;
3097{
3098  return MMX_REG_P (op);
3099}
3100
3101/* Return false if this is any eliminable register.  Otherwise
3102   general_operand.  */
3103
3104int
3105general_no_elim_operand (op, mode)
3106     register rtx op;
3107     enum machine_mode mode;
3108{
3109  rtx t = op;
3110  if (GET_CODE (t) == SUBREG)
3111    t = SUBREG_REG (t);
3112  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3113      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3114      || t == virtual_stack_dynamic_rtx)
3115    return 0;
3116  if (REG_P (t)
3117      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3118      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3119    return 0;
3120
3121  return general_operand (op, mode);
3122}
3123
3124/* Return false if this is any eliminable register.  Otherwise
3125   register_operand or const_int.  */
3126
3127int
3128nonmemory_no_elim_operand (op, mode)
3129     register rtx op;
3130     enum machine_mode mode;
3131{
3132  rtx t = op;
3133  if (GET_CODE (t) == SUBREG)
3134    t = SUBREG_REG (t);
3135  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3136      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3137      || t == virtual_stack_dynamic_rtx)
3138    return 0;
3139
3140  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3141}
3142
3143/* Return true if op is a Q_REGS class register.  */
3144
3145int
3146q_regs_operand (op, mode)
3147     register rtx op;
3148     enum machine_mode mode;
3149{
3150  if (mode != VOIDmode && GET_MODE (op) != mode)
3151    return 0;
3152  if (GET_CODE (op) == SUBREG)
3153    op = SUBREG_REG (op);
3154  return QI_REG_P (op);
3155}
3156
3157/* Return true if op is a NON_Q_REGS class register.  */
3158
3159int
3160non_q_regs_operand (op, mode)
3161     register rtx op;
3162     enum machine_mode mode;
3163{
3164  if (mode != VOIDmode && GET_MODE (op) != mode)
3165    return 0;
3166  if (GET_CODE (op) == SUBREG)
3167    op = SUBREG_REG (op);
3168  return NON_QI_REG_P (op);
3169}
3170
3171/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3172   insns.  */
3173int
3174sse_comparison_operator (op, mode)
3175     rtx op;
3176     enum machine_mode mode ATTRIBUTE_UNUSED;
3177{
3178  enum rtx_code code = GET_CODE (op);
3179  switch (code)
3180    {
3181    /* Operations supported directly.  */
3182    case EQ:
3183    case LT:
3184    case LE:
3185    case UNORDERED:
3186    case NE:
3187    case UNGE:
3188    case UNGT:
3189    case ORDERED:
3190      return 1;
3191    /* These are equivalent to ones above in non-IEEE comparisons.  */
3192    case UNEQ:
3193    case UNLT:
3194    case UNLE:
3195    case LTGT:
3196    case GE:
3197    case GT:
3198      return !TARGET_IEEE_FP;
3199    default:
3200      return 0;
3201    }
3202}
3203/* Return 1 if OP is a valid comparison operator in valid mode.  */
3204int
3205ix86_comparison_operator (op, mode)
3206     register rtx op;
3207     enum machine_mode mode;
3208{
3209  enum machine_mode inmode;
3210  enum rtx_code code = GET_CODE (op);
3211  if (mode != VOIDmode && GET_MODE (op) != mode)
3212    return 0;
3213  if (GET_RTX_CLASS (code) != '<')
3214    return 0;
3215  inmode = GET_MODE (XEXP (op, 0));
3216
3217  if (inmode == CCFPmode || inmode == CCFPUmode)
3218    {
3219      enum rtx_code second_code, bypass_code;
3220      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3221      return (bypass_code == NIL && second_code == NIL);
3222    }
3223  switch (code)
3224    {
3225    case EQ: case NE:
3226      return 1;
3227    case LT: case GE:
3228      if (inmode == CCmode || inmode == CCGCmode
3229	  || inmode == CCGOCmode || inmode == CCNOmode)
3230	return 1;
3231      return 0;
3232    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3233      if (inmode == CCmode)
3234	return 1;
3235      return 0;
3236    case GT: case LE:
3237      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3238	return 1;
3239      return 0;
3240    default:
3241      return 0;
3242    }
3243}
3244
3245/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3246
3247int
3248fcmov_comparison_operator (op, mode)
3249    register rtx op;
3250    enum machine_mode mode;
3251{
3252  enum machine_mode inmode;
3253  enum rtx_code code = GET_CODE (op);
3254  if (mode != VOIDmode && GET_MODE (op) != mode)
3255    return 0;
3256  if (GET_RTX_CLASS (code) != '<')
3257    return 0;
3258  inmode = GET_MODE (XEXP (op, 0));
3259  if (inmode == CCFPmode || inmode == CCFPUmode)
3260    {
3261      enum rtx_code second_code, bypass_code;
3262      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3263      if (bypass_code != NIL || second_code != NIL)
3264	return 0;
3265      code = ix86_fp_compare_code_to_integer (code);
3266    }
3267  /* i387 supports just limited amount of conditional codes.  */
3268  switch (code)
3269    {
3270    case LTU: case GTU: case LEU: case GEU:
3271      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3272	return 1;
3273      return 0;
3274    case ORDERED: case UNORDERED:
3275    case EQ: case NE:
3276      return 1;
3277    default:
3278      return 0;
3279    }
3280}
3281
3282/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3283
3284int
3285promotable_binary_operator (op, mode)
3286     register rtx op;
3287     enum machine_mode mode ATTRIBUTE_UNUSED;
3288{
3289  switch (GET_CODE (op))
3290    {
3291    case MULT:
3292      /* Modern CPUs have same latency for HImode and SImode multiply,
3293         but 386 and 486 do HImode multiply faster.  */
3294      return ix86_cpu > PROCESSOR_I486;
3295    case PLUS:
3296    case AND:
3297    case IOR:
3298    case XOR:
3299    case ASHIFT:
3300      return 1;
3301    default:
3302      return 0;
3303    }
3304}
3305
3306/* Nearly general operand, but accept any const_double, since we wish
3307   to be able to drop them into memory rather than have them get pulled
3308   into registers.  */
3309
3310int
3311cmp_fp_expander_operand (op, mode)
3312     register rtx op;
3313     enum machine_mode mode;
3314{
3315  if (mode != VOIDmode && mode != GET_MODE (op))
3316    return 0;
3317  if (GET_CODE (op) == CONST_DOUBLE)
3318    return 1;
3319  return general_operand (op, mode);
3320}
3321
3322/* Match an SI or HImode register for a zero_extract.  */
3323
3324int
3325ext_register_operand (op, mode)
3326     register rtx op;
3327     enum machine_mode mode ATTRIBUTE_UNUSED;
3328{
3329  int regno;
3330  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3331      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3332    return 0;
3333
3334  if (!register_operand (op, VOIDmode))
3335    return 0;
3336
3337  /* Be curefull to accept only registers having upper parts.  */
3338  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3339  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3340}
3341
3342/* Return 1 if this is a valid binary floating-point operation.
3343   OP is the expression matched, and MODE is its mode.  */
3344
3345int
3346binary_fp_operator (op, mode)
3347    register rtx op;
3348    enum machine_mode mode;
3349{
3350  if (mode != VOIDmode && mode != GET_MODE (op))
3351    return 0;
3352
3353  switch (GET_CODE (op))
3354    {
3355    case PLUS:
3356    case MINUS:
3357    case MULT:
3358    case DIV:
3359      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3360
3361    default:
3362      return 0;
3363    }
3364}
3365
3366int
3367mult_operator (op, mode)
3368    register rtx op;
3369    enum machine_mode mode ATTRIBUTE_UNUSED;
3370{
3371  return GET_CODE (op) == MULT;
3372}
3373
3374int
3375div_operator (op, mode)
3376    register rtx op;
3377    enum machine_mode mode ATTRIBUTE_UNUSED;
3378{
3379  return GET_CODE (op) == DIV;
3380}
3381
3382int
3383arith_or_logical_operator (op, mode)
3384      rtx op;
3385      enum machine_mode mode;
3386{
3387  return ((mode == VOIDmode || GET_MODE (op) == mode)
3388          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3389              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3390}
3391
3392/* Returns 1 if OP is memory operand with a displacement.  */
3393
3394int
3395memory_displacement_operand (op, mode)
3396     register rtx op;
3397     enum machine_mode mode;
3398{
3399  struct ix86_address parts;
3400
3401  if (! memory_operand (op, mode))
3402    return 0;
3403
3404  if (! ix86_decompose_address (XEXP (op, 0), &parts))
3405    abort ();
3406
3407  return parts.disp != NULL_RTX;
3408}
3409
3410/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3411   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3412
3413   ??? It seems likely that this will only work because cmpsi is an
3414   expander, and no actual insns use this.  */
3415
3416int
3417cmpsi_operand (op, mode)
3418      rtx op;
3419      enum machine_mode mode;
3420{
3421  if (nonimmediate_operand (op, mode))
3422    return 1;
3423
3424  if (GET_CODE (op) == AND
3425      && GET_MODE (op) == SImode
3426      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3427      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3428      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3429      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3430      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3431      && GET_CODE (XEXP (op, 1)) == CONST_INT)
3432    return 1;
3433
3434  return 0;
3435}
3436
3437/* Returns 1 if OP is memory operand that can not be represented by the
3438   modRM array.  */
3439
3440int
3441long_memory_operand (op, mode)
3442     register rtx op;
3443     enum machine_mode mode;
3444{
3445  if (! memory_operand (op, mode))
3446    return 0;
3447
3448  return memory_address_length (op) != 0;
3449}
3450
3451/* Return nonzero if the rtx is known aligned.  */
3452
3453int
3454aligned_operand (op, mode)
3455     rtx op;
3456     enum machine_mode mode;
3457{
3458  struct ix86_address parts;
3459
3460  if (!general_operand (op, mode))
3461    return 0;
3462
3463  /* Registers and immediate operands are always "aligned".  */
3464  if (GET_CODE (op) != MEM)
3465    return 1;
3466
3467  /* Don't even try to do any aligned optimizations with volatiles.  */
3468  if (MEM_VOLATILE_P (op))
3469    return 0;
3470
3471  op = XEXP (op, 0);
3472
3473  /* Pushes and pops are only valid on the stack pointer.  */
3474  if (GET_CODE (op) == PRE_DEC
3475      || GET_CODE (op) == POST_INC)
3476    return 1;
3477
3478  /* Decode the address.  */
3479  if (! ix86_decompose_address (op, &parts))
3480    abort ();
3481
3482  /* Look for some component that isn't known to be aligned.  */
3483  if (parts.index)
3484    {
3485      if (parts.scale < 4
3486	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3487	return 0;
3488    }
3489  if (parts.base)
3490    {
3491      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3492	return 0;
3493    }
3494  if (parts.disp)
3495    {
3496      if (GET_CODE (parts.disp) != CONST_INT
3497	  || (INTVAL (parts.disp) & 3) != 0)
3498	return 0;
3499    }
3500
3501  /* Didn't find one -- this must be an aligned address.  */
3502  return 1;
3503}
3504
3505/* Return true if the constant is something that can be loaded with
3506   a special instruction.  Only handle 0.0 and 1.0; others are less
3507   worthwhile.  */
3508
3509int
3510standard_80387_constant_p (x)
3511     rtx x;
3512{
3513  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3514    return -1;
3515  /* Note that on the 80387, other constants, such as pi, that we should support
3516     too.  On some machines, these are much slower to load as standard constant,
3517     than to load from doubles in memory.  */
3518  if (x == CONST0_RTX (GET_MODE (x)))
3519    return 1;
3520  if (x == CONST1_RTX (GET_MODE (x)))
3521    return 2;
3522  return 0;
3523}
3524
3525/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3526 */
3527int
3528standard_sse_constant_p (x)
3529     rtx x;
3530{
3531  if (GET_CODE (x) != CONST_DOUBLE)
3532    return -1;
3533  return (x == CONST0_RTX (GET_MODE (x)));
3534}
3535
3536/* Returns 1 if OP contains a symbol reference */
3537
3538int
3539symbolic_reference_mentioned_p (op)
3540     rtx op;
3541{
3542  register const char *fmt;
3543  register int i;
3544
3545  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3546    return 1;
3547
3548  fmt = GET_RTX_FORMAT (GET_CODE (op));
3549  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3550    {
3551      if (fmt[i] == 'E')
3552	{
3553	  register int j;
3554
3555	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3556	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3557	      return 1;
3558	}
3559
3560      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3561	return 1;
3562    }
3563
3564  return 0;
3565}
3566
3567/* Return 1 if it is appropriate to emit `ret' instructions in the
3568   body of a function.  Do this only if the epilogue is simple, needing a
3569   couple of insns.  Prior to reloading, we can't tell how many registers
3570   must be saved, so return 0 then.  Return 0 if there is no frame
3571   marker to de-allocate.
3572
3573   If NON_SAVING_SETJMP is defined and true, then it is not possible
3574   for the epilogue to be simple, so return 0.  This is a special case
3575   since NON_SAVING_SETJMP will not cause regs_ever_live to change
3576   until final, but jump_optimize may need to know sooner if a
3577   `return' is OK.  */
3578
3579int
3580ix86_can_use_return_insn_p ()
3581{
3582  struct ix86_frame frame;
3583
3584#ifdef NON_SAVING_SETJMP
3585  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3586    return 0;
3587#endif
3588
3589  if (! reload_completed || frame_pointer_needed)
3590    return 0;
3591
3592  /* Don't allow more than 32 pop, since that's all we can do
3593     with one instruction.  */
3594  if (current_function_pops_args
3595      && current_function_args_size >= 32768)
3596    return 0;
3597
3598  ix86_compute_frame_layout (&frame);
3599  return frame.to_allocate == 0 && frame.nregs == 0;
3600}
3601
3602/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
3603int
3604x86_64_sign_extended_value (value)
3605     rtx value;
3606{
3607  switch (GET_CODE (value))
3608    {
3609      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3610         to be at least 32 and this all acceptable constants are
3611	 represented as CONST_INT.  */
3612      case CONST_INT:
3613	if (HOST_BITS_PER_WIDE_INT == 32)
3614	  return 1;
3615	else
3616	  {
3617	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3618	    return trunc_int_for_mode (val, SImode) == val;
3619	  }
3620	break;
3621
3622      /* For certain code models, the symbolic references are known to fit.  */
3623      case SYMBOL_REF:
3624	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3625
3626      /* For certain code models, the code is near as well.  */
3627      case LABEL_REF:
3628	return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3629
3630      /* We also may accept the offsetted memory references in certain special
3631         cases.  */
3632      case CONST:
3633	if (GET_CODE (XEXP (value, 0)) == UNSPEC
3634	    && XVECLEN (XEXP (value, 0), 0) == 1
3635	    && XINT (XEXP (value, 0), 1) ==  15)
3636	  return 1;
3637	else if (GET_CODE (XEXP (value, 0)) == PLUS)
3638	  {
3639	    rtx op1 = XEXP (XEXP (value, 0), 0);
3640	    rtx op2 = XEXP (XEXP (value, 0), 1);
3641	    HOST_WIDE_INT offset;
3642
3643	    if (ix86_cmodel == CM_LARGE)
3644	      return 0;
3645	    if (GET_CODE (op2) != CONST_INT)
3646	      return 0;
3647	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
3648	    switch (GET_CODE (op1))
3649	      {
3650		case SYMBOL_REF:
3651		  /* For CM_SMALL assume that latest object is 1MB before
3652		     end of 31bits boundary.  We may also accept pretty
3653		     large negative constants knowing that all objects are
3654		     in the positive half of address space.  */
3655		  if (ix86_cmodel == CM_SMALL
3656		      && offset < 1024*1024*1024
3657		      && trunc_int_for_mode (offset, SImode) == offset)
3658		    return 1;
3659		  /* For CM_KERNEL we know that all object resist in the
3660		     negative half of 32bits address space.  We may not
3661		     accept negative offsets, since they may be just off
3662		     and we may accept pretty large positive ones.  */
3663		  if (ix86_cmodel == CM_KERNEL
3664		      && offset > 0
3665		      && trunc_int_for_mode (offset, SImode) == offset)
3666		    return 1;
3667		  break;
3668		case LABEL_REF:
3669		  /* These conditions are similar to SYMBOL_REF ones, just the
3670		     constraints for code models differ.  */
3671		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3672		      && offset < 1024*1024*1024
3673		      && trunc_int_for_mode (offset, SImode) == offset)
3674		    return 1;
3675		  if (ix86_cmodel == CM_KERNEL
3676		      && offset > 0
3677		      && trunc_int_for_mode (offset, SImode) == offset)
3678		    return 1;
3679		  break;
3680		default:
3681		  return 0;
3682	      }
3683	  }
3684	return 0;
3685      default:
3686	return 0;
3687    }
3688}
3689
3690/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
3691int
3692x86_64_zero_extended_value (value)
3693     rtx value;
3694{
3695  switch (GET_CODE (value))
3696    {
3697      case CONST_DOUBLE:
3698	if (HOST_BITS_PER_WIDE_INT == 32)
3699	  return  (GET_MODE (value) == VOIDmode
3700		   && !CONST_DOUBLE_HIGH (value));
3701	else
3702	  return 0;
3703      case CONST_INT:
3704	if (HOST_BITS_PER_WIDE_INT == 32)
3705	  return INTVAL (value) >= 0;
3706	else
3707	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3708	break;
3709
3710      /* For certain code models, the symbolic references are known to fit.  */
3711      case SYMBOL_REF:
3712	return ix86_cmodel == CM_SMALL;
3713
3714      /* For certain code models, the code is near as well.  */
3715      case LABEL_REF:
3716	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3717
3718      /* We also may accept the offsetted memory references in certain special
3719         cases.  */
3720      case CONST:
3721	if (GET_CODE (XEXP (value, 0)) == PLUS)
3722	  {
3723	    rtx op1 = XEXP (XEXP (value, 0), 0);
3724	    rtx op2 = XEXP (XEXP (value, 0), 1);
3725
3726	    if (ix86_cmodel == CM_LARGE)
3727	      return 0;
3728	    switch (GET_CODE (op1))
3729	      {
3730		case SYMBOL_REF:
3731		    return 0;
3732		  /* For small code model we may accept pretty large positive
3733		     offsets, since one bit is available for free.  Negative
3734		     offsets are limited by the size of NULL pointer area
3735		     specified by the ABI.  */
3736		  if (ix86_cmodel == CM_SMALL
3737		      && GET_CODE (op2) == CONST_INT
3738		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3739		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3740			  == INTVAL (op2)))
3741		    return 1;
3742	          /* ??? For the kernel, we may accept adjustment of
3743		     -0x10000000, since we know that it will just convert
3744		     negative address space to positive, but perhaps this
3745		     is not worthwhile.  */
3746		  break;
3747		case LABEL_REF:
3748		  /* These conditions are similar to SYMBOL_REF ones, just the
3749		     constraints for code models differ.  */
3750		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3751		      && GET_CODE (op2) == CONST_INT
3752		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3753		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3754			  == INTVAL (op2)))
3755		    return 1;
3756		  break;
3757		default:
3758		  return 0;
3759	      }
3760	  }
3761	return 0;
3762      default:
3763	return 0;
3764    }
3765}
3766
3767/* Value should be nonzero if functions must have frame pointers.
3768   Zero means the frame pointer need not be set up (and parms may
3769   be accessed via the stack pointer) in functions that seem suitable.  */
3770
3771int
3772ix86_frame_pointer_required ()
3773{
3774  /* If we accessed previous frames, then the generated code expects
3775     to be able to access the saved ebp value in our frame.  */
3776  if (cfun->machine->accesses_prev_frame)
3777    return 1;
3778
3779  /* Several x86 os'es need a frame pointer for other reasons,
3780     usually pertaining to setjmp.  */
3781  if (SUBTARGET_FRAME_POINTER_REQUIRED)
3782    return 1;
3783
3784  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3785     the frame pointer by default.  Turn it back on now if we've not
3786     got a leaf function.  */
3787  if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3788    return 1;
3789
3790  return 0;
3791}
3792
3793/* Record that the current function accesses previous call frames.  */
3794
3795void
3796ix86_setup_frame_addresses ()
3797{
3798  cfun->machine->accesses_prev_frame = 1;
3799}
3800
3801static char pic_label_name[32];
3802
3803/* This function generates code for -fpic that loads %ebx with
3804   the return address of the caller and then returns.  */
3805
3806void
3807ix86_asm_file_end (file)
3808     FILE *file;
3809{
3810  rtx xops[2];
3811
3812  if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3813    return;
3814
3815  /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3816     to updating relocations to a section being discarded such that this
3817     doesn't work.  Ought to detect this at configure time.  */
3818#if 0
3819  /* The trick here is to create a linkonce section containing the
3820     pic label thunk, but to refer to it with an internal label.
3821     Because the label is internal, we don't have inter-dso name
3822     binding issues on hosts that don't support ".hidden".
3823
3824     In order to use these macros, however, we must create a fake
3825     function decl.  */
3826  if (targetm.have_named_sections)
3827    {
3828      tree decl = build_decl (FUNCTION_DECL,
3829			      get_identifier ("i686.get_pc_thunk"),
3830			      error_mark_node);
3831      DECL_ONE_ONLY (decl) = 1;
3832      UNIQUE_SECTION (decl, 0);
3833      named_section (decl, NULL);
3834    }
3835  else
3836#else
3837    text_section ();
3838#endif
3839
3840  /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3841     internal (non-global) label that's being emitted, it didn't make
3842     sense to have .type information for local labels.   This caused
3843     the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3844     me debug info for a label that you're declaring non-global?) this
3845     was changed to call ASM_OUTPUT_LABEL() instead.  */
3846
3847  ASM_OUTPUT_LABEL (file, pic_label_name);
3848
3849  xops[0] = pic_offset_table_rtx;
3850  xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3851  output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3852  output_asm_insn ("ret", xops);
3853}
3854
3855void
3856load_pic_register ()
3857{
3858  rtx gotsym, pclab;
3859
3860  if (TARGET_64BIT)
3861    abort ();
3862
3863  gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3864
3865  if (TARGET_DEEP_BRANCH_PREDICTION)
3866    {
3867      if (! pic_label_name[0])
3868	ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3869      pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3870    }
3871  else
3872    {
3873      pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3874    }
3875
3876  emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3877
3878  if (! TARGET_DEEP_BRANCH_PREDICTION)
3879    emit_insn (gen_popsi1 (pic_offset_table_rtx));
3880
3881  emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3882}
3883
3884/* Generate an "push" pattern for input ARG.  */
3885
3886static rtx
3887gen_push (arg)
3888     rtx arg;
3889{
3890  return gen_rtx_SET (VOIDmode,
3891		      gen_rtx_MEM (Pmode,
3892				   gen_rtx_PRE_DEC (Pmode,
3893						    stack_pointer_rtx)),
3894		      arg);
3895}
3896
3897/* Return 1 if we need to save REGNO.  */
3898static int
3899ix86_save_reg (regno, maybe_eh_return)
3900     int regno;
3901     int maybe_eh_return;
3902{
3903  if (regno == PIC_OFFSET_TABLE_REGNUM
3904      && (current_function_uses_pic_offset_table
3905	  || current_function_uses_const_pool
3906	  || current_function_calls_eh_return))
3907    return 1;
3908
3909  if (current_function_calls_eh_return && maybe_eh_return)
3910    {
3911      unsigned i;
3912      for (i = 0; ; i++)
3913	{
3914	  unsigned test = EH_RETURN_DATA_REGNO (i);
3915	  if (test == INVALID_REGNUM)
3916	    break;
3917	  if (test == (unsigned) regno)
3918	    return 1;
3919	}
3920    }
3921
3922  return (regs_ever_live[regno]
3923	  && !call_used_regs[regno]
3924	  && !fixed_regs[regno]
3925	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3926}
3927
3928/* Return number of registers to be saved on the stack.  */
3929
3930static int
3931ix86_nsaved_regs ()
3932{
3933  int nregs = 0;
3934  int regno;
3935
3936  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3937    if (ix86_save_reg (regno, true))
3938      nregs++;
3939  return nregs;
3940}
3941
3942/* Return the offset between two registers, one to be eliminated, and the other
3943   its replacement, at the start of a routine.  */
3944
3945HOST_WIDE_INT
3946ix86_initial_elimination_offset (from, to)
3947     int from;
3948     int to;
3949{
3950  struct ix86_frame frame;
3951  ix86_compute_frame_layout (&frame);
3952
3953  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3954    return frame.hard_frame_pointer_offset;
3955  else if (from == FRAME_POINTER_REGNUM
3956	   && to == HARD_FRAME_POINTER_REGNUM)
3957    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3958  else
3959    {
3960      if (to != STACK_POINTER_REGNUM)
3961	abort ();
3962      else if (from == ARG_POINTER_REGNUM)
3963	return frame.stack_pointer_offset;
3964      else if (from != FRAME_POINTER_REGNUM)
3965	abort ();
3966      else
3967	return frame.stack_pointer_offset - frame.frame_pointer_offset;
3968    }
3969}
3970
3971/* Fill structure ix86_frame about frame of currently computed function.  */
3972
3973static void
3974ix86_compute_frame_layout (frame)
3975     struct ix86_frame *frame;
3976{
3977  HOST_WIDE_INT total_size;
3978  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3979  int offset;
3980  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3981  HOST_WIDE_INT size = get_frame_size ();
3982
3983  frame->nregs = ix86_nsaved_regs ();
3984  total_size = size;
3985
3986  /* Skip return value and save base pointer.  */
3987  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3988
3989  frame->hard_frame_pointer_offset = offset;
3990
3991  /* Do some sanity checking of stack_alignment_needed and
3992     preferred_alignment, since i386 port is the only using those features
3993     that may break easily.  */
3994
3995  if (size && !stack_alignment_needed)
3996    abort ();
3997  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3998    abort ();
3999  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4000    abort ();
4001  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4002    abort ();
4003
4004  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4005    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4006
4007  /* Register save area */
4008  offset += frame->nregs * UNITS_PER_WORD;
4009
4010  /* Va-arg area */
4011  if (ix86_save_varrargs_registers)
4012    {
4013      offset += X86_64_VARARGS_SIZE;
4014      frame->va_arg_size = X86_64_VARARGS_SIZE;
4015    }
4016  else
4017    frame->va_arg_size = 0;
4018
4019  /* Align start of frame for local function.  */
4020  frame->padding1 = ((offset + stack_alignment_needed - 1)
4021		     & -stack_alignment_needed) - offset;
4022
4023  offset += frame->padding1;
4024
4025  /* Frame pointer points here.  */
4026  frame->frame_pointer_offset = offset;
4027
4028  offset += size;
4029
4030  /* Add outgoing arguments area.  */
4031  if (ACCUMULATE_OUTGOING_ARGS)
4032    {
4033      offset += current_function_outgoing_args_size;
4034      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4035    }
4036  else
4037    frame->outgoing_arguments_size = 0;
4038
4039  /* Align stack boundary.  */
4040  frame->padding2 = ((offset + preferred_alignment - 1)
4041		     & -preferred_alignment) - offset;
4042
4043  offset += frame->padding2;
4044
4045  /* We've reached end of stack frame.  */
4046  frame->stack_pointer_offset = offset;
4047
4048  /* Size prologue needs to allocate.  */
4049  frame->to_allocate =
4050    (size + frame->padding1 + frame->padding2
4051     + frame->outgoing_arguments_size + frame->va_arg_size);
4052
4053  if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4054      && current_function_is_leaf)
4055    {
4056      frame->red_zone_size = frame->to_allocate;
4057      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4058	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4059    }
4060  else
4061    frame->red_zone_size = 0;
4062  frame->to_allocate -= frame->red_zone_size;
4063  frame->stack_pointer_offset -= frame->red_zone_size;
4064#if 0
4065  fprintf (stderr, "nregs: %i\n", frame->nregs);
4066  fprintf (stderr, "size: %i\n", size);
4067  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4068  fprintf (stderr, "padding1: %i\n", frame->padding1);
4069  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4070  fprintf (stderr, "padding2: %i\n", frame->padding2);
4071  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4072  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4073  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4074  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4075	   frame->hard_frame_pointer_offset);
4076  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4077#endif
4078}
4079
4080/* Emit code to save registers in the prologue.  */
4081
4082static void
4083ix86_emit_save_regs ()
4084{
4085  register int regno;
4086  rtx insn;
4087
4088  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4089    if (ix86_save_reg (regno, true))
4090      {
4091	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4092	RTX_FRAME_RELATED_P (insn) = 1;
4093      }
4094}
4095
4096/* Emit code to save registers using MOV insns.  First register
4097   is restored from POINTER + OFFSET.  */
4098static void
4099ix86_emit_save_regs_using_mov (pointer, offset)
4100     rtx pointer;
4101     HOST_WIDE_INT offset;
4102{
4103  int regno;
4104  rtx insn;
4105
4106  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4107    if (ix86_save_reg (regno, true))
4108      {
4109	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4110					       Pmode, offset),
4111			       gen_rtx_REG (Pmode, regno));
4112	RTX_FRAME_RELATED_P (insn) = 1;
4113	offset += UNITS_PER_WORD;
4114      }
4115}
4116
4117/* Expand the prologue into a bunch of separate insns.  */
4118
4119void
4120ix86_expand_prologue ()
4121{
4122  rtx insn;
4123  int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4124				  || current_function_uses_const_pool)
4125		      && !TARGET_64BIT);
4126  struct ix86_frame frame;
4127  int use_mov = 0;
4128  HOST_WIDE_INT allocate;
4129
4130  if (!optimize_size)
4131    {
4132      use_fast_prologue_epilogue
4133	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4134      if (TARGET_PROLOGUE_USING_MOVE)
4135        use_mov = use_fast_prologue_epilogue;
4136    }
4137  ix86_compute_frame_layout (&frame);
4138
4139  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4140     slower on all targets.  Also sdb doesn't like it.  */
4141
4142  if (frame_pointer_needed)
4143    {
4144      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4145      RTX_FRAME_RELATED_P (insn) = 1;
4146
4147      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4148      RTX_FRAME_RELATED_P (insn) = 1;
4149    }
4150
4151  allocate = frame.to_allocate;
4152  /* In case we are dealing only with single register and empty frame,
4153     push is equivalent of the mov+add sequence.  */
4154  if (allocate == 0 && frame.nregs <= 1)
4155    use_mov = 0;
4156
4157  if (!use_mov)
4158    ix86_emit_save_regs ();
4159  else
4160    allocate += frame.nregs * UNITS_PER_WORD;
4161
4162  if (allocate == 0)
4163    ;
4164  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4165    {
4166      insn = emit_insn (gen_pro_epilogue_adjust_stack
4167			(stack_pointer_rtx, stack_pointer_rtx,
4168			 GEN_INT (-allocate)));
4169      RTX_FRAME_RELATED_P (insn) = 1;
4170    }
4171  else
4172    {
4173      /* ??? Is this only valid for Win32?  */
4174
4175      rtx arg0, sym;
4176
4177      if (TARGET_64BIT)
4178	abort ();
4179
4180      arg0 = gen_rtx_REG (SImode, 0);
4181      emit_move_insn (arg0, GEN_INT (allocate));
4182
4183      sym = gen_rtx_MEM (FUNCTION_MODE,
4184			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4185      insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4186
4187      CALL_INSN_FUNCTION_USAGE (insn)
4188	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4189			     CALL_INSN_FUNCTION_USAGE (insn));
4190    }
4191  if (use_mov)
4192    {
4193      if (!frame_pointer_needed || !frame.to_allocate)
4194        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4195      else
4196        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4197				       -frame.nregs * UNITS_PER_WORD);
4198    }
4199
4200#ifdef SUBTARGET_PROLOGUE
4201  SUBTARGET_PROLOGUE;
4202#endif
4203
4204  if (pic_reg_used)
4205    load_pic_register ();
4206
4207  /* If we are profiling, make sure no instructions are scheduled before
4208     the call to mcount.  However, if -fpic, the above call will have
4209     done that.  */
4210  if (current_function_profile && ! pic_reg_used)
4211    emit_insn (gen_blockage ());
4212}
4213
4214/* Emit code to restore saved registers using MOV insns.  First register
4215   is restored from POINTER + OFFSET.  */
4216static void
4217ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4218     rtx pointer;
4219     int offset;
4220     int maybe_eh_return;
4221{
4222  int regno;
4223
4224  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4225    if (ix86_save_reg (regno, maybe_eh_return))
4226      {
4227	emit_move_insn (gen_rtx_REG (Pmode, regno),
4228			adjust_address (gen_rtx_MEM (Pmode, pointer),
4229					Pmode, offset));
4230	offset += UNITS_PER_WORD;
4231      }
4232}
4233
4234/* Restore function stack, frame, and registers.  */
4235
4236void
4237ix86_expand_epilogue (style)
4238     int style;
4239{
4240  int regno;
4241  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4242  struct ix86_frame frame;
4243  HOST_WIDE_INT offset;
4244
4245  ix86_compute_frame_layout (&frame);
4246
4247  /* Calculate start of saved registers relative to ebp.  Special care
4248     must be taken for the normal return case of a function using
4249     eh_return: the eax and edx registers are marked as saved, but not
4250     restored along this path.  */
4251  offset = frame.nregs;
4252  if (current_function_calls_eh_return && style != 2)
4253    offset -= 2;
4254  offset *= -UNITS_PER_WORD;
4255
4256  /* If we're only restoring one register and sp is not valid then
4257     using a move instruction to restore the register since it's
4258     less work than reloading sp and popping the register.
4259
4260     The default code result in stack adjustment using add/lea instruction,
4261     while this code results in LEAVE instruction (or discrete equivalent),
4262     so it is profitable in some other cases as well.  Especially when there
4263     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4264     and there is exactly one register to pop. This heruistic may need some
4265     tuning in future.  */
4266  if ((!sp_valid && frame.nregs <= 1)
4267      || (TARGET_EPILOGUE_USING_MOVE
4268	  && use_fast_prologue_epilogue
4269	  && (frame.nregs > 1 || frame.to_allocate))
4270      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4271      || (frame_pointer_needed && TARGET_USE_LEAVE
4272	  && use_fast_prologue_epilogue && frame.nregs == 1)
4273      || current_function_calls_eh_return)
4274    {
4275      /* Restore registers.  We can use ebp or esp to address the memory
4276	 locations.  If both are available, default to ebp, since offsets
4277	 are known to be small.  Only exception is esp pointing directly to the
4278	 end of block of saved registers, where we may simplify addressing
4279	 mode.  */
4280
4281      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4282	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4283					  frame.to_allocate, style == 2);
4284      else
4285	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4286					  offset, style == 2);
4287
4288      /* eh_return epilogues need %ecx added to the stack pointer.  */
4289      if (style == 2)
4290	{
4291	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4292
4293	  if (frame_pointer_needed)
4294	    {
4295	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4296	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4297	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4298
4299	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4300	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4301
4302	      emit_insn (gen_pro_epilogue_adjust_stack
4303			 (stack_pointer_rtx, sa, const0_rtx));
4304	    }
4305	  else
4306	    {
4307	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4308	      tmp = plus_constant (tmp, (frame.to_allocate
4309                                         + frame.nregs * UNITS_PER_WORD));
4310	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4311	    }
4312	}
4313      else if (!frame_pointer_needed)
4314	emit_insn (gen_pro_epilogue_adjust_stack
4315		   (stack_pointer_rtx, stack_pointer_rtx,
4316		    GEN_INT (frame.to_allocate
4317			     + frame.nregs * UNITS_PER_WORD)));
4318      /* If not an i386, mov & pop is faster than "leave".  */
4319      else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4320	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4321      else
4322	{
4323	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4324						    hard_frame_pointer_rtx,
4325						    const0_rtx));
4326	  if (TARGET_64BIT)
4327	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4328	  else
4329	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4330	}
4331    }
4332  else
4333    {
4334      /* First step is to deallocate the stack frame so that we can
4335	 pop the registers.  */
4336      if (!sp_valid)
4337	{
4338	  if (!frame_pointer_needed)
4339	    abort ();
4340          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4341						    hard_frame_pointer_rtx,
4342						    GEN_INT (offset)));
4343	}
4344      else if (frame.to_allocate)
4345	emit_insn (gen_pro_epilogue_adjust_stack
4346		   (stack_pointer_rtx, stack_pointer_rtx,
4347		    GEN_INT (frame.to_allocate)));
4348
4349      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4350	if (ix86_save_reg (regno, false))
4351	  {
4352	    if (TARGET_64BIT)
4353	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4354	    else
4355	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4356	  }
4357      if (frame_pointer_needed)
4358	{
4359	  /* Leave results in shorter dependency chains on CPUs that are
4360	     able to grok it fast.  */
4361	  if (TARGET_USE_LEAVE)
4362	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4363	  else if (TARGET_64BIT)
4364	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4365	  else
4366	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4367	}
4368    }
4369
4370  /* Sibcall epilogues don't want a return instruction.  */
4371  if (style == 0)
4372    return;
4373
4374  if (current_function_pops_args && current_function_args_size)
4375    {
4376      rtx popc = GEN_INT (current_function_pops_args);
4377
4378      /* i386 can only pop 64K bytes.  If asked to pop more, pop
4379	 return address, do explicit add, and jump indirectly to the
4380	 caller.  */
4381
4382      if (current_function_pops_args >= 65536)
4383	{
4384	  rtx ecx = gen_rtx_REG (SImode, 2);
4385
4386	  /* There are is no "pascal" calling convention in 64bit ABI.  */
4387	  if (TARGET_64BIT)
4388	    abort ();
4389
4390	  emit_insn (gen_popsi1 (ecx));
4391	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4392	  emit_jump_insn (gen_return_indirect_internal (ecx));
4393	}
4394      else
4395	emit_jump_insn (gen_return_pop_internal (popc));
4396    }
4397  else
4398    emit_jump_insn (gen_return_internal ());
4399}
4400
4401/* Extract the parts of an RTL expression that is a valid memory address
4402   for an instruction.  Return 0 if the structure of the address is
4403   grossly off.  Return -1 if the address contains ASHIFT, so it is not
4404   strictly valid, but still used for computing length of lea instruction.
4405   */
4406
4407static int
4408ix86_decompose_address (addr, out)
4409     register rtx addr;
4410     struct ix86_address *out;
4411{
4412  rtx base = NULL_RTX;
4413  rtx index = NULL_RTX;
4414  rtx disp = NULL_RTX;
4415  HOST_WIDE_INT scale = 1;
4416  rtx scale_rtx = NULL_RTX;
4417  int retval = 1;
4418
4419  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4420    base = addr;
4421  else if (GET_CODE (addr) == PLUS)
4422    {
4423      rtx op0 = XEXP (addr, 0);
4424      rtx op1 = XEXP (addr, 1);
4425      enum rtx_code code0 = GET_CODE (op0);
4426      enum rtx_code code1 = GET_CODE (op1);
4427
4428      if (code0 == REG || code0 == SUBREG)
4429	{
4430	  if (code1 == REG || code1 == SUBREG)
4431	    index = op0, base = op1;	/* index + base */
4432	  else
4433	    base = op0, disp = op1;	/* base + displacement */
4434	}
4435      else if (code0 == MULT)
4436	{
4437	  index = XEXP (op0, 0);
4438	  scale_rtx = XEXP (op0, 1);
4439	  if (code1 == REG || code1 == SUBREG)
4440	    base = op1;			/* index*scale + base */
4441	  else
4442	    disp = op1;			/* index*scale + disp */
4443	}
4444      else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4445	{
4446	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
4447	  scale_rtx = XEXP (XEXP (op0, 0), 1);
4448	  base = XEXP (op0, 1);
4449	  disp = op1;
4450	}
4451      else if (code0 == PLUS)
4452	{
4453	  index = XEXP (op0, 0);	/* index + base + disp */
4454	  base = XEXP (op0, 1);
4455	  disp = op1;
4456	}
4457      else
4458	return 0;
4459    }
4460  else if (GET_CODE (addr) == MULT)
4461    {
4462      index = XEXP (addr, 0);		/* index*scale */
4463      scale_rtx = XEXP (addr, 1);
4464    }
4465  else if (GET_CODE (addr) == ASHIFT)
4466    {
4467      rtx tmp;
4468
4469      /* We're called for lea too, which implements ashift on occasion.  */
4470      index = XEXP (addr, 0);
4471      tmp = XEXP (addr, 1);
4472      if (GET_CODE (tmp) != CONST_INT)
4473	return 0;
4474      scale = INTVAL (tmp);
4475      if ((unsigned HOST_WIDE_INT) scale > 3)
4476	return 0;
4477      scale = 1 << scale;
4478      retval = -1;
4479    }
4480  else
4481    disp = addr;			/* displacement */
4482
4483  /* Extract the integral value of scale.  */
4484  if (scale_rtx)
4485    {
4486      if (GET_CODE (scale_rtx) != CONST_INT)
4487	return 0;
4488      scale = INTVAL (scale_rtx);
4489    }
4490
4491  /* Allow arg pointer and stack pointer as index if there is not scaling */
4492  if (base && index && scale == 1
4493      && (index == arg_pointer_rtx || index == frame_pointer_rtx
4494          || index == stack_pointer_rtx))
4495    {
4496      rtx tmp = base;
4497      base = index;
4498      index = tmp;
4499    }
4500
4501  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
4502  if ((base == hard_frame_pointer_rtx
4503       || base == frame_pointer_rtx
4504       || base == arg_pointer_rtx) && !disp)
4505    disp = const0_rtx;
4506
4507  /* Special case: on K6, [%esi] makes the instruction vector decoded.
4508     Avoid this by transforming to [%esi+0].  */
4509  if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4510      && base && !index && !disp
4511      && REG_P (base)
4512      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4513    disp = const0_rtx;
4514
4515  /* Special case: encode reg+reg instead of reg*2.  */
4516  if (!base && index && scale && scale == 2)
4517    base = index, scale = 1;
4518
4519  /* Special case: scaling cannot be encoded without base or displacement.  */
4520  if (!base && !disp && index && scale != 1)
4521    disp = const0_rtx;
4522
4523  out->base = base;
4524  out->index = index;
4525  out->disp = disp;
4526  out->scale = scale;
4527
4528  return retval;
4529}
4530
4531/* Return cost of the memory address x.
4532   For i386, it is better to use a complex address than let gcc copy
4533   the address into a reg and make a new pseudo.  But not if the address
4534   requires to two regs - that would mean more pseudos with longer
4535   lifetimes.  */
4536int
4537ix86_address_cost (x)
4538     rtx x;
4539{
4540  struct ix86_address parts;
4541  int cost = 1;
4542
4543  if (!ix86_decompose_address (x, &parts))
4544    abort ();
4545
4546  /* More complex memory references are better.  */
4547  if (parts.disp && parts.disp != const0_rtx)
4548    cost--;
4549
4550  /* Attempt to minimize number of registers in the address.  */
4551  if ((parts.base
4552       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4553      || (parts.index
4554	  && (!REG_P (parts.index)
4555	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4556    cost++;
4557
4558  if (parts.base
4559      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4560      && parts.index
4561      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4562      && parts.base != parts.index)
4563    cost++;
4564
4565  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4566     since it's predecode logic can't detect the length of instructions
4567     and it degenerates to vector decoded.  Increase cost of such
4568     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
4569     to split such addresses or even refuse such addresses at all.
4570
4571     Following addressing modes are affected:
4572      [base+scale*index]
4573      [scale*index+disp]
4574      [base+index]
4575
4576     The first and last case  may be avoidable by explicitly coding the zero in
4577     memory address, but I don't have AMD-K6 machine handy to check this
4578     theory.  */
4579
4580  if (TARGET_K6
4581      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4582	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4583	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4584    cost += 10;
4585
4586  return cost;
4587}
4588
4589/* If X is a machine specific address (i.e. a symbol or label being
4590   referenced as a displacement from the GOT implemented using an
4591   UNSPEC), then return the base term.  Otherwise return X.  */
4592
4593rtx
4594ix86_find_base_term (x)
4595     rtx x;
4596{
4597  rtx term;
4598
4599  if (TARGET_64BIT)
4600    {
4601      if (GET_CODE (x) != CONST)
4602	return x;
4603      term = XEXP (x, 0);
4604      if (GET_CODE (term) == PLUS
4605	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
4606	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4607	term = XEXP (term, 0);
4608      if (GET_CODE (term) != UNSPEC
4609	  || XVECLEN (term, 0) != 1
4610	  || XINT (term, 1) !=  15)
4611	return x;
4612
4613      term = XVECEXP (term, 0, 0);
4614
4615      if (GET_CODE (term) != SYMBOL_REF
4616	  && GET_CODE (term) != LABEL_REF)
4617	return x;
4618
4619      return term;
4620    }
4621
4622  if (GET_CODE (x) != PLUS
4623      || XEXP (x, 0) != pic_offset_table_rtx
4624      || GET_CODE (XEXP (x, 1)) != CONST)
4625    return x;
4626
4627  term = XEXP (XEXP (x, 1), 0);
4628
4629  if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4630    term = XEXP (term, 0);
4631
4632  if (GET_CODE (term) != UNSPEC
4633      || XVECLEN (term, 0) != 1
4634      || XINT (term, 1) !=  7)
4635    return x;
4636
4637  term = XVECEXP (term, 0, 0);
4638
4639  if (GET_CODE (term) != SYMBOL_REF
4640      && GET_CODE (term) != LABEL_REF)
4641    return x;
4642
4643  return term;
4644}
4645
4646/* Determine if a given CONST RTX is a valid memory displacement
4647   in PIC mode.  */
4648
4649int
4650legitimate_pic_address_disp_p (disp)
4651     register rtx disp;
4652{
4653  /* In 64bit mode we can allow direct addresses of symbols and labels
4654     when they are not dynamic symbols.  */
4655  if (TARGET_64BIT)
4656    {
4657      rtx x = disp;
4658      if (GET_CODE (disp) == CONST)
4659	x = XEXP (disp, 0);
4660      /* ??? Handle PIC code models */
4661      if (GET_CODE (x) == PLUS
4662	  && (GET_CODE (XEXP (x, 1)) == CONST_INT
4663	      && ix86_cmodel == CM_SMALL_PIC
4664	      && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4665	      && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4666	x = XEXP (x, 0);
4667      if (local_symbolic_operand (x, Pmode))
4668	return 1;
4669    }
4670  if (GET_CODE (disp) != CONST)
4671    return 0;
4672  disp = XEXP (disp, 0);
4673
4674  if (TARGET_64BIT)
4675    {
4676      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
4677         of GOT tables.  We should not need these anyway.  */
4678      if (GET_CODE (disp) != UNSPEC
4679	  || XVECLEN (disp, 0) != 1
4680	  || XINT (disp, 1) != 15)
4681	return 0;
4682
4683      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4684	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4685	return 0;
4686      return 1;
4687    }
4688
4689  if (GET_CODE (disp) == PLUS)
4690    {
4691      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4692	return 0;
4693      disp = XEXP (disp, 0);
4694    }
4695
4696  if (GET_CODE (disp) != UNSPEC
4697      || XVECLEN (disp, 0) != 1)
4698    return 0;
4699
4700  /* Must be @GOT or @GOTOFF.  */
4701  switch (XINT (disp, 1))
4702    {
4703    case 6: /* @GOT */
4704      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4705
4706    case 7: /* @GOTOFF */
4707      return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4708    }
4709
4710  return 0;
4711}
4712
4713/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4714   memory address for an instruction.  The MODE argument is the machine mode
4715   for the MEM expression that wants to use this address.
4716
4717   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
4718   convert common non-canonical forms to canonical form so that they will
4719   be recognized.  */
4720
4721int
4722legitimate_address_p (mode, addr, strict)
4723     enum machine_mode mode;
4724     register rtx addr;
4725     int strict;
4726{
4727  struct ix86_address parts;
4728  rtx base, index, disp;
4729  HOST_WIDE_INT scale;
4730  const char *reason = NULL;
4731  rtx reason_rtx = NULL_RTX;
4732
4733  if (TARGET_DEBUG_ADDR)
4734    {
4735      fprintf (stderr,
4736	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4737	       GET_MODE_NAME (mode), strict);
4738      debug_rtx (addr);
4739    }
4740
4741  if (ix86_decompose_address (addr, &parts) <= 0)
4742    {
4743      reason = "decomposition failed";
4744      goto report_error;
4745    }
4746
4747  base = parts.base;
4748  index = parts.index;
4749  disp = parts.disp;
4750  scale = parts.scale;
4751
4752  /* Validate base register.
4753
4754     Don't allow SUBREG's here, it can lead to spill failures when the base
4755     is one word out of a two word structure, which is represented internally
4756     as a DImode int.  */
4757
4758  if (base)
4759    {
4760      reason_rtx = base;
4761
4762      if (GET_CODE (base) != REG)
4763	{
4764	  reason = "base is not a register";
4765	  goto report_error;
4766	}
4767
4768      if (GET_MODE (base) != Pmode)
4769	{
4770	  reason = "base is not in Pmode";
4771	  goto report_error;
4772	}
4773
4774      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4775	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4776	{
4777	  reason = "base is not valid";
4778	  goto report_error;
4779	}
4780    }
4781
4782  /* Validate index register.
4783
4784     Don't allow SUBREG's here, it can lead to spill failures when the index
4785     is one word out of a two word structure, which is represented internally
4786     as a DImode int.  */
4787
4788  if (index)
4789    {
4790      reason_rtx = index;
4791
4792      if (GET_CODE (index) != REG)
4793	{
4794	  reason = "index is not a register";
4795	  goto report_error;
4796	}
4797
4798      if (GET_MODE (index) != Pmode)
4799	{
4800	  reason = "index is not in Pmode";
4801	  goto report_error;
4802	}
4803
4804      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4805	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4806	{
4807	  reason = "index is not valid";
4808	  goto report_error;
4809	}
4810    }
4811
4812  /* Validate scale factor.  */
4813  if (scale != 1)
4814    {
4815      reason_rtx = GEN_INT (scale);
4816      if (!index)
4817	{
4818	  reason = "scale without index";
4819	  goto report_error;
4820	}
4821
4822      if (scale != 2 && scale != 4 && scale != 8)
4823	{
4824	  reason = "scale is not a valid multiplier";
4825	  goto report_error;
4826	}
4827    }
4828
4829  /* Validate displacement.  */
4830  if (disp)
4831    {
4832      reason_rtx = disp;
4833
4834      if (!CONSTANT_ADDRESS_P (disp))
4835	{
4836	  reason = "displacement is not constant";
4837	  goto report_error;
4838	}
4839
4840      if (TARGET_64BIT)
4841	{
4842	  if (!x86_64_sign_extended_value (disp))
4843	    {
4844	      reason = "displacement is out of range";
4845	      goto report_error;
4846	    }
4847	}
4848      else
4849	{
4850	  if (GET_CODE (disp) == CONST_DOUBLE)
4851	    {
4852	      reason = "displacement is a const_double";
4853	      goto report_error;
4854	    }
4855	}
4856
4857      if (flag_pic && SYMBOLIC_CONST (disp))
4858	{
4859	  if (TARGET_64BIT && (index || base))
4860	    {
4861	      reason = "non-constant pic memory reference";
4862	      goto report_error;
4863	    }
4864	  if (! legitimate_pic_address_disp_p (disp))
4865	    {
4866	      reason = "displacement is an invalid pic construct";
4867	      goto report_error;
4868	    }
4869
4870          /* This code used to verify that a symbolic pic displacement
4871	     includes the pic_offset_table_rtx register.
4872
4873	     While this is good idea, unfortunately these constructs may
4874	     be created by "adds using lea" optimization for incorrect
4875	     code like:
4876
4877	     int a;
4878	     int foo(int i)
4879	       {
4880	         return *(&a+i);
4881	       }
4882
4883	     This code is nonsensical, but results in addressing
4884	     GOT table with pic_offset_table_rtx base.  We can't
4885	     just refuse it easily, since it gets matched by
4886	     "addsi3" pattern, that later gets split to lea in the
4887	     case output register differs from input.  While this
4888	     can be handled by separate addsi pattern for this case
4889	     that never results in lea, this seems to be easier and
4890	     correct fix for crash to disable this test.  */
4891	}
4892      else if (HALF_PIC_P ())
4893	{
4894	  if (! HALF_PIC_ADDRESS_P (disp)
4895	      || (base != NULL_RTX || index != NULL_RTX))
4896	    {
4897	      reason = "displacement is an invalid half-pic reference";
4898	      goto report_error;
4899	    }
4900	}
4901    }
4902
4903  /* Everything looks valid.  */
4904  if (TARGET_DEBUG_ADDR)
4905    fprintf (stderr, "Success.\n");
4906  return TRUE;
4907
4908report_error:
4909  if (TARGET_DEBUG_ADDR)
4910    {
4911      fprintf (stderr, "Error: %s\n", reason);
4912      debug_rtx (reason_rtx);
4913    }
4914  return FALSE;
4915}
4916
4917/* Return an unique alias set for the GOT.  */
4918
4919static HOST_WIDE_INT
4920ix86_GOT_alias_set ()
4921{
4922    static HOST_WIDE_INT set = -1;
4923    if (set == -1)
4924      set = new_alias_set ();
4925    return set;
4926}
4927
4928/* Return a legitimate reference for ORIG (an address) using the
4929   register REG.  If REG is 0, a new pseudo is generated.
4930
4931   There are two types of references that must be handled:
4932
4933   1. Global data references must load the address from the GOT, via
4934      the PIC reg.  An insn is emitted to do this load, and the reg is
4935      returned.
4936
4937   2. Static data references, constant pool addresses, and code labels
4938      compute the address as an offset from the GOT, whose base is in
4939      the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
4940      differentiate them from global data objects.  The returned
4941      address is the PIC reg + an unspec constant.
4942
4943   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4944   reg also appears in the address.  */
4945
4946rtx
4947legitimize_pic_address (orig, reg)
4948     rtx orig;
4949     rtx reg;
4950{
4951  rtx addr = orig;
4952  rtx new = orig;
4953  rtx base;
4954
4955  if (local_symbolic_operand (addr, Pmode))
4956    {
4957      /* In 64bit mode we can address such objects directly.  */
4958      if (TARGET_64BIT)
4959	new = addr;
4960      else
4961	{
4962	  /* This symbol may be referenced via a displacement from the PIC
4963	     base address (@GOTOFF).  */
4964
4965	  current_function_uses_pic_offset_table = 1;
4966	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4967	  new = gen_rtx_CONST (Pmode, new);
4968	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4969
4970	  if (reg != 0)
4971	    {
4972	      emit_move_insn (reg, new);
4973	      new = reg;
4974	    }
4975      	}
4976    }
4977  else if (GET_CODE (addr) == SYMBOL_REF)
4978    {
4979      if (TARGET_64BIT)
4980	{
4981	  current_function_uses_pic_offset_table = 1;
4982	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4983	  new = gen_rtx_CONST (Pmode, new);
4984	  new = gen_rtx_MEM (Pmode, new);
4985	  RTX_UNCHANGING_P (new) = 1;
4986	  set_mem_alias_set (new, ix86_GOT_alias_set ());
4987
4988	  if (reg == 0)
4989	    reg = gen_reg_rtx (Pmode);
4990	  /* Use directly gen_movsi, otherwise the address is loaded
4991	     into register for CSE.  We don't want to CSE this addresses,
4992	     instead we CSE addresses from the GOT table, so skip this.  */
4993	  emit_insn (gen_movsi (reg, new));
4994	  new = reg;
4995	}
4996      else
4997	{
4998	  /* This symbol must be referenced via a load from the
4999	     Global Offset Table (@GOT).  */
5000
5001	  current_function_uses_pic_offset_table = 1;
5002	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
5003	  new = gen_rtx_CONST (Pmode, new);
5004	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5005	  new = gen_rtx_MEM (Pmode, new);
5006	  RTX_UNCHANGING_P (new) = 1;
5007	  set_mem_alias_set (new, ix86_GOT_alias_set ());
5008
5009	  if (reg == 0)
5010	    reg = gen_reg_rtx (Pmode);
5011	  emit_move_insn (reg, new);
5012	  new = reg;
5013	}
5014    }
5015  else
5016    {
5017      if (GET_CODE (addr) == CONST)
5018	{
5019	  addr = XEXP (addr, 0);
5020
5021	  /* We must match stuff we generate before.  Assume the only
5022	     unspecs that can get here are ours.  Not that we could do
5023	     anything with them anyway...  */
5024	  if (GET_CODE (addr) == UNSPEC
5025	      || (GET_CODE (addr) == PLUS
5026		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5027	    return orig;
5028	  if (GET_CODE (addr) != PLUS)
5029	    abort ();
5030	}
5031      if (GET_CODE (addr) == PLUS)
5032	{
5033	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5034
5035	  /* Check first to see if this is a constant offset from a @GOTOFF
5036	     symbol reference.  */
5037	  if (local_symbolic_operand (op0, Pmode)
5038	      && GET_CODE (op1) == CONST_INT)
5039	    {
5040	      if (!TARGET_64BIT)
5041		{
5042		  current_function_uses_pic_offset_table = 1;
5043		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5044		  new = gen_rtx_PLUS (Pmode, new, op1);
5045		  new = gen_rtx_CONST (Pmode, new);
5046		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5047
5048		  if (reg != 0)
5049		    {
5050		      emit_move_insn (reg, new);
5051		      new = reg;
5052		    }
5053		}
5054	      else
5055		{
5056		  /* ??? We need to limit offsets here.  */
5057		}
5058	    }
5059	  else
5060	    {
5061	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5062	      new  = legitimize_pic_address (XEXP (addr, 1),
5063					     base == reg ? NULL_RTX : reg);
5064
5065	      if (GET_CODE (new) == CONST_INT)
5066		new = plus_constant (base, INTVAL (new));
5067	      else
5068		{
5069		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5070		    {
5071		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5072		      new = XEXP (new, 1);
5073		    }
5074		  new = gen_rtx_PLUS (Pmode, base, new);
5075		}
5076	    }
5077	}
5078    }
5079  return new;
5080}
5081
5082/* Try machine-dependent ways of modifying an illegitimate address
5083   to be legitimate.  If we find one, return the new, valid address.
5084   This macro is used in only one place: `memory_address' in explow.c.
5085
5086   OLDX is the address as it was before break_out_memory_refs was called.
5087   In some cases it is useful to look at this to decide what needs to be done.
5088
5089   MODE and WIN are passed so that this macro can use
5090   GO_IF_LEGITIMATE_ADDRESS.
5091
5092   It is always safe for this macro to do nothing.  It exists to recognize
5093   opportunities to optimize the output.
5094
5095   For the 80386, we handle X+REG by loading X into a register R and
5096   using R+REG.  R will go in a general reg and indexing will be used.
5097   However, if REG is a broken-out memory address or multiplication,
5098   nothing needs to be done because REG can certainly go in a general reg.
5099
5100   When -fpic is used, special handling is needed for symbolic references.
5101   See comments by legitimize_pic_address in i386.c for details.  */
5102
5103rtx
5104legitimize_address (x, oldx, mode)
5105     register rtx x;
5106     register rtx oldx ATTRIBUTE_UNUSED;
5107     enum machine_mode mode;
5108{
5109  int changed = 0;
5110  unsigned log;
5111
5112  if (TARGET_DEBUG_ADDR)
5113    {
5114      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5115	       GET_MODE_NAME (mode));
5116      debug_rtx (x);
5117    }
5118
5119  if (flag_pic && SYMBOLIC_CONST (x))
5120    return legitimize_pic_address (x, 0);
5121
5122  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5123  if (GET_CODE (x) == ASHIFT
5124      && GET_CODE (XEXP (x, 1)) == CONST_INT
5125      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5126    {
5127      changed = 1;
5128      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5129			GEN_INT (1 << log));
5130    }
5131
5132  if (GET_CODE (x) == PLUS)
5133    {
5134      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
5135
5136      if (GET_CODE (XEXP (x, 0)) == ASHIFT
5137	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5138	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5139	{
5140	  changed = 1;
5141	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
5142				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5143				      GEN_INT (1 << log));
5144	}
5145
5146      if (GET_CODE (XEXP (x, 1)) == ASHIFT
5147	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5148	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5149	{
5150	  changed = 1;
5151	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
5152				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5153				      GEN_INT (1 << log));
5154	}
5155
5156      /* Put multiply first if it isn't already.  */
5157      if (GET_CODE (XEXP (x, 1)) == MULT)
5158	{
5159	  rtx tmp = XEXP (x, 0);
5160	  XEXP (x, 0) = XEXP (x, 1);
5161	  XEXP (x, 1) = tmp;
5162	  changed = 1;
5163	}
5164
5165      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5166	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
5167	 created by virtual register instantiation, register elimination, and
5168	 similar optimizations.  */
5169      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5170	{
5171	  changed = 1;
5172	  x = gen_rtx_PLUS (Pmode,
5173			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
5174					  XEXP (XEXP (x, 1), 0)),
5175			    XEXP (XEXP (x, 1), 1));
5176	}
5177
5178      /* Canonicalize
5179	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5180	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
5181      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5182	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5183	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5184	       && CONSTANT_P (XEXP (x, 1)))
5185	{
5186	  rtx constant;
5187	  rtx other = NULL_RTX;
5188
5189	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5190	    {
5191	      constant = XEXP (x, 1);
5192	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5193	    }
5194	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5195	    {
5196	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5197	      other = XEXP (x, 1);
5198	    }
5199	  else
5200	    constant = 0;
5201
5202	  if (constant)
5203	    {
5204	      changed = 1;
5205	      x = gen_rtx_PLUS (Pmode,
5206				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5207					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
5208				plus_constant (other, INTVAL (constant)));
5209	    }
5210	}
5211
5212      if (changed && legitimate_address_p (mode, x, FALSE))
5213	return x;
5214
5215      if (GET_CODE (XEXP (x, 0)) == MULT)
5216	{
5217	  changed = 1;
5218	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5219	}
5220
5221      if (GET_CODE (XEXP (x, 1)) == MULT)
5222	{
5223	  changed = 1;
5224	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5225	}
5226
5227      if (changed
5228	  && GET_CODE (XEXP (x, 1)) == REG
5229	  && GET_CODE (XEXP (x, 0)) == REG)
5230	return x;
5231
5232      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5233	{
5234	  changed = 1;
5235	  x = legitimize_pic_address (x, 0);
5236	}
5237
5238      if (changed && legitimate_address_p (mode, x, FALSE))
5239	return x;
5240
5241      if (GET_CODE (XEXP (x, 0)) == REG)
5242	{
5243	  register rtx temp = gen_reg_rtx (Pmode);
5244	  register rtx val  = force_operand (XEXP (x, 1), temp);
5245	  if (val != temp)
5246	    emit_move_insn (temp, val);
5247
5248	  XEXP (x, 1) = temp;
5249	  return x;
5250	}
5251
5252      else if (GET_CODE (XEXP (x, 1)) == REG)
5253	{
5254	  register rtx temp = gen_reg_rtx (Pmode);
5255	  register rtx val  = force_operand (XEXP (x, 0), temp);
5256	  if (val != temp)
5257	    emit_move_insn (temp, val);
5258
5259	  XEXP (x, 0) = temp;
5260	  return x;
5261	}
5262    }
5263
5264  return x;
5265}
5266
5267/* Print an integer constant expression in assembler syntax.  Addition
5268   and subtraction are the only arithmetic that may appear in these
5269   expressions.  FILE is the stdio stream to write to, X is the rtx, and
5270   CODE is the operand print code from the output string.  */
5271
5272static void
5273output_pic_addr_const (file, x, code)
5274     FILE *file;
5275     rtx x;
5276     int code;
5277{
5278  char buf[256];
5279
5280  switch (GET_CODE (x))
5281    {
5282    case PC:
5283      if (flag_pic)
5284	putc ('.', file);
5285      else
5286	abort ();
5287      break;
5288
5289    case SYMBOL_REF:
5290      assemble_name (file, XSTR (x, 0));
5291      if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5292	fputs ("@PLT", file);
5293      break;
5294
5295    case LABEL_REF:
5296      x = XEXP (x, 0);
5297      /* FALLTHRU */
5298    case CODE_LABEL:
5299      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5300      assemble_name (asm_out_file, buf);
5301      break;
5302
5303    case CONST_INT:
5304      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5305      break;
5306
5307    case CONST:
5308      /* This used to output parentheses around the expression,
5309	 but that does not work on the 386 (either ATT or BSD assembler).  */
5310      output_pic_addr_const (file, XEXP (x, 0), code);
5311      break;
5312
5313    case CONST_DOUBLE:
5314      if (GET_MODE (x) == VOIDmode)
5315	{
5316	  /* We can use %d if the number is <32 bits and positive.  */
5317	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5318	    fprintf (file, "0x%lx%08lx",
5319		     (unsigned long) CONST_DOUBLE_HIGH (x),
5320		     (unsigned long) CONST_DOUBLE_LOW (x));
5321	  else
5322	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5323	}
5324      else
5325	/* We can't handle floating point constants;
5326	   PRINT_OPERAND must handle them.  */
5327	output_operand_lossage ("floating constant misused");
5328      break;
5329
5330    case PLUS:
5331      /* Some assemblers need integer constants to appear first.  */
5332      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5333	{
5334	  output_pic_addr_const (file, XEXP (x, 0), code);
5335	  putc ('+', file);
5336	  output_pic_addr_const (file, XEXP (x, 1), code);
5337	}
5338      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5339	{
5340	  output_pic_addr_const (file, XEXP (x, 1), code);
5341	  putc ('+', file);
5342	  output_pic_addr_const (file, XEXP (x, 0), code);
5343	}
5344      else
5345	abort ();
5346      break;
5347
5348    case MINUS:
5349      putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5350      output_pic_addr_const (file, XEXP (x, 0), code);
5351      putc ('-', file);
5352      output_pic_addr_const (file, XEXP (x, 1), code);
5353      putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5354      break;
5355
5356     case UNSPEC:
5357       if (XVECLEN (x, 0) != 1)
5358	abort ();
5359       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5360       switch (XINT (x, 1))
5361	{
5362	case 6:
5363	  fputs ("@GOT", file);
5364	  break;
5365	case 7:
5366	  fputs ("@GOTOFF", file);
5367	  break;
5368	case 8:
5369	  fputs ("@PLT", file);
5370	  break;
5371	case 15:
5372	  fputs ("@GOTPCREL(%RIP)", file);
5373	  break;
5374	default:
5375	  output_operand_lossage ("invalid UNSPEC as operand");
5376	  break;
5377	}
5378       break;
5379
5380    default:
5381      output_operand_lossage ("invalid expression as operand");
5382    }
5383}
5384
5385/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5386   We need to handle our special PIC relocations.  */
5387
5388void
5389i386_dwarf_output_addr_const (file, x)
5390     FILE *file;
5391     rtx x;
5392{
5393#ifdef ASM_QUAD
5394  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5395#else
5396  if (TARGET_64BIT)
5397    abort ();
5398  fprintf (file, "%s", ASM_LONG);
5399#endif
5400  if (flag_pic)
5401    output_pic_addr_const (file, x, '\0');
5402  else
5403    output_addr_const (file, x);
5404  fputc ('\n', file);
5405}
5406
5407/* In the name of slightly smaller debug output, and to cater to
5408   general assembler losage, recognize PIC+GOTOFF and turn it back
5409   into a direct symbol reference.  */
5410
5411rtx
5412i386_simplify_dwarf_addr (orig_x)
5413     rtx orig_x;
5414{
5415  rtx x = orig_x, y;
5416
5417  if (GET_CODE (x) == MEM)
5418    x = XEXP (x, 0);
5419
5420  if (TARGET_64BIT)
5421    {
5422      if (GET_CODE (x) != CONST
5423	  || GET_CODE (XEXP (x, 0)) != UNSPEC
5424	  || XINT (XEXP (x, 0), 1) != 15
5425	  || GET_CODE (orig_x) != MEM)
5426	return orig_x;
5427      return XVECEXP (XEXP (x, 0), 0, 0);
5428    }
5429
5430  if (GET_CODE (x) != PLUS
5431      || GET_CODE (XEXP (x, 1)) != CONST)
5432    return orig_x;
5433
5434  if (GET_CODE (XEXP (x, 0)) == REG
5435      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5436    /* %ebx + GOT/GOTOFF */
5437    y = NULL;
5438  else if (GET_CODE (XEXP (x, 0)) == PLUS)
5439    {
5440      /* %ebx + %reg * scale + GOT/GOTOFF */
5441      y = XEXP (x, 0);
5442      if (GET_CODE (XEXP (y, 0)) == REG
5443	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5444	y = XEXP (y, 1);
5445      else if (GET_CODE (XEXP (y, 1)) == REG
5446	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5447	y = XEXP (y, 0);
5448      else
5449	return orig_x;
5450      if (GET_CODE (y) != REG
5451	  && GET_CODE (y) != MULT
5452	  && GET_CODE (y) != ASHIFT)
5453	return orig_x;
5454    }
5455  else
5456    return orig_x;
5457
5458  x = XEXP (XEXP (x, 1), 0);
5459  if (GET_CODE (x) == UNSPEC
5460      && ((XINT (x, 1) == 6 && GET_CODE (orig_x) == MEM)
5461	  || (XINT (x, 1) == 7 && GET_CODE (orig_x) != MEM)))
5462    {
5463      if (y)
5464	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5465      return XVECEXP (x, 0, 0);
5466    }
5467
5468  if (GET_CODE (x) == PLUS
5469      && GET_CODE (XEXP (x, 0)) == UNSPEC
5470      && GET_CODE (XEXP (x, 1)) == CONST_INT
5471      && ((XINT (XEXP (x, 0), 1) == 6 && GET_CODE (orig_x) == MEM)
5472	  || (XINT (XEXP (x, 0), 1) == 7 && GET_CODE (orig_x) != MEM)))
5473    {
5474      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5475      if (y)
5476	return gen_rtx_PLUS (Pmode, y, x);
5477      return x;
5478    }
5479
5480  return orig_x;
5481}
5482
5483static void
5484put_condition_code (code, mode, reverse, fp, file)
5485     enum rtx_code code;
5486     enum machine_mode mode;
5487     int reverse, fp;
5488     FILE *file;
5489{
5490  const char *suffix;
5491
5492  if (mode == CCFPmode || mode == CCFPUmode)
5493    {
5494      enum rtx_code second_code, bypass_code;
5495      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5496      if (bypass_code != NIL || second_code != NIL)
5497	abort ();
5498      code = ix86_fp_compare_code_to_integer (code);
5499      mode = CCmode;
5500    }
5501  if (reverse)
5502    code = reverse_condition (code);
5503
5504  switch (code)
5505    {
5506    case EQ:
5507      suffix = "e";
5508      break;
5509    case NE:
5510      suffix = "ne";
5511      break;
5512    case GT:
5513      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5514	abort ();
5515      suffix = "g";
5516      break;
5517    case GTU:
5518      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5519	 Those same assemblers have the same but opposite losage on cmov.  */
5520      if (mode != CCmode)
5521	abort ();
5522      suffix = fp ? "nbe" : "a";
5523      break;
5524    case LT:
5525      if (mode == CCNOmode || mode == CCGOCmode)
5526	suffix = "s";
5527      else if (mode == CCmode || mode == CCGCmode)
5528	suffix = "l";
5529      else
5530	abort ();
5531      break;
5532    case LTU:
5533      if (mode != CCmode)
5534	abort ();
5535      suffix = "b";
5536      break;
5537    case GE:
5538      if (mode == CCNOmode || mode == CCGOCmode)
5539	suffix = "ns";
5540      else if (mode == CCmode || mode == CCGCmode)
5541	suffix = "ge";
5542      else
5543	abort ();
5544      break;
5545    case GEU:
5546      /* ??? As above.  */
5547      if (mode != CCmode)
5548	abort ();
5549      suffix = fp ? "nb" : "ae";
5550      break;
5551    case LE:
5552      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5553	abort ();
5554      suffix = "le";
5555      break;
5556    case LEU:
5557      if (mode != CCmode)
5558	abort ();
5559      suffix = "be";
5560      break;
5561    case UNORDERED:
5562      suffix = fp ? "u" : "p";
5563      break;
5564    case ORDERED:
5565      suffix = fp ? "nu" : "np";
5566      break;
5567    default:
5568      abort ();
5569    }
5570  fputs (suffix, file);
5571}
5572
5573void
5574print_reg (x, code, file)
5575     rtx x;
5576     int code;
5577     FILE *file;
5578{
5579  if (REGNO (x) == ARG_POINTER_REGNUM
5580      || REGNO (x) == FRAME_POINTER_REGNUM
5581      || REGNO (x) == FLAGS_REG
5582      || REGNO (x) == FPSR_REG)
5583    abort ();
5584
5585  if (ASSEMBLER_DIALECT == ASM_ATT  || USER_LABEL_PREFIX[0] == 0)
5586    putc ('%', file);
5587
5588  if (code == 'w' || MMX_REG_P (x))
5589    code = 2;
5590  else if (code == 'b')
5591    code = 1;
5592  else if (code == 'k')
5593    code = 4;
5594  else if (code == 'q')
5595    code = 8;
5596  else if (code == 'y')
5597    code = 3;
5598  else if (code == 'h')
5599    code = 0;
5600  else
5601    code = GET_MODE_SIZE (GET_MODE (x));
5602
5603  /* Irritatingly, AMD extended registers use different naming convention
5604     from the normal registers.  */
5605  if (REX_INT_REG_P (x))
5606    {
5607      if (!TARGET_64BIT)
5608	abort ();
5609      switch (code)
5610	{
5611	  case 0:
5612	    error ("extended registers have no high halves");
5613	    break;
5614	  case 1:
5615	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5616	    break;
5617	  case 2:
5618	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5619	    break;
5620	  case 4:
5621	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5622	    break;
5623	  case 8:
5624	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5625	    break;
5626	  default:
5627	    error ("unsupported operand size for extended register");
5628	    break;
5629	}
5630      return;
5631    }
5632  switch (code)
5633    {
5634    case 3:
5635      if (STACK_TOP_P (x))
5636	{
5637	  fputs ("st(0)", file);
5638	  break;
5639	}
5640      /* FALLTHRU */
5641    case 8:
5642    case 4:
5643    case 12:
5644      if (! ANY_FP_REG_P (x))
5645	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5646      /* FALLTHRU */
5647    case 16:
5648    case 2:
5649      fputs (hi_reg_name[REGNO (x)], file);
5650      break;
5651    case 1:
5652      fputs (qi_reg_name[REGNO (x)], file);
5653      break;
5654    case 0:
5655      fputs (qi_high_reg_name[REGNO (x)], file);
5656      break;
5657    default:
5658      abort ();
5659    }
5660}
5661
5662/* Meaning of CODE:
5663   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5664   C -- print opcode suffix for set/cmov insn.
5665   c -- like C, but print reversed condition
5666   F,f -- likewise, but for floating-point.
5667   O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5668        nothing
5669   R -- print the prefix for register names.
5670   z -- print the opcode suffix for the size of the current operand.
5671   * -- print a star (in certain assembler syntax)
5672   A -- print an absolute memory reference.
5673   w -- print the operand as if it's a "word" (HImode) even if it isn't.
5674   s -- print a shift double count, followed by the assemblers argument
5675	delimiter.
5676   b -- print the QImode name of the register for the indicated operand.
5677	%b0 would print %al if operands[0] is reg 0.
5678   w --  likewise, print the HImode name of the register.
5679   k --  likewise, print the SImode name of the register.
5680   q --  likewise, print the DImode name of the register.
5681   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5682   y -- print "st(0)" instead of "st" as a register.
5683   D -- print condition for SSE cmp instruction.
5684   P -- if PIC, print an @PLT suffix.
5685   X -- don't print any sort of PIC '@' suffix for a symbol.
5686 */
5687
5688void
5689print_operand (file, x, code)
5690     FILE *file;
5691     rtx x;
5692     int code;
5693{
5694  if (code)
5695    {
5696      switch (code)
5697	{
5698	case '*':
5699	  if (ASSEMBLER_DIALECT == ASM_ATT)
5700	    putc ('*', file);
5701	  return;
5702
5703	case 'A':
5704	  if (ASSEMBLER_DIALECT == ASM_ATT)
5705	    putc ('*', file);
5706	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
5707	    {
5708	      /* Intel syntax. For absolute addresses, registers should not
5709		 be surrounded by braces.  */
5710	      if (GET_CODE (x) != REG)
5711		{
5712		  putc ('[', file);
5713		  PRINT_OPERAND (file, x, 0);
5714		  putc (']', file);
5715		  return;
5716		}
5717	    }
5718	  else
5719	    abort ();
5720
5721	  PRINT_OPERAND (file, x, 0);
5722	  return;
5723
5724
5725	case 'L':
5726	  if (ASSEMBLER_DIALECT == ASM_ATT)
5727	    putc ('l', file);
5728	  return;
5729
5730	case 'W':
5731	  if (ASSEMBLER_DIALECT == ASM_ATT)
5732	    putc ('w', file);
5733	  return;
5734
5735	case 'B':
5736	  if (ASSEMBLER_DIALECT == ASM_ATT)
5737	    putc ('b', file);
5738	  return;
5739
5740	case 'Q':
5741	  if (ASSEMBLER_DIALECT == ASM_ATT)
5742	    putc ('l', file);
5743	  return;
5744
5745	case 'S':
5746	  if (ASSEMBLER_DIALECT == ASM_ATT)
5747	    putc ('s', file);
5748	  return;
5749
5750	case 'T':
5751	  if (ASSEMBLER_DIALECT == ASM_ATT)
5752	    putc ('t', file);
5753	  return;
5754
5755	case 'z':
5756	  /* 387 opcodes don't get size suffixes if the operands are
5757	     registers.  */
5758	  if (STACK_REG_P (x))
5759	    return;
5760
5761	  /* Likewise if using Intel opcodes.  */
5762	  if (ASSEMBLER_DIALECT == ASM_INTEL)
5763	    return;
5764
5765	  /* This is the size of op from size of operand.  */
5766	  switch (GET_MODE_SIZE (GET_MODE (x)))
5767	    {
5768	    case 2:
5769#ifdef HAVE_GAS_FILDS_FISTS
5770	      putc ('s', file);
5771#endif
5772	      return;
5773
5774	    case 4:
5775	      if (GET_MODE (x) == SFmode)
5776		{
5777		  putc ('s', file);
5778		  return;
5779		}
5780	      else
5781		putc ('l', file);
5782	      return;
5783
5784	    case 12:
5785	    case 16:
5786	      putc ('t', file);
5787	      return;
5788
5789	    case 8:
5790	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5791		{
5792#ifdef GAS_MNEMONICS
5793		  putc ('q', file);
5794#else
5795		  putc ('l', file);
5796		  putc ('l', file);
5797#endif
5798		}
5799	      else
5800	        putc ('l', file);
5801	      return;
5802
5803	    default:
5804	      abort ();
5805	    }
5806
5807	case 'b':
5808	case 'w':
5809	case 'k':
5810	case 'q':
5811	case 'h':
5812	case 'y':
5813	case 'X':
5814	case 'P':
5815	  break;
5816
5817	case 's':
5818	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5819	    {
5820	      PRINT_OPERAND (file, x, 0);
5821	      putc (',', file);
5822	    }
5823	  return;
5824
5825	case 'D':
5826	  /* Little bit of braindamage here.  The SSE compare instructions
5827	     does use completely different names for the comparisons that the
5828	     fp conditional moves.  */
5829	  switch (GET_CODE (x))
5830	    {
5831	    case EQ:
5832	    case UNEQ:
5833	      fputs ("eq", file);
5834	      break;
5835	    case LT:
5836	    case UNLT:
5837	      fputs ("lt", file);
5838	      break;
5839	    case LE:
5840	    case UNLE:
5841	      fputs ("le", file);
5842	      break;
5843	    case UNORDERED:
5844	      fputs ("unord", file);
5845	      break;
5846	    case NE:
5847	    case LTGT:
5848	      fputs ("neq", file);
5849	      break;
5850	    case UNGE:
5851	    case GE:
5852	      fputs ("nlt", file);
5853	      break;
5854	    case UNGT:
5855	    case GT:
5856	      fputs ("nle", file);
5857	      break;
5858	    case ORDERED:
5859	      fputs ("ord", file);
5860	      break;
5861	    default:
5862	      abort ();
5863	      break;
5864	    }
5865	  return;
5866	case 'O':
5867#ifdef CMOV_SUN_AS_SYNTAX
5868	  if (ASSEMBLER_DIALECT == ASM_ATT)
5869	    {
5870	      switch (GET_MODE (x))
5871		{
5872		case HImode: putc ('w', file); break;
5873		case SImode:
5874		case SFmode: putc ('l', file); break;
5875		case DImode:
5876		case DFmode: putc ('q', file); break;
5877		default: abort ();
5878		}
5879	      putc ('.', file);
5880	    }
5881#endif
5882	  return;
5883	case 'C':
5884	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5885	  return;
5886	case 'F':
5887#ifdef CMOV_SUN_AS_SYNTAX
5888	  if (ASSEMBLER_DIALECT == ASM_ATT)
5889	    putc ('.', file);
5890#endif
5891	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5892	  return;
5893
5894	  /* Like above, but reverse condition */
5895	case 'c':
5896	  /* Check to see if argument to %c is really a constant
5897	     and not a condition code which needs to be reversed.  */
5898	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5899	  {
5900	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5901	     return;
5902	  }
5903	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5904	  return;
5905	case 'f':
5906#ifdef CMOV_SUN_AS_SYNTAX
5907	  if (ASSEMBLER_DIALECT == ASM_ATT)
5908	    putc ('.', file);
5909#endif
5910	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5911	  return;
5912	case '+':
5913	  {
5914	    rtx x;
5915
5916	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5917	      return;
5918
5919	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5920	    if (x)
5921	      {
5922		int pred_val = INTVAL (XEXP (x, 0));
5923
5924		if (pred_val < REG_BR_PROB_BASE * 45 / 100
5925		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
5926		  {
5927		    int taken = pred_val > REG_BR_PROB_BASE / 2;
5928		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
5929
5930		    /* Emit hints only in the case default branch prediction
5931		       heruistics would fail.  */
5932		    if (taken != cputaken)
5933		      {
5934			/* We use 3e (DS) prefix for taken branches and
5935			   2e (CS) prefix for not taken branches.  */
5936			if (taken)
5937			  fputs ("ds ; ", file);
5938			else
5939			  fputs ("cs ; ", file);
5940		      }
5941		  }
5942	      }
5943	    return;
5944	  }
5945	default:
5946	    output_operand_lossage ("invalid operand code `%c'", code);
5947	}
5948    }
5949
5950  if (GET_CODE (x) == REG)
5951    {
5952      PRINT_REG (x, code, file);
5953    }
5954
5955  else if (GET_CODE (x) == MEM)
5956    {
5957      /* No `byte ptr' prefix for call instructions.  */
5958      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5959	{
5960	  const char * size;
5961	  switch (GET_MODE_SIZE (GET_MODE (x)))
5962	    {
5963	    case 1: size = "BYTE"; break;
5964	    case 2: size = "WORD"; break;
5965	    case 4: size = "DWORD"; break;
5966	    case 8: size = "QWORD"; break;
5967	    case 12: size = "XWORD"; break;
5968	    case 16: size = "XMMWORD"; break;
5969	    default:
5970	      abort ();
5971	    }
5972
5973	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
5974	  if (code == 'b')
5975	    size = "BYTE";
5976	  else if (code == 'w')
5977	    size = "WORD";
5978	  else if (code == 'k')
5979	    size = "DWORD";
5980
5981	  fputs (size, file);
5982	  fputs (" PTR ", file);
5983	}
5984
5985      x = XEXP (x, 0);
5986      if (flag_pic && CONSTANT_ADDRESS_P (x))
5987	output_pic_addr_const (file, x, code);
5988      /* Avoid (%rip) for call operands.  */
5989      else if (CONSTANT_ADDRESS_P (x) && code =='P'
5990	       && GET_CODE (x) != CONST_INT)
5991	output_addr_const (file, x);
5992      else
5993	output_address (x);
5994    }
5995
5996  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5997    {
5998      REAL_VALUE_TYPE r;
5999      long l;
6000
6001      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6002      REAL_VALUE_TO_TARGET_SINGLE (r, l);
6003
6004      if (ASSEMBLER_DIALECT == ASM_ATT)
6005	putc ('$', file);
6006      fprintf (file, "0x%lx", l);
6007    }
6008
6009 /* These float cases don't actually occur as immediate operands.  */
6010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6011    {
6012      REAL_VALUE_TYPE r;
6013      char dstr[30];
6014
6015      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6016      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6017      fprintf (file, "%s", dstr);
6018    }
6019
6020  else if (GET_CODE (x) == CONST_DOUBLE
6021	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6022    {
6023      REAL_VALUE_TYPE r;
6024      char dstr[30];
6025
6026      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6027      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6028      fprintf (file, "%s", dstr);
6029    }
6030  else
6031    {
6032      if (code != 'P')
6033	{
6034	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6035	    {
6036	      if (ASSEMBLER_DIALECT == ASM_ATT)
6037		putc ('$', file);
6038	    }
6039	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6040		   || GET_CODE (x) == LABEL_REF)
6041	    {
6042	      if (ASSEMBLER_DIALECT == ASM_ATT)
6043		putc ('$', file);
6044	      else
6045		fputs ("OFFSET FLAT:", file);
6046	    }
6047	}
6048      if (GET_CODE (x) == CONST_INT)
6049	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6050      else if (flag_pic)
6051	output_pic_addr_const (file, x, code);
6052      else
6053	output_addr_const (file, x);
6054    }
6055}
6056
6057/* Print a memory operand whose address is ADDR.  */
6058
6059void
6060print_operand_address (file, addr)
6061     FILE *file;
6062     register rtx addr;
6063{
6064  struct ix86_address parts;
6065  rtx base, index, disp;
6066  int scale;
6067
6068  if (! ix86_decompose_address (addr, &parts))
6069    abort ();
6070
6071  base = parts.base;
6072  index = parts.index;
6073  disp = parts.disp;
6074  scale = parts.scale;
6075
6076  if (!base && !index)
6077    {
6078      /* Displacement only requires special attention.  */
6079
6080      if (GET_CODE (disp) == CONST_INT)
6081	{
6082	  if (ASSEMBLER_DIALECT == ASM_INTEL)
6083	    {
6084	      if (USER_LABEL_PREFIX[0] == 0)
6085		putc ('%', file);
6086	      fputs ("ds:", file);
6087	    }
6088	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6089	}
6090      else if (flag_pic)
6091	output_pic_addr_const (file, addr, 0);
6092      else
6093	output_addr_const (file, addr);
6094
6095      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
6096      if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6097	fputs ("(%rip)", file);
6098    }
6099  else
6100    {
6101      if (ASSEMBLER_DIALECT == ASM_ATT)
6102	{
6103	  if (disp)
6104	    {
6105	      if (flag_pic)
6106		output_pic_addr_const (file, disp, 0);
6107	      else if (GET_CODE (disp) == LABEL_REF)
6108		output_asm_label (disp);
6109	      else
6110		output_addr_const (file, disp);
6111	    }
6112
6113	  putc ('(', file);
6114	  if (base)
6115	    PRINT_REG (base, 0, file);
6116	  if (index)
6117	    {
6118	      putc (',', file);
6119	      PRINT_REG (index, 0, file);
6120	      if (scale != 1)
6121		fprintf (file, ",%d", scale);
6122	    }
6123	  putc (')', file);
6124	}
6125      else
6126	{
6127	  rtx offset = NULL_RTX;
6128
6129	  if (disp)
6130	    {
6131	      /* Pull out the offset of a symbol; print any symbol itself.  */
6132	      if (GET_CODE (disp) == CONST
6133		  && GET_CODE (XEXP (disp, 0)) == PLUS
6134		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6135		{
6136		  offset = XEXP (XEXP (disp, 0), 1);
6137		  disp = gen_rtx_CONST (VOIDmode,
6138					XEXP (XEXP (disp, 0), 0));
6139		}
6140
6141	      if (flag_pic)
6142		output_pic_addr_const (file, disp, 0);
6143	      else if (GET_CODE (disp) == LABEL_REF)
6144		output_asm_label (disp);
6145	      else if (GET_CODE (disp) == CONST_INT)
6146		offset = disp;
6147	      else
6148		output_addr_const (file, disp);
6149	    }
6150
6151	  putc ('[', file);
6152	  if (base)
6153	    {
6154	      PRINT_REG (base, 0, file);
6155	      if (offset)
6156		{
6157		  if (INTVAL (offset) >= 0)
6158		    putc ('+', file);
6159		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6160		}
6161	    }
6162	  else if (offset)
6163	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6164	  else
6165	    putc ('0', file);
6166
6167	  if (index)
6168	    {
6169	      putc ('+', file);
6170	      PRINT_REG (index, 0, file);
6171	      if (scale != 1)
6172		fprintf (file, "*%d", scale);
6173	    }
6174	  putc (']', file);
6175	}
6176    }
6177}
6178
6179/* Split one or more DImode RTL references into pairs of SImode
6180   references.  The RTL can be REG, offsettable MEM, integer constant, or
6181   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6182   split and "num" is its length.  lo_half and hi_half are output arrays
6183   that parallel "operands".  */
6184
6185void
6186split_di (operands, num, lo_half, hi_half)
6187     rtx operands[];
6188     int num;
6189     rtx lo_half[], hi_half[];
6190{
6191  while (num--)
6192    {
6193      rtx op = operands[num];
6194
6195      /* simplify_subreg refuse to split volatile memory addresses,
6196         but we still have to handle it.  */
6197      if (GET_CODE (op) == MEM)
6198	{
6199	  lo_half[num] = adjust_address (op, SImode, 0);
6200	  hi_half[num] = adjust_address (op, SImode, 4);
6201	}
6202      else
6203	{
6204	  lo_half[num] = simplify_gen_subreg (SImode, op,
6205					      GET_MODE (op) == VOIDmode
6206					      ? DImode : GET_MODE (op), 0);
6207	  hi_half[num] = simplify_gen_subreg (SImode, op,
6208					      GET_MODE (op) == VOIDmode
6209					      ? DImode : GET_MODE (op), 4);
6210	}
6211    }
6212}
6213/* Split one or more TImode RTL references into pairs of SImode
6214   references.  The RTL can be REG, offsettable MEM, integer constant, or
6215   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6216   split and "num" is its length.  lo_half and hi_half are output arrays
6217   that parallel "operands".  */
6218
6219void
6220split_ti (operands, num, lo_half, hi_half)
6221     rtx operands[];
6222     int num;
6223     rtx lo_half[], hi_half[];
6224{
6225  while (num--)
6226    {
6227      rtx op = operands[num];
6228
6229      /* simplify_subreg refuse to split volatile memory addresses, but we
6230         still have to handle it.  */
6231      if (GET_CODE (op) == MEM)
6232	{
6233	  lo_half[num] = adjust_address (op, DImode, 0);
6234	  hi_half[num] = adjust_address (op, DImode, 8);
6235	}
6236      else
6237	{
6238	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6239	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6240	}
6241    }
6242}
6243
6244/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6245   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
6246   is the expression of the binary operation.  The output may either be
6247   emitted here, or returned to the caller, like all output_* functions.
6248
6249   There is no guarantee that the operands are the same mode, as they
6250   might be within FLOAT or FLOAT_EXTEND expressions.  */
6251
6252#ifndef SYSV386_COMPAT
6253/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
6254   wants to fix the assemblers because that causes incompatibility
6255   with gcc.  No-one wants to fix gcc because that causes
6256   incompatibility with assemblers...  You can use the option of
6257   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
6258#define SYSV386_COMPAT 1
6259#endif
6260
6261const char *
6262output_387_binary_op (insn, operands)
6263     rtx insn;
6264     rtx *operands;
6265{
6266  static char buf[30];
6267  const char *p;
6268  const char *ssep;
6269  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6270
6271#ifdef ENABLE_CHECKING
6272  /* Even if we do not want to check the inputs, this documents input
6273     constraints.  Which helps in understanding the following code.  */
6274  if (STACK_REG_P (operands[0])
6275      && ((REG_P (operands[1])
6276	   && REGNO (operands[0]) == REGNO (operands[1])
6277	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6278	  || (REG_P (operands[2])
6279	      && REGNO (operands[0]) == REGNO (operands[2])
6280	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6281      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6282    ; /* ok */
6283  else if (!is_sse)
6284    abort ();
6285#endif
6286
6287  switch (GET_CODE (operands[3]))
6288    {
6289    case PLUS:
6290      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6291	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6292	p = "fiadd";
6293      else
6294	p = "fadd";
6295      ssep = "add";
6296      break;
6297
6298    case MINUS:
6299      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6300	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6301	p = "fisub";
6302      else
6303	p = "fsub";
6304      ssep = "sub";
6305      break;
6306
6307    case MULT:
6308      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6309	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6310	p = "fimul";
6311      else
6312	p = "fmul";
6313      ssep = "mul";
6314      break;
6315
6316    case DIV:
6317      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6318	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6319	p = "fidiv";
6320      else
6321	p = "fdiv";
6322      ssep = "div";
6323      break;
6324
6325    default:
6326      abort ();
6327    }
6328
6329  if (is_sse)
6330   {
6331      strcpy (buf, ssep);
6332      if (GET_MODE (operands[0]) == SFmode)
6333	strcat (buf, "ss\t{%2, %0|%0, %2}");
6334      else
6335	strcat (buf, "sd\t{%2, %0|%0, %2}");
6336      return buf;
6337   }
6338  strcpy (buf, p);
6339
6340  switch (GET_CODE (operands[3]))
6341    {
6342    case MULT:
6343    case PLUS:
6344      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6345	{
6346	  rtx temp = operands[2];
6347	  operands[2] = operands[1];
6348	  operands[1] = temp;
6349	}
6350
6351      /* know operands[0] == operands[1].  */
6352
6353      if (GET_CODE (operands[2]) == MEM)
6354	{
6355	  p = "%z2\t%2";
6356	  break;
6357	}
6358
6359      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6360	{
6361	  if (STACK_TOP_P (operands[0]))
6362	    /* How is it that we are storing to a dead operand[2]?
6363	       Well, presumably operands[1] is dead too.  We can't
6364	       store the result to st(0) as st(0) gets popped on this
6365	       instruction.  Instead store to operands[2] (which I
6366	       think has to be st(1)).  st(1) will be popped later.
6367	       gcc <= 2.8.1 didn't have this check and generated
6368	       assembly code that the Unixware assembler rejected.  */
6369	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6370	  else
6371	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6372	  break;
6373	}
6374
6375      if (STACK_TOP_P (operands[0]))
6376	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6377      else
6378	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6379      break;
6380
6381    case MINUS:
6382    case DIV:
6383      if (GET_CODE (operands[1]) == MEM)
6384	{
6385	  p = "r%z1\t%1";
6386	  break;
6387	}
6388
6389      if (GET_CODE (operands[2]) == MEM)
6390	{
6391	  p = "%z2\t%2";
6392	  break;
6393	}
6394
6395      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6396	{
6397#if SYSV386_COMPAT
6398	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6399	     derived assemblers, confusingly reverse the direction of
6400	     the operation for fsub{r} and fdiv{r} when the
6401	     destination register is not st(0).  The Intel assembler
6402	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
6403	     figure out what the hardware really does.  */
6404	  if (STACK_TOP_P (operands[0]))
6405	    p = "{p\t%0, %2|rp\t%2, %0}";
6406	  else
6407	    p = "{rp\t%2, %0|p\t%0, %2}";
6408#else
6409	  if (STACK_TOP_P (operands[0]))
6410	    /* As above for fmul/fadd, we can't store to st(0).  */
6411	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6412	  else
6413	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6414#endif
6415	  break;
6416	}
6417
6418      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6419	{
6420#if SYSV386_COMPAT
6421	  if (STACK_TOP_P (operands[0]))
6422	    p = "{rp\t%0, %1|p\t%1, %0}";
6423	  else
6424	    p = "{p\t%1, %0|rp\t%0, %1}";
6425#else
6426	  if (STACK_TOP_P (operands[0]))
6427	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
6428	  else
6429	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
6430#endif
6431	  break;
6432	}
6433
6434      if (STACK_TOP_P (operands[0]))
6435	{
6436	  if (STACK_TOP_P (operands[1]))
6437	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6438	  else
6439	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
6440	  break;
6441	}
6442      else if (STACK_TOP_P (operands[1]))
6443	{
6444#if SYSV386_COMPAT
6445	  p = "{\t%1, %0|r\t%0, %1}";
6446#else
6447	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
6448#endif
6449	}
6450      else
6451	{
6452#if SYSV386_COMPAT
6453	  p = "{r\t%2, %0|\t%0, %2}";
6454#else
6455	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6456#endif
6457	}
6458      break;
6459
6460    default:
6461      abort ();
6462    }
6463
6464  strcat (buf, p);
6465  return buf;
6466}
6467
6468/* Output code to initialize control word copies used by
6469   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
6470   is set to control word rounding downwards.  */
6471void
6472emit_i387_cw_initialization (normal, round_down)
6473     rtx normal, round_down;
6474{
6475  rtx reg = gen_reg_rtx (HImode);
6476
6477  emit_insn (gen_x86_fnstcw_1 (normal));
6478  emit_move_insn (reg, normal);
6479  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6480      && !TARGET_64BIT)
6481    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6482  else
6483    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6484  emit_move_insn (round_down, reg);
6485}
6486
6487/* Output code for INSN to convert a float to a signed int.  OPERANDS
6488   are the insn operands.  The output may be [HSD]Imode and the input
6489   operand may be [SDX]Fmode.  */
6490
6491const char *
6492output_fix_trunc (insn, operands)
6493     rtx insn;
6494     rtx *operands;
6495{
6496  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6497  int dimode_p = GET_MODE (operands[0]) == DImode;
6498
6499  /* Jump through a hoop or two for DImode, since the hardware has no
6500     non-popping instruction.  We used to do this a different way, but
6501     that was somewhat fragile and broke with post-reload splitters.  */
6502  if (dimode_p && !stack_top_dies)
6503    output_asm_insn ("fld\t%y1", operands);
6504
6505  if (!STACK_TOP_P (operands[1]))
6506    abort ();
6507
6508  if (GET_CODE (operands[0]) != MEM)
6509    abort ();
6510
6511  output_asm_insn ("fldcw\t%3", operands);
6512  if (stack_top_dies || dimode_p)
6513    output_asm_insn ("fistp%z0\t%0", operands);
6514  else
6515    output_asm_insn ("fist%z0\t%0", operands);
6516  output_asm_insn ("fldcw\t%2", operands);
6517
6518  return "";
6519}
6520
6521/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
6522   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
6523   when fucom should be used.  */
6524
6525const char *
6526output_fp_compare (insn, operands, eflags_p, unordered_p)
6527     rtx insn;
6528     rtx *operands;
6529     int eflags_p, unordered_p;
6530{
6531  int stack_top_dies;
6532  rtx cmp_op0 = operands[0];
6533  rtx cmp_op1 = operands[1];
6534  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6535
6536  if (eflags_p == 2)
6537    {
6538      cmp_op0 = cmp_op1;
6539      cmp_op1 = operands[2];
6540    }
6541  if (is_sse)
6542    {
6543      if (GET_MODE (operands[0]) == SFmode)
6544	if (unordered_p)
6545	  return "ucomiss\t{%1, %0|%0, %1}";
6546	else
6547	  return "comiss\t{%1, %0|%0, %y}";
6548      else
6549	if (unordered_p)
6550	  return "ucomisd\t{%1, %0|%0, %1}";
6551	else
6552	  return "comisd\t{%1, %0|%0, %y}";
6553    }
6554
6555  if (! STACK_TOP_P (cmp_op0))
6556    abort ();
6557
6558  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6559
6560  if (STACK_REG_P (cmp_op1)
6561      && stack_top_dies
6562      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6563      && REGNO (cmp_op1) != FIRST_STACK_REG)
6564    {
6565      /* If both the top of the 387 stack dies, and the other operand
6566	 is also a stack register that dies, then this must be a
6567	 `fcompp' float compare */
6568
6569      if (eflags_p == 1)
6570	{
6571	  /* There is no double popping fcomi variant.  Fortunately,
6572	     eflags is immune from the fstp's cc clobbering.  */
6573	  if (unordered_p)
6574	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6575	  else
6576	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6577	  return "fstp\t%y0";
6578	}
6579      else
6580	{
6581	  if (eflags_p == 2)
6582	    {
6583	      if (unordered_p)
6584		return "fucompp\n\tfnstsw\t%0";
6585	      else
6586		return "fcompp\n\tfnstsw\t%0";
6587	    }
6588	  else
6589	    {
6590	      if (unordered_p)
6591		return "fucompp";
6592	      else
6593		return "fcompp";
6594	    }
6595	}
6596    }
6597  else
6598    {
6599      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
6600
6601      static const char * const alt[24] =
6602      {
6603	"fcom%z1\t%y1",
6604	"fcomp%z1\t%y1",
6605	"fucom%z1\t%y1",
6606	"fucomp%z1\t%y1",
6607
6608	"ficom%z1\t%y1",
6609	"ficomp%z1\t%y1",
6610	NULL,
6611	NULL,
6612
6613	"fcomi\t{%y1, %0|%0, %y1}",
6614	"fcomip\t{%y1, %0|%0, %y1}",
6615	"fucomi\t{%y1, %0|%0, %y1}",
6616	"fucomip\t{%y1, %0|%0, %y1}",
6617
6618	NULL,
6619	NULL,
6620	NULL,
6621	NULL,
6622
6623	"fcom%z2\t%y2\n\tfnstsw\t%0",
6624	"fcomp%z2\t%y2\n\tfnstsw\t%0",
6625	"fucom%z2\t%y2\n\tfnstsw\t%0",
6626	"fucomp%z2\t%y2\n\tfnstsw\t%0",
6627
6628	"ficom%z2\t%y2\n\tfnstsw\t%0",
6629	"ficomp%z2\t%y2\n\tfnstsw\t%0",
6630	NULL,
6631	NULL
6632      };
6633
6634      int mask;
6635      const char *ret;
6636
6637      mask  = eflags_p << 3;
6638      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6639      mask |= unordered_p << 1;
6640      mask |= stack_top_dies;
6641
6642      if (mask >= 24)
6643	abort ();
6644      ret = alt[mask];
6645      if (ret == NULL)
6646	abort ();
6647
6648      return ret;
6649    }
6650}
6651
6652void
6653ix86_output_addr_vec_elt (file, value)
6654     FILE *file;
6655     int value;
6656{
6657  const char *directive = ASM_LONG;
6658
6659  if (TARGET_64BIT)
6660    {
6661#ifdef ASM_QUAD
6662      directive = ASM_QUAD;
6663#else
6664      abort ();
6665#endif
6666    }
6667
6668  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6669}
6670
6671void
6672ix86_output_addr_diff_elt (file, value, rel)
6673     FILE *file;
6674     int value, rel;
6675{
6676  if (TARGET_64BIT)
6677    fprintf (file, "%s%s%d-.+(.-%s%d)\n",
6678	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
6679  else if (HAVE_AS_GOTOFF_IN_DATA)
6680    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6681  else
6682    asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6683		 ASM_LONG, LPREFIX, value);
6684}
6685
6686/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6687   for the target.  */
6688
6689void
6690ix86_expand_clear (dest)
6691     rtx dest;
6692{
6693  rtx tmp;
6694
6695  /* We play register width games, which are only valid after reload.  */
6696  if (!reload_completed)
6697    abort ();
6698
6699  /* Avoid HImode and its attendant prefix byte.  */
6700  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6701    dest = gen_rtx_REG (SImode, REGNO (dest));
6702
6703  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6704
6705  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
6706  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6707    {
6708      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6709      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6710    }
6711
6712  emit_insn (tmp);
6713}
6714
6715void
6716ix86_expand_move (mode, operands)
6717     enum machine_mode mode;
6718     rtx operands[];
6719{
6720  int strict = (reload_in_progress || reload_completed);
6721  rtx insn;
6722
6723  if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6724    {
6725      /* Emit insns to move operands[1] into operands[0].  */
6726
6727      if (GET_CODE (operands[0]) == MEM)
6728	operands[1] = force_reg (Pmode, operands[1]);
6729      else
6730	{
6731	  rtx temp = operands[0];
6732	  if (GET_CODE (temp) != REG)
6733	    temp = gen_reg_rtx (Pmode);
6734	  temp = legitimize_pic_address (operands[1], temp);
6735	  if (temp == operands[0])
6736	    return;
6737	  operands[1] = temp;
6738	}
6739    }
6740  else
6741    {
6742      if (GET_CODE (operands[0]) == MEM
6743	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6744	      || !push_operand (operands[0], mode))
6745	  && GET_CODE (operands[1]) == MEM)
6746	operands[1] = force_reg (mode, operands[1]);
6747
6748      if (push_operand (operands[0], mode)
6749	  && ! general_no_elim_operand (operands[1], mode))
6750	operands[1] = copy_to_mode_reg (mode, operands[1]);
6751
6752      /* Force large constants in 64bit compilation into register
6753	 to get them CSEed.  */
6754      if (TARGET_64BIT && mode == DImode
6755	  && immediate_operand (operands[1], mode)
6756	  && !x86_64_zero_extended_value (operands[1])
6757	  && !register_operand (operands[0], mode)
6758	  && optimize && !reload_completed && !reload_in_progress)
6759	operands[1] = copy_to_mode_reg (mode, operands[1]);
6760
6761      if (FLOAT_MODE_P (mode))
6762	{
6763	  /* If we are loading a floating point constant to a register,
6764	     force the value to memory now, since we'll get better code
6765	     out the back end.  */
6766
6767	  if (strict)
6768	    ;
6769	  else if (GET_CODE (operands[1]) == CONST_DOUBLE
6770		   && register_operand (operands[0], mode))
6771	    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6772	}
6773    }
6774
6775  insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6776
6777  emit_insn (insn);
6778}
6779
6780void
6781ix86_expand_vector_move (mode, operands)
6782     enum machine_mode mode;
6783     rtx operands[];
6784{
6785  /* Force constants other than zero into memory.  We do not know how
6786     the instructions used to build constants modify the upper 64 bits
6787     of the register, once we have that information we may be able
6788     to handle some of them more efficiently.  */
6789  if ((reload_in_progress | reload_completed) == 0
6790      && register_operand (operands[0], mode)
6791      && CONSTANT_P (operands[1]))
6792    {
6793      rtx addr = gen_reg_rtx (Pmode);
6794      emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6795      operands[1] = gen_rtx_MEM (mode, addr);
6796    }
6797
6798  /* Make operand1 a register if it isn't already.  */
6799  if ((reload_in_progress | reload_completed) == 0
6800      && !register_operand (operands[0], mode)
6801      && !register_operand (operands[1], mode)
6802      && operands[1] != CONST0_RTX (mode))
6803    {
6804      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
6805      emit_move_insn (operands[0], temp);
6806      return;
6807    }
6808
6809  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6810}
6811
6812/* Attempt to expand a binary operator.  Make the expansion closer to the
6813   actual machine, then just general_operand, which will allow 3 separate
6814   memory references (one output, two input) in a single insn.  */
6815
6816void
6817ix86_expand_binary_operator (code, mode, operands)
6818     enum rtx_code code;
6819     enum machine_mode mode;
6820     rtx operands[];
6821{
6822  int matching_memory;
6823  rtx src1, src2, dst, op, clob;
6824
6825  dst = operands[0];
6826  src1 = operands[1];
6827  src2 = operands[2];
6828
6829  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6830  if (GET_RTX_CLASS (code) == 'c'
6831      && (rtx_equal_p (dst, src2)
6832	  || immediate_operand (src1, mode)))
6833    {
6834      rtx temp = src1;
6835      src1 = src2;
6836      src2 = temp;
6837    }
6838
6839  /* If the destination is memory, and we do not have matching source
6840     operands, do things in registers.  */
6841  matching_memory = 0;
6842  if (GET_CODE (dst) == MEM)
6843    {
6844      if (rtx_equal_p (dst, src1))
6845	matching_memory = 1;
6846      else if (GET_RTX_CLASS (code) == 'c'
6847	       && rtx_equal_p (dst, src2))
6848	matching_memory = 2;
6849      else
6850	dst = gen_reg_rtx (mode);
6851    }
6852
6853  /* Both source operands cannot be in memory.  */
6854  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6855    {
6856      if (matching_memory != 2)
6857	src2 = force_reg (mode, src2);
6858      else
6859	src1 = force_reg (mode, src1);
6860    }
6861
6862  /* If the operation is not commutable, source 1 cannot be a constant
6863     or non-matching memory.  */
6864  if ((CONSTANT_P (src1)
6865       || (!matching_memory && GET_CODE (src1) == MEM))
6866      && GET_RTX_CLASS (code) != 'c')
6867    src1 = force_reg (mode, src1);
6868
6869  /* If optimizing, copy to regs to improve CSE */
6870  if (optimize && ! no_new_pseudos)
6871    {
6872      if (GET_CODE (dst) == MEM)
6873	dst = gen_reg_rtx (mode);
6874      if (GET_CODE (src1) == MEM)
6875	src1 = force_reg (mode, src1);
6876      if (GET_CODE (src2) == MEM)
6877	src2 = force_reg (mode, src2);
6878    }
6879
6880  /* Emit the instruction.  */
6881
6882  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6883  if (reload_in_progress)
6884    {
6885      /* Reload doesn't know about the flags register, and doesn't know that
6886         it doesn't want to clobber it.  We can only do this with PLUS.  */
6887      if (code != PLUS)
6888	abort ();
6889      emit_insn (op);
6890    }
6891  else
6892    {
6893      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6894      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6895    }
6896
6897  /* Fix up the destination if needed.  */
6898  if (dst != operands[0])
6899    emit_move_insn (operands[0], dst);
6900}
6901
6902/* Return TRUE or FALSE depending on whether the binary operator meets the
6903   appropriate constraints.  */
6904
6905int
6906ix86_binary_operator_ok (code, mode, operands)
6907     enum rtx_code code;
6908     enum machine_mode mode ATTRIBUTE_UNUSED;
6909     rtx operands[3];
6910{
6911  /* Both source operands cannot be in memory.  */
6912  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6913    return 0;
6914  /* If the operation is not commutable, source 1 cannot be a constant.  */
6915  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6916    return 0;
6917  /* If the destination is memory, we must have a matching source operand.  */
6918  if (GET_CODE (operands[0]) == MEM
6919      && ! (rtx_equal_p (operands[0], operands[1])
6920	    || (GET_RTX_CLASS (code) == 'c'
6921		&& rtx_equal_p (operands[0], operands[2]))))
6922    return 0;
6923  /* If the operation is not commutable and the source 1 is memory, we must
6924     have a matching destination.  */
6925  if (GET_CODE (operands[1]) == MEM
6926      && GET_RTX_CLASS (code) != 'c'
6927      && ! rtx_equal_p (operands[0], operands[1]))
6928    return 0;
6929  return 1;
6930}
6931
6932/* Attempt to expand a unary operator.  Make the expansion closer to the
6933   actual machine, then just general_operand, which will allow 2 separate
6934   memory references (one output, one input) in a single insn.  */
6935
6936void
6937ix86_expand_unary_operator (code, mode, operands)
6938     enum rtx_code code;
6939     enum machine_mode mode;
6940     rtx operands[];
6941{
6942  int matching_memory;
6943  rtx src, dst, op, clob;
6944
6945  dst = operands[0];
6946  src = operands[1];
6947
6948  /* If the destination is memory, and we do not have matching source
6949     operands, do things in registers.  */
6950  matching_memory = 0;
6951  if (GET_CODE (dst) == MEM)
6952    {
6953      if (rtx_equal_p (dst, src))
6954	matching_memory = 1;
6955      else
6956	dst = gen_reg_rtx (mode);
6957    }
6958
6959  /* When source operand is memory, destination must match.  */
6960  if (!matching_memory && GET_CODE (src) == MEM)
6961    src = force_reg (mode, src);
6962
6963  /* If optimizing, copy to regs to improve CSE */
6964  if (optimize && ! no_new_pseudos)
6965    {
6966      if (GET_CODE (dst) == MEM)
6967	dst = gen_reg_rtx (mode);
6968      if (GET_CODE (src) == MEM)
6969	src = force_reg (mode, src);
6970    }
6971
6972  /* Emit the instruction.  */
6973
6974  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6975  if (reload_in_progress || code == NOT)
6976    {
6977      /* Reload doesn't know about the flags register, and doesn't know that
6978         it doesn't want to clobber it.  */
6979      if (code != NOT)
6980        abort ();
6981      emit_insn (op);
6982    }
6983  else
6984    {
6985      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6986      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6987    }
6988
6989  /* Fix up the destination if needed.  */
6990  if (dst != operands[0])
6991    emit_move_insn (operands[0], dst);
6992}
6993
6994/* Return TRUE or FALSE depending on whether the unary operator meets the
6995   appropriate constraints.  */
6996
6997int
6998ix86_unary_operator_ok (code, mode, operands)
6999     enum rtx_code code ATTRIBUTE_UNUSED;
7000     enum machine_mode mode ATTRIBUTE_UNUSED;
7001     rtx operands[2] ATTRIBUTE_UNUSED;
7002{
7003  /* If one of operands is memory, source and destination must match.  */
7004  if ((GET_CODE (operands[0]) == MEM
7005       || GET_CODE (operands[1]) == MEM)
7006      && ! rtx_equal_p (operands[0], operands[1]))
7007    return FALSE;
7008  return TRUE;
7009}
7010
7011/* Return TRUE or FALSE depending on whether the first SET in INSN
7012   has source and destination with matching CC modes, and that the
7013   CC mode is at least as constrained as REQ_MODE.  */
7014
7015int
7016ix86_match_ccmode (insn, req_mode)
7017     rtx insn;
7018     enum machine_mode req_mode;
7019{
7020  rtx set;
7021  enum machine_mode set_mode;
7022
7023  set = PATTERN (insn);
7024  if (GET_CODE (set) == PARALLEL)
7025    set = XVECEXP (set, 0, 0);
7026  if (GET_CODE (set) != SET)
7027    abort ();
7028  if (GET_CODE (SET_SRC (set)) != COMPARE)
7029    abort ();
7030
7031  set_mode = GET_MODE (SET_DEST (set));
7032  switch (set_mode)
7033    {
7034    case CCNOmode:
7035      if (req_mode != CCNOmode
7036	  && (req_mode != CCmode
7037	      || XEXP (SET_SRC (set), 1) != const0_rtx))
7038	return 0;
7039      break;
7040    case CCmode:
7041      if (req_mode == CCGCmode)
7042	return 0;
7043      /* FALLTHRU */
7044    case CCGCmode:
7045      if (req_mode == CCGOCmode || req_mode == CCNOmode)
7046	return 0;
7047      /* FALLTHRU */
7048    case CCGOCmode:
7049      if (req_mode == CCZmode)
7050	return 0;
7051      /* FALLTHRU */
7052    case CCZmode:
7053      break;
7054
7055    default:
7056      abort ();
7057    }
7058
7059  return (GET_MODE (SET_SRC (set)) == set_mode);
7060}
7061
7062/* Generate insn patterns to do an integer compare of OPERANDS.  */
7063
7064static rtx
7065ix86_expand_int_compare (code, op0, op1)
7066     enum rtx_code code;
7067     rtx op0, op1;
7068{
7069  enum machine_mode cmpmode;
7070  rtx tmp, flags;
7071
7072  cmpmode = SELECT_CC_MODE (code, op0, op1);
7073  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7074
7075  /* This is very simple, but making the interface the same as in the
7076     FP case makes the rest of the code easier.  */
7077  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7078  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7079
7080  /* Return the test that should be put into the flags user, i.e.
7081     the bcc, scc, or cmov instruction.  */
7082  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7083}
7084
7085/* Figure out whether to use ordered or unordered fp comparisons.
7086   Return the appropriate mode to use.  */
7087
7088enum machine_mode
7089ix86_fp_compare_mode (code)
7090     enum rtx_code code ATTRIBUTE_UNUSED;
7091{
7092  /* ??? In order to make all comparisons reversible, we do all comparisons
7093     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
7094     all forms trapping and nontrapping comparisons, we can make inequality
7095     comparisons trapping again, since it results in better code when using
7096     FCOM based compares.  */
7097  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7098}
7099
7100enum machine_mode
7101ix86_cc_mode (code, op0, op1)
7102     enum rtx_code code;
7103     rtx op0, op1;
7104{
7105  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7106    return ix86_fp_compare_mode (code);
7107  switch (code)
7108    {
7109      /* Only zero flag is needed.  */
7110    case EQ:			/* ZF=0 */
7111    case NE:			/* ZF!=0 */
7112      return CCZmode;
7113      /* Codes needing carry flag.  */
7114    case GEU:			/* CF=0 */
7115    case GTU:			/* CF=0 & ZF=0 */
7116    case LTU:			/* CF=1 */
7117    case LEU:			/* CF=1 | ZF=1 */
7118      return CCmode;
7119      /* Codes possibly doable only with sign flag when
7120         comparing against zero.  */
7121    case GE:			/* SF=OF   or   SF=0 */
7122    case LT:			/* SF<>OF  or   SF=1 */
7123      if (op1 == const0_rtx)
7124	return CCGOCmode;
7125      else
7126	/* For other cases Carry flag is not required.  */
7127	return CCGCmode;
7128      /* Codes doable only with sign flag when comparing
7129         against zero, but we miss jump instruction for it
7130         so we need to use relational tests agains overflow
7131         that thus needs to be zero.  */
7132    case GT:			/* ZF=0 & SF=OF */
7133    case LE:			/* ZF=1 | SF<>OF */
7134      if (op1 == const0_rtx)
7135	return CCNOmode;
7136      else
7137	return CCGCmode;
7138      /* strcmp pattern do (use flags) and combine may ask us for proper
7139	 mode.  */
7140    case USE:
7141      return CCmode;
7142    default:
7143      abort ();
7144    }
7145}
7146
7147/* Return true if we should use an FCOMI instruction for this fp comparison.  */
7148
7149int
7150ix86_use_fcomi_compare (code)
7151     enum rtx_code code ATTRIBUTE_UNUSED;
7152{
7153  enum rtx_code swapped_code = swap_condition (code);
7154  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7155	  || (ix86_fp_comparison_cost (swapped_code)
7156	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
7157}
7158
7159/* Swap, force into registers, or otherwise massage the two operands
7160   to a fp comparison.  The operands are updated in place; the new
7161   comparsion code is returned.  */
7162
7163static enum rtx_code
7164ix86_prepare_fp_compare_args (code, pop0, pop1)
7165     enum rtx_code code;
7166     rtx *pop0, *pop1;
7167{
7168  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7169  rtx op0 = *pop0, op1 = *pop1;
7170  enum machine_mode op_mode = GET_MODE (op0);
7171  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7172
7173  /* All of the unordered compare instructions only work on registers.
7174     The same is true of the XFmode compare instructions.  The same is
7175     true of the fcomi compare instructions.  */
7176
7177  if (!is_sse
7178      && (fpcmp_mode == CCFPUmode
7179	  || op_mode == XFmode
7180	  || op_mode == TFmode
7181	  || ix86_use_fcomi_compare (code)))
7182    {
7183      op0 = force_reg (op_mode, op0);
7184      op1 = force_reg (op_mode, op1);
7185    }
7186  else
7187    {
7188      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
7189	 things around if they appear profitable, otherwise force op0
7190	 into a register.  */
7191
7192      if (standard_80387_constant_p (op0) == 0
7193	  || (GET_CODE (op0) == MEM
7194	      && ! (standard_80387_constant_p (op1) == 0
7195		    || GET_CODE (op1) == MEM)))
7196	{
7197	  rtx tmp;
7198	  tmp = op0, op0 = op1, op1 = tmp;
7199	  code = swap_condition (code);
7200	}
7201
7202      if (GET_CODE (op0) != REG)
7203	op0 = force_reg (op_mode, op0);
7204
7205      if (CONSTANT_P (op1))
7206	{
7207	  if (standard_80387_constant_p (op1))
7208	    op1 = force_reg (op_mode, op1);
7209	  else
7210	    op1 = validize_mem (force_const_mem (op_mode, op1));
7211	}
7212    }
7213
7214  /* Try to rearrange the comparison to make it cheaper.  */
7215  if (ix86_fp_comparison_cost (code)
7216      > ix86_fp_comparison_cost (swap_condition (code))
7217      && (GET_CODE (op1) == REG || !no_new_pseudos))
7218    {
7219      rtx tmp;
7220      tmp = op0, op0 = op1, op1 = tmp;
7221      code = swap_condition (code);
7222      if (GET_CODE (op0) != REG)
7223	op0 = force_reg (op_mode, op0);
7224    }
7225
7226  *pop0 = op0;
7227  *pop1 = op1;
7228  return code;
7229}
7230
7231/* Convert comparison codes we use to represent FP comparison to integer
7232   code that will result in proper branch.  Return UNKNOWN if no such code
7233   is available.  */
7234static enum rtx_code
7235ix86_fp_compare_code_to_integer (code)
7236     enum rtx_code code;
7237{
7238  switch (code)
7239    {
7240    case GT:
7241      return GTU;
7242    case GE:
7243      return GEU;
7244    case ORDERED:
7245    case UNORDERED:
7246      return code;
7247      break;
7248    case UNEQ:
7249      return EQ;
7250      break;
7251    case UNLT:
7252      return LTU;
7253      break;
7254    case UNLE:
7255      return LEU;
7256      break;
7257    case LTGT:
7258      return NE;
7259      break;
7260    default:
7261      return UNKNOWN;
7262    }
7263}
7264
7265/* Split comparison code CODE into comparisons we can do using branch
7266   instructions.  BYPASS_CODE is comparison code for branch that will
7267   branch around FIRST_CODE and SECOND_CODE.  If some of branches
7268   is not required, set value to NIL.
7269   We never require more than two branches.  */
7270static void
7271ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7272     enum rtx_code code, *bypass_code, *first_code, *second_code;
7273{
7274  *first_code = code;
7275  *bypass_code = NIL;
7276  *second_code = NIL;
7277
7278  /* The fcomi comparison sets flags as follows:
7279
7280     cmp    ZF PF CF
7281     >      0  0  0
7282     <      0  0  1
7283     =      1  0  0
7284     un     1  1  1 */
7285
7286  switch (code)
7287    {
7288    case GT:			/* GTU - CF=0 & ZF=0 */
7289    case GE:			/* GEU - CF=0 */
7290    case ORDERED:		/* PF=0 */
7291    case UNORDERED:		/* PF=1 */
7292    case UNEQ:			/* EQ - ZF=1 */
7293    case UNLT:			/* LTU - CF=1 */
7294    case UNLE:			/* LEU - CF=1 | ZF=1 */
7295    case LTGT:			/* EQ - ZF=0 */
7296      break;
7297    case LT:			/* LTU - CF=1 - fails on unordered */
7298      *first_code = UNLT;
7299      *bypass_code = UNORDERED;
7300      break;
7301    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
7302      *first_code = UNLE;
7303      *bypass_code = UNORDERED;
7304      break;
7305    case EQ:			/* EQ - ZF=1 - fails on unordered */
7306      *first_code = UNEQ;
7307      *bypass_code = UNORDERED;
7308      break;
7309    case NE:			/* NE - ZF=0 - fails on unordered */
7310      *first_code = LTGT;
7311      *second_code = UNORDERED;
7312      break;
7313    case UNGE:			/* GEU - CF=0 - fails on unordered */
7314      *first_code = GE;
7315      *second_code = UNORDERED;
7316      break;
7317    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
7318      *first_code = GT;
7319      *second_code = UNORDERED;
7320      break;
7321    default:
7322      abort ();
7323    }
7324  if (!TARGET_IEEE_FP)
7325    {
7326      *second_code = NIL;
7327      *bypass_code = NIL;
7328    }
7329}
7330
7331/* Return cost of comparison done fcom + arithmetics operations on AX.
7332   All following functions do use number of instructions as an cost metrics.
7333   In future this should be tweaked to compute bytes for optimize_size and
7334   take into account performance of various instructions on various CPUs.  */
7335static int
7336ix86_fp_comparison_arithmetics_cost (code)
7337     enum rtx_code code;
7338{
7339  if (!TARGET_IEEE_FP)
7340    return 4;
7341  /* The cost of code output by ix86_expand_fp_compare.  */
7342  switch (code)
7343    {
7344    case UNLE:
7345    case UNLT:
7346    case LTGT:
7347    case GT:
7348    case GE:
7349    case UNORDERED:
7350    case ORDERED:
7351    case UNEQ:
7352      return 4;
7353      break;
7354    case LT:
7355    case NE:
7356    case EQ:
7357    case UNGE:
7358      return 5;
7359      break;
7360    case LE:
7361    case UNGT:
7362      return 6;
7363      break;
7364    default:
7365      abort ();
7366    }
7367}
7368
7369/* Return cost of comparison done using fcomi operation.
7370   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7371static int
7372ix86_fp_comparison_fcomi_cost (code)
7373     enum rtx_code code;
7374{
7375  enum rtx_code bypass_code, first_code, second_code;
7376  /* Return arbitarily high cost when instruction is not supported - this
7377     prevents gcc from using it.  */
7378  if (!TARGET_CMOVE)
7379    return 1024;
7380  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7381  return (bypass_code != NIL || second_code != NIL) + 2;
7382}
7383
7384/* Return cost of comparison done using sahf operation.
7385   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7386static int
7387ix86_fp_comparison_sahf_cost (code)
7388     enum rtx_code code;
7389{
7390  enum rtx_code bypass_code, first_code, second_code;
7391  /* Return arbitarily high cost when instruction is not preferred - this
7392     avoids gcc from using it.  */
7393  if (!TARGET_USE_SAHF && !optimize_size)
7394    return 1024;
7395  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7396  return (bypass_code != NIL || second_code != NIL) + 3;
7397}
7398
7399/* Compute cost of the comparison done using any method.
7400   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7401static int
7402ix86_fp_comparison_cost (code)
7403     enum rtx_code code;
7404{
7405  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7406  int min;
7407
7408  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7409  sahf_cost = ix86_fp_comparison_sahf_cost (code);
7410
7411  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7412  if (min > sahf_cost)
7413    min = sahf_cost;
7414  if (min > fcomi_cost)
7415    min = fcomi_cost;
7416  return min;
7417}
7418
7419/* Generate insn patterns to do a floating point compare of OPERANDS.  */
7420
7421static rtx
7422ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7423     enum rtx_code code;
7424     rtx op0, op1, scratch;
7425     rtx *second_test;
7426     rtx *bypass_test;
7427{
7428  enum machine_mode fpcmp_mode, intcmp_mode;
7429  rtx tmp, tmp2;
7430  int cost = ix86_fp_comparison_cost (code);
7431  enum rtx_code bypass_code, first_code, second_code;
7432
7433  fpcmp_mode = ix86_fp_compare_mode (code);
7434  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7435
7436  if (second_test)
7437    *second_test = NULL_RTX;
7438  if (bypass_test)
7439    *bypass_test = NULL_RTX;
7440
7441  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7442
7443  /* Do fcomi/sahf based test when profitable.  */
7444  if ((bypass_code == NIL || bypass_test)
7445      && (second_code == NIL || second_test)
7446      && ix86_fp_comparison_arithmetics_cost (code) > cost)
7447    {
7448      if (TARGET_CMOVE)
7449	{
7450	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7451	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7452			     tmp);
7453	  emit_insn (tmp);
7454	}
7455      else
7456	{
7457	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7458	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7459	  if (!scratch)
7460	    scratch = gen_reg_rtx (HImode);
7461	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7462	  emit_insn (gen_x86_sahf_1 (scratch));
7463	}
7464
7465      /* The FP codes work out to act like unsigned.  */
7466      intcmp_mode = fpcmp_mode;
7467      code = first_code;
7468      if (bypass_code != NIL)
7469	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7470				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7471				       const0_rtx);
7472      if (second_code != NIL)
7473	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7474				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7475				       const0_rtx);
7476    }
7477  else
7478    {
7479      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
7480      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7481      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7482      if (!scratch)
7483	scratch = gen_reg_rtx (HImode);
7484      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7485
7486      /* In the unordered case, we have to check C2 for NaN's, which
7487	 doesn't happen to work out to anything nice combination-wise.
7488	 So do some bit twiddling on the value we've got in AH to come
7489	 up with an appropriate set of condition codes.  */
7490
7491      intcmp_mode = CCNOmode;
7492      switch (code)
7493	{
7494	case GT:
7495	case UNGT:
7496	  if (code == GT || !TARGET_IEEE_FP)
7497	    {
7498	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7499	      code = EQ;
7500	    }
7501	  else
7502	    {
7503	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7504	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7505	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7506	      intcmp_mode = CCmode;
7507	      code = GEU;
7508	    }
7509	  break;
7510	case LT:
7511	case UNLT:
7512	  if (code == LT && TARGET_IEEE_FP)
7513	    {
7514	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7515	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7516	      intcmp_mode = CCmode;
7517	      code = EQ;
7518	    }
7519	  else
7520	    {
7521	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7522	      code = NE;
7523	    }
7524	  break;
7525	case GE:
7526	case UNGE:
7527	  if (code == GE || !TARGET_IEEE_FP)
7528	    {
7529	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7530	      code = EQ;
7531	    }
7532	  else
7533	    {
7534	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7535	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7536					     GEN_INT (0x01)));
7537	      code = NE;
7538	    }
7539	  break;
7540	case LE:
7541	case UNLE:
7542	  if (code == LE && TARGET_IEEE_FP)
7543	    {
7544	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7545	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7546	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7547	      intcmp_mode = CCmode;
7548	      code = LTU;
7549	    }
7550	  else
7551	    {
7552	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7553	      code = NE;
7554	    }
7555	  break;
7556	case EQ:
7557	case UNEQ:
7558	  if (code == EQ && TARGET_IEEE_FP)
7559	    {
7560	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7561	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7562	      intcmp_mode = CCmode;
7563	      code = EQ;
7564	    }
7565	  else
7566	    {
7567	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7568	      code = NE;
7569	      break;
7570	    }
7571	  break;
7572	case NE:
7573	case LTGT:
7574	  if (code == NE && TARGET_IEEE_FP)
7575	    {
7576	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7577	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7578					     GEN_INT (0x40)));
7579	      code = NE;
7580	    }
7581	  else
7582	    {
7583	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7584	      code = EQ;
7585	    }
7586	  break;
7587
7588	case UNORDERED:
7589	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7590	  code = NE;
7591	  break;
7592	case ORDERED:
7593	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7594	  code = EQ;
7595	  break;
7596
7597	default:
7598	  abort ();
7599	}
7600    }
7601
7602  /* Return the test that should be put into the flags user, i.e.
7603     the bcc, scc, or cmov instruction.  */
7604  return gen_rtx_fmt_ee (code, VOIDmode,
7605			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7606			 const0_rtx);
7607}
7608
7609rtx
7610ix86_expand_compare (code, second_test, bypass_test)
7611     enum rtx_code code;
7612     rtx *second_test, *bypass_test;
7613{
7614  rtx op0, op1, ret;
7615  op0 = ix86_compare_op0;
7616  op1 = ix86_compare_op1;
7617
7618  if (second_test)
7619    *second_test = NULL_RTX;
7620  if (bypass_test)
7621    *bypass_test = NULL_RTX;
7622
7623  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7624    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7625				  second_test, bypass_test);
7626  else
7627    ret = ix86_expand_int_compare (code, op0, op1);
7628
7629  return ret;
7630}
7631
7632/* Return true if the CODE will result in nontrivial jump sequence.  */
7633bool
7634ix86_fp_jump_nontrivial_p (code)
7635    enum rtx_code code;
7636{
7637  enum rtx_code bypass_code, first_code, second_code;
7638  if (!TARGET_CMOVE)
7639    return true;
7640  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7641  return bypass_code != NIL || second_code != NIL;
7642}
7643
7644void
7645ix86_expand_branch (code, label)
7646     enum rtx_code code;
7647     rtx label;
7648{
7649  rtx tmp;
7650
7651  switch (GET_MODE (ix86_compare_op0))
7652    {
7653    case QImode:
7654    case HImode:
7655    case SImode:
7656      simple:
7657      tmp = ix86_expand_compare (code, NULL, NULL);
7658      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7659				  gen_rtx_LABEL_REF (VOIDmode, label),
7660				  pc_rtx);
7661      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7662      return;
7663
7664    case SFmode:
7665    case DFmode:
7666    case XFmode:
7667    case TFmode:
7668      {
7669	rtvec vec;
7670	int use_fcomi;
7671	enum rtx_code bypass_code, first_code, second_code;
7672
7673	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7674					     &ix86_compare_op1);
7675
7676	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7677
7678	/* Check whether we will use the natural sequence with one jump.  If
7679	   so, we can expand jump early.  Otherwise delay expansion by
7680	   creating compound insn to not confuse optimizers.  */
7681	if (bypass_code == NIL && second_code == NIL
7682	    && TARGET_CMOVE)
7683	  {
7684	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7685				  gen_rtx_LABEL_REF (VOIDmode, label),
7686				  pc_rtx, NULL_RTX);
7687	  }
7688	else
7689	  {
7690	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
7691				  ix86_compare_op0, ix86_compare_op1);
7692	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7693					gen_rtx_LABEL_REF (VOIDmode, label),
7694					pc_rtx);
7695	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7696
7697	    use_fcomi = ix86_use_fcomi_compare (code);
7698	    vec = rtvec_alloc (3 + !use_fcomi);
7699	    RTVEC_ELT (vec, 0) = tmp;
7700	    RTVEC_ELT (vec, 1)
7701	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7702	    RTVEC_ELT (vec, 2)
7703	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7704	    if (! use_fcomi)
7705	      RTVEC_ELT (vec, 3)
7706		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7707
7708	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7709	  }
7710	return;
7711      }
7712
7713    case DImode:
7714      if (TARGET_64BIT)
7715	goto simple;
7716      /* Expand DImode branch into multiple compare+branch.  */
7717      {
7718	rtx lo[2], hi[2], label2;
7719	enum rtx_code code1, code2, code3;
7720
7721	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7722	  {
7723	    tmp = ix86_compare_op0;
7724	    ix86_compare_op0 = ix86_compare_op1;
7725	    ix86_compare_op1 = tmp;
7726	    code = swap_condition (code);
7727	  }
7728	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7729	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7730
7731	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7732	   avoid two branches.  This costs one extra insn, so disable when
7733	   optimizing for size.  */
7734
7735	if ((code == EQ || code == NE)
7736	    && (!optimize_size
7737	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
7738	  {
7739	    rtx xor0, xor1;
7740
7741	    xor1 = hi[0];
7742	    if (hi[1] != const0_rtx)
7743	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7744				   NULL_RTX, 0, OPTAB_WIDEN);
7745
7746	    xor0 = lo[0];
7747	    if (lo[1] != const0_rtx)
7748	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7749				   NULL_RTX, 0, OPTAB_WIDEN);
7750
7751	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7752				NULL_RTX, 0, OPTAB_WIDEN);
7753
7754	    ix86_compare_op0 = tmp;
7755	    ix86_compare_op1 = const0_rtx;
7756	    ix86_expand_branch (code, label);
7757	    return;
7758	  }
7759
7760	/* Otherwise, if we are doing less-than or greater-or-equal-than,
7761	   op1 is a constant and the low word is zero, then we can just
7762	   examine the high word.  */
7763
7764	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7765	  switch (code)
7766	    {
7767	    case LT: case LTU: case GE: case GEU:
7768	      ix86_compare_op0 = hi[0];
7769	      ix86_compare_op1 = hi[1];
7770	      ix86_expand_branch (code, label);
7771	      return;
7772	    default:
7773	      break;
7774	    }
7775
7776	/* Otherwise, we need two or three jumps.  */
7777
7778	label2 = gen_label_rtx ();
7779
7780	code1 = code;
7781	code2 = swap_condition (code);
7782	code3 = unsigned_condition (code);
7783
7784	switch (code)
7785	  {
7786	  case LT: case GT: case LTU: case GTU:
7787	    break;
7788
7789	  case LE:   code1 = LT;  code2 = GT;  break;
7790	  case GE:   code1 = GT;  code2 = LT;  break;
7791	  case LEU:  code1 = LTU; code2 = GTU; break;
7792	  case GEU:  code1 = GTU; code2 = LTU; break;
7793
7794	  case EQ:   code1 = NIL; code2 = NE;  break;
7795	  case NE:   code2 = NIL; break;
7796
7797	  default:
7798	    abort ();
7799	  }
7800
7801	/*
7802	 * a < b =>
7803	 *    if (hi(a) < hi(b)) goto true;
7804	 *    if (hi(a) > hi(b)) goto false;
7805	 *    if (lo(a) < lo(b)) goto true;
7806	 *  false:
7807	 */
7808
7809	ix86_compare_op0 = hi[0];
7810	ix86_compare_op1 = hi[1];
7811
7812	if (code1 != NIL)
7813	  ix86_expand_branch (code1, label);
7814	if (code2 != NIL)
7815	  ix86_expand_branch (code2, label2);
7816
7817	ix86_compare_op0 = lo[0];
7818	ix86_compare_op1 = lo[1];
7819	ix86_expand_branch (code3, label);
7820
7821	if (code2 != NIL)
7822	  emit_label (label2);
7823	return;
7824      }
7825
7826    default:
7827      abort ();
7828    }
7829}
7830
7831/* Split branch based on floating point condition.  */
7832void
7833ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7834     enum rtx_code code;
7835     rtx op1, op2, target1, target2, tmp;
7836{
7837  rtx second, bypass;
7838  rtx label = NULL_RTX;
7839  rtx condition;
7840  int bypass_probability = -1, second_probability = -1, probability = -1;
7841  rtx i;
7842
7843  if (target2 != pc_rtx)
7844    {
7845      rtx tmp = target2;
7846      code = reverse_condition_maybe_unordered (code);
7847      target2 = target1;
7848      target1 = tmp;
7849    }
7850
7851  condition = ix86_expand_fp_compare (code, op1, op2,
7852				      tmp, &second, &bypass);
7853
7854  if (split_branch_probability >= 0)
7855    {
7856      /* Distribute the probabilities across the jumps.
7857	 Assume the BYPASS and SECOND to be always test
7858	 for UNORDERED.  */
7859      probability = split_branch_probability;
7860
7861      /* Value of 1 is low enough to make no need for probability
7862	 to be updated.  Later we may run some experiments and see
7863	 if unordered values are more frequent in practice.  */
7864      if (bypass)
7865	bypass_probability = 1;
7866      if (second)
7867	second_probability = 1;
7868    }
7869  if (bypass != NULL_RTX)
7870    {
7871      label = gen_label_rtx ();
7872      i = emit_jump_insn (gen_rtx_SET
7873			  (VOIDmode, pc_rtx,
7874			   gen_rtx_IF_THEN_ELSE (VOIDmode,
7875						 bypass,
7876						 gen_rtx_LABEL_REF (VOIDmode,
7877								    label),
7878						 pc_rtx)));
7879      if (bypass_probability >= 0)
7880	REG_NOTES (i)
7881	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7882			       GEN_INT (bypass_probability),
7883			       REG_NOTES (i));
7884    }
7885  i = emit_jump_insn (gen_rtx_SET
7886		      (VOIDmode, pc_rtx,
7887		       gen_rtx_IF_THEN_ELSE (VOIDmode,
7888					     condition, target1, target2)));
7889  if (probability >= 0)
7890    REG_NOTES (i)
7891      = gen_rtx_EXPR_LIST (REG_BR_PROB,
7892			   GEN_INT (probability),
7893			   REG_NOTES (i));
7894  if (second != NULL_RTX)
7895    {
7896      i = emit_jump_insn (gen_rtx_SET
7897			  (VOIDmode, pc_rtx,
7898			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7899						 target2)));
7900      if (second_probability >= 0)
7901	REG_NOTES (i)
7902	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7903			       GEN_INT (second_probability),
7904			       REG_NOTES (i));
7905    }
7906  if (label != NULL_RTX)
7907    emit_label (label);
7908}
7909
7910int
7911ix86_expand_setcc (code, dest)
7912     enum rtx_code code;
7913     rtx dest;
7914{
7915  rtx ret, tmp, tmpreg;
7916  rtx second_test, bypass_test;
7917
7918  if (GET_MODE (ix86_compare_op0) == DImode
7919      && !TARGET_64BIT)
7920    return 0; /* FAIL */
7921
7922  if (GET_MODE (dest) != QImode)
7923    abort ();
7924
7925  ret = ix86_expand_compare (code, &second_test, &bypass_test);
7926  PUT_MODE (ret, QImode);
7927
7928  tmp = dest;
7929  tmpreg = dest;
7930
7931  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7932  if (bypass_test || second_test)
7933    {
7934      rtx test = second_test;
7935      int bypass = 0;
7936      rtx tmp2 = gen_reg_rtx (QImode);
7937      if (bypass_test)
7938	{
7939	  if (second_test)
7940	    abort ();
7941	  test = bypass_test;
7942	  bypass = 1;
7943	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7944	}
7945      PUT_MODE (test, QImode);
7946      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7947
7948      if (bypass)
7949	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7950      else
7951	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7952    }
7953
7954  return 1; /* DONE */
7955}
7956
7957int
7958ix86_expand_int_movcc (operands)
7959     rtx operands[];
7960{
7961  enum rtx_code code = GET_CODE (operands[1]), compare_code;
7962  rtx compare_seq, compare_op;
7963  rtx second_test, bypass_test;
7964  enum machine_mode mode = GET_MODE (operands[0]);
7965
7966  /* When the compare code is not LTU or GEU, we can not use sbbl case.
7967     In case comparsion is done with immediate, we can convert it to LTU or
7968     GEU by altering the integer.  */
7969
7970  if ((code == LEU || code == GTU)
7971      && GET_CODE (ix86_compare_op1) == CONST_INT
7972      && mode != HImode
7973      && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7974      && GET_CODE (operands[2]) == CONST_INT
7975      && GET_CODE (operands[3]) == CONST_INT)
7976    {
7977      if (code == LEU)
7978	code = LTU;
7979      else
7980	code = GEU;
7981      ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7982    }
7983
7984  start_sequence ();
7985  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7986  compare_seq = gen_sequence ();
7987  end_sequence ();
7988
7989  compare_code = GET_CODE (compare_op);
7990
7991  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7992     HImode insns, we'd be swallowed in word prefix ops.  */
7993
7994  if (mode != HImode
7995      && (mode != DImode || TARGET_64BIT)
7996      && GET_CODE (operands[2]) == CONST_INT
7997      && GET_CODE (operands[3]) == CONST_INT)
7998    {
7999      rtx out = operands[0];
8000      HOST_WIDE_INT ct = INTVAL (operands[2]);
8001      HOST_WIDE_INT cf = INTVAL (operands[3]);
8002      HOST_WIDE_INT diff;
8003
8004      if ((compare_code == LTU || compare_code == GEU)
8005	  && !second_test && !bypass_test)
8006	{
8007
8008	  /* Detect overlap between destination and compare sources.  */
8009	  rtx tmp = out;
8010
8011	  /* To simplify rest of code, restrict to the GEU case.  */
8012	  if (compare_code == LTU)
8013	    {
8014	      int tmp = ct;
8015	      ct = cf;
8016	      cf = tmp;
8017	      compare_code = reverse_condition (compare_code);
8018	      code = reverse_condition (code);
8019	    }
8020	  diff = ct - cf;
8021
8022	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8023	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
8024	    tmp = gen_reg_rtx (mode);
8025
8026	  emit_insn (compare_seq);
8027	  if (mode == DImode)
8028	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8029	  else
8030	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
8031
8032	  if (diff == 1)
8033	    {
8034	      /*
8035	       * cmpl op0,op1
8036	       * sbbl dest,dest
8037	       * [addl dest, ct]
8038	       *
8039	       * Size 5 - 8.
8040	       */
8041	      if (ct)
8042	       	tmp = expand_simple_binop (mode, PLUS,
8043					   tmp, GEN_INT (ct),
8044					   tmp, 1, OPTAB_DIRECT);
8045	    }
8046	  else if (cf == -1)
8047	    {
8048	      /*
8049	       * cmpl op0,op1
8050	       * sbbl dest,dest
8051	       * orl $ct, dest
8052	       *
8053	       * Size 8.
8054	       */
8055	      tmp = expand_simple_binop (mode, IOR,
8056					 tmp, GEN_INT (ct),
8057					 tmp, 1, OPTAB_DIRECT);
8058	    }
8059	  else if (diff == -1 && ct)
8060	    {
8061	      /*
8062	       * cmpl op0,op1
8063	       * sbbl dest,dest
8064	       * xorl $-1, dest
8065	       * [addl dest, cf]
8066	       *
8067	       * Size 8 - 11.
8068	       */
8069	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8070	      if (cf)
8071	       	tmp = expand_simple_binop (mode, PLUS,
8072					   tmp, GEN_INT (cf),
8073					   tmp, 1, OPTAB_DIRECT);
8074	    }
8075	  else
8076	    {
8077	      /*
8078	       * cmpl op0,op1
8079	       * sbbl dest,dest
8080	       * andl cf - ct, dest
8081	       * [addl dest, ct]
8082	       *
8083	       * Size 8 - 11.
8084	       */
8085	      tmp = expand_simple_binop (mode, AND,
8086					 tmp,
8087					 GEN_INT (trunc_int_for_mode
8088						  (cf - ct, mode)),
8089					 tmp, 1, OPTAB_DIRECT);
8090	      if (ct)
8091	       	tmp = expand_simple_binop (mode, PLUS,
8092					   tmp, GEN_INT (ct),
8093					   tmp, 1, OPTAB_DIRECT);
8094	    }
8095
8096	  if (tmp != out)
8097	    emit_move_insn (out, tmp);
8098
8099	  return 1; /* DONE */
8100	}
8101
8102      diff = ct - cf;
8103      if (diff < 0)
8104	{
8105	  HOST_WIDE_INT tmp;
8106	  tmp = ct, ct = cf, cf = tmp;
8107	  diff = -diff;
8108	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8109	    {
8110	      /* We may be reversing unordered compare to normal compare, that
8111		 is not valid in general (we may convert non-trapping condition
8112		 to trapping one), however on i386 we currently emit all
8113		 comparisons unordered.  */
8114	      compare_code = reverse_condition_maybe_unordered (compare_code);
8115	      code = reverse_condition_maybe_unordered (code);
8116	    }
8117	  else
8118	    {
8119	      compare_code = reverse_condition (compare_code);
8120	      code = reverse_condition (code);
8121	    }
8122	}
8123      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8124	   || diff == 3 || diff == 5 || diff == 9)
8125	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8126	{
8127	  /*
8128	   * xorl dest,dest
8129	   * cmpl op1,op2
8130	   * setcc dest
8131	   * lea cf(dest*(ct-cf)),dest
8132	   *
8133	   * Size 14.
8134	   *
8135	   * This also catches the degenerate setcc-only case.
8136	   */
8137
8138	  rtx tmp;
8139	  int nops;
8140
8141	  out = emit_store_flag (out, code, ix86_compare_op0,
8142				 ix86_compare_op1, VOIDmode, 0, 1);
8143
8144	  nops = 0;
8145	  /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8146	     done in proper mode to match.  */
8147	  if (diff == 1)
8148	    tmp = out;
8149	  else
8150	    {
8151	      rtx out1;
8152	      out1 = out;
8153	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8154	      nops++;
8155	      if (diff & 1)
8156		{
8157		  tmp = gen_rtx_PLUS (mode, tmp, out1);
8158		  nops++;
8159		}
8160	    }
8161	  if (cf != 0)
8162	    {
8163	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8164	      nops++;
8165	    }
8166	  if (tmp != out
8167	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8168	    {
8169	      if (nops == 1)
8170		{
8171		  rtx clob;
8172
8173		  clob = gen_rtx_REG (CCmode, FLAGS_REG);
8174		  clob = gen_rtx_CLOBBER (VOIDmode, clob);
8175
8176		  tmp = gen_rtx_SET (VOIDmode, out, tmp);
8177		  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8178		  emit_insn (tmp);
8179		}
8180	      else
8181		emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8182	    }
8183	  if (out != operands[0])
8184	    emit_move_insn (operands[0], out);
8185
8186	  return 1; /* DONE */
8187	}
8188
8189      /*
8190       * General case:			Jumpful:
8191       *   xorl dest,dest		cmpl op1, op2
8192       *   cmpl op1, op2		movl ct, dest
8193       *   setcc dest			jcc 1f
8194       *   decl dest			movl cf, dest
8195       *   andl (cf-ct),dest		1:
8196       *   addl ct,dest
8197       *
8198       * Size 20.			Size 14.
8199       *
8200       * This is reasonably steep, but branch mispredict costs are
8201       * high on modern cpus, so consider failing only if optimizing
8202       * for space.
8203       *
8204       * %%% Parameterize branch_cost on the tuning architecture, then
8205       * use that.  The 80386 couldn't care less about mispredicts.
8206       */
8207
8208      if (!optimize_size && !TARGET_CMOVE)
8209	{
8210	  if (ct == 0)
8211	    {
8212	      ct = cf;
8213	      cf = 0;
8214	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8215		{
8216		  /* We may be reversing unordered compare to normal compare,
8217		     that is not valid in general (we may convert non-trapping
8218		     condition to trapping one), however on i386 we currently
8219		     emit all comparisons unordered.  */
8220		  compare_code = reverse_condition_maybe_unordered (compare_code);
8221		  code = reverse_condition_maybe_unordered (code);
8222		}
8223	      else
8224		{
8225		  compare_code = reverse_condition (compare_code);
8226		  code = reverse_condition (code);
8227		}
8228	    }
8229
8230	  out = emit_store_flag (out, code, ix86_compare_op0,
8231				 ix86_compare_op1, VOIDmode, 0, 1);
8232
8233	  out = expand_simple_binop (mode, PLUS,
8234				     out, constm1_rtx,
8235				     out, 1, OPTAB_DIRECT);
8236	  out = expand_simple_binop (mode, AND,
8237				     out,
8238				     GEN_INT (trunc_int_for_mode
8239					      (cf - ct, mode)),
8240				     out, 1, OPTAB_DIRECT);
8241	  out = expand_simple_binop (mode, PLUS,
8242				     out, GEN_INT (ct),
8243				     out, 1, OPTAB_DIRECT);
8244	  if (out != operands[0])
8245	    emit_move_insn (operands[0], out);
8246
8247	  return 1; /* DONE */
8248	}
8249    }
8250
8251  if (!TARGET_CMOVE)
8252    {
8253      /* Try a few things more with specific constants and a variable.  */
8254
8255      optab op;
8256      rtx var, orig_out, out, tmp;
8257
8258      if (optimize_size)
8259	return 0; /* FAIL */
8260
8261      /* If one of the two operands is an interesting constant, load a
8262	 constant with the above and mask it in with a logical operation.  */
8263
8264      if (GET_CODE (operands[2]) == CONST_INT)
8265	{
8266	  var = operands[3];
8267	  if (INTVAL (operands[2]) == 0)
8268	    operands[3] = constm1_rtx, op = and_optab;
8269	  else if (INTVAL (operands[2]) == -1)
8270	    operands[3] = const0_rtx, op = ior_optab;
8271	  else
8272	    return 0; /* FAIL */
8273	}
8274      else if (GET_CODE (operands[3]) == CONST_INT)
8275	{
8276	  var = operands[2];
8277	  if (INTVAL (operands[3]) == 0)
8278	    operands[2] = constm1_rtx, op = and_optab;
8279	  else if (INTVAL (operands[3]) == -1)
8280	    operands[2] = const0_rtx, op = ior_optab;
8281	  else
8282	    return 0; /* FAIL */
8283	}
8284      else
8285        return 0; /* FAIL */
8286
8287      orig_out = operands[0];
8288      tmp = gen_reg_rtx (mode);
8289      operands[0] = tmp;
8290
8291      /* Recurse to get the constant loaded.  */
8292      if (ix86_expand_int_movcc (operands) == 0)
8293        return 0; /* FAIL */
8294
8295      /* Mask in the interesting variable.  */
8296      out = expand_binop (mode, op, var, tmp, orig_out, 0,
8297			  OPTAB_WIDEN);
8298      if (out != orig_out)
8299	emit_move_insn (orig_out, out);
8300
8301      return 1; /* DONE */
8302    }
8303
8304  /*
8305   * For comparison with above,
8306   *
8307   * movl cf,dest
8308   * movl ct,tmp
8309   * cmpl op1,op2
8310   * cmovcc tmp,dest
8311   *
8312   * Size 15.
8313   */
8314
8315  if (! nonimmediate_operand (operands[2], mode))
8316    operands[2] = force_reg (mode, operands[2]);
8317  if (! nonimmediate_operand (operands[3], mode))
8318    operands[3] = force_reg (mode, operands[3]);
8319
8320  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8321    {
8322      rtx tmp = gen_reg_rtx (mode);
8323      emit_move_insn (tmp, operands[3]);
8324      operands[3] = tmp;
8325    }
8326  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8327    {
8328      rtx tmp = gen_reg_rtx (mode);
8329      emit_move_insn (tmp, operands[2]);
8330      operands[2] = tmp;
8331    }
8332  if (! register_operand (operands[2], VOIDmode)
8333      && ! register_operand (operands[3], VOIDmode))
8334    operands[2] = force_reg (mode, operands[2]);
8335
8336  emit_insn (compare_seq);
8337  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8338			  gen_rtx_IF_THEN_ELSE (mode,
8339						compare_op, operands[2],
8340						operands[3])));
8341  if (bypass_test)
8342    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8343			    gen_rtx_IF_THEN_ELSE (mode,
8344				  bypass_test,
8345				  operands[3],
8346				  operands[0])));
8347  if (second_test)
8348    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8349			    gen_rtx_IF_THEN_ELSE (mode,
8350				  second_test,
8351				  operands[2],
8352				  operands[0])));
8353
8354  return 1; /* DONE */
8355}
8356
8357int
8358ix86_expand_fp_movcc (operands)
8359     rtx operands[];
8360{
8361  enum rtx_code code;
8362  rtx tmp;
8363  rtx compare_op, second_test, bypass_test;
8364
8365  /* For SF/DFmode conditional moves based on comparisons
8366     in same mode, we may want to use SSE min/max instructions.  */
8367  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8368       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8369      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8370      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
8371      && (!TARGET_IEEE_FP
8372	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8373      /* We may be called from the post-reload splitter.  */
8374      && (!REG_P (operands[0])
8375	  || SSE_REG_P (operands[0])
8376	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8377    {
8378      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8379      code = GET_CODE (operands[1]);
8380
8381      /* See if we have (cross) match between comparison operands and
8382         conditional move operands.  */
8383      if (rtx_equal_p (operands[2], op1))
8384	{
8385	  rtx tmp = op0;
8386	  op0 = op1;
8387	  op1 = tmp;
8388	  code = reverse_condition_maybe_unordered (code);
8389	}
8390      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8391	{
8392	  /* Check for min operation.  */
8393	  if (code == LT)
8394	    {
8395	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8396	       if (memory_operand (op0, VOIDmode))
8397		 op0 = force_reg (GET_MODE (operands[0]), op0);
8398	       if (GET_MODE (operands[0]) == SFmode)
8399		 emit_insn (gen_minsf3 (operands[0], op0, op1));
8400	       else
8401		 emit_insn (gen_mindf3 (operands[0], op0, op1));
8402	       return 1;
8403	    }
8404	  /* Check for max operation.  */
8405	  if (code == GT)
8406	    {
8407	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8408	       if (memory_operand (op0, VOIDmode))
8409		 op0 = force_reg (GET_MODE (operands[0]), op0);
8410	       if (GET_MODE (operands[0]) == SFmode)
8411		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8412	       else
8413		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8414	       return 1;
8415	    }
8416	}
8417      /* Manage condition to be sse_comparison_operator.  In case we are
8418	 in non-ieee mode, try to canonicalize the destination operand
8419	 to be first in the comparison - this helps reload to avoid extra
8420	 moves.  */
8421      if (!sse_comparison_operator (operands[1], VOIDmode)
8422	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8423	{
8424	  rtx tmp = ix86_compare_op0;
8425	  ix86_compare_op0 = ix86_compare_op1;
8426	  ix86_compare_op1 = tmp;
8427	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8428					VOIDmode, ix86_compare_op0,
8429					ix86_compare_op1);
8430	}
8431      /* Similary try to manage result to be first operand of conditional
8432	 move. We also don't support the NE comparison on SSE, so try to
8433	 avoid it.  */
8434      if ((rtx_equal_p (operands[0], operands[3])
8435	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8436	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8437	{
8438	  rtx tmp = operands[2];
8439	  operands[2] = operands[3];
8440	  operands[3] = tmp;
8441	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8442					  (GET_CODE (operands[1])),
8443					VOIDmode, ix86_compare_op0,
8444					ix86_compare_op1);
8445	}
8446      if (GET_MODE (operands[0]) == SFmode)
8447	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8448				    operands[2], operands[3],
8449				    ix86_compare_op0, ix86_compare_op1));
8450      else
8451	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8452				    operands[2], operands[3],
8453				    ix86_compare_op0, ix86_compare_op1));
8454      return 1;
8455    }
8456
8457  /* The floating point conditional move instructions don't directly
8458     support conditions resulting from a signed integer comparison.  */
8459
8460  code = GET_CODE (operands[1]);
8461  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8462
8463  /* The floating point conditional move instructions don't directly
8464     support signed integer comparisons.  */
8465
8466  if (!fcmov_comparison_operator (compare_op, VOIDmode))
8467    {
8468      if (second_test != NULL || bypass_test != NULL)
8469	abort ();
8470      tmp = gen_reg_rtx (QImode);
8471      ix86_expand_setcc (code, tmp);
8472      code = NE;
8473      ix86_compare_op0 = tmp;
8474      ix86_compare_op1 = const0_rtx;
8475      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
8476    }
8477  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8478    {
8479      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8480      emit_move_insn (tmp, operands[3]);
8481      operands[3] = tmp;
8482    }
8483  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8484    {
8485      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8486      emit_move_insn (tmp, operands[2]);
8487      operands[2] = tmp;
8488    }
8489
8490  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8491			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8492				compare_op,
8493				operands[2],
8494				operands[3])));
8495  if (bypass_test)
8496    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8497			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8498				  bypass_test,
8499				  operands[3],
8500				  operands[0])));
8501  if (second_test)
8502    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8503			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8504				  second_test,
8505				  operands[2],
8506				  operands[0])));
8507
8508  return 1;
8509}
8510
8511/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
8512   works for floating pointer parameters and nonoffsetable memories.
8513   For pushes, it returns just stack offsets; the values will be saved
8514   in the right order.  Maximally three parts are generated.  */
8515
8516static int
8517ix86_split_to_parts (operand, parts, mode)
8518     rtx operand;
8519     rtx *parts;
8520     enum machine_mode mode;
8521{
8522  int size;
8523
8524  if (!TARGET_64BIT)
8525    size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8526  else
8527    size = (GET_MODE_SIZE (mode) + 4) / 8;
8528
8529  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8530    abort ();
8531  if (size < 2 || size > 3)
8532    abort ();
8533
8534  /* Optimize constant pool reference to immediates.  This is used by fp moves,
8535     that force all constants to memory to allow combining.  */
8536
8537  if (GET_CODE (operand) == MEM
8538      && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8539      && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8540    operand = get_pool_constant (XEXP (operand, 0));
8541
8542  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8543    {
8544      /* The only non-offsetable memories we handle are pushes.  */
8545      if (! push_operand (operand, VOIDmode))
8546	abort ();
8547
8548      operand = copy_rtx (operand);
8549      PUT_MODE (operand, Pmode);
8550      parts[0] = parts[1] = parts[2] = operand;
8551    }
8552  else if (!TARGET_64BIT)
8553    {
8554      if (mode == DImode)
8555	split_di (&operand, 1, &parts[0], &parts[1]);
8556      else
8557	{
8558	  if (REG_P (operand))
8559	    {
8560	      if (!reload_completed)
8561		abort ();
8562	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8563	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8564	      if (size == 3)
8565		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8566	    }
8567	  else if (offsettable_memref_p (operand))
8568	    {
8569	      operand = adjust_address (operand, SImode, 0);
8570	      parts[0] = operand;
8571	      parts[1] = adjust_address (operand, SImode, 4);
8572	      if (size == 3)
8573		parts[2] = adjust_address (operand, SImode, 8);
8574	    }
8575	  else if (GET_CODE (operand) == CONST_DOUBLE)
8576	    {
8577	      REAL_VALUE_TYPE r;
8578	      long l[4];
8579
8580	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8581	      switch (mode)
8582		{
8583		case XFmode:
8584		case TFmode:
8585		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8586		  parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8587		  break;
8588		case DFmode:
8589		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8590		  break;
8591		default:
8592		  abort ();
8593		}
8594	      parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8595	      parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8596	    }
8597	  else
8598	    abort ();
8599	}
8600    }
8601  else
8602    {
8603      if (mode == TImode)
8604	split_ti (&operand, 1, &parts[0], &parts[1]);
8605      if (mode == XFmode || mode == TFmode)
8606	{
8607	  if (REG_P (operand))
8608	    {
8609	      if (!reload_completed)
8610		abort ();
8611	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8612	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8613	    }
8614	  else if (offsettable_memref_p (operand))
8615	    {
8616	      operand = adjust_address (operand, DImode, 0);
8617	      parts[0] = operand;
8618	      parts[1] = adjust_address (operand, SImode, 8);
8619	    }
8620	  else if (GET_CODE (operand) == CONST_DOUBLE)
8621	    {
8622	      REAL_VALUE_TYPE r;
8623	      long l[3];
8624
8625	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8626	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8627	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
8628	      if (HOST_BITS_PER_WIDE_INT >= 64)
8629	        parts[0]
8630		  = GEN_INT (trunc_int_for_mode
8631		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8632		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8633		       DImode));
8634	      else
8635	        parts[0] = immed_double_const (l[0], l[1], DImode);
8636	      parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8637	    }
8638	  else
8639	    abort ();
8640	}
8641    }
8642
8643  return size;
8644}
8645
8646/* Emit insns to perform a move or push of DI, DF, and XF values.
8647   Return false when normal moves are needed; true when all required
8648   insns have been emitted.  Operands 2-4 contain the input values
8649   int the correct order; operands 5-7 contain the output values.  */
8650
8651void
8652ix86_split_long_move (operands)
8653     rtx operands[];
8654{
8655  rtx part[2][3];
8656  int nparts;
8657  int push = 0;
8658  int collisions = 0;
8659  enum machine_mode mode = GET_MODE (operands[0]);
8660
8661  /* The DFmode expanders may ask us to move double.
8662     For 64bit target this is single move.  By hiding the fact
8663     here we simplify i386.md splitters.  */
8664  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8665    {
8666      /* Optimize constant pool reference to immediates.  This is used by
8667	 fp moves, that force all constants to memory to allow combining.  */
8668
8669      if (GET_CODE (operands[1]) == MEM
8670	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8671	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8672	operands[1] = get_pool_constant (XEXP (operands[1], 0));
8673      if (push_operand (operands[0], VOIDmode))
8674	{
8675	  operands[0] = copy_rtx (operands[0]);
8676	  PUT_MODE (operands[0], Pmode);
8677	}
8678      else
8679        operands[0] = gen_lowpart (DImode, operands[0]);
8680      operands[1] = gen_lowpart (DImode, operands[1]);
8681      emit_move_insn (operands[0], operands[1]);
8682      return;
8683    }
8684
8685  /* The only non-offsettable memory we handle is push.  */
8686  if (push_operand (operands[0], VOIDmode))
8687    push = 1;
8688  else if (GET_CODE (operands[0]) == MEM
8689	   && ! offsettable_memref_p (operands[0]))
8690    abort ();
8691
8692  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8693  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8694
8695  /* When emitting push, take care for source operands on the stack.  */
8696  if (push && GET_CODE (operands[1]) == MEM
8697      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8698    {
8699      if (nparts == 3)
8700	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8701				     XEXP (part[1][2], 0));
8702      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8703				   XEXP (part[1][1], 0));
8704    }
8705
8706  /* We need to do copy in the right order in case an address register
8707     of the source overlaps the destination.  */
8708  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8709    {
8710      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8711	collisions++;
8712      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8713	collisions++;
8714      if (nparts == 3
8715	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8716	collisions++;
8717
8718      /* Collision in the middle part can be handled by reordering.  */
8719      if (collisions == 1 && nparts == 3
8720	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8721	{
8722	  rtx tmp;
8723	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8724	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8725	}
8726
8727      /* If there are more collisions, we can't handle it by reordering.
8728	 Do an lea to the last part and use only one colliding move.  */
8729      else if (collisions > 1)
8730	{
8731	  collisions = 1;
8732	  emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8733				  XEXP (part[1][0], 0)));
8734	  part[1][0] = change_address (part[1][0],
8735				       TARGET_64BIT ? DImode : SImode,
8736				       part[0][nparts - 1]);
8737	  part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8738	  if (nparts == 3)
8739	    part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8740	}
8741    }
8742
8743  if (push)
8744    {
8745      if (!TARGET_64BIT)
8746	{
8747	  if (nparts == 3)
8748	    {
8749	      /* We use only first 12 bytes of TFmode value, but for pushing we
8750		 are required to adjust stack as if we were pushing real 16byte
8751		 value.  */
8752	      if (mode == TFmode && !TARGET_64BIT)
8753		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8754				       GEN_INT (-4)));
8755	      emit_move_insn (part[0][2], part[1][2]);
8756	    }
8757	}
8758      else
8759	{
8760	  /* In 64bit mode we don't have 32bit push available.  In case this is
8761	     register, it is OK - we will just use larger counterpart.  We also
8762	     retype memory - these comes from attempt to avoid REX prefix on
8763	     moving of second half of TFmode value.  */
8764	  if (GET_MODE (part[1][1]) == SImode)
8765	    {
8766	      if (GET_CODE (part[1][1]) == MEM)
8767		part[1][1] = adjust_address (part[1][1], DImode, 0);
8768	      else if (REG_P (part[1][1]))
8769		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8770	      else
8771		abort ();
8772	      if (GET_MODE (part[1][0]) == SImode)
8773		part[1][0] = part[1][1];
8774	    }
8775	}
8776      emit_move_insn (part[0][1], part[1][1]);
8777      emit_move_insn (part[0][0], part[1][0]);
8778      return;
8779    }
8780
8781  /* Choose correct order to not overwrite the source before it is copied.  */
8782  if ((REG_P (part[0][0])
8783       && REG_P (part[1][1])
8784       && (REGNO (part[0][0]) == REGNO (part[1][1])
8785	   || (nparts == 3
8786	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
8787      || (collisions > 0
8788	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8789    {
8790      if (nparts == 3)
8791	{
8792	  operands[2] = part[0][2];
8793	  operands[3] = part[0][1];
8794	  operands[4] = part[0][0];
8795	  operands[5] = part[1][2];
8796	  operands[6] = part[1][1];
8797	  operands[7] = part[1][0];
8798	}
8799      else
8800	{
8801	  operands[2] = part[0][1];
8802	  operands[3] = part[0][0];
8803	  operands[5] = part[1][1];
8804	  operands[6] = part[1][0];
8805	}
8806    }
8807  else
8808    {
8809      if (nparts == 3)
8810	{
8811	  operands[2] = part[0][0];
8812	  operands[3] = part[0][1];
8813	  operands[4] = part[0][2];
8814	  operands[5] = part[1][0];
8815	  operands[6] = part[1][1];
8816	  operands[7] = part[1][2];
8817	}
8818      else
8819	{
8820	  operands[2] = part[0][0];
8821	  operands[3] = part[0][1];
8822	  operands[5] = part[1][0];
8823	  operands[6] = part[1][1];
8824	}
8825    }
8826  emit_move_insn (operands[2], operands[5]);
8827  emit_move_insn (operands[3], operands[6]);
8828  if (nparts == 3)
8829    emit_move_insn (operands[4], operands[7]);
8830
8831  return;
8832}
8833
8834void
8835ix86_split_ashldi (operands, scratch)
8836     rtx *operands, scratch;
8837{
8838  rtx low[2], high[2];
8839  int count;
8840
8841  if (GET_CODE (operands[2]) == CONST_INT)
8842    {
8843      split_di (operands, 2, low, high);
8844      count = INTVAL (operands[2]) & 63;
8845
8846      if (count >= 32)
8847	{
8848	  emit_move_insn (high[0], low[1]);
8849	  emit_move_insn (low[0], const0_rtx);
8850
8851	  if (count > 32)
8852	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8853	}
8854      else
8855	{
8856	  if (!rtx_equal_p (operands[0], operands[1]))
8857	    emit_move_insn (operands[0], operands[1]);
8858	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8859	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8860	}
8861    }
8862  else
8863    {
8864      if (!rtx_equal_p (operands[0], operands[1]))
8865	emit_move_insn (operands[0], operands[1]);
8866
8867      split_di (operands, 1, low, high);
8868
8869      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8870      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8871
8872      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8873	{
8874	  if (! no_new_pseudos)
8875	    scratch = force_reg (SImode, const0_rtx);
8876	  else
8877	    emit_move_insn (scratch, const0_rtx);
8878
8879	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8880					  scratch));
8881	}
8882      else
8883	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8884    }
8885}
8886
8887void
8888ix86_split_ashrdi (operands, scratch)
8889     rtx *operands, scratch;
8890{
8891  rtx low[2], high[2];
8892  int count;
8893
8894  if (GET_CODE (operands[2]) == CONST_INT)
8895    {
8896      split_di (operands, 2, low, high);
8897      count = INTVAL (operands[2]) & 63;
8898
8899      if (count >= 32)
8900	{
8901	  emit_move_insn (low[0], high[1]);
8902
8903	  if (! reload_completed)
8904	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8905	  else
8906	    {
8907	      emit_move_insn (high[0], low[0]);
8908	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8909	    }
8910
8911	  if (count > 32)
8912	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8913	}
8914      else
8915	{
8916	  if (!rtx_equal_p (operands[0], operands[1]))
8917	    emit_move_insn (operands[0], operands[1]);
8918	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8919	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8920	}
8921    }
8922  else
8923    {
8924      if (!rtx_equal_p (operands[0], operands[1]))
8925	emit_move_insn (operands[0], operands[1]);
8926
8927      split_di (operands, 1, low, high);
8928
8929      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8930      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8931
8932      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8933	{
8934	  if (! no_new_pseudos)
8935	    scratch = gen_reg_rtx (SImode);
8936	  emit_move_insn (scratch, high[0]);
8937	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8938	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8939					  scratch));
8940	}
8941      else
8942	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8943    }
8944}
8945
8946void
8947ix86_split_lshrdi (operands, scratch)
8948     rtx *operands, scratch;
8949{
8950  rtx low[2], high[2];
8951  int count;
8952
8953  if (GET_CODE (operands[2]) == CONST_INT)
8954    {
8955      split_di (operands, 2, low, high);
8956      count = INTVAL (operands[2]) & 63;
8957
8958      if (count >= 32)
8959	{
8960	  emit_move_insn (low[0], high[1]);
8961	  emit_move_insn (high[0], const0_rtx);
8962
8963	  if (count > 32)
8964	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8965	}
8966      else
8967	{
8968	  if (!rtx_equal_p (operands[0], operands[1]))
8969	    emit_move_insn (operands[0], operands[1]);
8970	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8971	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8972	}
8973    }
8974  else
8975    {
8976      if (!rtx_equal_p (operands[0], operands[1]))
8977	emit_move_insn (operands[0], operands[1]);
8978
8979      split_di (operands, 1, low, high);
8980
8981      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8982      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8983
8984      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
8985      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8986	{
8987	  if (! no_new_pseudos)
8988	    scratch = force_reg (SImode, const0_rtx);
8989	  else
8990	    emit_move_insn (scratch, const0_rtx);
8991
8992	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8993					  scratch));
8994	}
8995      else
8996	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8997    }
8998}
8999
9000/* Helper function for the string operations below.  Dest VARIABLE whether
9001   it is aligned to VALUE bytes.  If true, jump to the label.  */
9002static rtx
9003ix86_expand_aligntest (variable, value)
9004     rtx variable;
9005     int value;
9006{
9007  rtx label = gen_label_rtx ();
9008  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9009  if (GET_MODE (variable) == DImode)
9010    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9011  else
9012    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9013  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9014			   1, label);
9015  return label;
9016}
9017
9018/* Adjust COUNTER by the VALUE.  */
9019static void
9020ix86_adjust_counter (countreg, value)
9021     rtx countreg;
9022     HOST_WIDE_INT value;
9023{
9024  if (GET_MODE (countreg) == DImode)
9025    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9026  else
9027    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9028}
9029
9030/* Zero extend possibly SImode EXP to Pmode register.  */
9031rtx
9032ix86_zero_extend_to_Pmode (exp)
9033   rtx exp;
9034{
9035  rtx r;
9036  if (GET_MODE (exp) == VOIDmode)
9037    return force_reg (Pmode, exp);
9038  if (GET_MODE (exp) == Pmode)
9039    return copy_to_mode_reg (Pmode, exp);
9040  r = gen_reg_rtx (Pmode);
9041  emit_insn (gen_zero_extendsidi2 (r, exp));
9042  return r;
9043}
9044
9045/* Expand string move (memcpy) operation.  Use i386 string operations when
9046   profitable.  expand_clrstr contains similar code.  */
9047int
9048ix86_expand_movstr (dst, src, count_exp, align_exp)
9049     rtx dst, src, count_exp, align_exp;
9050{
9051  rtx srcreg, destreg, countreg;
9052  enum machine_mode counter_mode;
9053  HOST_WIDE_INT align = 0;
9054  unsigned HOST_WIDE_INT count = 0;
9055  rtx insns;
9056
9057  start_sequence ();
9058
9059  if (GET_CODE (align_exp) == CONST_INT)
9060    align = INTVAL (align_exp);
9061
9062  /* This simple hack avoids all inlining code and simplifies code below.  */
9063  if (!TARGET_ALIGN_STRINGOPS)
9064    align = 64;
9065
9066  if (GET_CODE (count_exp) == CONST_INT)
9067    count = INTVAL (count_exp);
9068
9069  /* Figure out proper mode for counter.  For 32bits it is always SImode,
9070     for 64bits use SImode when possible, otherwise DImode.
9071     Set count to number of bytes copied when known at compile time.  */
9072  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9073      || x86_64_zero_extended_value (count_exp))
9074    counter_mode = SImode;
9075  else
9076    counter_mode = DImode;
9077
9078  if (counter_mode != SImode && counter_mode != DImode)
9079    abort ();
9080
9081  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9082  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9083
9084  emit_insn (gen_cld ());
9085
9086  /* When optimizing for size emit simple rep ; movsb instruction for
9087     counts not divisible by 4.  */
9088
9089  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9090    {
9091      countreg = ix86_zero_extend_to_Pmode (count_exp);
9092      if (TARGET_64BIT)
9093	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9094				        destreg, srcreg, countreg));
9095      else
9096	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9097				  destreg, srcreg, countreg));
9098    }
9099
9100  /* For constant aligned (or small unaligned) copies use rep movsl
9101     followed by code copying the rest.  For PentiumPro ensure 8 byte
9102     alignment to allow rep movsl acceleration.  */
9103
9104  else if (count != 0
9105	   && (align >= 8
9106	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9107	       || optimize_size || count < (unsigned int) 64))
9108    {
9109      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9110      if (count & ~(size - 1))
9111	{
9112	  countreg = copy_to_mode_reg (counter_mode,
9113				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9114						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9115	  countreg = ix86_zero_extend_to_Pmode (countreg);
9116	  if (size == 4)
9117	    {
9118	      if (TARGET_64BIT)
9119		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9120					        destreg, srcreg, countreg));
9121	      else
9122		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9123					  destreg, srcreg, countreg));
9124	    }
9125	  else
9126	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9127					    destreg, srcreg, countreg));
9128	}
9129      if (size == 8 && (count & 0x04))
9130	emit_insn (gen_strmovsi (destreg, srcreg));
9131      if (count & 0x02)
9132	emit_insn (gen_strmovhi (destreg, srcreg));
9133      if (count & 0x01)
9134	emit_insn (gen_strmovqi (destreg, srcreg));
9135    }
9136  /* The generic code based on the glibc implementation:
9137     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9138     allowing accelerated copying there)
9139     - copy the data using rep movsl
9140     - copy the rest.  */
9141  else
9142    {
9143      rtx countreg2;
9144      rtx label = NULL;
9145
9146      /* In case we don't know anything about the alignment, default to
9147         library version, since it is usually equally fast and result in
9148         shorter code.  */
9149      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9150	{
9151	  end_sequence ();
9152	  return 0;
9153	}
9154
9155      if (TARGET_SINGLE_STRINGOP)
9156	emit_insn (gen_cld ());
9157
9158      countreg2 = gen_reg_rtx (Pmode);
9159      countreg = copy_to_mode_reg (counter_mode, count_exp);
9160
9161      /* We don't use loops to align destination and to copy parts smaller
9162         than 4 bytes, because gcc is able to optimize such code better (in
9163         the case the destination or the count really is aligned, gcc is often
9164         able to predict the branches) and also it is friendlier to the
9165         hardware branch prediction.
9166
9167         Using loops is benefical for generic case, because we can
9168         handle small counts using the loops.  Many CPUs (such as Athlon)
9169         have large REP prefix setup costs.
9170
9171         This is quite costy.  Maybe we can revisit this decision later or
9172         add some customizability to this code.  */
9173
9174      if (count == 0
9175	  && align < (TARGET_PENTIUMPRO && (count == 0
9176					    || count >= (unsigned int) 260)
9177		      ? 8 : UNITS_PER_WORD))
9178	{
9179	  label = gen_label_rtx ();
9180	  emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9181				   LEU, 0, counter_mode, 1, label);
9182	}
9183      if (align <= 1)
9184	{
9185	  rtx label = ix86_expand_aligntest (destreg, 1);
9186	  emit_insn (gen_strmovqi (destreg, srcreg));
9187	  ix86_adjust_counter (countreg, 1);
9188	  emit_label (label);
9189	  LABEL_NUSES (label) = 1;
9190	}
9191      if (align <= 2)
9192	{
9193	  rtx label = ix86_expand_aligntest (destreg, 2);
9194	  emit_insn (gen_strmovhi (destreg, srcreg));
9195	  ix86_adjust_counter (countreg, 2);
9196	  emit_label (label);
9197	  LABEL_NUSES (label) = 1;
9198	}
9199      if (align <= 4
9200	  && ((TARGET_PENTIUMPRO && (count == 0
9201				     || count >= (unsigned int) 260))
9202	      || TARGET_64BIT))
9203	{
9204	  rtx label = ix86_expand_aligntest (destreg, 4);
9205	  emit_insn (gen_strmovsi (destreg, srcreg));
9206	  ix86_adjust_counter (countreg, 4);
9207	  emit_label (label);
9208	  LABEL_NUSES (label) = 1;
9209	}
9210
9211      if (!TARGET_SINGLE_STRINGOP)
9212	emit_insn (gen_cld ());
9213      if (TARGET_64BIT)
9214	{
9215	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9216				  GEN_INT (3)));
9217	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9218					  destreg, srcreg, countreg2));
9219	}
9220      else
9221	{
9222	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9223	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9224				    destreg, srcreg, countreg2));
9225	}
9226
9227      if (label)
9228	{
9229	  emit_label (label);
9230	  LABEL_NUSES (label) = 1;
9231	}
9232      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9233	emit_insn (gen_strmovsi (destreg, srcreg));
9234      if ((align <= 4 || count == 0) && TARGET_64BIT)
9235	{
9236	  rtx label = ix86_expand_aligntest (countreg, 4);
9237	  emit_insn (gen_strmovsi (destreg, srcreg));
9238	  emit_label (label);
9239	  LABEL_NUSES (label) = 1;
9240	}
9241      if (align > 2 && count != 0 && (count & 2))
9242	emit_insn (gen_strmovhi (destreg, srcreg));
9243      if (align <= 2 || count == 0)
9244	{
9245	  rtx label = ix86_expand_aligntest (countreg, 2);
9246	  emit_insn (gen_strmovhi (destreg, srcreg));
9247	  emit_label (label);
9248	  LABEL_NUSES (label) = 1;
9249	}
9250      if (align > 1 && count != 0 && (count & 1))
9251	emit_insn (gen_strmovqi (destreg, srcreg));
9252      if (align <= 1 || count == 0)
9253	{
9254	  rtx label = ix86_expand_aligntest (countreg, 1);
9255	  emit_insn (gen_strmovqi (destreg, srcreg));
9256	  emit_label (label);
9257	  LABEL_NUSES (label) = 1;
9258	}
9259    }
9260
9261  insns = get_insns ();
9262  end_sequence ();
9263
9264  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9265  emit_insns (insns);
9266  return 1;
9267}
9268
9269/* Expand string clear operation (bzero).  Use i386 string operations when
9270   profitable.  expand_movstr contains similar code.  */
9271int
9272ix86_expand_clrstr (src, count_exp, align_exp)
9273     rtx src, count_exp, align_exp;
9274{
9275  rtx destreg, zeroreg, countreg;
9276  enum machine_mode counter_mode;
9277  HOST_WIDE_INT align = 0;
9278  unsigned HOST_WIDE_INT count = 0;
9279
9280  if (GET_CODE (align_exp) == CONST_INT)
9281    align = INTVAL (align_exp);
9282
9283  /* This simple hack avoids all inlining code and simplifies code below.  */
9284  if (!TARGET_ALIGN_STRINGOPS)
9285    align = 32;
9286
9287  if (GET_CODE (count_exp) == CONST_INT)
9288    count = INTVAL (count_exp);
9289  /* Figure out proper mode for counter.  For 32bits it is always SImode,
9290     for 64bits use SImode when possible, otherwise DImode.
9291     Set count to number of bytes copied when known at compile time.  */
9292  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9293      || x86_64_zero_extended_value (count_exp))
9294    counter_mode = SImode;
9295  else
9296    counter_mode = DImode;
9297
9298  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9299
9300  emit_insn (gen_cld ());
9301
9302  /* When optimizing for size emit simple rep ; movsb instruction for
9303     counts not divisible by 4.  */
9304
9305  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9306    {
9307      countreg = ix86_zero_extend_to_Pmode (count_exp);
9308      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9309      if (TARGET_64BIT)
9310	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9311				         destreg, countreg));
9312      else
9313	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9314				   destreg, countreg));
9315    }
9316  else if (count != 0
9317	   && (align >= 8
9318	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9319	       || optimize_size || count < (unsigned int) 64))
9320    {
9321      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9322      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9323      if (count & ~(size - 1))
9324	{
9325	  countreg = copy_to_mode_reg (counter_mode,
9326				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9327						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9328	  countreg = ix86_zero_extend_to_Pmode (countreg);
9329	  if (size == 4)
9330	    {
9331	      if (TARGET_64BIT)
9332		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9333					         destreg, countreg));
9334	      else
9335		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9336					   destreg, countreg));
9337	    }
9338	  else
9339	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9340					     destreg, countreg));
9341	}
9342      if (size == 8 && (count & 0x04))
9343	emit_insn (gen_strsetsi (destreg,
9344				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9345      if (count & 0x02)
9346	emit_insn (gen_strsethi (destreg,
9347				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9348      if (count & 0x01)
9349	emit_insn (gen_strsetqi (destreg,
9350				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9351    }
9352  else
9353    {
9354      rtx countreg2;
9355      rtx label = NULL;
9356
9357      /* In case we don't know anything about the alignment, default to
9358         library version, since it is usually equally fast and result in
9359         shorter code.  */
9360      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9361	return 0;
9362
9363      if (TARGET_SINGLE_STRINGOP)
9364	emit_insn (gen_cld ());
9365
9366      countreg2 = gen_reg_rtx (Pmode);
9367      countreg = copy_to_mode_reg (counter_mode, count_exp);
9368      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9369
9370      if (count == 0
9371	  && align < (TARGET_PENTIUMPRO && (count == 0
9372					    || count >= (unsigned int) 260)
9373		      ? 8 : UNITS_PER_WORD))
9374	{
9375	  label = gen_label_rtx ();
9376	  emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9377				   LEU, 0, counter_mode, 1, label);
9378	}
9379      if (align <= 1)
9380	{
9381	  rtx label = ix86_expand_aligntest (destreg, 1);
9382	  emit_insn (gen_strsetqi (destreg,
9383				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9384	  ix86_adjust_counter (countreg, 1);
9385	  emit_label (label);
9386	  LABEL_NUSES (label) = 1;
9387	}
9388      if (align <= 2)
9389	{
9390	  rtx label = ix86_expand_aligntest (destreg, 2);
9391	  emit_insn (gen_strsethi (destreg,
9392				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9393	  ix86_adjust_counter (countreg, 2);
9394	  emit_label (label);
9395	  LABEL_NUSES (label) = 1;
9396	}
9397      if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9398					      || count >= (unsigned int) 260))
9399	{
9400	  rtx label = ix86_expand_aligntest (destreg, 4);
9401	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9402					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9403					     : zeroreg)));
9404	  ix86_adjust_counter (countreg, 4);
9405	  emit_label (label);
9406	  LABEL_NUSES (label) = 1;
9407	}
9408
9409      if (!TARGET_SINGLE_STRINGOP)
9410	emit_insn (gen_cld ());
9411      if (TARGET_64BIT)
9412	{
9413	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9414				  GEN_INT (3)));
9415	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9416					   destreg, countreg2));
9417	}
9418      else
9419	{
9420	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9421	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9422				     destreg, countreg2));
9423	}
9424
9425      if (label)
9426	{
9427	  emit_label (label);
9428	  LABEL_NUSES (label) = 1;
9429	}
9430      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9431	emit_insn (gen_strsetsi (destreg,
9432				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9433      if (TARGET_64BIT && (align <= 4 || count == 0))
9434	{
9435	  rtx label = ix86_expand_aligntest (countreg, 4);
9436	  emit_insn (gen_strsetsi (destreg,
9437				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
9438	  emit_label (label);
9439	  LABEL_NUSES (label) = 1;
9440	}
9441      if (align > 2 && count != 0 && (count & 2))
9442	emit_insn (gen_strsethi (destreg,
9443				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9444      if (align <= 2 || count == 0)
9445	{
9446	  rtx label = ix86_expand_aligntest (countreg, 2);
9447	  emit_insn (gen_strsethi (destreg,
9448				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9449	  emit_label (label);
9450	  LABEL_NUSES (label) = 1;
9451	}
9452      if (align > 1 && count != 0 && (count & 1))
9453	emit_insn (gen_strsetqi (destreg,
9454				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9455      if (align <= 1 || count == 0)
9456	{
9457	  rtx label = ix86_expand_aligntest (countreg, 1);
9458	  emit_insn (gen_strsetqi (destreg,
9459				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9460	  emit_label (label);
9461	  LABEL_NUSES (label) = 1;
9462	}
9463    }
9464  return 1;
9465}
9466/* Expand strlen.  */
9467int
9468ix86_expand_strlen (out, src, eoschar, align)
9469     rtx out, src, eoschar, align;
9470{
9471  rtx addr, scratch1, scratch2, scratch3, scratch4;
9472
9473  /* The generic case of strlen expander is long.  Avoid it's
9474     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
9475
9476  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9477      && !TARGET_INLINE_ALL_STRINGOPS
9478      && !optimize_size
9479      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9480    return 0;
9481
9482  addr = force_reg (Pmode, XEXP (src, 0));
9483  scratch1 = gen_reg_rtx (Pmode);
9484
9485  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9486      && !optimize_size)
9487    {
9488      /* Well it seems that some optimizer does not combine a call like
9489         foo(strlen(bar), strlen(bar));
9490         when the move and the subtraction is done here.  It does calculate
9491         the length just once when these instructions are done inside of
9492         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
9493         often used and I use one fewer register for the lifetime of
9494         output_strlen_unroll() this is better.  */
9495
9496      emit_move_insn (out, addr);
9497
9498      ix86_expand_strlensi_unroll_1 (out, align);
9499
9500      /* strlensi_unroll_1 returns the address of the zero at the end of
9501         the string, like memchr(), so compute the length by subtracting
9502         the start address.  */
9503      if (TARGET_64BIT)
9504	emit_insn (gen_subdi3 (out, out, addr));
9505      else
9506	emit_insn (gen_subsi3 (out, out, addr));
9507    }
9508  else
9509    {
9510      scratch2 = gen_reg_rtx (Pmode);
9511      scratch3 = gen_reg_rtx (Pmode);
9512      scratch4 = force_reg (Pmode, constm1_rtx);
9513
9514      emit_move_insn (scratch3, addr);
9515      eoschar = force_reg (QImode, eoschar);
9516
9517      emit_insn (gen_cld ());
9518      if (TARGET_64BIT)
9519	{
9520	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9521					 align, scratch4, scratch3));
9522	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9523	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9524	}
9525      else
9526	{
9527	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9528				     align, scratch4, scratch3));
9529	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9530	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9531	}
9532    }
9533  return 1;
9534}
9535
9536/* Expand the appropriate insns for doing strlen if not just doing
9537   repnz; scasb
9538
9539   out = result, initialized with the start address
9540   align_rtx = alignment of the address.
9541   scratch = scratch register, initialized with the startaddress when
9542	not aligned, otherwise undefined
9543
9544   This is just the body. It needs the initialisations mentioned above and
9545   some address computing at the end.  These things are done in i386.md.  */
9546
9547static void
9548ix86_expand_strlensi_unroll_1 (out, align_rtx)
9549     rtx out, align_rtx;
9550{
9551  int align;
9552  rtx tmp;
9553  rtx align_2_label = NULL_RTX;
9554  rtx align_3_label = NULL_RTX;
9555  rtx align_4_label = gen_label_rtx ();
9556  rtx end_0_label = gen_label_rtx ();
9557  rtx mem;
9558  rtx tmpreg = gen_reg_rtx (SImode);
9559  rtx scratch = gen_reg_rtx (SImode);
9560
9561  align = 0;
9562  if (GET_CODE (align_rtx) == CONST_INT)
9563    align = INTVAL (align_rtx);
9564
9565  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
9566
9567  /* Is there a known alignment and is it less than 4?  */
9568  if (align < 4)
9569    {
9570      rtx scratch1 = gen_reg_rtx (Pmode);
9571      emit_move_insn (scratch1, out);
9572      /* Is there a known alignment and is it not 2? */
9573      if (align != 2)
9574	{
9575	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9576	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9577
9578	  /* Leave just the 3 lower bits.  */
9579	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9580				    NULL_RTX, 0, OPTAB_WIDEN);
9581
9582	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9583				   Pmode, 1, align_4_label);
9584	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9585				   Pmode, 1, align_2_label);
9586	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9587				   Pmode, 1, align_3_label);
9588	}
9589      else
9590        {
9591	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
9592	     check if is aligned to 4 - byte.  */
9593
9594	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9595				    NULL_RTX, 0, OPTAB_WIDEN);
9596
9597	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9598				   Pmode, 1, align_4_label);
9599        }
9600
9601      mem = gen_rtx_MEM (QImode, out);
9602
9603      /* Now compare the bytes.  */
9604
9605      /* Compare the first n unaligned byte on a byte per byte basis.  */
9606      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9607			       QImode, 1, end_0_label);
9608
9609      /* Increment the address.  */
9610      if (TARGET_64BIT)
9611	emit_insn (gen_adddi3 (out, out, const1_rtx));
9612      else
9613	emit_insn (gen_addsi3 (out, out, const1_rtx));
9614
9615      /* Not needed with an alignment of 2 */
9616      if (align != 2)
9617	{
9618	  emit_label (align_2_label);
9619
9620	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9621				   end_0_label);
9622
9623	  if (TARGET_64BIT)
9624	    emit_insn (gen_adddi3 (out, out, const1_rtx));
9625	  else
9626	    emit_insn (gen_addsi3 (out, out, const1_rtx));
9627
9628	  emit_label (align_3_label);
9629	}
9630
9631      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9632			       end_0_label);
9633
9634      if (TARGET_64BIT)
9635	emit_insn (gen_adddi3 (out, out, const1_rtx));
9636      else
9637	emit_insn (gen_addsi3 (out, out, const1_rtx));
9638    }
9639
9640  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
9641     align this loop.  It gives only huge programs, but does not help to
9642     speed up.  */
9643  emit_label (align_4_label);
9644
9645  mem = gen_rtx_MEM (SImode, out);
9646  emit_move_insn (scratch, mem);
9647  if (TARGET_64BIT)
9648    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9649  else
9650    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9651
9652  /* This formula yields a nonzero result iff one of the bytes is zero.
9653     This saves three branches inside loop and many cycles.  */
9654
9655  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9656  emit_insn (gen_one_cmplsi2 (scratch, scratch));
9657  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9658  emit_insn (gen_andsi3 (tmpreg, tmpreg,
9659			 GEN_INT (trunc_int_for_mode
9660				  (0x80808080, SImode))));
9661  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9662			   align_4_label);
9663
9664  if (TARGET_CMOVE)
9665    {
9666       rtx reg = gen_reg_rtx (SImode);
9667       rtx reg2 = gen_reg_rtx (Pmode);
9668       emit_move_insn (reg, tmpreg);
9669       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9670
9671       /* If zero is not in the first two bytes, move two bytes forward.  */
9672       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9673       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9674       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9675       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9676			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
9677						     reg,
9678						     tmpreg)));
9679       /* Emit lea manually to avoid clobbering of flags.  */
9680       emit_insn (gen_rtx_SET (SImode, reg2,
9681			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9682
9683       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9684       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9685       emit_insn (gen_rtx_SET (VOIDmode, out,
9686			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9687						     reg2,
9688						     out)));
9689
9690    }
9691  else
9692    {
9693       rtx end_2_label = gen_label_rtx ();
9694       /* Is zero in the first two bytes? */
9695
9696       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9697       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9698       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9699       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9700                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9701                            pc_rtx);
9702       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9703       JUMP_LABEL (tmp) = end_2_label;
9704
9705       /* Not in the first two.  Move two bytes forward.  */
9706       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9707       if (TARGET_64BIT)
9708	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9709       else
9710	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9711
9712       emit_label (end_2_label);
9713
9714    }
9715
9716  /* Avoid branch in fixing the byte.  */
9717  tmpreg = gen_lowpart (QImode, tmpreg);
9718  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9719  if (TARGET_64BIT)
9720    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9721  else
9722    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9723
9724  emit_label (end_0_label);
9725}
9726
9727/* Clear stack slot assignments remembered from previous functions.
9728   This is called from INIT_EXPANDERS once before RTL is emitted for each
9729   function.  */
9730
9731static void
9732ix86_init_machine_status (p)
9733     struct function *p;
9734{
9735  p->machine = (struct machine_function *)
9736    xcalloc (1, sizeof (struct machine_function));
9737}
9738
9739/* Mark machine specific bits of P for GC.  */
9740static void
9741ix86_mark_machine_status (p)
9742     struct function *p;
9743{
9744  struct machine_function *machine = p->machine;
9745  enum machine_mode mode;
9746  int n;
9747
9748  if (! machine)
9749    return;
9750
9751  for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9752       mode = (enum machine_mode) ((int) mode + 1))
9753    for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9754      ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9755}
9756
9757static void
9758ix86_free_machine_status (p)
9759     struct function *p;
9760{
9761  free (p->machine);
9762  p->machine = NULL;
9763}
9764
9765/* Return a MEM corresponding to a stack slot with mode MODE.
9766   Allocate a new slot if necessary.
9767
9768   The RTL for a function can have several slots available: N is
9769   which slot to use.  */
9770
9771rtx
9772assign_386_stack_local (mode, n)
9773     enum machine_mode mode;
9774     int n;
9775{
9776  if (n < 0 || n >= MAX_386_STACK_LOCALS)
9777    abort ();
9778
9779  if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9780    ix86_stack_locals[(int) mode][n]
9781      = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9782
9783  return ix86_stack_locals[(int) mode][n];
9784}
9785
9786/* Calculate the length of the memory address in the instruction
9787   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
9788
9789static int
9790memory_address_length (addr)
9791     rtx addr;
9792{
9793  struct ix86_address parts;
9794  rtx base, index, disp;
9795  int len;
9796
9797  if (GET_CODE (addr) == PRE_DEC
9798      || GET_CODE (addr) == POST_INC
9799      || GET_CODE (addr) == PRE_MODIFY
9800      || GET_CODE (addr) == POST_MODIFY)
9801    return 0;
9802
9803  if (! ix86_decompose_address (addr, &parts))
9804    abort ();
9805
9806  base = parts.base;
9807  index = parts.index;
9808  disp = parts.disp;
9809  len = 0;
9810
9811  /* Register Indirect.  */
9812  if (base && !index && !disp)
9813    {
9814      /* Special cases: ebp and esp need the two-byte modrm form.  */
9815      if (addr == stack_pointer_rtx
9816	  || addr == arg_pointer_rtx
9817	  || addr == frame_pointer_rtx
9818	  || addr == hard_frame_pointer_rtx)
9819	len = 1;
9820    }
9821
9822  /* Direct Addressing.  */
9823  else if (disp && !base && !index)
9824    len = 4;
9825
9826  else
9827    {
9828      /* Find the length of the displacement constant.  */
9829      if (disp)
9830	{
9831	  if (GET_CODE (disp) == CONST_INT
9832	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9833	    len = 1;
9834	  else
9835	    len = 4;
9836	}
9837
9838      /* An index requires the two-byte modrm form.  */
9839      if (index)
9840	len += 1;
9841    }
9842
9843  return len;
9844}
9845
9846/* Compute default value for "length_immediate" attribute.  When SHORTFORM is set
9847   expect that insn have 8bit immediate alternative.  */
9848int
9849ix86_attr_length_immediate_default (insn, shortform)
9850     rtx insn;
9851     int shortform;
9852{
9853  int len = 0;
9854  int i;
9855  extract_insn_cached (insn);
9856  for (i = recog_data.n_operands - 1; i >= 0; --i)
9857    if (CONSTANT_P (recog_data.operand[i]))
9858      {
9859	if (len)
9860	  abort ();
9861	if (shortform
9862	    && GET_CODE (recog_data.operand[i]) == CONST_INT
9863	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9864	  len = 1;
9865	else
9866	  {
9867	    switch (get_attr_mode (insn))
9868	      {
9869		case MODE_QI:
9870		  len+=1;
9871		  break;
9872		case MODE_HI:
9873		  len+=2;
9874		  break;
9875		case MODE_SI:
9876		  len+=4;
9877		  break;
9878		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
9879		case MODE_DI:
9880		  len+=4;
9881		  break;
9882		default:
9883		  fatal_insn ("unknown insn mode", insn);
9884	      }
9885	  }
9886      }
9887  return len;
9888}
9889/* Compute default value for "length_address" attribute.  */
9890int
9891ix86_attr_length_address_default (insn)
9892     rtx insn;
9893{
9894  int i;
9895  extract_insn_cached (insn);
9896  for (i = recog_data.n_operands - 1; i >= 0; --i)
9897    if (GET_CODE (recog_data.operand[i]) == MEM)
9898      {
9899	return memory_address_length (XEXP (recog_data.operand[i], 0));
9900	break;
9901      }
9902  return 0;
9903}
9904
9905/* Return the maximum number of instructions a cpu can issue.  */
9906
9907static int
9908ix86_issue_rate ()
9909{
9910  switch (ix86_cpu)
9911    {
9912    case PROCESSOR_PENTIUM:
9913    case PROCESSOR_K6:
9914      return 2;
9915
9916    case PROCESSOR_PENTIUMPRO:
9917    case PROCESSOR_PENTIUM4:
9918    case PROCESSOR_ATHLON:
9919      return 3;
9920
9921    default:
9922      return 1;
9923    }
9924}
9925
9926/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9927   by DEP_INSN and nothing set by DEP_INSN.  */
9928
9929static int
9930ix86_flags_dependant (insn, dep_insn, insn_type)
9931     rtx insn, dep_insn;
9932     enum attr_type insn_type;
9933{
9934  rtx set, set2;
9935
9936  /* Simplify the test for uninteresting insns.  */
9937  if (insn_type != TYPE_SETCC
9938      && insn_type != TYPE_ICMOV
9939      && insn_type != TYPE_FCMOV
9940      && insn_type != TYPE_IBR)
9941    return 0;
9942
9943  if ((set = single_set (dep_insn)) != 0)
9944    {
9945      set = SET_DEST (set);
9946      set2 = NULL_RTX;
9947    }
9948  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9949	   && XVECLEN (PATTERN (dep_insn), 0) == 2
9950	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9951	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9952    {
9953      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9954      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9955    }
9956  else
9957    return 0;
9958
9959  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9960    return 0;
9961
9962  /* This test is true if the dependent insn reads the flags but
9963     not any other potentially set register.  */
9964  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9965    return 0;
9966
9967  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9968    return 0;
9969
9970  return 1;
9971}
9972
9973/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9974   address with operands set by DEP_INSN.  */
9975
9976static int
9977ix86_agi_dependant (insn, dep_insn, insn_type)
9978     rtx insn, dep_insn;
9979     enum attr_type insn_type;
9980{
9981  rtx addr;
9982
9983  if (insn_type == TYPE_LEA
9984      && TARGET_PENTIUM)
9985    {
9986      addr = PATTERN (insn);
9987      if (GET_CODE (addr) == SET)
9988	;
9989      else if (GET_CODE (addr) == PARALLEL
9990	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9991	addr = XVECEXP (addr, 0, 0);
9992      else
9993	abort ();
9994      addr = SET_SRC (addr);
9995    }
9996  else
9997    {
9998      int i;
9999      extract_insn_cached (insn);
10000      for (i = recog_data.n_operands - 1; i >= 0; --i)
10001	if (GET_CODE (recog_data.operand[i]) == MEM)
10002	  {
10003	    addr = XEXP (recog_data.operand[i], 0);
10004	    goto found;
10005	  }
10006      return 0;
10007    found:;
10008    }
10009
10010  return modified_in_p (addr, dep_insn);
10011}
10012
10013static int
10014ix86_adjust_cost (insn, link, dep_insn, cost)
10015     rtx insn, link, dep_insn;
10016     int cost;
10017{
10018  enum attr_type insn_type, dep_insn_type;
10019  enum attr_memory memory, dep_memory;
10020  rtx set, set2;
10021  int dep_insn_code_number;
10022
10023  /* Anti and output depenancies have zero cost on all CPUs.  */
10024  if (REG_NOTE_KIND (link) != 0)
10025    return 0;
10026
10027  dep_insn_code_number = recog_memoized (dep_insn);
10028
10029  /* If we can't recognize the insns, we can't really do anything.  */
10030  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10031    return cost;
10032
10033  insn_type = get_attr_type (insn);
10034  dep_insn_type = get_attr_type (dep_insn);
10035
10036  switch (ix86_cpu)
10037    {
10038    case PROCESSOR_PENTIUM:
10039      /* Address Generation Interlock adds a cycle of latency.  */
10040      if (ix86_agi_dependant (insn, dep_insn, insn_type))
10041	cost += 1;
10042
10043      /* ??? Compares pair with jump/setcc.  */
10044      if (ix86_flags_dependant (insn, dep_insn, insn_type))
10045	cost = 0;
10046
10047      /* Floating point stores require value to be ready one cycle ealier.  */
10048      if (insn_type == TYPE_FMOV
10049	  && get_attr_memory (insn) == MEMORY_STORE
10050	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10051	cost += 1;
10052      break;
10053
10054    case PROCESSOR_PENTIUMPRO:
10055      memory = get_attr_memory (insn);
10056      dep_memory = get_attr_memory (dep_insn);
10057
10058      /* Since we can't represent delayed latencies of load+operation,
10059	 increase the cost here for non-imov insns.  */
10060      if (dep_insn_type != TYPE_IMOV
10061          && dep_insn_type != TYPE_FMOV
10062          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10063	cost += 1;
10064
10065      /* INT->FP conversion is expensive.  */
10066      if (get_attr_fp_int_src (dep_insn))
10067	cost += 5;
10068
10069      /* There is one cycle extra latency between an FP op and a store.  */
10070      if (insn_type == TYPE_FMOV
10071	  && (set = single_set (dep_insn)) != NULL_RTX
10072	  && (set2 = single_set (insn)) != NULL_RTX
10073	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10074	  && GET_CODE (SET_DEST (set2)) == MEM)
10075	cost += 1;
10076
10077      /* Show ability of reorder buffer to hide latency of load by executing
10078	 in parallel with previous instruction in case
10079	 previous instruction is not needed to compute the address.  */
10080      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10081	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10082 	{
10083	  /* Claim moves to take one cycle, as core can issue one load
10084	     at time and the next load can start cycle later.  */
10085	  if (dep_insn_type == TYPE_IMOV
10086	      || dep_insn_type == TYPE_FMOV)
10087	    cost = 1;
10088	  else if (cost > 1)
10089	    cost--;
10090	}
10091      break;
10092
10093    case PROCESSOR_K6:
10094      memory = get_attr_memory (insn);
10095      dep_memory = get_attr_memory (dep_insn);
10096      /* The esp dependency is resolved before the instruction is really
10097         finished.  */
10098      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10099	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10100	return 1;
10101
10102      /* Since we can't represent delayed latencies of load+operation,
10103	 increase the cost here for non-imov insns.  */
10104      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10105	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10106
10107      /* INT->FP conversion is expensive.  */
10108      if (get_attr_fp_int_src (dep_insn))
10109	cost += 5;
10110
10111      /* Show ability of reorder buffer to hide latency of load by executing
10112	 in parallel with previous instruction in case
10113	 previous instruction is not needed to compute the address.  */
10114      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10115	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10116 	{
10117	  /* Claim moves to take one cycle, as core can issue one load
10118	     at time and the next load can start cycle later.  */
10119	  if (dep_insn_type == TYPE_IMOV
10120	      || dep_insn_type == TYPE_FMOV)
10121	    cost = 1;
10122	  else if (cost > 2)
10123	    cost -= 2;
10124	  else
10125	    cost = 1;
10126	}
10127      break;
10128
10129    case PROCESSOR_ATHLON:
10130      memory = get_attr_memory (insn);
10131      dep_memory = get_attr_memory (dep_insn);
10132
10133      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10134	{
10135	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10136	    cost += 2;
10137	  else
10138	    cost += 3;
10139        }
10140      /* Show ability of reorder buffer to hide latency of load by executing
10141	 in parallel with previous instruction in case
10142	 previous instruction is not needed to compute the address.  */
10143      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10144	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10145 	{
10146	  /* Claim moves to take one cycle, as core can issue one load
10147	     at time and the next load can start cycle later.  */
10148	  if (dep_insn_type == TYPE_IMOV
10149	      || dep_insn_type == TYPE_FMOV)
10150	    cost = 0;
10151	  else if (cost >= 3)
10152	    cost -= 3;
10153	  else
10154	    cost = 0;
10155	}
10156
10157    default:
10158      break;
10159    }
10160
10161  return cost;
10162}
10163
10164static union
10165{
10166  struct ppro_sched_data
10167  {
10168    rtx decode[3];
10169    int issued_this_cycle;
10170  } ppro;
10171} ix86_sched_data;
10172
10173static int
10174ix86_safe_length (insn)
10175     rtx insn;
10176{
10177  if (recog_memoized (insn) >= 0)
10178    return get_attr_length (insn);
10179  else
10180    return 128;
10181}
10182
10183static int
10184ix86_safe_length_prefix (insn)
10185     rtx insn;
10186{
10187  if (recog_memoized (insn) >= 0)
10188    return get_attr_length (insn);
10189  else
10190    return 0;
10191}
10192
10193static enum attr_memory
10194ix86_safe_memory (insn)
10195     rtx insn;
10196{
10197  if (recog_memoized (insn) >= 0)
10198    return get_attr_memory (insn);
10199  else
10200    return MEMORY_UNKNOWN;
10201}
10202
10203static enum attr_pent_pair
10204ix86_safe_pent_pair (insn)
10205     rtx insn;
10206{
10207  if (recog_memoized (insn) >= 0)
10208    return get_attr_pent_pair (insn);
10209  else
10210    return PENT_PAIR_NP;
10211}
10212
10213static enum attr_ppro_uops
10214ix86_safe_ppro_uops (insn)
10215     rtx insn;
10216{
10217  if (recog_memoized (insn) >= 0)
10218    return get_attr_ppro_uops (insn);
10219  else
10220    return PPRO_UOPS_MANY;
10221}
10222
10223static void
10224ix86_dump_ppro_packet (dump)
10225     FILE *dump;
10226{
10227  if (ix86_sched_data.ppro.decode[0])
10228    {
10229      fprintf (dump, "PPRO packet: %d",
10230	       INSN_UID (ix86_sched_data.ppro.decode[0]));
10231      if (ix86_sched_data.ppro.decode[1])
10232	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10233      if (ix86_sched_data.ppro.decode[2])
10234	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10235      fputc ('\n', dump);
10236    }
10237}
10238
10239/* We're beginning a new block.  Initialize data structures as necessary.  */
10240
10241static void
10242ix86_sched_init (dump, sched_verbose, veclen)
10243     FILE *dump ATTRIBUTE_UNUSED;
10244     int sched_verbose ATTRIBUTE_UNUSED;
10245     int veclen ATTRIBUTE_UNUSED;
10246{
10247  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10248}
10249
10250/* Shift INSN to SLOT, and shift everything else down.  */
10251
10252static void
10253ix86_reorder_insn (insnp, slot)
10254     rtx *insnp, *slot;
10255{
10256  if (insnp != slot)
10257    {
10258      rtx insn = *insnp;
10259      do
10260	insnp[0] = insnp[1];
10261      while (++insnp != slot);
10262      *insnp = insn;
10263    }
10264}
10265
10266/* Find an instruction with given pairability and minimal amount of cycles
10267   lost by the fact that the CPU waits for both pipelines to finish before
10268   reading next instructions.  Also take care that both instructions together
10269   can not exceed 7 bytes.  */
10270
10271static rtx *
10272ix86_pent_find_pair (e_ready, ready, type, first)
10273     rtx *e_ready;
10274     rtx *ready;
10275     enum attr_pent_pair type;
10276     rtx first;
10277{
10278  int mincycles, cycles;
10279  enum attr_pent_pair tmp;
10280  enum attr_memory memory;
10281  rtx *insnp, *bestinsnp = NULL;
10282
10283  if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10284    return NULL;
10285
10286  memory = ix86_safe_memory (first);
10287  cycles = result_ready_cost (first);
10288  mincycles = INT_MAX;
10289
10290  for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10291    if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10292	&& ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10293      {
10294	enum attr_memory second_memory;
10295	int secondcycles, currentcycles;
10296
10297	second_memory = ix86_safe_memory (*insnp);
10298	secondcycles = result_ready_cost (*insnp);
10299	currentcycles = abs (cycles - secondcycles);
10300
10301	if (secondcycles >= 1 && cycles >= 1)
10302	  {
10303	    /* Two read/modify/write instructions together takes two
10304	       cycles longer.  */
10305	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10306	      currentcycles += 2;
10307
10308	    /* Read modify/write instruction followed by read/modify
10309	       takes one cycle longer.  */
10310	    if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10311	        && tmp != PENT_PAIR_UV
10312	        && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10313	      currentcycles += 1;
10314	  }
10315	if (currentcycles < mincycles)
10316	  bestinsnp = insnp, mincycles = currentcycles;
10317      }
10318
10319  return bestinsnp;
10320}
10321
10322/* Subroutines of ix86_sched_reorder.  */
10323
10324static void
10325ix86_sched_reorder_pentium (ready, e_ready)
10326     rtx *ready;
10327     rtx *e_ready;
10328{
10329  enum attr_pent_pair pair1, pair2;
10330  rtx *insnp;
10331
10332  /* This wouldn't be necessary if Haifa knew that static insn ordering
10333     is important to which pipe an insn is issued to.  So we have to make
10334     some minor rearrangements.  */
10335
10336  pair1 = ix86_safe_pent_pair (*e_ready);
10337
10338  /* If the first insn is non-pairable, let it be.  */
10339  if (pair1 == PENT_PAIR_NP)
10340    return;
10341
10342  pair2 = PENT_PAIR_NP;
10343  insnp = 0;
10344
10345  /* If the first insn is UV or PV pairable, search for a PU
10346     insn to go with.  */
10347  if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10348    {
10349      insnp = ix86_pent_find_pair (e_ready-1, ready,
10350				   PENT_PAIR_PU, *e_ready);
10351      if (insnp)
10352	pair2 = PENT_PAIR_PU;
10353    }
10354
10355  /* If the first insn is PU or UV pairable, search for a PV
10356     insn to go with.  */
10357  if (pair2 == PENT_PAIR_NP
10358      && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10359    {
10360      insnp = ix86_pent_find_pair (e_ready-1, ready,
10361				   PENT_PAIR_PV, *e_ready);
10362      if (insnp)
10363	pair2 = PENT_PAIR_PV;
10364    }
10365
10366  /* If the first insn is pairable, search for a UV
10367     insn to go with.  */
10368  if (pair2 == PENT_PAIR_NP)
10369    {
10370      insnp = ix86_pent_find_pair (e_ready-1, ready,
10371				   PENT_PAIR_UV, *e_ready);
10372      if (insnp)
10373	pair2 = PENT_PAIR_UV;
10374    }
10375
10376  if (pair2 == PENT_PAIR_NP)
10377    return;
10378
10379  /* Found something!  Decide if we need to swap the order.  */
10380  if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10381      || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10382	  && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10383	  && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10384    ix86_reorder_insn (insnp, e_ready);
10385  else
10386    ix86_reorder_insn (insnp, e_ready - 1);
10387}
10388
10389static void
10390ix86_sched_reorder_ppro (ready, e_ready)
10391     rtx *ready;
10392     rtx *e_ready;
10393{
10394  rtx decode[3];
10395  enum attr_ppro_uops cur_uops;
10396  int issued_this_cycle;
10397  rtx *insnp;
10398  int i;
10399
10400  /* At this point .ppro.decode contains the state of the three
10401     decoders from last "cycle".  That is, those insns that were
10402     actually independent.  But here we're scheduling for the
10403     decoder, and we may find things that are decodable in the
10404     same cycle.  */
10405
10406  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10407  issued_this_cycle = 0;
10408
10409  insnp = e_ready;
10410  cur_uops = ix86_safe_ppro_uops (*insnp);
10411
10412  /* If the decoders are empty, and we've a complex insn at the
10413     head of the priority queue, let it issue without complaint.  */
10414  if (decode[0] == NULL)
10415    {
10416      if (cur_uops == PPRO_UOPS_MANY)
10417	{
10418	  decode[0] = *insnp;
10419	  goto ppro_done;
10420	}
10421
10422      /* Otherwise, search for a 2-4 uop unsn to issue.  */
10423      while (cur_uops != PPRO_UOPS_FEW)
10424	{
10425	  if (insnp == ready)
10426	    break;
10427	  cur_uops = ix86_safe_ppro_uops (*--insnp);
10428	}
10429
10430      /* If so, move it to the head of the line.  */
10431      if (cur_uops == PPRO_UOPS_FEW)
10432	ix86_reorder_insn (insnp, e_ready);
10433
10434      /* Issue the head of the queue.  */
10435      issued_this_cycle = 1;
10436      decode[0] = *e_ready--;
10437    }
10438
10439  /* Look for simple insns to fill in the other two slots.  */
10440  for (i = 1; i < 3; ++i)
10441    if (decode[i] == NULL)
10442      {
10443	if (ready >= e_ready)
10444	  goto ppro_done;
10445
10446	insnp = e_ready;
10447	cur_uops = ix86_safe_ppro_uops (*insnp);
10448	while (cur_uops != PPRO_UOPS_ONE)
10449	  {
10450	    if (insnp == ready)
10451	      break;
10452	    cur_uops = ix86_safe_ppro_uops (*--insnp);
10453	  }
10454
10455	/* Found one.  Move it to the head of the queue and issue it.  */
10456	if (cur_uops == PPRO_UOPS_ONE)
10457	  {
10458	    ix86_reorder_insn (insnp, e_ready);
10459	    decode[i] = *e_ready--;
10460	    issued_this_cycle++;
10461	    continue;
10462	  }
10463
10464	/* ??? Didn't find one.  Ideally, here we would do a lazy split
10465	   of 2-uop insns, issue one and queue the other.  */
10466      }
10467
10468 ppro_done:
10469  if (issued_this_cycle == 0)
10470    issued_this_cycle = 1;
10471  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10472}
10473
10474/* We are about to being issuing insns for this clock cycle.
10475   Override the default sort algorithm to better slot instructions.  */
10476static int
10477ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10478     FILE *dump ATTRIBUTE_UNUSED;
10479     int sched_verbose ATTRIBUTE_UNUSED;
10480     rtx *ready;
10481     int *n_readyp;
10482     int clock_var ATTRIBUTE_UNUSED;
10483{
10484  int n_ready = *n_readyp;
10485  rtx *e_ready = ready + n_ready - 1;
10486
10487  if (n_ready < 2)
10488    goto out;
10489
10490  switch (ix86_cpu)
10491    {
10492    default:
10493      break;
10494
10495    case PROCESSOR_PENTIUM:
10496      ix86_sched_reorder_pentium (ready, e_ready);
10497      break;
10498
10499    case PROCESSOR_PENTIUMPRO:
10500      ix86_sched_reorder_ppro (ready, e_ready);
10501      break;
10502    }
10503
10504out:
10505  return ix86_issue_rate ();
10506}
10507
10508/* We are about to issue INSN.  Return the number of insns left on the
10509   ready queue that can be issued this cycle.  */
10510
10511static int
10512ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10513     FILE *dump;
10514     int sched_verbose;
10515     rtx insn;
10516     int can_issue_more;
10517{
10518  int i;
10519  switch (ix86_cpu)
10520    {
10521    default:
10522      return can_issue_more - 1;
10523
10524    case PROCESSOR_PENTIUMPRO:
10525      {
10526	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10527
10528	if (uops == PPRO_UOPS_MANY)
10529	  {
10530	    if (sched_verbose)
10531	      ix86_dump_ppro_packet (dump);
10532	    ix86_sched_data.ppro.decode[0] = insn;
10533	    ix86_sched_data.ppro.decode[1] = NULL;
10534	    ix86_sched_data.ppro.decode[2] = NULL;
10535	    if (sched_verbose)
10536	      ix86_dump_ppro_packet (dump);
10537	    ix86_sched_data.ppro.decode[0] = NULL;
10538	  }
10539	else if (uops == PPRO_UOPS_FEW)
10540	  {
10541	    if (sched_verbose)
10542	      ix86_dump_ppro_packet (dump);
10543	    ix86_sched_data.ppro.decode[0] = insn;
10544	    ix86_sched_data.ppro.decode[1] = NULL;
10545	    ix86_sched_data.ppro.decode[2] = NULL;
10546	  }
10547	else
10548	  {
10549	    for (i = 0; i < 3; ++i)
10550	      if (ix86_sched_data.ppro.decode[i] == NULL)
10551		{
10552		  ix86_sched_data.ppro.decode[i] = insn;
10553		  break;
10554		}
10555	    if (i == 3)
10556	      abort ();
10557	    if (i == 2)
10558	      {
10559	        if (sched_verbose)
10560	          ix86_dump_ppro_packet (dump);
10561		ix86_sched_data.ppro.decode[0] = NULL;
10562		ix86_sched_data.ppro.decode[1] = NULL;
10563		ix86_sched_data.ppro.decode[2] = NULL;
10564	      }
10565	  }
10566      }
10567      return --ix86_sched_data.ppro.issued_this_cycle;
10568    }
10569}
10570
10571/* Walk through INSNS and look for MEM references whose address is DSTREG or
10572   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10573   appropriate.  */
10574
10575void
10576ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10577     rtx insns;
10578     rtx dstref, srcref, dstreg, srcreg;
10579{
10580  rtx insn;
10581
10582  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10583    if (INSN_P (insn))
10584      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10585				 dstreg, srcreg);
10586}
10587
10588/* Subroutine of above to actually do the updating by recursively walking
10589   the rtx.  */
10590
10591static void
10592ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10593     rtx x;
10594     rtx dstref, srcref, dstreg, srcreg;
10595{
10596  enum rtx_code code = GET_CODE (x);
10597  const char *format_ptr = GET_RTX_FORMAT (code);
10598  int i, j;
10599
10600  if (code == MEM && XEXP (x, 0) == dstreg)
10601    MEM_COPY_ATTRIBUTES (x, dstref);
10602  else if (code == MEM && XEXP (x, 0) == srcreg)
10603    MEM_COPY_ATTRIBUTES (x, srcref);
10604
10605  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10606    {
10607      if (*format_ptr == 'e')
10608	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10609				   dstreg, srcreg);
10610      else if (*format_ptr == 'E')
10611	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10612	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10613				     dstreg, srcreg);
10614    }
10615}
10616
10617/* Compute the alignment given to a constant that is being placed in memory.
10618   EXP is the constant and ALIGN is the alignment that the object would
10619   ordinarily have.
10620   The value of this function is used instead of that alignment to align
10621   the object.  */
10622
10623int
10624ix86_constant_alignment (exp, align)
10625     tree exp;
10626     int align;
10627{
10628  if (TREE_CODE (exp) == REAL_CST)
10629    {
10630      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10631	return 64;
10632      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10633	return 128;
10634    }
10635  else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10636	   && align < 256)
10637    return 256;
10638
10639  return align;
10640}
10641
10642/* Compute the alignment for a static variable.
10643   TYPE is the data type, and ALIGN is the alignment that
10644   the object would ordinarily have.  The value of this function is used
10645   instead of that alignment to align the object.  */
10646
10647int
10648ix86_data_alignment (type, align)
10649     tree type;
10650     int align;
10651{
10652  if (AGGREGATE_TYPE_P (type)
10653       && TYPE_SIZE (type)
10654       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10655       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10656	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10657    return 256;
10658
10659  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10660     to 16byte boundary.  */
10661  if (TARGET_64BIT)
10662    {
10663      if (AGGREGATE_TYPE_P (type)
10664	   && TYPE_SIZE (type)
10665	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10666	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10667	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10668	return 128;
10669    }
10670
10671  if (TREE_CODE (type) == ARRAY_TYPE)
10672    {
10673      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10674	return 64;
10675      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10676	return 128;
10677    }
10678  else if (TREE_CODE (type) == COMPLEX_TYPE)
10679    {
10680
10681      if (TYPE_MODE (type) == DCmode && align < 64)
10682	return 64;
10683      if (TYPE_MODE (type) == XCmode && align < 128)
10684	return 128;
10685    }
10686  else if ((TREE_CODE (type) == RECORD_TYPE
10687	    || TREE_CODE (type) == UNION_TYPE
10688	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10689	   && TYPE_FIELDS (type))
10690    {
10691      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10692	return 64;
10693      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10694	return 128;
10695    }
10696  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10697	   || TREE_CODE (type) == INTEGER_TYPE)
10698    {
10699      if (TYPE_MODE (type) == DFmode && align < 64)
10700	return 64;
10701      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10702	return 128;
10703    }
10704
10705  return align;
10706}
10707
10708/* Compute the alignment for a local variable.
10709   TYPE is the data type, and ALIGN is the alignment that
10710   the object would ordinarily have.  The value of this macro is used
10711   instead of that alignment to align the object.  */
10712
10713int
10714ix86_local_alignment (type, align)
10715     tree type;
10716     int align;
10717{
10718  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10719     to 16byte boundary.  */
10720  if (TARGET_64BIT)
10721    {
10722      if (AGGREGATE_TYPE_P (type)
10723	   && TYPE_SIZE (type)
10724	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10725	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10726	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10727	return 128;
10728    }
10729  if (TREE_CODE (type) == ARRAY_TYPE)
10730    {
10731      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10732	return 64;
10733      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10734	return 128;
10735    }
10736  else if (TREE_CODE (type) == COMPLEX_TYPE)
10737    {
10738      if (TYPE_MODE (type) == DCmode && align < 64)
10739	return 64;
10740      if (TYPE_MODE (type) == XCmode && align < 128)
10741	return 128;
10742    }
10743  else if ((TREE_CODE (type) == RECORD_TYPE
10744	    || TREE_CODE (type) == UNION_TYPE
10745	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10746	   && TYPE_FIELDS (type))
10747    {
10748      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10749	return 64;
10750      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10751	return 128;
10752    }
10753  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10754	   || TREE_CODE (type) == INTEGER_TYPE)
10755    {
10756
10757      if (TYPE_MODE (type) == DFmode && align < 64)
10758	return 64;
10759      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10760	return 128;
10761    }
10762  return align;
10763}
10764
10765/* Emit RTL insns to initialize the variable parts of a trampoline.
10766   FNADDR is an RTX for the address of the function's pure code.
10767   CXT is an RTX for the static chain value for the function.  */
10768void
10769x86_initialize_trampoline (tramp, fnaddr, cxt)
10770     rtx tramp, fnaddr, cxt;
10771{
10772  if (!TARGET_64BIT)
10773    {
10774      /* Compute offset from the end of the jmp to the target function.  */
10775      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10776			       plus_constant (tramp, 10),
10777			       NULL_RTX, 1, OPTAB_DIRECT);
10778      emit_move_insn (gen_rtx_MEM (QImode, tramp),
10779		      GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10780      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10781      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10782		      GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10783      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10784    }
10785  else
10786    {
10787      int offset = 0;
10788      /* Try to load address using shorter movl instead of movabs.
10789         We may want to support movq for kernel mode, but kernel does not use
10790         trampolines at the moment.  */
10791      if (x86_64_zero_extended_value (fnaddr))
10792	{
10793	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
10794	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10795			  GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10796	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10797			  gen_lowpart (SImode, fnaddr));
10798	  offset += 6;
10799	}
10800      else
10801	{
10802	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10803			  GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10804	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10805			  fnaddr);
10806	  offset += 10;
10807	}
10808      /* Load static chain using movabs to r10.  */
10809      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10810		      GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10811      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10812		      cxt);
10813      offset += 10;
10814      /* Jump to the r11 */
10815      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10816		      GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10817      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10818		      GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10819      offset += 3;
10820      if (offset > TRAMPOLINE_SIZE)
10821	abort ();
10822    }
10823}
10824
10825#define def_builtin(MASK, NAME, TYPE, CODE)				\
10826do {									\
10827  if ((MASK) & target_flags)						\
10828    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL);	\
10829} while (0)
10830
10831struct builtin_description
10832{
10833  const unsigned int mask;
10834  const enum insn_code icode;
10835  const char *const name;
10836  const enum ix86_builtins code;
10837  const enum rtx_code comparison;
10838  const unsigned int flag;
10839};
10840
10841static const struct builtin_description bdesc_comi[] =
10842{
10843  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10844  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10845  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10846  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10847  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10848  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10849  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10850  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10851  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10852  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10853  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10854  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10855};
10856
10857static const struct builtin_description bdesc_2arg[] =
10858{
10859  /* SSE */
10860  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10861  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10862  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10863  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10864  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10865  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10866  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10867  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10868
10869  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10870  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10871  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10872  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10873  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10874  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10875  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10876  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10877  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10878  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10879  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10880  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10881  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10882  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10883  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10884  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10885  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10886  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10887  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10888  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10889  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10890  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10891  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10892  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10893
10894  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10895  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10896  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10897  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10898
10899  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10900  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10901  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10902  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10903  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10904
10905  /* MMX */
10906  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10907  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10908  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10909  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10910  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10911  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10912
10913  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10914  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10915  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10916  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10917  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10918  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10919  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10920  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10921
10922  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10923  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10924  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10925
10926  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10927  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10928  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10929  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10930
10931  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10932  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10933
10934  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10935  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10936  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10937  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10938  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10939  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10940
10941  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10942  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10943  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10944  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10945
10946  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10947  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10948  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10949  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10950  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10951  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10952
10953  /* Special.  */
10954  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10955  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10956  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10957
10958  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10959  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10960
10961  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10962  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10963  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10964  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10965  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10966  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10967
10968  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10969  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10970  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10971  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10972  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10973  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10974
10975  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10976  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10977  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10978  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10979
10980  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10981  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10982
10983};
10984
10985static const struct builtin_description bdesc_1arg[] =
10986{
10987  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10988  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10989
10990  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10991  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10992  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10993
10994  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10995  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10996  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10997  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10998
10999};
11000
11001void
11002ix86_init_builtins ()
11003{
11004  if (TARGET_MMX)
11005    ix86_init_mmx_sse_builtins ();
11006}
11007
11008/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
11009   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
11010   builtins.  */
11011static void
11012ix86_init_mmx_sse_builtins ()
11013{
11014  const struct builtin_description * d;
11015  size_t i;
11016  tree endlink = void_list_node;
11017
11018  tree pchar_type_node = build_pointer_type (char_type_node);
11019  tree pfloat_type_node = build_pointer_type (float_type_node);
11020  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11021  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11022
11023  /* Comparisons.  */
11024  tree int_ftype_v4sf_v4sf
11025    = build_function_type (integer_type_node,
11026			   tree_cons (NULL_TREE, V4SF_type_node,
11027				      tree_cons (NULL_TREE,
11028						 V4SF_type_node,
11029						 endlink)));
11030  tree v4si_ftype_v4sf_v4sf
11031    = build_function_type (V4SI_type_node,
11032			   tree_cons (NULL_TREE, V4SF_type_node,
11033				      tree_cons (NULL_TREE,
11034						 V4SF_type_node,
11035						 endlink)));
11036  /* MMX/SSE/integer conversions.  */
11037  tree int_ftype_v4sf
11038    = build_function_type (integer_type_node,
11039			   tree_cons (NULL_TREE, V4SF_type_node,
11040				      endlink));
11041  tree int_ftype_v8qi
11042    = build_function_type (integer_type_node,
11043			   tree_cons (NULL_TREE, V8QI_type_node,
11044				      endlink));
11045  tree v4sf_ftype_v4sf_int
11046    = build_function_type (V4SF_type_node,
11047			   tree_cons (NULL_TREE, V4SF_type_node,
11048				      tree_cons (NULL_TREE, integer_type_node,
11049						 endlink)));
11050  tree v4sf_ftype_v4sf_v2si
11051    = build_function_type (V4SF_type_node,
11052			   tree_cons (NULL_TREE, V4SF_type_node,
11053				      tree_cons (NULL_TREE, V2SI_type_node,
11054						 endlink)));
11055  tree int_ftype_v4hi_int
11056    = build_function_type (integer_type_node,
11057			   tree_cons (NULL_TREE, V4HI_type_node,
11058				      tree_cons (NULL_TREE, integer_type_node,
11059						 endlink)));
11060  tree v4hi_ftype_v4hi_int_int
11061    = build_function_type (V4HI_type_node,
11062			   tree_cons (NULL_TREE, V4HI_type_node,
11063				      tree_cons (NULL_TREE, integer_type_node,
11064						 tree_cons (NULL_TREE,
11065							    integer_type_node,
11066							    endlink))));
11067  /* Miscellaneous.  */
11068  tree v8qi_ftype_v4hi_v4hi
11069    = build_function_type (V8QI_type_node,
11070			   tree_cons (NULL_TREE, V4HI_type_node,
11071				      tree_cons (NULL_TREE, V4HI_type_node,
11072						 endlink)));
11073  tree v4hi_ftype_v2si_v2si
11074    = build_function_type (V4HI_type_node,
11075			   tree_cons (NULL_TREE, V2SI_type_node,
11076				      tree_cons (NULL_TREE, V2SI_type_node,
11077						 endlink)));
11078  tree v4sf_ftype_v4sf_v4sf_int
11079    = build_function_type (V4SF_type_node,
11080			   tree_cons (NULL_TREE, V4SF_type_node,
11081				      tree_cons (NULL_TREE, V4SF_type_node,
11082						 tree_cons (NULL_TREE,
11083							    integer_type_node,
11084							    endlink))));
11085  tree v4hi_ftype_v8qi_v8qi
11086    = build_function_type (V4HI_type_node,
11087			   tree_cons (NULL_TREE, V8QI_type_node,
11088				      tree_cons (NULL_TREE, V8QI_type_node,
11089						 endlink)));
11090  tree v2si_ftype_v4hi_v4hi
11091    = build_function_type (V2SI_type_node,
11092			   tree_cons (NULL_TREE, V4HI_type_node,
11093				      tree_cons (NULL_TREE, V4HI_type_node,
11094						 endlink)));
11095  tree v4hi_ftype_v4hi_int
11096    = build_function_type (V4HI_type_node,
11097			   tree_cons (NULL_TREE, V4HI_type_node,
11098				      tree_cons (NULL_TREE, integer_type_node,
11099						 endlink)));
11100  tree v4hi_ftype_v4hi_di
11101    = build_function_type (V4HI_type_node,
11102			   tree_cons (NULL_TREE, V4HI_type_node,
11103				      tree_cons (NULL_TREE,
11104						 long_long_integer_type_node,
11105						 endlink)));
11106  tree v2si_ftype_v2si_di
11107    = build_function_type (V2SI_type_node,
11108			   tree_cons (NULL_TREE, V2SI_type_node,
11109				      tree_cons (NULL_TREE,
11110						 long_long_integer_type_node,
11111						 endlink)));
11112  tree void_ftype_void
11113    = build_function_type (void_type_node, endlink);
11114  tree void_ftype_unsigned
11115    = build_function_type (void_type_node,
11116			   tree_cons (NULL_TREE, unsigned_type_node,
11117				      endlink));
11118  tree unsigned_ftype_void
11119    = build_function_type (unsigned_type_node, endlink);
11120  tree di_ftype_void
11121    = build_function_type (long_long_unsigned_type_node, endlink);
11122  tree v4sf_ftype_void
11123    = build_function_type (V4SF_type_node, endlink);
11124  tree v2si_ftype_v4sf
11125    = build_function_type (V2SI_type_node,
11126			   tree_cons (NULL_TREE, V4SF_type_node,
11127				      endlink));
11128  /* Loads/stores.  */
11129  tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11130				  tree_cons (NULL_TREE, V8QI_type_node,
11131					     tree_cons (NULL_TREE,
11132							pchar_type_node,
11133							endlink)));
11134  tree void_ftype_v8qi_v8qi_pchar
11135    = build_function_type (void_type_node, maskmovq_args);
11136  tree v4sf_ftype_pfloat
11137    = build_function_type (V4SF_type_node,
11138			   tree_cons (NULL_TREE, pfloat_type_node,
11139				      endlink));
11140  /* @@@ the type is bogus */
11141  tree v4sf_ftype_v4sf_pv2si
11142    = build_function_type (V4SF_type_node,
11143			   tree_cons (NULL_TREE, V4SF_type_node,
11144				      tree_cons (NULL_TREE, pv2si_type_node,
11145						 endlink)));
11146  tree void_ftype_pv2si_v4sf
11147    = build_function_type (void_type_node,
11148			   tree_cons (NULL_TREE, pv2si_type_node,
11149				      tree_cons (NULL_TREE, V4SF_type_node,
11150						 endlink)));
11151  tree void_ftype_pfloat_v4sf
11152    = build_function_type (void_type_node,
11153			   tree_cons (NULL_TREE, pfloat_type_node,
11154				      tree_cons (NULL_TREE, V4SF_type_node,
11155						 endlink)));
11156  tree void_ftype_pdi_di
11157    = build_function_type (void_type_node,
11158			   tree_cons (NULL_TREE, pdi_type_node,
11159				      tree_cons (NULL_TREE,
11160						 long_long_unsigned_type_node,
11161						 endlink)));
11162  /* Normal vector unops.  */
11163  tree v4sf_ftype_v4sf
11164    = build_function_type (V4SF_type_node,
11165			   tree_cons (NULL_TREE, V4SF_type_node,
11166				      endlink));
11167
11168  /* Normal vector binops.  */
11169  tree v4sf_ftype_v4sf_v4sf
11170    = build_function_type (V4SF_type_node,
11171			   tree_cons (NULL_TREE, V4SF_type_node,
11172				      tree_cons (NULL_TREE, V4SF_type_node,
11173						 endlink)));
11174  tree v8qi_ftype_v8qi_v8qi
11175    = build_function_type (V8QI_type_node,
11176			   tree_cons (NULL_TREE, V8QI_type_node,
11177				      tree_cons (NULL_TREE, V8QI_type_node,
11178						 endlink)));
11179  tree v4hi_ftype_v4hi_v4hi
11180    = build_function_type (V4HI_type_node,
11181			   tree_cons (NULL_TREE, V4HI_type_node,
11182				      tree_cons (NULL_TREE, V4HI_type_node,
11183						 endlink)));
11184  tree v2si_ftype_v2si_v2si
11185    = build_function_type (V2SI_type_node,
11186			   tree_cons (NULL_TREE, V2SI_type_node,
11187				      tree_cons (NULL_TREE, V2SI_type_node,
11188						 endlink)));
11189  tree di_ftype_di_di
11190    = build_function_type (long_long_unsigned_type_node,
11191			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
11192				      tree_cons (NULL_TREE,
11193						 long_long_unsigned_type_node,
11194						 endlink)));
11195
11196  tree v2si_ftype_v2sf
11197    = build_function_type (V2SI_type_node,
11198                           tree_cons (NULL_TREE, V2SF_type_node,
11199                                      endlink));
11200  tree v2sf_ftype_v2si
11201    = build_function_type (V2SF_type_node,
11202                           tree_cons (NULL_TREE, V2SI_type_node,
11203                                      endlink));
11204  tree v2si_ftype_v2si
11205    = build_function_type (V2SI_type_node,
11206                           tree_cons (NULL_TREE, V2SI_type_node,
11207                                      endlink));
11208  tree v2sf_ftype_v2sf
11209    = build_function_type (V2SF_type_node,
11210                           tree_cons (NULL_TREE, V2SF_type_node,
11211                                      endlink));
11212  tree v2sf_ftype_v2sf_v2sf
11213    = build_function_type (V2SF_type_node,
11214                           tree_cons (NULL_TREE, V2SF_type_node,
11215                                      tree_cons (NULL_TREE,
11216                                                 V2SF_type_node,
11217                                                 endlink)));
11218  tree v2si_ftype_v2sf_v2sf
11219    = build_function_type (V2SI_type_node,
11220                           tree_cons (NULL_TREE, V2SF_type_node,
11221                                      tree_cons (NULL_TREE,
11222                                                 V2SF_type_node,
11223                                                 endlink)));
11224
11225  /* Add all builtins that are more or less simple operations on two
11226     operands.  */
11227  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11228    {
11229      /* Use one of the operands; the target can have a different mode for
11230	 mask-generating compares.  */
11231      enum machine_mode mode;
11232      tree type;
11233
11234      if (d->name == 0)
11235	continue;
11236      mode = insn_data[d->icode].operand[1].mode;
11237
11238      switch (mode)
11239	{
11240	case V4SFmode:
11241	  type = v4sf_ftype_v4sf_v4sf;
11242	  break;
11243	case V8QImode:
11244	  type = v8qi_ftype_v8qi_v8qi;
11245	  break;
11246	case V4HImode:
11247	  type = v4hi_ftype_v4hi_v4hi;
11248	  break;
11249	case V2SImode:
11250	  type = v2si_ftype_v2si_v2si;
11251	  break;
11252	case DImode:
11253	  type = di_ftype_di_di;
11254	  break;
11255
11256	default:
11257	  abort ();
11258	}
11259
11260      /* Override for comparisons.  */
11261      if (d->icode == CODE_FOR_maskcmpv4sf3
11262	  || d->icode == CODE_FOR_maskncmpv4sf3
11263	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
11264	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11265	type = v4si_ftype_v4sf_v4sf;
11266
11267      def_builtin (d->mask, d->name, type, d->code);
11268    }
11269
11270  /* Add the remaining MMX insns with somewhat more complicated types.  */
11271  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11272  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11273  def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11274  def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11275  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11276  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11277  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11278
11279  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11280  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11281  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11282
11283  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11284  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11285
11286  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11287  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11288
11289  /* comi/ucomi insns.  */
11290  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11291    def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11292
11293  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11294  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11295  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11296
11297  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11298  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11299  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11300  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11301  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11302  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11303
11304  def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11305  def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11306  def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11307  def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11308
11309  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11310  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11311
11312  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11313
11314  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11315  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11316  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11317  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11318  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11319  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11320
11321  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11322  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11323  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11324  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11325
11326  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11327  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11328  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11329  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11330
11331  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11332
11333  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11334
11335  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11336  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11337  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11338  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11339  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11340  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11341
11342  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11343
11344  /* Original 3DNow!  */
11345  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11346  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11347  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11348  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11349  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11350  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11351  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11352  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11353  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11354  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11355  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11356  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11357  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11358  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11359  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11360  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11361  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11362  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11363  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11364  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11365
11366  /* 3DNow! extension as used in the Athlon CPU.  */
11367  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11368  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11369  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11370  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11371  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11372  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11373
11374  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11375}
11376
11377/* Errors in the source file can cause expand_expr to return const0_rtx
11378   where we expect a vector.  To avoid crashing, use one of the vector
11379   clear instructions.  */
11380static rtx
11381safe_vector_operand (x, mode)
11382     rtx x;
11383     enum machine_mode mode;
11384{
11385  if (x != const0_rtx)
11386    return x;
11387  x = gen_reg_rtx (mode);
11388
11389  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11390    emit_insn (gen_mmx_clrdi (mode == DImode ? x
11391			      : gen_rtx_SUBREG (DImode, x, 0)));
11392  else
11393    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11394				: gen_rtx_SUBREG (V4SFmode, x, 0)));
11395  return x;
11396}
11397
11398/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
11399
11400static rtx
11401ix86_expand_binop_builtin (icode, arglist, target)
11402     enum insn_code icode;
11403     tree arglist;
11404     rtx target;
11405{
11406  rtx pat;
11407  tree arg0 = TREE_VALUE (arglist);
11408  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11409  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11410  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11411  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11412  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11413  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11414
11415  if (VECTOR_MODE_P (mode0))
11416    op0 = safe_vector_operand (op0, mode0);
11417  if (VECTOR_MODE_P (mode1))
11418    op1 = safe_vector_operand (op1, mode1);
11419
11420  if (! target
11421      || GET_MODE (target) != tmode
11422      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11423    target = gen_reg_rtx (tmode);
11424
11425  /* In case the insn wants input operands in modes different from
11426     the result, abort.  */
11427  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11428    abort ();
11429
11430  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11431    op0 = copy_to_mode_reg (mode0, op0);
11432  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11433    op1 = copy_to_mode_reg (mode1, op1);
11434
11435  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11436     yet one of the two must not be a memory.  This is normally enforced
11437     by expanders, but we didn't bother to create one here.  */
11438  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11439    op0 = copy_to_mode_reg (mode0, op0);
11440
11441  pat = GEN_FCN (icode) (target, op0, op1);
11442  if (! pat)
11443    return 0;
11444  emit_insn (pat);
11445  return target;
11446}
11447
11448/* In type_for_mode we restrict the ability to create TImode types
11449   to hosts with 64-bit H_W_I.  So we've defined the SSE logicals
11450   to have a V4SFmode signature.  Convert them in-place to TImode.  */
11451
11452static rtx
11453ix86_expand_timode_binop_builtin (icode, arglist, target)
11454     enum insn_code icode;
11455     tree arglist;
11456     rtx target;
11457{
11458  rtx pat;
11459  tree arg0 = TREE_VALUE (arglist);
11460  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11461  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11462  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11463
11464  op0 = gen_lowpart (TImode, op0);
11465  op1 = gen_lowpart (TImode, op1);
11466  target = gen_reg_rtx (TImode);
11467
11468  if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11469    op0 = copy_to_mode_reg (TImode, op0);
11470  if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11471    op1 = copy_to_mode_reg (TImode, op1);
11472
11473  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11474     yet one of the two must not be a memory.  This is normally enforced
11475     by expanders, but we didn't bother to create one here.  */
11476  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
11477    op0 = copy_to_mode_reg (TImode, op0);
11478
11479  pat = GEN_FCN (icode) (target, op0, op1);
11480  if (! pat)
11481    return 0;
11482  emit_insn (pat);
11483
11484  return gen_lowpart (V4SFmode, target);
11485}
11486
11487/* Subroutine of ix86_expand_builtin to take care of stores.  */
11488
11489static rtx
11490ix86_expand_store_builtin (icode, arglist)
11491     enum insn_code icode;
11492     tree arglist;
11493{
11494  rtx pat;
11495  tree arg0 = TREE_VALUE (arglist);
11496  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11497  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11498  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11499  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11500  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11501
11502  if (VECTOR_MODE_P (mode1))
11503    op1 = safe_vector_operand (op1, mode1);
11504
11505  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11506
11507  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11508    op1 = copy_to_mode_reg (mode1, op1);
11509
11510  pat = GEN_FCN (icode) (op0, op1);
11511  if (pat)
11512    emit_insn (pat);
11513  return 0;
11514}
11515
11516/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
11517
11518static rtx
11519ix86_expand_unop_builtin (icode, arglist, target, do_load)
11520     enum insn_code icode;
11521     tree arglist;
11522     rtx target;
11523     int do_load;
11524{
11525  rtx pat;
11526  tree arg0 = TREE_VALUE (arglist);
11527  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11528  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11529  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11530
11531  if (! target
11532      || GET_MODE (target) != tmode
11533      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11534    target = gen_reg_rtx (tmode);
11535  if (do_load)
11536    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11537  else
11538    {
11539      if (VECTOR_MODE_P (mode0))
11540	op0 = safe_vector_operand (op0, mode0);
11541
11542      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11543	op0 = copy_to_mode_reg (mode0, op0);
11544    }
11545
11546  pat = GEN_FCN (icode) (target, op0);
11547  if (! pat)
11548    return 0;
11549  emit_insn (pat);
11550  return target;
11551}
11552
11553/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11554   sqrtss, rsqrtss, rcpss.  */
11555
11556static rtx
11557ix86_expand_unop1_builtin (icode, arglist, target)
11558     enum insn_code icode;
11559     tree arglist;
11560     rtx target;
11561{
11562  rtx pat;
11563  tree arg0 = TREE_VALUE (arglist);
11564  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11565  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11566  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11567
11568  if (! target
11569      || GET_MODE (target) != tmode
11570      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11571    target = gen_reg_rtx (tmode);
11572
11573  if (VECTOR_MODE_P (mode0))
11574    op0 = safe_vector_operand (op0, mode0);
11575
11576  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11577    op0 = copy_to_mode_reg (mode0, op0);
11578
11579  op1 = op0;
11580  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
11581    op1 = copy_to_mode_reg (mode0, op1);
11582
11583  pat = GEN_FCN (icode) (target, op0, op1);
11584  if (! pat)
11585    return 0;
11586  emit_insn (pat);
11587  return target;
11588}
11589
11590/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
11591
11592static rtx
11593ix86_expand_sse_compare (d, arglist, target)
11594     const struct builtin_description *d;
11595     tree arglist;
11596     rtx target;
11597{
11598  rtx pat;
11599  tree arg0 = TREE_VALUE (arglist);
11600  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11601  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11602  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11603  rtx op2;
11604  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11605  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11606  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11607  enum rtx_code comparison = d->comparison;
11608
11609  if (VECTOR_MODE_P (mode0))
11610    op0 = safe_vector_operand (op0, mode0);
11611  if (VECTOR_MODE_P (mode1))
11612    op1 = safe_vector_operand (op1, mode1);
11613
11614  /* Swap operands if we have a comparison that isn't available in
11615     hardware.  */
11616  if (d->flag)
11617    {
11618      rtx tmp = gen_reg_rtx (mode1);
11619      emit_move_insn (tmp, op1);
11620      op1 = op0;
11621      op0 = tmp;
11622    }
11623
11624  if (! target
11625      || GET_MODE (target) != tmode
11626      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11627    target = gen_reg_rtx (tmode);
11628
11629  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11630    op0 = copy_to_mode_reg (mode0, op0);
11631  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11632    op1 = copy_to_mode_reg (mode1, op1);
11633
11634  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11635  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11636  if (! pat)
11637    return 0;
11638  emit_insn (pat);
11639  return target;
11640}
11641
11642/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
11643
11644static rtx
11645ix86_expand_sse_comi (d, arglist, target)
11646     const struct builtin_description *d;
11647     tree arglist;
11648     rtx target;
11649{
11650  rtx pat;
11651  tree arg0 = TREE_VALUE (arglist);
11652  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11653  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11654  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11655  rtx op2;
11656  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11657  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11658  enum rtx_code comparison = d->comparison;
11659
11660  if (VECTOR_MODE_P (mode0))
11661    op0 = safe_vector_operand (op0, mode0);
11662  if (VECTOR_MODE_P (mode1))
11663    op1 = safe_vector_operand (op1, mode1);
11664
11665  /* Swap operands if we have a comparison that isn't available in
11666     hardware.  */
11667  if (d->flag)
11668    {
11669      rtx tmp = op1;
11670      op1 = op0;
11671      op0 = tmp;
11672    }
11673
11674  target = gen_reg_rtx (SImode);
11675  emit_move_insn (target, const0_rtx);
11676  target = gen_rtx_SUBREG (QImode, target, 0);
11677
11678  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11679    op0 = copy_to_mode_reg (mode0, op0);
11680  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11681    op1 = copy_to_mode_reg (mode1, op1);
11682
11683  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11684  pat = GEN_FCN (d->icode) (op0, op1, op2);
11685  if (! pat)
11686    return 0;
11687  emit_insn (pat);
11688  emit_insn (gen_rtx_SET (VOIDmode,
11689			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11690			  gen_rtx_fmt_ee (comparison, QImode,
11691					  gen_rtx_REG (CCmode, FLAGS_REG),
11692					  const0_rtx)));
11693
11694  return SUBREG_REG (target);
11695}
11696
11697/* Expand an expression EXP that calls a built-in function,
11698   with result going to TARGET if that's convenient
11699   (and in mode MODE if that's convenient).
11700   SUBTARGET may be used as the target for computing one of EXP's operands.
11701   IGNORE is nonzero if the value is to be ignored.  */
11702
11703rtx
11704ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11705     tree exp;
11706     rtx target;
11707     rtx subtarget ATTRIBUTE_UNUSED;
11708     enum machine_mode mode ATTRIBUTE_UNUSED;
11709     int ignore ATTRIBUTE_UNUSED;
11710{
11711  const struct builtin_description *d;
11712  size_t i;
11713  enum insn_code icode;
11714  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11715  tree arglist = TREE_OPERAND (exp, 1);
11716  tree arg0, arg1, arg2;
11717  rtx op0, op1, op2, pat;
11718  enum machine_mode tmode, mode0, mode1, mode2;
11719  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11720
11721  switch (fcode)
11722    {
11723    case IX86_BUILTIN_EMMS:
11724      emit_insn (gen_emms ());
11725      return 0;
11726
11727    case IX86_BUILTIN_SFENCE:
11728      emit_insn (gen_sfence ());
11729      return 0;
11730
11731    case IX86_BUILTIN_PEXTRW:
11732      icode = CODE_FOR_mmx_pextrw;
11733      arg0 = TREE_VALUE (arglist);
11734      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11735      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11736      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11737      tmode = insn_data[icode].operand[0].mode;
11738      mode0 = insn_data[icode].operand[1].mode;
11739      mode1 = insn_data[icode].operand[2].mode;
11740
11741      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11742	op0 = copy_to_mode_reg (mode0, op0);
11743      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11744	{
11745	  /* @@@ better error message */
11746	  error ("selector must be an immediate");
11747	  return gen_reg_rtx (tmode);
11748	}
11749      if (target == 0
11750	  || GET_MODE (target) != tmode
11751	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11752	target = gen_reg_rtx (tmode);
11753      pat = GEN_FCN (icode) (target, op0, op1);
11754      if (! pat)
11755	return 0;
11756      emit_insn (pat);
11757      return target;
11758
11759    case IX86_BUILTIN_PINSRW:
11760      icode = CODE_FOR_mmx_pinsrw;
11761      arg0 = TREE_VALUE (arglist);
11762      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11763      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11764      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11765      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11766      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11767      tmode = insn_data[icode].operand[0].mode;
11768      mode0 = insn_data[icode].operand[1].mode;
11769      mode1 = insn_data[icode].operand[2].mode;
11770      mode2 = insn_data[icode].operand[3].mode;
11771
11772      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11773	op0 = copy_to_mode_reg (mode0, op0);
11774      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11775	op1 = copy_to_mode_reg (mode1, op1);
11776      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11777	{
11778	  /* @@@ better error message */
11779	  error ("selector must be an immediate");
11780	  return const0_rtx;
11781	}
11782      if (target == 0
11783	  || GET_MODE (target) != tmode
11784	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11785	target = gen_reg_rtx (tmode);
11786      pat = GEN_FCN (icode) (target, op0, op1, op2);
11787      if (! pat)
11788	return 0;
11789      emit_insn (pat);
11790      return target;
11791
11792    case IX86_BUILTIN_MASKMOVQ:
11793      icode = TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq;
11794      /* Note the arg order is different from the operand order.  */
11795      arg1 = TREE_VALUE (arglist);
11796      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11797      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11798      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11799      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11800      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11801      mode0 = insn_data[icode].operand[0].mode;
11802      mode1 = insn_data[icode].operand[1].mode;
11803      mode2 = insn_data[icode].operand[2].mode;
11804
11805      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11806	op0 = copy_to_mode_reg (mode0, op0);
11807      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11808	op1 = copy_to_mode_reg (mode1, op1);
11809      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11810	op2 = copy_to_mode_reg (mode2, op2);
11811      pat = GEN_FCN (icode) (op0, op1, op2);
11812      if (! pat)
11813	return 0;
11814      emit_insn (pat);
11815      return 0;
11816
11817    case IX86_BUILTIN_SQRTSS:
11818      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11819    case IX86_BUILTIN_RSQRTSS:
11820      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11821    case IX86_BUILTIN_RCPSS:
11822      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11823
11824    case IX86_BUILTIN_ANDPS:
11825      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11826					       arglist, target);
11827    case IX86_BUILTIN_ANDNPS:
11828      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11829					       arglist, target);
11830    case IX86_BUILTIN_ORPS:
11831      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11832					       arglist, target);
11833    case IX86_BUILTIN_XORPS:
11834      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11835					       arglist, target);
11836
11837    case IX86_BUILTIN_LOADAPS:
11838      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11839
11840    case IX86_BUILTIN_LOADUPS:
11841      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11842
11843    case IX86_BUILTIN_STOREAPS:
11844      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11845    case IX86_BUILTIN_STOREUPS:
11846      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11847
11848    case IX86_BUILTIN_LOADSS:
11849      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11850
11851    case IX86_BUILTIN_STORESS:
11852      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11853
11854    case IX86_BUILTIN_LOADHPS:
11855    case IX86_BUILTIN_LOADLPS:
11856      icode = (fcode == IX86_BUILTIN_LOADHPS
11857	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11858      arg0 = TREE_VALUE (arglist);
11859      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11860      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11861      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11862      tmode = insn_data[icode].operand[0].mode;
11863      mode0 = insn_data[icode].operand[1].mode;
11864      mode1 = insn_data[icode].operand[2].mode;
11865
11866      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11867	op0 = copy_to_mode_reg (mode0, op0);
11868      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11869      if (target == 0
11870	  || GET_MODE (target) != tmode
11871	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11872	target = gen_reg_rtx (tmode);
11873      pat = GEN_FCN (icode) (target, op0, op1);
11874      if (! pat)
11875	return 0;
11876      emit_insn (pat);
11877      return target;
11878
11879    case IX86_BUILTIN_STOREHPS:
11880    case IX86_BUILTIN_STORELPS:
11881      icode = (fcode == IX86_BUILTIN_STOREHPS
11882	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11883      arg0 = TREE_VALUE (arglist);
11884      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11885      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11886      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11887      mode0 = insn_data[icode].operand[1].mode;
11888      mode1 = insn_data[icode].operand[2].mode;
11889
11890      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11891      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11892	op1 = copy_to_mode_reg (mode1, op1);
11893
11894      pat = GEN_FCN (icode) (op0, op0, op1);
11895      if (! pat)
11896	return 0;
11897      emit_insn (pat);
11898      return 0;
11899
11900    case IX86_BUILTIN_MOVNTPS:
11901      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11902    case IX86_BUILTIN_MOVNTQ:
11903      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11904
11905    case IX86_BUILTIN_LDMXCSR:
11906      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11907      target = assign_386_stack_local (SImode, 0);
11908      emit_move_insn (target, op0);
11909      emit_insn (gen_ldmxcsr (target));
11910      return 0;
11911
11912    case IX86_BUILTIN_STMXCSR:
11913      target = assign_386_stack_local (SImode, 0);
11914      emit_insn (gen_stmxcsr (target));
11915      return copy_to_mode_reg (SImode, target);
11916
11917    case IX86_BUILTIN_SHUFPS:
11918      icode = CODE_FOR_sse_shufps;
11919      arg0 = TREE_VALUE (arglist);
11920      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11921      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11922      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11923      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11924      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11925      tmode = insn_data[icode].operand[0].mode;
11926      mode0 = insn_data[icode].operand[1].mode;
11927      mode1 = insn_data[icode].operand[2].mode;
11928      mode2 = insn_data[icode].operand[3].mode;
11929
11930      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11931	op0 = copy_to_mode_reg (mode0, op0);
11932      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11933	op1 = copy_to_mode_reg (mode1, op1);
11934      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11935	{
11936	  /* @@@ better error message */
11937	  error ("mask must be an immediate");
11938	  return gen_reg_rtx (tmode);
11939	}
11940      if (target == 0
11941	  || GET_MODE (target) != tmode
11942	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11943	target = gen_reg_rtx (tmode);
11944      pat = GEN_FCN (icode) (target, op0, op1, op2);
11945      if (! pat)
11946	return 0;
11947      emit_insn (pat);
11948      return target;
11949
11950    case IX86_BUILTIN_PSHUFW:
11951      icode = CODE_FOR_mmx_pshufw;
11952      arg0 = TREE_VALUE (arglist);
11953      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11954      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11955      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11956      tmode = insn_data[icode].operand[0].mode;
11957      mode1 = insn_data[icode].operand[1].mode;
11958      mode2 = insn_data[icode].operand[2].mode;
11959
11960      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11961	op0 = copy_to_mode_reg (mode1, op0);
11962      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11963	{
11964	  /* @@@ better error message */
11965	  error ("mask must be an immediate");
11966	  return const0_rtx;
11967	}
11968      if (target == 0
11969	  || GET_MODE (target) != tmode
11970	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11971	target = gen_reg_rtx (tmode);
11972      pat = GEN_FCN (icode) (target, op0, op1);
11973      if (! pat)
11974	return 0;
11975      emit_insn (pat);
11976      return target;
11977
11978    case IX86_BUILTIN_FEMMS:
11979      emit_insn (gen_femms ());
11980      return NULL_RTX;
11981
11982    case IX86_BUILTIN_PAVGUSB:
11983      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11984
11985    case IX86_BUILTIN_PF2ID:
11986      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11987
11988    case IX86_BUILTIN_PFACC:
11989      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11990
11991    case IX86_BUILTIN_PFADD:
11992     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11993
11994    case IX86_BUILTIN_PFCMPEQ:
11995      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11996
11997    case IX86_BUILTIN_PFCMPGE:
11998      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11999
12000    case IX86_BUILTIN_PFCMPGT:
12001      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
12002
12003    case IX86_BUILTIN_PFMAX:
12004      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
12005
12006    case IX86_BUILTIN_PFMIN:
12007      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
12008
12009    case IX86_BUILTIN_PFMUL:
12010      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
12011
12012    case IX86_BUILTIN_PFRCP:
12013      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
12014
12015    case IX86_BUILTIN_PFRCPIT1:
12016      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
12017
12018    case IX86_BUILTIN_PFRCPIT2:
12019      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
12020
12021    case IX86_BUILTIN_PFRSQIT1:
12022      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
12023
12024    case IX86_BUILTIN_PFRSQRT:
12025      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
12026
12027    case IX86_BUILTIN_PFSUB:
12028      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
12029
12030    case IX86_BUILTIN_PFSUBR:
12031      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
12032
12033    case IX86_BUILTIN_PI2FD:
12034      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
12035
12036    case IX86_BUILTIN_PMULHRW:
12037      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
12038
12039    case IX86_BUILTIN_PF2IW:
12040      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
12041
12042    case IX86_BUILTIN_PFNACC:
12043      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
12044
12045    case IX86_BUILTIN_PFPNACC:
12046      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
12047
12048    case IX86_BUILTIN_PI2FW:
12049      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
12050
12051    case IX86_BUILTIN_PSWAPDSI:
12052      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
12053
12054    case IX86_BUILTIN_PSWAPDSF:
12055      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
12056
12057    case IX86_BUILTIN_SSE_ZERO:
12058      target = gen_reg_rtx (V4SFmode);
12059      emit_insn (gen_sse_clrv4sf (target));
12060      return target;
12061
12062    case IX86_BUILTIN_MMX_ZERO:
12063      target = gen_reg_rtx (DImode);
12064      emit_insn (gen_mmx_clrdi (target));
12065      return target;
12066
12067    default:
12068      break;
12069    }
12070
12071  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
12072    if (d->code == fcode)
12073      {
12074	/* Compares are treated specially.  */
12075	if (d->icode == CODE_FOR_maskcmpv4sf3
12076	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
12077	    || d->icode == CODE_FOR_maskncmpv4sf3
12078	    || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12079	  return ix86_expand_sse_compare (d, arglist, target);
12080
12081	return ix86_expand_binop_builtin (d->icode, arglist, target);
12082      }
12083
12084  for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
12085    if (d->code == fcode)
12086      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
12087
12088  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
12089    if (d->code == fcode)
12090      return ix86_expand_sse_comi (d, arglist, target);
12091
12092  /* @@@ Should really do something sensible here.  */
12093  return 0;
12094}
12095
12096/* Store OPERAND to the memory after reload is completed.  This means
12097   that we can't easily use assign_stack_local.  */
12098rtx
12099ix86_force_to_memory (mode, operand)
12100     enum machine_mode mode;
12101     rtx operand;
12102{
12103  rtx result;
12104  if (!reload_completed)
12105    abort ();
12106  if (TARGET_64BIT && TARGET_RED_ZONE)
12107    {
12108      result = gen_rtx_MEM (mode,
12109			    gen_rtx_PLUS (Pmode,
12110					  stack_pointer_rtx,
12111					  GEN_INT (-RED_ZONE_SIZE)));
12112      emit_move_insn (result, operand);
12113    }
12114  else if (TARGET_64BIT && !TARGET_RED_ZONE)
12115    {
12116      switch (mode)
12117	{
12118	case HImode:
12119	case SImode:
12120	  operand = gen_lowpart (DImode, operand);
12121	  /* FALLTHRU */
12122	case DImode:
12123	  emit_insn (
12124		      gen_rtx_SET (VOIDmode,
12125				   gen_rtx_MEM (DImode,
12126						gen_rtx_PRE_DEC (DImode,
12127							stack_pointer_rtx)),
12128				   operand));
12129	  break;
12130	default:
12131	  abort ();
12132	}
12133      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12134    }
12135  else
12136    {
12137      switch (mode)
12138	{
12139	case DImode:
12140	  {
12141	    rtx operands[2];
12142	    split_di (&operand, 1, operands, operands + 1);
12143	    emit_insn (
12144			gen_rtx_SET (VOIDmode,
12145				     gen_rtx_MEM (SImode,
12146						  gen_rtx_PRE_DEC (Pmode,
12147							stack_pointer_rtx)),
12148				     operands[1]));
12149	    emit_insn (
12150			gen_rtx_SET (VOIDmode,
12151				     gen_rtx_MEM (SImode,
12152						  gen_rtx_PRE_DEC (Pmode,
12153							stack_pointer_rtx)),
12154				     operands[0]));
12155	  }
12156	  break;
12157	case HImode:
12158	  /* It is better to store HImodes as SImodes.  */
12159	  if (!TARGET_PARTIAL_REG_STALL)
12160	    operand = gen_lowpart (SImode, operand);
12161	  /* FALLTHRU */
12162	case SImode:
12163	  emit_insn (
12164		      gen_rtx_SET (VOIDmode,
12165				   gen_rtx_MEM (GET_MODE (operand),
12166						gen_rtx_PRE_DEC (SImode,
12167							stack_pointer_rtx)),
12168				   operand));
12169	  break;
12170	default:
12171	  abort ();
12172	}
12173      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12174    }
12175  return result;
12176}
12177
12178/* Free operand from the memory.  */
12179void
12180ix86_free_from_memory (mode)
12181     enum machine_mode mode;
12182{
12183  if (!TARGET_64BIT || !TARGET_RED_ZONE)
12184    {
12185      int size;
12186
12187      if (mode == DImode || TARGET_64BIT)
12188	size = 8;
12189      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12190	size = 2;
12191      else
12192	size = 4;
12193      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
12194         to pop or add instruction if registers are available.  */
12195      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12196			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12197					    GEN_INT (size))));
12198    }
12199}
12200
12201/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12202   QImode must go into class Q_REGS.
12203   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
12204   movdf to do mem-to-mem moves through integer regs.  */
12205enum reg_class
12206ix86_preferred_reload_class (x, class)
12207     rtx x;
12208     enum reg_class class;
12209{
12210  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12211    {
12212      /* SSE can't load any constant directly yet.  */
12213      if (SSE_CLASS_P (class))
12214	return NO_REGS;
12215      /* Floats can load 0 and 1.  */
12216      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12217	{
12218	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
12219	  if (MAYBE_SSE_CLASS_P (class))
12220	    return (reg_class_subset_p (class, GENERAL_REGS)
12221		    ? GENERAL_REGS : FLOAT_REGS);
12222	  else
12223	    return class;
12224	}
12225      /* General regs can load everything.  */
12226      if (reg_class_subset_p (class, GENERAL_REGS))
12227	return GENERAL_REGS;
12228      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
12229      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12230	return NO_REGS;
12231    }
12232  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12233    return NO_REGS;
12234  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12235    return Q_REGS;
12236  return class;
12237}
12238
12239/* If we are copying between general and FP registers, we need a memory
12240   location. The same is true for SSE and MMX registers.
12241
12242   The macro can't work reliably when one of the CLASSES is class containing
12243   registers from multiple units (SSE, MMX, integer).  We avoid this by never
12244   combining those units in single alternative in the machine description.
12245   Ensure that this constraint holds to avoid unexpected surprises.
12246
12247   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12248   enforce these sanity checks.  */
12249int
12250ix86_secondary_memory_needed (class1, class2, mode, strict)
12251     enum reg_class class1, class2;
12252     enum machine_mode mode;
12253     int strict;
12254{
12255  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12256      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12257      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12258      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12259      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12260      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12261    {
12262      if (strict)
12263	abort ();
12264      else
12265	return 1;
12266    }
12267  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12268	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12269	      && (mode) != SImode)
12270	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12271	      && (mode) != SImode));
12272}
12273/* Return the cost of moving data from a register in class CLASS1 to
12274   one in class CLASS2.
12275
12276   It is not required that the cost always equal 2 when FROM is the same as TO;
12277   on some machines it is expensive to move between registers if they are not
12278   general registers.  */
12279int
12280ix86_register_move_cost (mode, class1, class2)
12281     enum machine_mode mode;
12282     enum reg_class class1, class2;
12283{
12284  /* In case we require secondary memory, compute cost of the store followed
12285     by load.  In case of copying from general_purpose_register we may emit
12286     multiple stores followed by single load causing memory size mismatch
12287     stall.  Count this as arbitarily high cost of 20.  */
12288  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12289    {
12290      int add_cost = 0;
12291      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12292	  add_cost = 20;
12293      return (MEMORY_MOVE_COST (mode, class1, 0)
12294	      + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12295    }
12296  /* Moves between SSE/MMX and integer unit are expensive.  */
12297  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12298      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12299    return ix86_cost->mmxsse_to_integer;
12300  if (MAYBE_FLOAT_CLASS_P (class1))
12301    return ix86_cost->fp_move;
12302  if (MAYBE_SSE_CLASS_P (class1))
12303    return ix86_cost->sse_move;
12304  if (MAYBE_MMX_CLASS_P (class1))
12305    return ix86_cost->mmx_move;
12306  return 2;
12307}
12308
12309/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
12310int
12311ix86_hard_regno_mode_ok (regno, mode)
12312     int regno;
12313     enum machine_mode mode;
12314{
12315  /* Flags and only flags can only hold CCmode values.  */
12316  if (CC_REGNO_P (regno))
12317    return GET_MODE_CLASS (mode) == MODE_CC;
12318  if (GET_MODE_CLASS (mode) == MODE_CC
12319      || GET_MODE_CLASS (mode) == MODE_RANDOM
12320      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12321    return 0;
12322  if (FP_REGNO_P (regno))
12323    return VALID_FP_MODE_P (mode);
12324  if (SSE_REGNO_P (regno))
12325    return VALID_SSE_REG_MODE (mode);
12326  if (MMX_REGNO_P (regno))
12327    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12328  /* We handle both integer and floats in the general purpose registers.
12329     In future we should be able to handle vector modes as well.  */
12330  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12331    return 0;
12332  /* Take care for QImode values - they can be in non-QI regs, but then
12333     they do cause partial register stalls.  */
12334  if (regno < 4 || mode != QImode || TARGET_64BIT)
12335    return 1;
12336  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12337}
12338
12339/* Return the cost of moving data of mode M between a
12340   register and memory.  A value of 2 is the default; this cost is
12341   relative to those in `REGISTER_MOVE_COST'.
12342
12343   If moving between registers and memory is more expensive than
12344   between two registers, you should define this macro to express the
12345   relative cost.
12346
12347   Model also increased moving costs of QImode registers in non
12348   Q_REGS classes.
12349 */
12350int
12351ix86_memory_move_cost (mode, class, in)
12352     enum machine_mode mode;
12353     enum reg_class class;
12354     int in;
12355{
12356  if (FLOAT_CLASS_P (class))
12357    {
12358      int index;
12359      switch (mode)
12360	{
12361	  case SFmode:
12362	    index = 0;
12363	    break;
12364	  case DFmode:
12365	    index = 1;
12366	    break;
12367	  case XFmode:
12368	  case TFmode:
12369	    index = 2;
12370	    break;
12371	  default:
12372	    return 100;
12373	}
12374      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12375    }
12376  if (SSE_CLASS_P (class))
12377    {
12378      int index;
12379      switch (GET_MODE_SIZE (mode))
12380	{
12381	  case 4:
12382	    index = 0;
12383	    break;
12384	  case 8:
12385	    index = 1;
12386	    break;
12387	  case 16:
12388	    index = 2;
12389	    break;
12390	  default:
12391	    return 100;
12392	}
12393      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12394    }
12395  if (MMX_CLASS_P (class))
12396    {
12397      int index;
12398      switch (GET_MODE_SIZE (mode))
12399	{
12400	  case 4:
12401	    index = 0;
12402	    break;
12403	  case 8:
12404	    index = 1;
12405	    break;
12406	  default:
12407	    return 100;
12408	}
12409      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12410    }
12411  switch (GET_MODE_SIZE (mode))
12412    {
12413      case 1:
12414	if (in)
12415	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12416		  : ix86_cost->movzbl_load);
12417	else
12418	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12419		  : ix86_cost->int_store[0] + 4);
12420	break;
12421      case 2:
12422	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12423      default:
12424	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
12425	if (mode == TFmode)
12426	  mode = XFmode;
12427	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12428		* (int) GET_MODE_SIZE (mode) / 4);
12429    }
12430}
12431
12432#ifdef DO_GLOBAL_CTORS_BODY
12433static void
12434ix86_svr3_asm_out_constructor (symbol, priority)
12435     rtx symbol;
12436     int priority ATTRIBUTE_UNUSED;
12437{
12438  init_section ();
12439  fputs ("\tpushl $", asm_out_file);
12440  assemble_name (asm_out_file, XSTR (symbol, 0));
12441  fputc ('\n', asm_out_file);
12442}
12443#endif
12444
12445/* Order the registers for register allocator.  */
12446
12447void
12448x86_order_regs_for_local_alloc ()
12449{
12450   int pos = 0;
12451   int i;
12452
12453   /* First allocate the local general purpose registers.  */
12454   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12455     if (GENERAL_REGNO_P (i) && call_used_regs[i])
12456	reg_alloc_order [pos++] = i;
12457
12458   /* Global general purpose registers.  */
12459   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
12460     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
12461	reg_alloc_order [pos++] = i;
12462
12463   /* x87 registers come first in case we are doing FP math
12464      using them.  */
12465   if (!TARGET_SSE_MATH)
12466     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12467       reg_alloc_order [pos++] = i;
12468
12469   /* SSE registers.  */
12470   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
12471     reg_alloc_order [pos++] = i;
12472   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
12473     reg_alloc_order [pos++] = i;
12474
12475   /* x87 registerts.  */
12476   if (TARGET_SSE_MATH)
12477     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
12478       reg_alloc_order [pos++] = i;
12479
12480   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
12481     reg_alloc_order [pos++] = i;
12482
12483   /* Initialize the rest of array as we do not allocate some registers
12484      at all.  */
12485   while (pos < FIRST_PSEUDO_REGISTER)
12486     reg_alloc_order [pos++] = 0;
12487}
12488