i386.c revision 90284
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002 Free Software Foundation, Inc.
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 90284 2002-02-06 05:01:29Z obrien $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-attr.h"
38#include "flags.h"
39#include "except.h"
40#include "function.h"
41#include "recog.h"
42#include "expr.h"
43#include "optabs.h"
44#include "toplev.h"
45#include "basic-block.h"
46#include "ggc.h"
47#include "target.h"
48#include "target-def.h"
49
50#ifndef CHECK_STACK_LIMIT
51#define CHECK_STACK_LIMIT (-1)
52#endif
53
54#warning NEED TO REVISIT "PIC_REG_USED" AND -mprofiler-epilogue SUPPORT
55#if 0
56#define PIC_REG_USED 					\
57  (flag_pic && (current_function_uses_pic_offset_table	\
58		|| current_function_uses_const_pool	\
59		|| profile_flag || profile_block_flag))
60#endif
61
62/* Processor costs (relative to an add) */
63static const
64struct processor_costs size_cost = {	/* costs for tunning for size */
65  2,					/* cost of an add instruction */
66  3,					/* cost of a lea instruction */
67  2,					/* variable shift costs */
68  3,					/* constant shift costs */
69  3,					/* cost of starting a multiply */
70  0,					/* cost of multiply per each bit set */
71  3,					/* cost of a divide/mod */
72  3,					/* cost of movsx */
73  3,					/* cost of movzx */
74  0,					/* "large" insn */
75  2,					/* MOVE_RATIO */
76  2,					/* cost for loading QImode using movzbl */
77  {2, 2, 2},				/* cost of loading integer registers
78					   in QImode, HImode and SImode.
79					   Relative to reg-reg move (2).  */
80  {2, 2, 2},				/* cost of storing integer registers */
81  2,					/* cost of reg,reg fld/fst */
82  {2, 2, 2},				/* cost of loading fp registers
83					   in SFmode, DFmode and XFmode */
84  {2, 2, 2},				/* cost of loading integer registers */
85  3,					/* cost of moving MMX register */
86  {3, 3},				/* cost of loading MMX registers
87					   in SImode and DImode */
88  {3, 3},				/* cost of storing MMX registers
89					   in SImode and DImode */
90  3,					/* cost of moving SSE register */
91  {3, 3, 3},				/* cost of loading SSE registers
92					   in SImode, DImode and TImode */
93  {3, 3, 3},				/* cost of storing SSE registers
94					   in SImode, DImode and TImode */
95  3,					/* MMX or SSE register to integer */
96  0,					/* size of prefetch block */
97  0,					/* number of parallel prefetches */
98};
99/* Processor costs (relative to an add) */
100static const
101struct processor_costs i386_cost = {	/* 386 specific costs */
102  1,					/* cost of an add instruction */
103  1,					/* cost of a lea instruction */
104  3,					/* variable shift costs */
105  2,					/* constant shift costs */
106  6,					/* cost of starting a multiply */
107  1,					/* cost of multiply per each bit set */
108  23,					/* cost of a divide/mod */
109  3,					/* cost of movsx */
110  2,					/* cost of movzx */
111  15,					/* "large" insn */
112  3,					/* MOVE_RATIO */
113  4,					/* cost for loading QImode using movzbl */
114  {2, 4, 2},				/* cost of loading integer registers
115					   in QImode, HImode and SImode.
116					   Relative to reg-reg move (2).  */
117  {2, 4, 2},				/* cost of storing integer registers */
118  2,					/* cost of reg,reg fld/fst */
119  {8, 8, 8},				/* cost of loading fp registers
120					   in SFmode, DFmode and XFmode */
121  {8, 8, 8},				/* cost of loading integer registers */
122  2,					/* cost of moving MMX register */
123  {4, 8},				/* cost of loading MMX registers
124					   in SImode and DImode */
125  {4, 8},				/* cost of storing MMX registers
126					   in SImode and DImode */
127  2,					/* cost of moving SSE register */
128  {4, 8, 16},				/* cost of loading SSE registers
129					   in SImode, DImode and TImode */
130  {4, 8, 16},				/* cost of storing SSE registers
131					   in SImode, DImode and TImode */
132  3,					/* MMX or SSE register to integer */
133  0,					/* size of prefetch block */
134  0,					/* number of parallel prefetches */
135};
136
137static const
138struct processor_costs i486_cost = {	/* 486 specific costs */
139  1,					/* cost of an add instruction */
140  1,					/* cost of a lea instruction */
141  3,					/* variable shift costs */
142  2,					/* constant shift costs */
143  12,					/* cost of starting a multiply */
144  1,					/* cost of multiply per each bit set */
145  40,					/* cost of a divide/mod */
146  3,					/* cost of movsx */
147  2,					/* cost of movzx */
148  15,					/* "large" insn */
149  3,					/* MOVE_RATIO */
150  4,					/* cost for loading QImode using movzbl */
151  {2, 4, 2},				/* cost of loading integer registers
152					   in QImode, HImode and SImode.
153					   Relative to reg-reg move (2).  */
154  {2, 4, 2},				/* cost of storing integer registers */
155  2,					/* cost of reg,reg fld/fst */
156  {8, 8, 8},				/* cost of loading fp registers
157					   in SFmode, DFmode and XFmode */
158  {8, 8, 8},				/* cost of loading integer registers */
159  2,					/* cost of moving MMX register */
160  {4, 8},				/* cost of loading MMX registers
161					   in SImode and DImode */
162  {4, 8},				/* cost of storing MMX registers
163					   in SImode and DImode */
164  2,					/* cost of moving SSE register */
165  {4, 8, 16},				/* cost of loading SSE registers
166					   in SImode, DImode and TImode */
167  {4, 8, 16},				/* cost of storing SSE registers
168					   in SImode, DImode and TImode */
169  3,					/* MMX or SSE register to integer */
170  0,					/* size of prefetch block */
171  0,					/* number of parallel prefetches */
172};
173
174static const
175struct processor_costs pentium_cost = {
176  1,					/* cost of an add instruction */
177  1,					/* cost of a lea instruction */
178  4,					/* variable shift costs */
179  1,					/* constant shift costs */
180  11,					/* cost of starting a multiply */
181  0,					/* cost of multiply per each bit set */
182  25,					/* cost of a divide/mod */
183  3,					/* cost of movsx */
184  2,					/* cost of movzx */
185  8,					/* "large" insn */
186  6,					/* MOVE_RATIO */
187  6,					/* cost for loading QImode using movzbl */
188  {2, 4, 2},				/* cost of loading integer registers
189					   in QImode, HImode and SImode.
190					   Relative to reg-reg move (2).  */
191  {2, 4, 2},				/* cost of storing integer registers */
192  2,					/* cost of reg,reg fld/fst */
193  {2, 2, 6},				/* cost of loading fp registers
194					   in SFmode, DFmode and XFmode */
195  {4, 4, 6},				/* cost of loading integer registers */
196  8,					/* cost of moving MMX register */
197  {8, 8},				/* cost of loading MMX registers
198					   in SImode and DImode */
199  {8, 8},				/* cost of storing MMX registers
200					   in SImode and DImode */
201  2,					/* cost of moving SSE register */
202  {4, 8, 16},				/* cost of loading SSE registers
203					   in SImode, DImode and TImode */
204  {4, 8, 16},				/* cost of storing SSE registers
205					   in SImode, DImode and TImode */
206  3,					/* MMX or SSE register to integer */
207  0,					/* size of prefetch block */
208  0,					/* number of parallel prefetches */
209};
210
211static const
212struct processor_costs pentiumpro_cost = {
213  1,					/* cost of an add instruction */
214  1,					/* cost of a lea instruction */
215  1,					/* variable shift costs */
216  1,					/* constant shift costs */
217  4,					/* cost of starting a multiply */
218  0,					/* cost of multiply per each bit set */
219  17,					/* cost of a divide/mod */
220  1,					/* cost of movsx */
221  1,					/* cost of movzx */
222  8,					/* "large" insn */
223  6,					/* MOVE_RATIO */
224  2,					/* cost for loading QImode using movzbl */
225  {4, 4, 4},				/* cost of loading integer registers
226					   in QImode, HImode and SImode.
227					   Relative to reg-reg move (2).  */
228  {2, 2, 2},				/* cost of storing integer registers */
229  2,					/* cost of reg,reg fld/fst */
230  {2, 2, 6},				/* cost of loading fp registers
231					   in SFmode, DFmode and XFmode */
232  {4, 4, 6},				/* cost of loading integer registers */
233  2,					/* cost of moving MMX register */
234  {2, 2},				/* cost of loading MMX registers
235					   in SImode and DImode */
236  {2, 2},				/* cost of storing MMX registers
237					   in SImode and DImode */
238  2,					/* cost of moving SSE register */
239  {2, 2, 8},				/* cost of loading SSE registers
240					   in SImode, DImode and TImode */
241  {2, 2, 8},				/* cost of storing SSE registers
242					   in SImode, DImode and TImode */
243  3,					/* MMX or SSE register to integer */
244  32,					/* size of prefetch block */
245  6,					/* number of parallel prefetches */
246};
247
248static const
249struct processor_costs k6_cost = {
250  1,					/* cost of an add instruction */
251  2,					/* cost of a lea instruction */
252  1,					/* variable shift costs */
253  1,					/* constant shift costs */
254  3,					/* cost of starting a multiply */
255  0,					/* cost of multiply per each bit set */
256  18,					/* cost of a divide/mod */
257  2,					/* cost of movsx */
258  2,					/* cost of movzx */
259  8,					/* "large" insn */
260  4,					/* MOVE_RATIO */
261  3,					/* cost for loading QImode using movzbl */
262  {4, 5, 4},				/* cost of loading integer registers
263					   in QImode, HImode and SImode.
264					   Relative to reg-reg move (2).  */
265  {2, 3, 2},				/* cost of storing integer registers */
266  4,					/* cost of reg,reg fld/fst */
267  {6, 6, 6},				/* cost of loading fp registers
268					   in SFmode, DFmode and XFmode */
269  {4, 4, 4},				/* cost of loading integer registers */
270  2,					/* cost of moving MMX register */
271  {2, 2},				/* cost of loading MMX registers
272					   in SImode and DImode */
273  {2, 2},				/* cost of storing MMX registers
274					   in SImode and DImode */
275  2,					/* cost of moving SSE register */
276  {2, 2, 8},				/* cost of loading SSE registers
277					   in SImode, DImode and TImode */
278  {2, 2, 8},				/* cost of storing SSE registers
279					   in SImode, DImode and TImode */
280  6,					/* MMX or SSE register to integer */
281  32,					/* size of prefetch block */
282  1,					/* number of parallel prefetches */
283};
284
285static const
286struct processor_costs athlon_cost = {
287  1,					/* cost of an add instruction */
288  2,					/* cost of a lea instruction */
289  1,					/* variable shift costs */
290  1,					/* constant shift costs */
291  5,					/* cost of starting a multiply */
292  0,					/* cost of multiply per each bit set */
293  42,					/* cost of a divide/mod */
294  1,					/* cost of movsx */
295  1,					/* cost of movzx */
296  8,					/* "large" insn */
297  9,					/* MOVE_RATIO */
298  4,					/* cost for loading QImode using movzbl */
299  {4, 5, 4},				/* cost of loading integer registers
300					   in QImode, HImode and SImode.
301					   Relative to reg-reg move (2).  */
302  {2, 3, 2},				/* cost of storing integer registers */
303  4,					/* cost of reg,reg fld/fst */
304  {6, 6, 20},				/* cost of loading fp registers
305					   in SFmode, DFmode and XFmode */
306  {4, 4, 16},				/* cost of loading integer registers */
307  2,					/* cost of moving MMX register */
308  {2, 2},				/* cost of loading MMX registers
309					   in SImode and DImode */
310  {2, 2},				/* cost of storing MMX registers
311					   in SImode and DImode */
312  2,					/* cost of moving SSE register */
313  {2, 2, 8},				/* cost of loading SSE registers
314					   in SImode, DImode and TImode */
315  {2, 2, 8},				/* cost of storing SSE registers
316					   in SImode, DImode and TImode */
317  6,					/* MMX or SSE register to integer */
318  64,					/* size of prefetch block */
319  6,					/* number of parallel prefetches */
320};
321
322static const
323struct processor_costs pentium4_cost = {
324  1,					/* cost of an add instruction */
325  1,					/* cost of a lea instruction */
326  8,					/* variable shift costs */
327  8,					/* constant shift costs */
328  30,					/* cost of starting a multiply */
329  0,					/* cost of multiply per each bit set */
330  112,					/* cost of a divide/mod */
331  1,					/* cost of movsx */
332  1,					/* cost of movzx */
333  16,					/* "large" insn */
334  6,					/* MOVE_RATIO */
335  2,					/* cost for loading QImode using movzbl */
336  {4, 5, 4},				/* cost of loading integer registers
337					   in QImode, HImode and SImode.
338					   Relative to reg-reg move (2).  */
339  {2, 3, 2},				/* cost of storing integer registers */
340  2,					/* cost of reg,reg fld/fst */
341  {2, 2, 6},				/* cost of loading fp registers
342					   in SFmode, DFmode and XFmode */
343  {4, 4, 6},				/* cost of loading integer registers */
344  2,					/* cost of moving MMX register */
345  {2, 2},				/* cost of loading MMX registers
346					   in SImode and DImode */
347  {2, 2},				/* cost of storing MMX registers
348					   in SImode and DImode */
349  12,					/* cost of moving SSE register */
350  {12, 12, 12},				/* cost of loading SSE registers
351					   in SImode, DImode and TImode */
352  {2, 2, 8},				/* cost of storing SSE registers
353					   in SImode, DImode and TImode */
354  10,					/* MMX or SSE register to integer */
355  64,					/* size of prefetch block */
356  6,					/* number of parallel prefetches */
357};
358
359const struct processor_costs *ix86_cost = &pentium_cost;
360
361/* Processor feature/optimization bitmasks.  */
362#define m_386 (1<<PROCESSOR_I386)
363#define m_486 (1<<PROCESSOR_I486)
364#define m_PENT (1<<PROCESSOR_PENTIUM)
365#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
366#define m_K6  (1<<PROCESSOR_K6)
367#define m_ATHLON  (1<<PROCESSOR_ATHLON)
368#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
369
370const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
371const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
372const int x86_zero_extend_with_and = m_486 | m_PENT;
373const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
374const int x86_double_with_add = ~m_386;
375const int x86_use_bit_test = m_386;
376const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
377const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
378const int x86_3dnow_a = m_ATHLON;
379const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
380const int x86_branch_hints = m_PENT4;
381const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
382const int x86_partial_reg_stall = m_PPRO;
383const int x86_use_loop = m_K6;
384const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
385const int x86_use_mov0 = m_K6;
386const int x86_use_cltd = ~(m_PENT | m_K6);
387const int x86_read_modify_write = ~m_PENT;
388const int x86_read_modify = ~(m_PENT | m_PPRO);
389const int x86_split_long_moves = m_PPRO;
390const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
391const int x86_single_stringop = m_386 | m_PENT4;
392const int x86_qimode_math = ~(0);
393const int x86_promote_qi_regs = 0;
394const int x86_himode_math = ~(m_PPRO);
395const int x86_promote_hi_regs = m_PPRO;
396const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
397const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
398const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
399const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
400const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
401const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
402const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
403const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
404const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
405const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
406const int x86_decompose_lea = m_PENT4;
407
408/* In case the avreage insn count for single function invocation is
409   lower than this constant, emit fast (but longer) prologue and
410   epilogue code.  */
411#define FAST_PROLOGUE_INSN_COUNT 30
412/* Set by prologue expander and used by epilogue expander to determine
413   the style used.  */
414static int use_fast_prologue_epilogue;
415
416#define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
417
418static const char *const hi_reg_name[] = HI_REGISTER_NAMES; /* names for 16 bit regs */
419static const char *const qi_reg_name[] = QI_REGISTER_NAMES; /* names for 8 bit regs (low) */
420static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; /* names for 8 bit regs (high) */
421
422/* Array of the smallest class containing reg number REGNO, indexed by
423   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
424
425enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
426{
427  /* ax, dx, cx, bx */
428  AREG, DREG, CREG, BREG,
429  /* si, di, bp, sp */
430  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
431  /* FP registers */
432  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
433  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
434  /* arg pointer */
435  NON_Q_REGS,
436  /* flags, fpsr, dirflag, frame */
437  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
438  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
439  SSE_REGS, SSE_REGS,
440  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
441  MMX_REGS, MMX_REGS,
442  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
443  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
444  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
445  SSE_REGS, SSE_REGS,
446};
447
448/* The "default" register map used in 32bit mode.  */
449
450int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
451{
452  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
453  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
454  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
455  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
456  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
457  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
458  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
459};
460
461static int const x86_64_int_parameter_registers[6] = {5 /*RDI*/, 4 /*RSI*/,
462					        1 /*RDX*/, 2 /*RCX*/,
463					        FIRST_REX_INT_REG /*R8 */,
464					        FIRST_REX_INT_REG + 1 /*R9 */};
465static int const x86_64_int_return_registers[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
466
467/* The "default" register map used in 64bit mode.  */
468int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
469{
470  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
471  33, 34, 35, 36, 37, 38, 39, 40	/* fp regs */
472  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
473  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
474  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
475  8,9,10,11,12,13,14,15,		/* extended integer registers */
476  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
477};
478
479/* Define the register numbers to be used in Dwarf debugging information.
480   The SVR4 reference port C compiler uses the following register numbers
481   in its Dwarf output code:
482	0 for %eax (gcc regno = 0)
483	1 for %ecx (gcc regno = 2)
484	2 for %edx (gcc regno = 1)
485	3 for %ebx (gcc regno = 3)
486	4 for %esp (gcc regno = 7)
487	5 for %ebp (gcc regno = 6)
488	6 for %esi (gcc regno = 4)
489	7 for %edi (gcc regno = 5)
490   The following three DWARF register numbers are never generated by
491   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
492   believes these numbers have these meanings.
493	8  for %eip    (no gcc equivalent)
494	9  for %eflags (gcc regno = 17)
495	10 for %trapno (no gcc equivalent)
496   It is not at all clear how we should number the FP stack registers
497   for the x86 architecture.  If the version of SDB on x86/svr4 were
498   a bit less brain dead with respect to floating-point then we would
499   have a precedent to follow with respect to DWARF register numbers
500   for x86 FP registers, but the SDB on x86/svr4 is so completely
501   broken with respect to FP registers that it is hardly worth thinking
502   of it as something to strive for compatibility with.
503   The version of x86/svr4 SDB I have at the moment does (partially)
504   seem to believe that DWARF register number 11 is associated with
505   the x86 register %st(0), but that's about all.  Higher DWARF
506   register numbers don't seem to be associated with anything in
507   particular, and even for DWARF regno 11, SDB only seems to under-
508   stand that it should say that a variable lives in %st(0) (when
509   asked via an `=' command) if we said it was in DWARF regno 11,
510   but SDB still prints garbage when asked for the value of the
511   variable in question (via a `/' command).
512   (Also note that the labels SDB prints for various FP stack regs
513   when doing an `x' command are all wrong.)
514   Note that these problems generally don't affect the native SVR4
515   C compiler because it doesn't allow the use of -O with -g and
516   because when it is *not* optimizing, it allocates a memory
517   location for each floating-point variable, and the memory
518   location is what gets described in the DWARF AT_location
519   attribute for the variable in question.
520   Regardless of the severe mental illness of the x86/svr4 SDB, we
521   do something sensible here and we use the following DWARF
522   register numbers.  Note that these are all stack-top-relative
523   numbers.
524	11 for %st(0) (gcc regno = 8)
525	12 for %st(1) (gcc regno = 9)
526	13 for %st(2) (gcc regno = 10)
527	14 for %st(3) (gcc regno = 11)
528	15 for %st(4) (gcc regno = 12)
529	16 for %st(5) (gcc regno = 13)
530	17 for %st(6) (gcc regno = 14)
531	18 for %st(7) (gcc regno = 15)
532*/
533int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
534{
535  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
536  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
537  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
538  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
539  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
540  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded integer registers */
541  -1, -1, -1, -1, -1, -1, -1, -1,	/* extemded SSE registers */
542};
543
544/* Test and compare insns in i386.md store the information needed to
545   generate branch and scc insns here.  */
546
547rtx ix86_compare_op0 = NULL_RTX;
548rtx ix86_compare_op1 = NULL_RTX;
549
550#define MAX_386_STACK_LOCALS 3
551/* Size of the register save area.  */
552#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
553
554/* Define the structure for the machine field in struct function.  */
555struct machine_function
556{
557  rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
558  int save_varrargs_registers;
559  int accesses_prev_frame;
560};
561
562#define ix86_stack_locals (cfun->machine->stack_locals)
563#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
564
565/* Structure describing stack frame layout.
566   Stack grows downward:
567
568   [arguments]
569					      <- ARG_POINTER
570   saved pc
571
572   saved frame pointer if frame_pointer_needed
573					      <- HARD_FRAME_POINTER
574   [saved regs]
575
576   [padding1]          \
577		        )
578   [va_arg registers]  (
579		        > to_allocate	      <- FRAME_POINTER
580   [frame]	       (
581		        )
582   [padding2]	       /
583  */
584struct ix86_frame
585{
586  int nregs;
587  int padding1;
588  int va_arg_size;
589  HOST_WIDE_INT frame;
590  int padding2;
591  int outgoing_arguments_size;
592  int red_zone_size;
593
594  HOST_WIDE_INT to_allocate;
595  /* The offsets relative to ARG_POINTER.  */
596  HOST_WIDE_INT frame_pointer_offset;
597  HOST_WIDE_INT hard_frame_pointer_offset;
598  HOST_WIDE_INT stack_pointer_offset;
599};
600
601/* Used to enable/disable debugging features.  */
602const char *ix86_debug_arg_string, *ix86_debug_addr_string;
603/* Code model option as passed by user.  */
604const char *ix86_cmodel_string;
605/* Parsed value.  */
606enum cmodel ix86_cmodel;
607/* Asm dialect.  */
608const char *ix86_asm_string;
609enum asm_dialect ix86_asm_dialect = ASM_ATT;
610
611/* which cpu are we scheduling for */
612enum processor_type ix86_cpu;
613
614/* which unit we are generating floating point math for */
615enum fpmath_unit ix86_fpmath;
616
617/* which instruction set architecture to use.  */
618int ix86_arch;
619
620/* Strings to hold which cpu and instruction set architecture  to use.  */
621const char *ix86_cpu_string;		/* for -mcpu=<xxx> */
622const char *ix86_arch_string;		/* for -march=<xxx> */
623const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
624
625/* # of registers to use to pass arguments.  */
626const char *ix86_regparm_string;
627
628/* true if sse prefetch instruction is not NOOP.  */
629int x86_prefetch_sse;
630
631/* ix86_regparm_string as a number */
632int ix86_regparm;
633
634/* Alignment to use for loops and jumps:  */
635
636/* Power of two alignment for loops.  */
637const char *ix86_align_loops_string;
638
639/* Power of two alignment for non-loop jumps.  */
640const char *ix86_align_jumps_string;
641
642/* Power of two alignment for stack boundary in bytes.  */
643const char *ix86_preferred_stack_boundary_string;
644
645/* Preferred alignment for stack boundary in bits.  */
646int ix86_preferred_stack_boundary;
647
648/* Values 1-5: see jump.c */
649int ix86_branch_cost;
650const char *ix86_branch_cost_string;
651
652/* Power of two alignment for functions.  */
653const char *ix86_align_funcs_string;
654
655/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
656static char internal_label_prefix[16];
657static int internal_label_prefix_len;
658
659static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
660static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
661static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
662				       int, int, FILE *));
663static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
664static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
665							   rtx *, rtx *));
666static rtx gen_push PARAMS ((rtx));
667static int memory_address_length PARAMS ((rtx addr));
668static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
669static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
670static int ix86_safe_length PARAMS ((rtx));
671static enum attr_memory ix86_safe_memory PARAMS ((rtx));
672static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
673static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
674static void ix86_dump_ppro_packet PARAMS ((FILE *));
675static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
676static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
677					 rtx));
678static void ix86_init_machine_status PARAMS ((struct function *));
679static void ix86_mark_machine_status PARAMS ((struct function *));
680static void ix86_free_machine_status PARAMS ((struct function *));
681static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
682static int ix86_safe_length_prefix PARAMS ((rtx));
683static int ix86_nsaved_regs PARAMS ((void));
684static void ix86_emit_save_regs PARAMS ((void));
685static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
686static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
687static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
688static void ix86_sched_reorder_pentium PARAMS ((rtx *, rtx *));
689static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
690static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
691static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
692static rtx ix86_expand_aligntest PARAMS ((rtx, int));
693static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
694static int ix86_issue_rate PARAMS ((void));
695static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
696static void ix86_sched_init PARAMS ((FILE *, int, int));
697static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
698static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
699static void ix86_init_mmx_sse_builtins PARAMS ((void));
700
701struct ix86_address
702{
703  rtx base, index, disp;
704  HOST_WIDE_INT scale;
705};
706
707static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
708
709struct builtin_description;
710static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
711					 tree, rtx));
712static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
713					    tree, rtx));
714static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
715static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
716static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
717static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
718						     tree, rtx));
719static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
720static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
721static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
722static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
723					      enum rtx_code *,
724					      enum rtx_code *,
725					      enum rtx_code *));
726static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
727					  rtx *, rtx *));
728static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
729static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
730static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
731static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
732static int ix86_save_reg PARAMS ((int, int));
733static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
734static int ix86_comp_type_attributes PARAMS ((tree, tree));
735const struct attribute_spec ix86_attribute_table[];
736static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
737static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
738
739#ifdef DO_GLOBAL_CTORS_BODY
740static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
741#endif
742
743/* Register class used for passing given 64bit part of the argument.
744   These represent classes as documented by the PS ABI, with the exception
745   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
746   use SF or DFmode move instead of DImode to avoid reformating penalties.
747
748   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
749   whenever possible (upper half does contain padding).
750 */
751enum x86_64_reg_class
752  {
753    X86_64_NO_CLASS,
754    X86_64_INTEGER_CLASS,
755    X86_64_INTEGERSI_CLASS,
756    X86_64_SSE_CLASS,
757    X86_64_SSESF_CLASS,
758    X86_64_SSEDF_CLASS,
759    X86_64_SSEUP_CLASS,
760    X86_64_X87_CLASS,
761    X86_64_X87UP_CLASS,
762    X86_64_MEMORY_CLASS
763  };
764static const char * const x86_64_reg_class_name[] =
765   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
766
767#define MAX_CLASSES 4
768static int classify_argument PARAMS ((enum machine_mode, tree,
769				      enum x86_64_reg_class [MAX_CLASSES],
770				      int));
771static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
772				     int *));
773static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
774					const int *, int));
775static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
776						    enum x86_64_reg_class));
777
778/* Initialize the GCC target structure.  */
779#undef TARGET_ATTRIBUTE_TABLE
780#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
781#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
782#  undef TARGET_MERGE_DECL_ATTRIBUTES
783#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
784#endif
785
786#undef TARGET_COMP_TYPE_ATTRIBUTES
787#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
788
789#undef TARGET_INIT_BUILTINS
790#define TARGET_INIT_BUILTINS ix86_init_builtins
791
792#undef TARGET_EXPAND_BUILTIN
793#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
794
795#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
796   static void ix86_osf_output_function_prologue PARAMS ((FILE *,
797							  HOST_WIDE_INT));
798#  undef TARGET_ASM_FUNCTION_PROLOGUE
799#  define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
800#endif
801
802#undef TARGET_ASM_OPEN_PAREN
803#define TARGET_ASM_OPEN_PAREN ""
804#undef TARGET_ASM_CLOSE_PAREN
805#define TARGET_ASM_CLOSE_PAREN ""
806
807#undef TARGET_ASM_ALIGNED_HI_OP
808#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
809#undef TARGET_ASM_ALIGNED_SI_OP
810#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
811#ifdef ASM_QUAD
812#undef TARGET_ASM_ALIGNED_DI_OP
813#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
814#endif
815
816#undef TARGET_ASM_UNALIGNED_HI_OP
817#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
818#undef TARGET_ASM_UNALIGNED_SI_OP
819#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
820#undef TARGET_ASM_UNALIGNED_DI_OP
821#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
822
823#undef TARGET_SCHED_ADJUST_COST
824#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
825#undef TARGET_SCHED_ISSUE_RATE
826#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
827#undef TARGET_SCHED_VARIABLE_ISSUE
828#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
829#undef TARGET_SCHED_INIT
830#define TARGET_SCHED_INIT ix86_sched_init
831#undef TARGET_SCHED_REORDER
832#define TARGET_SCHED_REORDER ix86_sched_reorder
833
834struct gcc_target targetm = TARGET_INITIALIZER;
835
836/* Sometimes certain combinations of command options do not make
837   sense on a particular target machine.  You can define a macro
838   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
839   defined, is executed once just after all the command options have
840   been parsed.
841
842   Don't use this macro to turn on various extra optimizations for
843   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
844
845void
846override_options ()
847{
848  int i;
849  /* Comes from final.c -- no real reason to change it.  */
850#define MAX_CODE_ALIGN 16
851
852  static struct ptt
853    {
854      const struct processor_costs *cost;	/* Processor costs */
855      const int target_enable;			/* Target flags to enable.  */
856      const int target_disable;			/* Target flags to disable.  */
857      const int align_loop;			/* Default alignments.  */
858      const int align_loop_max_skip;
859      const int align_jump;
860      const int align_jump_max_skip;
861      const int align_func;
862      const int branch_cost;
863    }
864  const processor_target_table[PROCESSOR_max] =
865    {
866      {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
867      {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
868      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
869      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
870      {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
871      {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
872      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
873    };
874
875  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
876  static struct pta
877    {
878      const char *const name;		/* processor name or nickname.  */
879      const enum processor_type processor;
880      const enum pta_flags
881	{
882	  PTA_SSE = 1,
883	  PTA_SSE2 = 2,
884	  PTA_MMX = 4,
885	  PTA_PREFETCH_SSE = 8,
886	  PTA_3DNOW = 16,
887	  PTA_3DNOW_A = 64
888	} flags;
889    }
890  const processor_alias_table[] =
891    {
892      {"i386", PROCESSOR_I386, 0},
893      {"i486", PROCESSOR_I486, 0},
894      {"i586", PROCESSOR_PENTIUM, 0},
895      {"pentium", PROCESSOR_PENTIUM, 0},
896      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
897      {"i686", PROCESSOR_PENTIUMPRO, 0},
898      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
899      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
900      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
901      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
902				       PTA_MMX | PTA_PREFETCH_SSE},
903      {"k6", PROCESSOR_K6, PTA_MMX},
904      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
905      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
906      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
907				   | PTA_3DNOW_A},
908      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
909					 | PTA_3DNOW | PTA_3DNOW_A},
910      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
911				    | PTA_3DNOW_A | PTA_SSE},
912      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
913				      | PTA_3DNOW_A | PTA_SSE},
914      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
915				      | PTA_3DNOW_A | PTA_SSE},
916    };
917
918  int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
919
920#ifdef SUBTARGET_OVERRIDE_OPTIONS
921  SUBTARGET_OVERRIDE_OPTIONS;
922#endif
923
924  if (!ix86_cpu_string && ix86_arch_string)
925    ix86_cpu_string = ix86_arch_string;
926  if (!ix86_cpu_string)
927    ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
928  if (!ix86_arch_string)
929    ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
930
931  if (ix86_cmodel_string != 0)
932    {
933      if (!strcmp (ix86_cmodel_string, "small"))
934	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
935      else if (flag_pic)
936	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
937      else if (!strcmp (ix86_cmodel_string, "32"))
938	ix86_cmodel = CM_32;
939      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
940	ix86_cmodel = CM_KERNEL;
941      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
942	ix86_cmodel = CM_MEDIUM;
943      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
944	ix86_cmodel = CM_LARGE;
945      else
946	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
947    }
948  else
949    {
950      ix86_cmodel = CM_32;
951      if (TARGET_64BIT)
952	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953    }
954  if (ix86_asm_string != 0)
955    {
956      if (!strcmp (ix86_asm_string, "intel"))
957	ix86_asm_dialect = ASM_INTEL;
958      else if (!strcmp (ix86_asm_string, "att"))
959	ix86_asm_dialect = ASM_ATT;
960      else
961	error ("bad value (%s) for -masm= switch", ix86_asm_string);
962    }
963  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
964    error ("code model `%s' not supported in the %s bit mode",
965	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
966  if (ix86_cmodel == CM_LARGE)
967    sorry ("code model `large' not supported yet");
968  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
969    sorry ("%i-bit mode not compiled in",
970	   (target_flags & MASK_64BIT) ? 64 : 32);
971
972  for (i = 0; i < pta_size; i++)
973    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
974      {
975	ix86_arch = processor_alias_table[i].processor;
976	/* Default cpu tuning to the architecture.  */
977	ix86_cpu = ix86_arch;
978	if (processor_alias_table[i].flags & PTA_MMX
979	    && !(target_flags & MASK_MMX_SET))
980	  target_flags |= MASK_MMX;
981	if (processor_alias_table[i].flags & PTA_3DNOW
982	    && !(target_flags & MASK_3DNOW_SET))
983	  target_flags |= MASK_3DNOW;
984	if (processor_alias_table[i].flags & PTA_3DNOW_A
985	    && !(target_flags & MASK_3DNOW_A_SET))
986	  target_flags |= MASK_3DNOW_A;
987	if (processor_alias_table[i].flags & PTA_SSE
988	    && !(target_flags & MASK_SSE_SET))
989	  target_flags |= MASK_SSE;
990	if (processor_alias_table[i].flags & PTA_SSE2
991	    && !(target_flags & MASK_SSE2_SET))
992	  target_flags |= MASK_SSE2;
993	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
994	  x86_prefetch_sse = true;
995	break;
996      }
997
998  if (i == pta_size)
999    error ("bad value (%s) for -march= switch", ix86_arch_string);
1000
1001  for (i = 0; i < pta_size; i++)
1002    if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1003      {
1004	ix86_cpu = processor_alias_table[i].processor;
1005	break;
1006      }
1007  if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1008    x86_prefetch_sse = true;
1009  if (i == pta_size)
1010    error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1011
1012  if (optimize_size)
1013    ix86_cost = &size_cost;
1014  else
1015    ix86_cost = processor_target_table[ix86_cpu].cost;
1016  target_flags |= processor_target_table[ix86_cpu].target_enable;
1017  target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1018
1019  /* Arrange to set up i386_stack_locals for all functions.  */
1020  init_machine_status = ix86_init_machine_status;
1021  mark_machine_status = ix86_mark_machine_status;
1022  free_machine_status = ix86_free_machine_status;
1023
1024  /* Validate -mregparm= value.  */
1025  if (ix86_regparm_string)
1026    {
1027      i = atoi (ix86_regparm_string);
1028      if (i < 0 || i > REGPARM_MAX)
1029	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1030      else
1031	ix86_regparm = i;
1032    }
1033  else
1034   if (TARGET_64BIT)
1035     ix86_regparm = REGPARM_MAX;
1036
1037  /* If the user has provided any of the -malign-* options,
1038     warn and use that value only if -falign-* is not set.
1039     Remove this code in GCC 3.2 or later.  */
1040  if (ix86_align_loops_string)
1041    {
1042      warning ("-malign-loops is obsolete, use -falign-loops");
1043      if (align_loops == 0)
1044	{
1045	  i = atoi (ix86_align_loops_string);
1046	  if (i < 0 || i > MAX_CODE_ALIGN)
1047	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1048	  else
1049	    align_loops = 1 << i;
1050	}
1051    }
1052
1053  if (ix86_align_jumps_string)
1054    {
1055      warning ("-malign-jumps is obsolete, use -falign-jumps");
1056      if (align_jumps == 0)
1057	{
1058	  i = atoi (ix86_align_jumps_string);
1059	  if (i < 0 || i > MAX_CODE_ALIGN)
1060	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1061	  else
1062	    align_jumps = 1 << i;
1063	}
1064    }
1065
1066  if (ix86_align_funcs_string)
1067    {
1068      warning ("-malign-functions is obsolete, use -falign-functions");
1069      if (align_functions == 0)
1070	{
1071	  i = atoi (ix86_align_funcs_string);
1072	  if (i < 0 || i > MAX_CODE_ALIGN)
1073	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1074	  else
1075	    align_functions = 1 << i;
1076	}
1077    }
1078
1079  /* Default align_* from the processor table.  */
1080  if (align_loops == 0)
1081    {
1082      align_loops = processor_target_table[ix86_cpu].align_loop;
1083      align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1084    }
1085  if (align_jumps == 0)
1086    {
1087      align_jumps = processor_target_table[ix86_cpu].align_jump;
1088      align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1089    }
1090  if (align_functions == 0)
1091    {
1092      align_functions = processor_target_table[ix86_cpu].align_func;
1093    }
1094
1095  /* Validate -mpreferred-stack-boundary= value, or provide default.
1096     The default of 128 bits is for Pentium III's SSE __m128, but we
1097     don't want additional code to keep the stack aligned when
1098     optimizing for code size.  */
1099  ix86_preferred_stack_boundary = (optimize_size
1100				   ? TARGET_64BIT ? 64 : 32
1101				   : 128);
1102  if (ix86_preferred_stack_boundary_string)
1103    {
1104      i = atoi (ix86_preferred_stack_boundary_string);
1105      if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1106	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1107	       TARGET_64BIT ? 3 : 2);
1108      else
1109	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1110    }
1111
1112  /* Validate -mbranch-cost= value, or provide default.  */
1113  ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1114  if (ix86_branch_cost_string)
1115    {
1116      i = atoi (ix86_branch_cost_string);
1117      if (i < 0 || i > 5)
1118	error ("-mbranch-cost=%d is not between 0 and 5", i);
1119      else
1120	ix86_branch_cost = i;
1121    }
1122
1123  /* Keep nonleaf frame pointers.  */
1124  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1125    flag_omit_frame_pointer = 1;
1126
1127  /* If we're doing fast math, we don't care about comparison order
1128     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1129  if (flag_unsafe_math_optimizations)
1130    target_flags &= ~MASK_IEEE_FP;
1131
1132  if (TARGET_64BIT)
1133    {
1134      if (TARGET_ALIGN_DOUBLE)
1135	error ("-malign-double makes no sense in the 64bit mode");
1136      if (TARGET_RTD)
1137	error ("-mrtd calling convention not supported in the 64bit mode");
1138      /* Enable by default the SSE and MMX builtins.  */
1139      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1140      ix86_fpmath = FPMATH_SSE;
1141     }
1142  else
1143    ix86_fpmath = FPMATH_387;
1144
1145  if (ix86_fpmath_string != 0)
1146    {
1147      if (! strcmp (ix86_fpmath_string, "387"))
1148	ix86_fpmath = FPMATH_387;
1149      else if (! strcmp (ix86_fpmath_string, "sse"))
1150	{
1151	  if (!TARGET_SSE)
1152	    {
1153	      warning ("SSE instruction set disabled, using 387 arithmetics");
1154	      ix86_fpmath = FPMATH_387;
1155	    }
1156	  else
1157	    ix86_fpmath = FPMATH_SSE;
1158	}
1159      else if (! strcmp (ix86_fpmath_string, "387,sse")
1160	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1161	{
1162	  if (!TARGET_SSE)
1163	    {
1164	      warning ("SSE instruction set disabled, using 387 arithmetics");
1165	      ix86_fpmath = FPMATH_387;
1166	    }
1167	  else if (!TARGET_80387)
1168	    {
1169	      warning ("387 instruction set disabled, using SSE arithmetics");
1170	      ix86_fpmath = FPMATH_SSE;
1171	    }
1172	  else
1173	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1174	}
1175      else
1176	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1177    }
1178
1179  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1180     on by -msse.  */
1181  if (TARGET_SSE)
1182    {
1183      target_flags |= MASK_MMX;
1184      x86_prefetch_sse = true;
1185    }
1186
1187  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1188  if (TARGET_3DNOW)
1189    {
1190      target_flags |= MASK_MMX;
1191      /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1192	 extensions it adds.  */
1193      if (x86_3dnow_a & (1 << ix86_arch))
1194	target_flags |= MASK_3DNOW_A;
1195    }
1196  if ((x86_accumulate_outgoing_args & CPUMASK)
1197      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1198      && !optimize_size)
1199    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1200
1201  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1202  {
1203    char *p;
1204    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1205    p = strchr (internal_label_prefix, 'X');
1206    internal_label_prefix_len = p - internal_label_prefix;
1207    *p = '\0';
1208  }
1209}
1210
1211void
1212optimization_options (level, size)
1213     int level;
1214     int size ATTRIBUTE_UNUSED;
1215{
1216  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1217     make the problem with not enough registers even worse.  */
1218#ifdef INSN_SCHEDULING
1219  if (level > 1)
1220    flag_schedule_insns = 0;
1221#endif
1222  if (TARGET_64BIT && optimize >= 1)
1223    flag_omit_frame_pointer = 1;
1224  if (TARGET_64BIT)
1225    {
1226      flag_pcc_struct_return = 0;
1227      flag_asynchronous_unwind_tables = 1;
1228    }
1229}
1230
1231/* Table of valid machine attributes.  */
1232const struct attribute_spec ix86_attribute_table[] =
1233{
1234  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1235  /* Stdcall attribute says callee is responsible for popping arguments
1236     if they are not variable.  */
1237  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1238  /* Cdecl attribute says the callee is a normal C declaration */
1239  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1240  /* Regparm attribute specifies how many integer arguments are to be
1241     passed in registers.  */
1242  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1243#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1244  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1245  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1246  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1247#endif
1248  { NULL,        0, 0, false, false, false, NULL }
1249};
1250
1251/* Handle a "cdecl" or "stdcall" attribute;
1252   arguments as in struct attribute_spec.handler.  */
1253static tree
1254ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1255     tree *node;
1256     tree name;
1257     tree args ATTRIBUTE_UNUSED;
1258     int flags ATTRIBUTE_UNUSED;
1259     bool *no_add_attrs;
1260{
1261  if (TREE_CODE (*node) != FUNCTION_TYPE
1262      && TREE_CODE (*node) != METHOD_TYPE
1263      && TREE_CODE (*node) != FIELD_DECL
1264      && TREE_CODE (*node) != TYPE_DECL)
1265    {
1266      warning ("`%s' attribute only applies to functions",
1267	       IDENTIFIER_POINTER (name));
1268      *no_add_attrs = true;
1269    }
1270
1271  if (TARGET_64BIT)
1272    {
1273      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1274      *no_add_attrs = true;
1275    }
1276
1277  return NULL_TREE;
1278}
1279
1280/* Handle a "regparm" attribute;
1281   arguments as in struct attribute_spec.handler.  */
1282static tree
1283ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1284     tree *node;
1285     tree name;
1286     tree args;
1287     int flags ATTRIBUTE_UNUSED;
1288     bool *no_add_attrs;
1289{
1290  if (TREE_CODE (*node) != FUNCTION_TYPE
1291      && TREE_CODE (*node) != METHOD_TYPE
1292      && TREE_CODE (*node) != FIELD_DECL
1293      && TREE_CODE (*node) != TYPE_DECL)
1294    {
1295      warning ("`%s' attribute only applies to functions",
1296	       IDENTIFIER_POINTER (name));
1297      *no_add_attrs = true;
1298    }
1299  else
1300    {
1301      tree cst;
1302
1303      cst = TREE_VALUE (args);
1304      if (TREE_CODE (cst) != INTEGER_CST)
1305	{
1306	  warning ("`%s' attribute requires an integer constant argument",
1307		   IDENTIFIER_POINTER (name));
1308	  *no_add_attrs = true;
1309	}
1310      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1311	{
1312	  warning ("argument to `%s' attribute larger than %d",
1313		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1314	  *no_add_attrs = true;
1315	}
1316    }
1317
1318  return NULL_TREE;
1319}
1320
1321#if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1322
1323/* Generate the assembly code for function entry.  FILE is a stdio
1324   stream to output the code to.  SIZE is an int: how many units of
1325   temporary storage to allocate.
1326
1327   Refer to the array `regs_ever_live' to determine which registers to
1328   save; `regs_ever_live[I]' is nonzero if register number I is ever
1329   used in the function.  This function is responsible for knowing
1330   which registers should not be saved even if used.
1331
1332   We override it here to allow for the new profiling code to go before
1333   the prologue and the old mcount code to go after the prologue (and
1334   after %ebx has been set up for ELF shared library support).  */
1335
1336static void
1337ix86_osf_output_function_prologue (file, size)
1338     FILE *file;
1339     HOST_WIDE_INT size;
1340{
1341  const char *prefix = "";
1342  const char *const lprefix = LPREFIX;
1343  int labelno = profile_label_no;
1344
1345#ifdef OSF_OS
1346
1347  if (TARGET_UNDERSCORES)
1348    prefix = "_";
1349
1350  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1351    {
1352      if (!flag_pic && !HALF_PIC_P ())
1353	{
1354	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1355	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1356	}
1357
1358      else if (HALF_PIC_P ())
1359	{
1360	  rtx symref;
1361
1362	  HALF_PIC_EXTERNAL ("_mcount_ptr");
1363	  symref = HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode,
1364						     "_mcount_ptr"));
1365
1366	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1367	  fprintf (file, "\tmovl %s%s,%%eax\n", prefix,
1368		   XSTR (symref, 0));
1369	  fprintf (file, "\tcall *(%%eax)\n");
1370	}
1371
1372      else
1373	{
1374	  static int call_no = 0;
1375
1376	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1377	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1378	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1379		   lprefix, call_no++);
1380	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1381		   lprefix, labelno);
1382	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1383		   prefix);
1384	  fprintf (file, "\tcall *(%%eax)\n");
1385	}
1386    }
1387
1388#else  /* !OSF_OS */
1389
1390  if (current_function_profile && OSF_PROFILE_BEFORE_PROLOGUE)
1391    {
1392      if (!flag_pic)
1393	{
1394	  fprintf (file, "\tmovl $%sP%d,%%edx\n", lprefix, labelno);
1395	  fprintf (file, "\tcall *%s_mcount_ptr\n", prefix);
1396	}
1397
1398      else
1399	{
1400	  static int call_no = 0;
1401
1402	  fprintf (file, "\tcall %sPc%d\n", lprefix, call_no);
1403	  fprintf (file, "%sPc%d:\tpopl %%eax\n", lprefix, call_no);
1404	  fprintf (file, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1405		   lprefix, call_no++);
1406	  fprintf (file, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1407		   lprefix, labelno);
1408	  fprintf (file, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1409		   prefix);
1410	  fprintf (file, "\tcall *(%%eax)\n");
1411	}
1412    }
1413#endif /* !OSF_OS */
1414
1415  function_prologue (file, size);
1416}
1417
1418#endif  /* OSF_OS || TARGET_OSF1ELF */
1419
1420/* Return 0 if the attributes for two types are incompatible, 1 if they
1421   are compatible, and 2 if they are nearly compatible (which causes a
1422   warning to be generated).  */
1423
1424static int
1425ix86_comp_type_attributes (type1, type2)
1426     tree type1;
1427     tree type2;
1428{
1429  /* Check for mismatch of non-default calling convention.  */
1430  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1431
1432  if (TREE_CODE (type1) != FUNCTION_TYPE)
1433    return 1;
1434
1435  /* Check for mismatched return types (cdecl vs stdcall).  */
1436  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1437      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1438    return 0;
1439  return 1;
1440}
1441
1442/* Value is the number of bytes of arguments automatically
1443   popped when returning from a subroutine call.
1444   FUNDECL is the declaration node of the function (as a tree),
1445   FUNTYPE is the data type of the function (as a tree),
1446   or for a library call it is an identifier node for the subroutine name.
1447   SIZE is the number of bytes of arguments passed on the stack.
1448
1449   On the 80386, the RTD insn may be used to pop them if the number
1450     of args is fixed, but if the number is variable then the caller
1451     must pop them all.  RTD can't be used for library calls now
1452     because the library is compiled with the Unix compiler.
1453   Use of RTD is a selectable option, since it is incompatible with
1454   standard Unix calling sequences.  If the option is not selected,
1455   the caller must always pop the args.
1456
1457   The attribute stdcall is equivalent to RTD on a per module basis.  */
1458
1459int
1460ix86_return_pops_args (fundecl, funtype, size)
1461     tree fundecl;
1462     tree funtype;
1463     int size;
1464{
1465  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1466
1467    /* Cdecl functions override -mrtd, and never pop the stack.  */
1468  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1469
1470    /* Stdcall functions will pop the stack if not variable args.  */
1471    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1472      rtd = 1;
1473
1474    if (rtd
1475        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1476	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1477		== void_type_node)))
1478      return size;
1479  }
1480
1481  /* Lose any fake structure return argument.  */
1482  if (aggregate_value_p (TREE_TYPE (funtype))
1483      && !TARGET_64BIT)
1484    return GET_MODE_SIZE (Pmode);
1485
1486    return 0;
1487}
1488
1489/* Argument support functions.  */
1490
1491/* Return true when register may be used to pass function parameters.  */
1492bool
1493ix86_function_arg_regno_p (regno)
1494     int regno;
1495{
1496  int i;
1497  if (!TARGET_64BIT)
1498    return (regno < REGPARM_MAX
1499	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1500  if (SSE_REGNO_P (regno) && TARGET_SSE)
1501    return true;
1502  /* RAX is used as hidden argument to va_arg functions.  */
1503  if (!regno)
1504    return true;
1505  for (i = 0; i < REGPARM_MAX; i++)
1506    if (regno == x86_64_int_parameter_registers[i])
1507      return true;
1508  return false;
1509}
1510
1511/* Initialize a variable CUM of type CUMULATIVE_ARGS
1512   for a call to a function whose data type is FNTYPE.
1513   For a library call, FNTYPE is 0.  */
1514
1515void
1516init_cumulative_args (cum, fntype, libname)
1517     CUMULATIVE_ARGS *cum;	/* Argument info to initialize */
1518     tree fntype;		/* tree ptr for function decl */
1519     rtx libname;		/* SYMBOL_REF of library name or 0 */
1520{
1521  static CUMULATIVE_ARGS zero_cum;
1522  tree param, next_param;
1523
1524  if (TARGET_DEBUG_ARG)
1525    {
1526      fprintf (stderr, "\ninit_cumulative_args (");
1527      if (fntype)
1528	fprintf (stderr, "fntype code = %s, ret code = %s",
1529		 tree_code_name[(int) TREE_CODE (fntype)],
1530		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1531      else
1532	fprintf (stderr, "no fntype");
1533
1534      if (libname)
1535	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1536    }
1537
1538  *cum = zero_cum;
1539
1540  /* Set up the number of registers to use for passing arguments.  */
1541  cum->nregs = ix86_regparm;
1542  cum->sse_nregs = SSE_REGPARM_MAX;
1543  if (fntype && !TARGET_64BIT)
1544    {
1545      tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1546
1547      if (attr)
1548	cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1549    }
1550  cum->maybe_vaarg = false;
1551
1552  /* Determine if this function has variable arguments.  This is
1553     indicated by the last argument being 'void_type_mode' if there
1554     are no variable arguments.  If there are variable arguments, then
1555     we won't pass anything in registers */
1556
1557  if (cum->nregs)
1558    {
1559      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1560	   param != 0; param = next_param)
1561	{
1562	  next_param = TREE_CHAIN (param);
1563	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1564	    {
1565	      if (!TARGET_64BIT)
1566		cum->nregs = 0;
1567	      cum->maybe_vaarg = true;
1568	    }
1569	}
1570    }
1571  if ((!fntype && !libname)
1572      || (fntype && !TYPE_ARG_TYPES (fntype)))
1573    cum->maybe_vaarg = 1;
1574
1575  if (TARGET_DEBUG_ARG)
1576    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1577
1578  return;
1579}
1580
1581/* x86-64 register passing impleemntation.  See x86-64 ABI for details.  Goal
1582   of this code is to classify each 8bytes of incoming argument by the register
1583   class and assign registers accordingly.  */
1584
1585/* Return the union class of CLASS1 and CLASS2.
1586   See the x86-64 PS ABI for details.  */
1587
1588static enum x86_64_reg_class
1589merge_classes (class1, class2)
1590     enum x86_64_reg_class class1, class2;
1591{
1592  /* Rule #1: If both classes are equal, this is the resulting class.  */
1593  if (class1 == class2)
1594    return class1;
1595
1596  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1597     the other class.  */
1598  if (class1 == X86_64_NO_CLASS)
1599    return class2;
1600  if (class2 == X86_64_NO_CLASS)
1601    return class1;
1602
1603  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1604  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1605    return X86_64_MEMORY_CLASS;
1606
1607  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1608  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1609      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1610    return X86_64_INTEGERSI_CLASS;
1611  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1612      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1613    return X86_64_INTEGER_CLASS;
1614
1615  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1616  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1617      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1618    return X86_64_MEMORY_CLASS;
1619
1620  /* Rule #6: Otherwise class SSE is used.  */
1621  return X86_64_SSE_CLASS;
1622}
1623
1624/* Classify the argument of type TYPE and mode MODE.
1625   CLASSES will be filled by the register class used to pass each word
1626   of the operand.  The number of words is returned.  In case the parameter
1627   should be passed in memory, 0 is returned. As a special case for zero
1628   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1629
1630   BIT_OFFSET is used internally for handling records and specifies offset
1631   of the offset in bits modulo 256 to avoid overflow cases.
1632
1633   See the x86-64 PS ABI for details.
1634*/
1635
1636static int
1637classify_argument (mode, type, classes, bit_offset)
1638     enum machine_mode mode;
1639     tree type;
1640     enum x86_64_reg_class classes[MAX_CLASSES];
1641     int bit_offset;
1642{
1643  int bytes =
1644    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1645  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1646
1647  if (type && AGGREGATE_TYPE_P (type))
1648    {
1649      int i;
1650      tree field;
1651      enum x86_64_reg_class subclasses[MAX_CLASSES];
1652
1653      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
1654      if (bytes > 16)
1655	return 0;
1656
1657      for (i = 0; i < words; i++)
1658	classes[i] = X86_64_NO_CLASS;
1659
1660      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
1661	 signalize memory class, so handle it as special case.  */
1662      if (!words)
1663	{
1664	  classes[0] = X86_64_NO_CLASS;
1665	  return 1;
1666	}
1667
1668      /* Classify each field of record and merge classes.  */
1669      if (TREE_CODE (type) == RECORD_TYPE)
1670	{
1671	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1672	    {
1673	      if (TREE_CODE (field) == FIELD_DECL)
1674		{
1675		  int num;
1676
1677		  /* Bitfields are always classified as integer.  Handle them
1678		     early, since later code would consider them to be
1679		     misaligned integers.  */
1680		  if (DECL_BIT_FIELD (field))
1681		    {
1682		      for (i = int_bit_position (field) / 8 / 8;
1683			   i < (int_bit_position (field)
1684			        + tree_low_cst (DECL_SIZE (field), 0)
1685			       	+ 63) / 8 / 8; i++)
1686			classes[i] =
1687			  merge_classes (X86_64_INTEGER_CLASS,
1688					 classes[i]);
1689		    }
1690		  else
1691		    {
1692		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1693					       TREE_TYPE (field), subclasses,
1694					       (int_bit_position (field)
1695						+ bit_offset) % 256);
1696		      if (!num)
1697			return 0;
1698		      for (i = 0; i < num; i++)
1699			{
1700			  int pos =
1701			    (int_bit_position (field) + bit_offset) / 8 / 8;
1702			  classes[i + pos] =
1703			    merge_classes (subclasses[i], classes[i + pos]);
1704			}
1705		    }
1706		}
1707	    }
1708	}
1709      /* Arrays are handled as small records.  */
1710      else if (TREE_CODE (type) == ARRAY_TYPE)
1711	{
1712	  int num;
1713	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1714				   TREE_TYPE (type), subclasses, bit_offset);
1715	  if (!num)
1716	    return 0;
1717
1718	  /* The partial classes are now full classes.  */
1719	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1720	    subclasses[0] = X86_64_SSE_CLASS;
1721	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1722	    subclasses[0] = X86_64_INTEGER_CLASS;
1723
1724	  for (i = 0; i < words; i++)
1725	    classes[i] = subclasses[i % num];
1726	}
1727      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
1728      else if (TREE_CODE (type) == UNION_TYPE)
1729	{
1730	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1731	    {
1732	      if (TREE_CODE (field) == FIELD_DECL)
1733		{
1734		  int num;
1735		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1736					   TREE_TYPE (field), subclasses,
1737					   bit_offset);
1738		  if (!num)
1739		    return 0;
1740		  for (i = 0; i < num; i++)
1741		    classes[i] = merge_classes (subclasses[i], classes[i]);
1742		}
1743	    }
1744	}
1745      else
1746	abort ();
1747
1748      /* Final merger cleanup.  */
1749      for (i = 0; i < words; i++)
1750	{
1751	  /* If one class is MEMORY, everything should be passed in
1752	     memory.  */
1753	  if (classes[i] == X86_64_MEMORY_CLASS)
1754	    return 0;
1755
1756	  /* The X86_64_SSEUP_CLASS should be always preceded by
1757	     X86_64_SSE_CLASS.  */
1758	  if (classes[i] == X86_64_SSEUP_CLASS
1759	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1760	    classes[i] = X86_64_SSE_CLASS;
1761
1762	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
1763	  if (classes[i] == X86_64_X87UP_CLASS
1764	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1765	    classes[i] = X86_64_SSE_CLASS;
1766	}
1767      return words;
1768    }
1769
1770  /* Compute alignment needed.  We align all types to natural boundaries with
1771     exception of XFmode that is aligned to 64bits.  */
1772  if (mode != VOIDmode && mode != BLKmode)
1773    {
1774      int mode_alignment = GET_MODE_BITSIZE (mode);
1775
1776      if (mode == XFmode)
1777	mode_alignment = 128;
1778      else if (mode == XCmode)
1779	mode_alignment = 256;
1780      /* Misaligned fields are always returned in memory.  */
1781      if (bit_offset % mode_alignment)
1782	return 0;
1783    }
1784
1785  /* Classification of atomic types.  */
1786  switch (mode)
1787    {
1788    case DImode:
1789    case SImode:
1790    case HImode:
1791    case QImode:
1792    case CSImode:
1793    case CHImode:
1794    case CQImode:
1795      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1796	classes[0] = X86_64_INTEGERSI_CLASS;
1797      else
1798	classes[0] = X86_64_INTEGER_CLASS;
1799      return 1;
1800    case CDImode:
1801    case TImode:
1802      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1803      return 2;
1804    case CTImode:
1805      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1806      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1807      return 4;
1808    case SFmode:
1809      if (!(bit_offset % 64))
1810	classes[0] = X86_64_SSESF_CLASS;
1811      else
1812	classes[0] = X86_64_SSE_CLASS;
1813      return 1;
1814    case DFmode:
1815      classes[0] = X86_64_SSEDF_CLASS;
1816      return 1;
1817    case TFmode:
1818      classes[0] = X86_64_X87_CLASS;
1819      classes[1] = X86_64_X87UP_CLASS;
1820      return 2;
1821    case TCmode:
1822      classes[0] = X86_64_X87_CLASS;
1823      classes[1] = X86_64_X87UP_CLASS;
1824      classes[2] = X86_64_X87_CLASS;
1825      classes[3] = X86_64_X87UP_CLASS;
1826      return 4;
1827    case DCmode:
1828      classes[0] = X86_64_SSEDF_CLASS;
1829      classes[1] = X86_64_SSEDF_CLASS;
1830      return 2;
1831    case SCmode:
1832      classes[0] = X86_64_SSE_CLASS;
1833      return 1;
1834    case BLKmode:
1835      return 0;
1836    default:
1837      abort ();
1838    }
1839}
1840
1841/* Examine the argument and return set number of register required in each
1842   class.  Return 0 iff parameter should be passed in memory.  */
1843static int
1844examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1845     enum machine_mode mode;
1846     tree type;
1847     int *int_nregs, *sse_nregs;
1848     int in_return;
1849{
1850  enum x86_64_reg_class class[MAX_CLASSES];
1851  int n = classify_argument (mode, type, class, 0);
1852
1853  *int_nregs = 0;
1854  *sse_nregs = 0;
1855  if (!n)
1856    return 0;
1857  for (n--; n >= 0; n--)
1858    switch (class[n])
1859      {
1860      case X86_64_INTEGER_CLASS:
1861      case X86_64_INTEGERSI_CLASS:
1862	(*int_nregs)++;
1863	break;
1864      case X86_64_SSE_CLASS:
1865      case X86_64_SSESF_CLASS:
1866      case X86_64_SSEDF_CLASS:
1867	(*sse_nregs)++;
1868	break;
1869      case X86_64_NO_CLASS:
1870      case X86_64_SSEUP_CLASS:
1871	break;
1872      case X86_64_X87_CLASS:
1873      case X86_64_X87UP_CLASS:
1874	if (!in_return)
1875	  return 0;
1876	break;
1877      case X86_64_MEMORY_CLASS:
1878	abort ();
1879      }
1880  return 1;
1881}
1882/* Construct container for the argument used by GCC interface.  See
1883   FUNCTION_ARG for the detailed description.  */
1884static rtx
1885construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1886     enum machine_mode mode;
1887     tree type;
1888     int in_return;
1889     int nintregs, nsseregs;
1890     const int * intreg;
1891     int sse_regno;
1892{
1893  enum machine_mode tmpmode;
1894  int bytes =
1895    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1896  enum x86_64_reg_class class[MAX_CLASSES];
1897  int n;
1898  int i;
1899  int nexps = 0;
1900  int needed_sseregs, needed_intregs;
1901  rtx exp[MAX_CLASSES];
1902  rtx ret;
1903
1904  n = classify_argument (mode, type, class, 0);
1905  if (TARGET_DEBUG_ARG)
1906    {
1907      if (!n)
1908	fprintf (stderr, "Memory class\n");
1909      else
1910	{
1911	  fprintf (stderr, "Classes:");
1912	  for (i = 0; i < n; i++)
1913	    {
1914	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1915	    }
1916	   fprintf (stderr, "\n");
1917	}
1918    }
1919  if (!n)
1920    return NULL;
1921  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1922    return NULL;
1923  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1924    return NULL;
1925
1926  /* First construct simple cases.  Avoid SCmode, since we want to use
1927     single register to pass this type.  */
1928  if (n == 1 && mode != SCmode)
1929    switch (class[0])
1930      {
1931      case X86_64_INTEGER_CLASS:
1932      case X86_64_INTEGERSI_CLASS:
1933	return gen_rtx_REG (mode, intreg[0]);
1934      case X86_64_SSE_CLASS:
1935      case X86_64_SSESF_CLASS:
1936      case X86_64_SSEDF_CLASS:
1937	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1938      case X86_64_X87_CLASS:
1939	return gen_rtx_REG (mode, FIRST_STACK_REG);
1940      case X86_64_NO_CLASS:
1941	/* Zero sized array, struct or class.  */
1942	return NULL;
1943      default:
1944	abort ();
1945      }
1946  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1947    return gen_rtx_REG (TImode, SSE_REGNO (sse_regno));
1948  if (n == 2
1949      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1950    return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1951  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1952      && class[1] == X86_64_INTEGER_CLASS
1953      && (mode == CDImode || mode == TImode)
1954      && intreg[0] + 1 == intreg[1])
1955    return gen_rtx_REG (mode, intreg[0]);
1956  if (n == 4
1957      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1958      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1959    return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1960
1961  /* Otherwise figure out the entries of the PARALLEL.  */
1962  for (i = 0; i < n; i++)
1963    {
1964      switch (class[i])
1965        {
1966	  case X86_64_NO_CLASS:
1967	    break;
1968	  case X86_64_INTEGER_CLASS:
1969	  case X86_64_INTEGERSI_CLASS:
1970	    /* Merge TImodes on aligned occassions here too.  */
1971	    if (i * 8 + 8 > bytes)
1972	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1973	    else if (class[i] == X86_64_INTEGERSI_CLASS)
1974	      tmpmode = SImode;
1975	    else
1976	      tmpmode = DImode;
1977	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
1978	    if (tmpmode == BLKmode)
1979	      tmpmode = DImode;
1980	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1981					       gen_rtx_REG (tmpmode, *intreg),
1982					       GEN_INT (i*8));
1983	    intreg++;
1984	    break;
1985	  case X86_64_SSESF_CLASS:
1986	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1987					       gen_rtx_REG (SFmode,
1988							    SSE_REGNO (sse_regno)),
1989					       GEN_INT (i*8));
1990	    sse_regno++;
1991	    break;
1992	  case X86_64_SSEDF_CLASS:
1993	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1994					       gen_rtx_REG (DFmode,
1995							    SSE_REGNO (sse_regno)),
1996					       GEN_INT (i*8));
1997	    sse_regno++;
1998	    break;
1999	  case X86_64_SSE_CLASS:
2000	    if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2001	      tmpmode = TImode, i++;
2002	    else
2003	      tmpmode = DImode;
2004	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005					       gen_rtx_REG (tmpmode,
2006							    SSE_REGNO (sse_regno)),
2007					       GEN_INT (i*8));
2008	    sse_regno++;
2009	    break;
2010	  default:
2011	    abort ();
2012	}
2013    }
2014  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2015  for (i = 0; i < nexps; i++)
2016    XVECEXP (ret, 0, i) = exp [i];
2017  return ret;
2018}
2019
2020/* Update the data in CUM to advance over an argument
2021   of mode MODE and data type TYPE.
2022   (TYPE is null for libcalls where that information may not be available.)  */
2023
2024void
2025function_arg_advance (cum, mode, type, named)
2026     CUMULATIVE_ARGS *cum;	/* current arg information */
2027     enum machine_mode mode;	/* current arg mode */
2028     tree type;			/* type of the argument or 0 if lib support */
2029     int named;			/* whether or not the argument was named */
2030{
2031  int bytes =
2032    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2033  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2034
2035  if (TARGET_DEBUG_ARG)
2036    fprintf (stderr,
2037	     "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2038	     words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2039  if (TARGET_64BIT)
2040    {
2041      int int_nregs, sse_nregs;
2042      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2043	cum->words += words;
2044      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2045	{
2046	  cum->nregs -= int_nregs;
2047	  cum->sse_nregs -= sse_nregs;
2048	  cum->regno += int_nregs;
2049	  cum->sse_regno += sse_nregs;
2050	}
2051      else
2052	cum->words += words;
2053    }
2054  else
2055    {
2056      if (TARGET_SSE && mode == TImode)
2057	{
2058	  cum->sse_words += words;
2059	  cum->sse_nregs -= 1;
2060	  cum->sse_regno += 1;
2061	  if (cum->sse_nregs <= 0)
2062	    {
2063	      cum->sse_nregs = 0;
2064	      cum->sse_regno = 0;
2065	    }
2066	}
2067      else
2068	{
2069	  cum->words += words;
2070	  cum->nregs -= words;
2071	  cum->regno += words;
2072
2073	  if (cum->nregs <= 0)
2074	    {
2075	      cum->nregs = 0;
2076	      cum->regno = 0;
2077	    }
2078	}
2079    }
2080  return;
2081}
2082
2083/* Define where to put the arguments to a function.
2084   Value is zero to push the argument on the stack,
2085   or a hard register in which to store the argument.
2086
2087   MODE is the argument's machine mode.
2088   TYPE is the data type of the argument (as a tree).
2089    This is null for libcalls where that information may
2090    not be available.
2091   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2092    the preceding args and about the function being called.
2093   NAMED is nonzero if this argument is a named parameter
2094    (otherwise it is an extra parameter matching an ellipsis).  */
2095
2096rtx
2097function_arg (cum, mode, type, named)
2098     CUMULATIVE_ARGS *cum;	/* current arg information */
2099     enum machine_mode mode;	/* current arg mode */
2100     tree type;			/* type of the argument or 0 if lib support */
2101     int named;			/* != 0 for normal args, == 0 for ... args */
2102{
2103  rtx ret   = NULL_RTX;
2104  int bytes =
2105    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2106  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2107
2108  /* Handle an hidden AL argument containing number of registers for varargs
2109     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2110     any AL settings.  */
2111  if (mode == VOIDmode)
2112    {
2113      if (TARGET_64BIT)
2114	return GEN_INT (cum->maybe_vaarg
2115			? (cum->sse_nregs < 0
2116			   ? SSE_REGPARM_MAX
2117			   : cum->sse_regno)
2118			: -1);
2119      else
2120	return constm1_rtx;
2121    }
2122  if (TARGET_64BIT)
2123    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2124			       &x86_64_int_parameter_registers [cum->regno],
2125			       cum->sse_regno);
2126  else
2127    switch (mode)
2128      {
2129	/* For now, pass fp/complex values on the stack.  */
2130      default:
2131	break;
2132
2133      case BLKmode:
2134      case DImode:
2135      case SImode:
2136      case HImode:
2137      case QImode:
2138	if (words <= cum->nregs)
2139	  ret = gen_rtx_REG (mode, cum->regno);
2140	break;
2141      case TImode:
2142	if (cum->sse_nregs)
2143	  ret = gen_rtx_REG (mode, cum->sse_regno);
2144	break;
2145      }
2146
2147  if (TARGET_DEBUG_ARG)
2148    {
2149      fprintf (stderr,
2150	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2151	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2152
2153      if (ret)
2154	fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO (ret) ]);
2155      else
2156	fprintf (stderr, ", stack");
2157
2158      fprintf (stderr, " )\n");
2159    }
2160
2161  return ret;
2162}
2163
2164/* Gives the alignment boundary, in bits, of an argument with the specified mode
2165   and type.   */
2166
2167int
2168ix86_function_arg_boundary (mode, type)
2169     enum machine_mode mode;
2170     tree type;
2171{
2172  int align;
2173  if (!TARGET_64BIT)
2174    return PARM_BOUNDARY;
2175  if (type)
2176    align = TYPE_ALIGN (type);
2177  else
2178    align = GET_MODE_ALIGNMENT (mode);
2179  if (align < PARM_BOUNDARY)
2180    align = PARM_BOUNDARY;
2181  if (align > 128)
2182    align = 128;
2183  return align;
2184}
2185
2186/* Return true if N is a possible register number of function value.  */
2187bool
2188ix86_function_value_regno_p (regno)
2189     int regno;
2190{
2191  if (!TARGET_64BIT)
2192    {
2193      return ((regno) == 0
2194	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2195	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2196    }
2197  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2198	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2199	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2200}
2201
2202/* Define how to find the value returned by a function.
2203   VALTYPE is the data type of the value (as a tree).
2204   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2205   otherwise, FUNC is 0.  */
2206rtx
2207ix86_function_value (valtype)
2208     tree valtype;
2209{
2210  if (TARGET_64BIT)
2211    {
2212      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2213				     REGPARM_MAX, SSE_REGPARM_MAX,
2214				     x86_64_int_return_registers, 0);
2215      /* For zero sized structures, construct_continer return NULL, but we need
2216         to keep rest of compiler happy by returning meaningfull value.  */
2217      if (!ret)
2218	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2219      return ret;
2220    }
2221  else
2222    return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2223}
2224
2225/* Return false iff type is returned in memory.  */
2226int
2227ix86_return_in_memory (type)
2228     tree type;
2229{
2230  int needed_intregs, needed_sseregs;
2231  if (TARGET_64BIT)
2232    {
2233      return !examine_argument (TYPE_MODE (type), type, 1,
2234				&needed_intregs, &needed_sseregs);
2235    }
2236  else
2237    {
2238      if (TYPE_MODE (type) == BLKmode
2239	  || (VECTOR_MODE_P (TYPE_MODE (type))
2240	      && int_size_in_bytes (type) == 8)
2241	  || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2242	      && TYPE_MODE (type) != TFmode
2243	      && !VECTOR_MODE_P (TYPE_MODE (type))))
2244	return 1;
2245      return 0;
2246    }
2247}
2248
2249/* Define how to find the value returned by a library function
2250   assuming the value has mode MODE.  */
2251rtx
2252ix86_libcall_value (mode)
2253   enum machine_mode mode;
2254{
2255  if (TARGET_64BIT)
2256    {
2257      switch (mode)
2258	{
2259	  case SFmode:
2260	  case SCmode:
2261	  case DFmode:
2262	  case DCmode:
2263	    return gen_rtx_REG (mode, FIRST_SSE_REG);
2264	  case TFmode:
2265	  case TCmode:
2266	    return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2267	  default:
2268	    return gen_rtx_REG (mode, 0);
2269	}
2270    }
2271  else
2272   return gen_rtx_REG (mode, VALUE_REGNO (mode));
2273}
2274
2275/* Create the va_list data type.  */
2276
2277tree
2278ix86_build_va_list ()
2279{
2280  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2281
2282  /* For i386 we use plain pointer to argument area.  */
2283  if (!TARGET_64BIT)
2284    return build_pointer_type (char_type_node);
2285
2286  record = make_lang_type (RECORD_TYPE);
2287  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2288
2289  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2290		      unsigned_type_node);
2291  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2292		      unsigned_type_node);
2293  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2294		      ptr_type_node);
2295  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2296		      ptr_type_node);
2297
2298  DECL_FIELD_CONTEXT (f_gpr) = record;
2299  DECL_FIELD_CONTEXT (f_fpr) = record;
2300  DECL_FIELD_CONTEXT (f_ovf) = record;
2301  DECL_FIELD_CONTEXT (f_sav) = record;
2302
2303  TREE_CHAIN (record) = type_decl;
2304  TYPE_NAME (record) = type_decl;
2305  TYPE_FIELDS (record) = f_gpr;
2306  TREE_CHAIN (f_gpr) = f_fpr;
2307  TREE_CHAIN (f_fpr) = f_ovf;
2308  TREE_CHAIN (f_ovf) = f_sav;
2309
2310  layout_type (record);
2311
2312  /* The correct type is an array type of one element.  */
2313  return build_array_type (record, build_index_type (size_zero_node));
2314}
2315
2316/* Perform any needed actions needed for a function that is receiving a
2317   variable number of arguments.
2318
2319   CUM is as above.
2320
2321   MODE and TYPE are the mode and type of the current parameter.
2322
2323   PRETEND_SIZE is a variable that should be set to the amount of stack
2324   that must be pushed by the prolog to pretend that our caller pushed
2325   it.
2326
2327   Normally, this macro will push all remaining incoming registers on the
2328   stack and set PRETEND_SIZE to the length of the registers pushed.  */
2329
2330void
2331ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2332     CUMULATIVE_ARGS *cum;
2333     enum machine_mode mode;
2334     tree type;
2335     int *pretend_size ATTRIBUTE_UNUSED;
2336     int no_rtl;
2337
2338{
2339  CUMULATIVE_ARGS next_cum;
2340  rtx save_area = NULL_RTX, mem;
2341  rtx label;
2342  rtx label_ref;
2343  rtx tmp_reg;
2344  rtx nsse_reg;
2345  int set;
2346  tree fntype;
2347  int stdarg_p;
2348  int i;
2349
2350  if (!TARGET_64BIT)
2351    return;
2352
2353  /* Indicate to allocate space on the stack for varargs save area.  */
2354  ix86_save_varrargs_registers = 1;
2355
2356  fntype = TREE_TYPE (current_function_decl);
2357  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2358	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2359		  != void_type_node));
2360
2361  /* For varargs, we do not want to skip the dummy va_dcl argument.
2362     For stdargs, we do want to skip the last named argument.  */
2363  next_cum = *cum;
2364  if (stdarg_p)
2365    function_arg_advance (&next_cum, mode, type, 1);
2366
2367  if (!no_rtl)
2368    save_area = frame_pointer_rtx;
2369
2370  set = get_varargs_alias_set ();
2371
2372  for (i = next_cum.regno; i < ix86_regparm; i++)
2373    {
2374      mem = gen_rtx_MEM (Pmode,
2375			 plus_constant (save_area, i * UNITS_PER_WORD));
2376      set_mem_alias_set (mem, set);
2377      emit_move_insn (mem, gen_rtx_REG (Pmode,
2378					x86_64_int_parameter_registers[i]));
2379    }
2380
2381  if (next_cum.sse_nregs)
2382    {
2383      /* Now emit code to save SSE registers.  The AX parameter contains number
2384	 of SSE parameter regsiters used to call this function.  We use
2385	 sse_prologue_save insn template that produces computed jump across
2386	 SSE saves.  We need some preparation work to get this working.  */
2387
2388      label = gen_label_rtx ();
2389      label_ref = gen_rtx_LABEL_REF (Pmode, label);
2390
2391      /* Compute address to jump to :
2392         label - 5*eax + nnamed_sse_arguments*5  */
2393      tmp_reg = gen_reg_rtx (Pmode);
2394      nsse_reg = gen_reg_rtx (Pmode);
2395      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2396      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2397			      gen_rtx_MULT (Pmode, nsse_reg,
2398					    GEN_INT (4))));
2399      if (next_cum.sse_regno)
2400	emit_move_insn
2401	  (nsse_reg,
2402	   gen_rtx_CONST (DImode,
2403			  gen_rtx_PLUS (DImode,
2404					label_ref,
2405					GEN_INT (next_cum.sse_regno * 4))));
2406      else
2407	emit_move_insn (nsse_reg, label_ref);
2408      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2409
2410      /* Compute address of memory block we save into.  We always use pointer
2411	 pointing 127 bytes after first byte to store - this is needed to keep
2412	 instruction size limited by 4 bytes.  */
2413      tmp_reg = gen_reg_rtx (Pmode);
2414      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415			      plus_constant (save_area,
2416					     8 * REGPARM_MAX + 127)));
2417      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2418      set_mem_alias_set (mem, set);
2419      set_mem_align (mem, BITS_PER_WORD);
2420
2421      /* And finally do the dirty job!  */
2422      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2423					GEN_INT (next_cum.sse_regno), label));
2424    }
2425
2426}
2427
2428/* Implement va_start.  */
2429
2430void
2431ix86_va_start (stdarg_p, valist, nextarg)
2432     int stdarg_p;
2433     tree valist;
2434     rtx nextarg;
2435{
2436  HOST_WIDE_INT words, n_gpr, n_fpr;
2437  tree f_gpr, f_fpr, f_ovf, f_sav;
2438  tree gpr, fpr, ovf, sav, t;
2439
2440  /* Only 64bit target needs something special.  */
2441  if (!TARGET_64BIT)
2442    {
2443      std_expand_builtin_va_start (stdarg_p, valist, nextarg);
2444      return;
2445    }
2446
2447  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2448  f_fpr = TREE_CHAIN (f_gpr);
2449  f_ovf = TREE_CHAIN (f_fpr);
2450  f_sav = TREE_CHAIN (f_ovf);
2451
2452  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2453  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2454  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2455  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2456  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2457
2458  /* Count number of gp and fp argument registers used.  */
2459  words = current_function_args_info.words;
2460  n_gpr = current_function_args_info.regno;
2461  n_fpr = current_function_args_info.sse_regno;
2462
2463  if (TARGET_DEBUG_ARG)
2464    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2465	     (int) words, (int) n_gpr, (int) n_fpr);
2466
2467  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2468	     build_int_2 (n_gpr * 8, 0));
2469  TREE_SIDE_EFFECTS (t) = 1;
2470  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2471
2472  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2473	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2474  TREE_SIDE_EFFECTS (t) = 1;
2475  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2476
2477  /* Find the overflow area.  */
2478  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2479  if (words != 0)
2480    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2481	       build_int_2 (words * UNITS_PER_WORD, 0));
2482  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2483  TREE_SIDE_EFFECTS (t) = 1;
2484  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2485
2486  /* Find the register save area.
2487     Prologue of the function save it right above stack frame.  */
2488  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2489  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2490  TREE_SIDE_EFFECTS (t) = 1;
2491  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2492}
2493
2494/* Implement va_arg.  */
2495rtx
2496ix86_va_arg (valist, type)
2497     tree valist, type;
2498{
2499  static int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2500  tree f_gpr, f_fpr, f_ovf, f_sav;
2501  tree gpr, fpr, ovf, sav, t;
2502  int size, rsize;
2503  rtx lab_false, lab_over = NULL_RTX;
2504  rtx addr_rtx, r;
2505  rtx container;
2506
2507  /* Only 64bit target needs something special.  */
2508  if (!TARGET_64BIT)
2509    {
2510      return std_expand_builtin_va_arg (valist, type);
2511    }
2512
2513  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2514  f_fpr = TREE_CHAIN (f_gpr);
2515  f_ovf = TREE_CHAIN (f_fpr);
2516  f_sav = TREE_CHAIN (f_ovf);
2517
2518  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2519  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2520  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2521  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2522  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2523
2524  size = int_size_in_bytes (type);
2525  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2526
2527  container = construct_container (TYPE_MODE (type), type, 0,
2528				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2529  /*
2530   * Pull the value out of the saved registers ...
2531   */
2532
2533  addr_rtx = gen_reg_rtx (Pmode);
2534
2535  if (container)
2536    {
2537      rtx int_addr_rtx, sse_addr_rtx;
2538      int needed_intregs, needed_sseregs;
2539      int need_temp;
2540
2541      lab_over = gen_label_rtx ();
2542      lab_false = gen_label_rtx ();
2543
2544      examine_argument (TYPE_MODE (type), type, 0,
2545		        &needed_intregs, &needed_sseregs);
2546
2547
2548      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2549		   || TYPE_ALIGN (type) > 128);
2550
2551      /* In case we are passing structure, verify that it is consetuctive block
2552         on the register save area.  If not we need to do moves.  */
2553      if (!need_temp && !REG_P (container))
2554	{
2555	  /* Verify that all registers are strictly consetuctive  */
2556	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2557	    {
2558	      int i;
2559
2560	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2561		{
2562		  rtx slot = XVECEXP (container, 0, i);
2563		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2564		      || INTVAL (XEXP (slot, 1)) != i * 16)
2565		    need_temp = 1;
2566		}
2567	    }
2568	  else
2569	    {
2570	      int i;
2571
2572	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2573		{
2574		  rtx slot = XVECEXP (container, 0, i);
2575		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2576		      || INTVAL (XEXP (slot, 1)) != i * 8)
2577		    need_temp = 1;
2578		}
2579	    }
2580	}
2581      if (!need_temp)
2582	{
2583	  int_addr_rtx = addr_rtx;
2584	  sse_addr_rtx = addr_rtx;
2585	}
2586      else
2587	{
2588	  int_addr_rtx = gen_reg_rtx (Pmode);
2589	  sse_addr_rtx = gen_reg_rtx (Pmode);
2590	}
2591      /* First ensure that we fit completely in registers.  */
2592      if (needed_intregs)
2593	{
2594	  emit_cmp_and_jump_insns (expand_expr
2595				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2596				   GEN_INT ((REGPARM_MAX - needed_intregs +
2597					     1) * 8), GE, const1_rtx, SImode,
2598				   1, lab_false);
2599	}
2600      if (needed_sseregs)
2601	{
2602	  emit_cmp_and_jump_insns (expand_expr
2603				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2604				   GEN_INT ((SSE_REGPARM_MAX -
2605					     needed_sseregs + 1) * 16 +
2606					    REGPARM_MAX * 8), GE, const1_rtx,
2607				   SImode, 1, lab_false);
2608	}
2609
2610      /* Compute index to start of area used for integer regs.  */
2611      if (needed_intregs)
2612	{
2613	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2614	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2615	  if (r != int_addr_rtx)
2616	    emit_move_insn (int_addr_rtx, r);
2617	}
2618      if (needed_sseregs)
2619	{
2620	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2621	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2622	  if (r != sse_addr_rtx)
2623	    emit_move_insn (sse_addr_rtx, r);
2624	}
2625      if (need_temp)
2626	{
2627	  int i;
2628	  rtx mem;
2629
2630	  /* Never use the memory itself, as it has the alias set.  */
2631	  addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2632	  mem = gen_rtx_MEM (BLKmode, addr_rtx);
2633	  set_mem_alias_set (mem, get_varargs_alias_set ());
2634	  set_mem_align (mem, BITS_PER_UNIT);
2635
2636	  for (i = 0; i < XVECLEN (container, 0); i++)
2637	    {
2638	      rtx slot = XVECEXP (container, 0, i);
2639	      rtx reg = XEXP (slot, 0);
2640	      enum machine_mode mode = GET_MODE (reg);
2641	      rtx src_addr;
2642	      rtx src_mem;
2643	      int src_offset;
2644	      rtx dest_mem;
2645
2646	      if (SSE_REGNO_P (REGNO (reg)))
2647		{
2648		  src_addr = sse_addr_rtx;
2649		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2650		}
2651	      else
2652		{
2653		  src_addr = int_addr_rtx;
2654		  src_offset = REGNO (reg) * 8;
2655		}
2656	      src_mem = gen_rtx_MEM (mode, src_addr);
2657	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
2658	      src_mem = adjust_address (src_mem, mode, src_offset);
2659	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2660	      emit_move_insn (dest_mem, src_mem);
2661	    }
2662	}
2663
2664      if (needed_intregs)
2665	{
2666	  t =
2667	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2668		   build_int_2 (needed_intregs * 8, 0));
2669	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2670	  TREE_SIDE_EFFECTS (t) = 1;
2671	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2672	}
2673      if (needed_sseregs)
2674	{
2675	  t =
2676	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2677		   build_int_2 (needed_sseregs * 16, 0));
2678	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2679	  TREE_SIDE_EFFECTS (t) = 1;
2680	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2681	}
2682
2683      emit_jump_insn (gen_jump (lab_over));
2684      emit_barrier ();
2685      emit_label (lab_false);
2686    }
2687
2688  /* ... otherwise out of the overflow area.  */
2689
2690  /* Care for on-stack alignment if needed.  */
2691  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2692    t = ovf;
2693  else
2694    {
2695      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2696      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2697      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2698    }
2699  t = save_expr (t);
2700
2701  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2702  if (r != addr_rtx)
2703    emit_move_insn (addr_rtx, r);
2704
2705  t =
2706    build (PLUS_EXPR, TREE_TYPE (t), t,
2707	   build_int_2 (rsize * UNITS_PER_WORD, 0));
2708  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2709  TREE_SIDE_EFFECTS (t) = 1;
2710  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711
2712  if (container)
2713    emit_label (lab_over);
2714
2715  return addr_rtx;
2716}
2717
2718/* Return nonzero if OP is general operand representable on x86_64.  */
2719
2720int
2721x86_64_general_operand (op, mode)
2722     rtx op;
2723     enum machine_mode mode;
2724{
2725  if (!TARGET_64BIT)
2726    return general_operand (op, mode);
2727  if (nonimmediate_operand (op, mode))
2728    return 1;
2729  return x86_64_sign_extended_value (op);
2730}
2731
2732/* Return nonzero if OP is general operand representable on x86_64
2733   as either sign extended or zero extended constant.  */
2734
2735int
2736x86_64_szext_general_operand (op, mode)
2737     rtx op;
2738     enum machine_mode mode;
2739{
2740  if (!TARGET_64BIT)
2741    return general_operand (op, mode);
2742  if (nonimmediate_operand (op, mode))
2743    return 1;
2744  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2745}
2746
2747/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2748
2749int
2750x86_64_nonmemory_operand (op, mode)
2751     rtx op;
2752     enum machine_mode mode;
2753{
2754  if (!TARGET_64BIT)
2755    return nonmemory_operand (op, mode);
2756  if (register_operand (op, mode))
2757    return 1;
2758  return x86_64_sign_extended_value (op);
2759}
2760
2761/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
2762
2763int
2764x86_64_movabs_operand (op, mode)
2765     rtx op;
2766     enum machine_mode mode;
2767{
2768  if (!TARGET_64BIT || !flag_pic)
2769    return nonmemory_operand (op, mode);
2770  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2771    return 1;
2772  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2773    return 1;
2774  return 0;
2775}
2776
2777/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
2778
2779int
2780x86_64_szext_nonmemory_operand (op, mode)
2781     rtx op;
2782     enum machine_mode mode;
2783{
2784  if (!TARGET_64BIT)
2785    return nonmemory_operand (op, mode);
2786  if (register_operand (op, mode))
2787    return 1;
2788  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2789}
2790
2791/* Return nonzero if OP is immediate operand representable on x86_64.  */
2792
2793int
2794x86_64_immediate_operand (op, mode)
2795     rtx op;
2796     enum machine_mode mode;
2797{
2798  if (!TARGET_64BIT)
2799    return immediate_operand (op, mode);
2800  return x86_64_sign_extended_value (op);
2801}
2802
2803/* Return nonzero if OP is immediate operand representable on x86_64.  */
2804
2805int
2806x86_64_zext_immediate_operand (op, mode)
2807     rtx op;
2808     enum machine_mode mode ATTRIBUTE_UNUSED;
2809{
2810  return x86_64_zero_extended_value (op);
2811}
2812
2813/* Return nonzero if OP is (const_int 1), else return zero.  */
2814
2815int
2816const_int_1_operand (op, mode)
2817     rtx op;
2818     enum machine_mode mode ATTRIBUTE_UNUSED;
2819{
2820  return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2821}
2822
2823/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2824   reference and a constant.  */
2825
2826int
2827symbolic_operand (op, mode)
2828     register rtx op;
2829     enum machine_mode mode ATTRIBUTE_UNUSED;
2830{
2831  switch (GET_CODE (op))
2832    {
2833    case SYMBOL_REF:
2834    case LABEL_REF:
2835      return 1;
2836
2837    case CONST:
2838      op = XEXP (op, 0);
2839      if (GET_CODE (op) == SYMBOL_REF
2840	  || GET_CODE (op) == LABEL_REF
2841	  || (GET_CODE (op) == UNSPEC
2842	      && (XINT (op, 1) == 6
2843		  || XINT (op, 1) == 7
2844		  || XINT (op, 1) == 15)))
2845	return 1;
2846      if (GET_CODE (op) != PLUS
2847	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
2848	return 0;
2849
2850      op = XEXP (op, 0);
2851      if (GET_CODE (op) == SYMBOL_REF
2852	  || GET_CODE (op) == LABEL_REF)
2853	return 1;
2854      /* Only @GOTOFF gets offsets.  */
2855      if (GET_CODE (op) != UNSPEC
2856	  || XINT (op, 1) != 7)
2857	return 0;
2858
2859      op = XVECEXP (op, 0, 0);
2860      if (GET_CODE (op) == SYMBOL_REF
2861	  || GET_CODE (op) == LABEL_REF)
2862	return 1;
2863      return 0;
2864
2865    default:
2866      return 0;
2867    }
2868}
2869
2870/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
2871
2872int
2873pic_symbolic_operand (op, mode)
2874     register rtx op;
2875     enum machine_mode mode ATTRIBUTE_UNUSED;
2876{
2877  if (GET_CODE (op) != CONST)
2878    return 0;
2879  op = XEXP (op, 0);
2880  if (TARGET_64BIT)
2881    {
2882      if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2883	return 1;
2884    }
2885  else
2886    {
2887      if (GET_CODE (op) == UNSPEC)
2888	return 1;
2889      if (GET_CODE (op) != PLUS
2890	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
2891	return 0;
2892      op = XEXP (op, 0);
2893      if (GET_CODE (op) == UNSPEC)
2894	return 1;
2895    }
2896  return 0;
2897}
2898
2899/* Return true if OP is a symbolic operand that resolves locally.  */
2900
2901static int
2902local_symbolic_operand (op, mode)
2903     rtx op;
2904     enum machine_mode mode ATTRIBUTE_UNUSED;
2905{
2906  if (GET_CODE (op) == LABEL_REF)
2907    return 1;
2908
2909  if (GET_CODE (op) == CONST
2910      && GET_CODE (XEXP (op, 0)) == PLUS
2911      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2912    op = XEXP (XEXP (op, 0), 0);
2913
2914  if (GET_CODE (op) != SYMBOL_REF)
2915    return 0;
2916
2917  /* These we've been told are local by varasm and encode_section_info
2918     respectively.  */
2919  if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2920    return 1;
2921
2922  /* There is, however, a not insubstantial body of code in the rest of
2923     the compiler that assumes it can just stick the results of
2924     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
2925  /* ??? This is a hack.  Should update the body of the compiler to
2926     always create a DECL an invoke ENCODE_SECTION_INFO.  */
2927  if (strncmp (XSTR (op, 0), internal_label_prefix,
2928	       internal_label_prefix_len) == 0)
2929    return 1;
2930
2931  return 0;
2932}
2933
2934/* Test for a valid operand for a call instruction.  Don't allow the
2935   arg pointer register or virtual regs since they may decay into
2936   reg + const, which the patterns can't handle.  */
2937
2938int
2939call_insn_operand (op, mode)
2940     rtx op;
2941     enum machine_mode mode ATTRIBUTE_UNUSED;
2942{
2943  /* Disallow indirect through a virtual register.  This leads to
2944     compiler aborts when trying to eliminate them.  */
2945  if (GET_CODE (op) == REG
2946      && (op == arg_pointer_rtx
2947	  || op == frame_pointer_rtx
2948	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
2949	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
2950    return 0;
2951
2952  /* Disallow `call 1234'.  Due to varying assembler lameness this
2953     gets either rejected or translated to `call .+1234'.  */
2954  if (GET_CODE (op) == CONST_INT)
2955    return 0;
2956
2957  /* Explicitly allow SYMBOL_REF even if pic.  */
2958  if (GET_CODE (op) == SYMBOL_REF)
2959    return 1;
2960
2961  /* Half-pic doesn't allow anything but registers and constants.
2962     We've just taken care of the later.  */
2963  if (HALF_PIC_P ())
2964    return register_operand (op, Pmode);
2965
2966  /* Otherwise we can allow any general_operand in the address.  */
2967  return general_operand (op, Pmode);
2968}
2969
2970int
2971constant_call_address_operand (op, mode)
2972     rtx op;
2973     enum machine_mode mode ATTRIBUTE_UNUSED;
2974{
2975  if (GET_CODE (op) == CONST
2976      && GET_CODE (XEXP (op, 0)) == PLUS
2977      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2978    op = XEXP (XEXP (op, 0), 0);
2979  return GET_CODE (op) == SYMBOL_REF;
2980}
2981
2982/* Match exactly zero and one.  */
2983
2984int
2985const0_operand (op, mode)
2986     register rtx op;
2987     enum machine_mode mode;
2988{
2989  return op == CONST0_RTX (mode);
2990}
2991
2992int
2993const1_operand (op, mode)
2994     register rtx op;
2995     enum machine_mode mode ATTRIBUTE_UNUSED;
2996{
2997  return op == const1_rtx;
2998}
2999
3000/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3001
3002int
3003const248_operand (op, mode)
3004     register rtx op;
3005     enum machine_mode mode ATTRIBUTE_UNUSED;
3006{
3007  return (GET_CODE (op) == CONST_INT
3008	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3009}
3010
3011/* True if this is a constant appropriate for an increment or decremenmt.  */
3012
3013int
3014incdec_operand (op, mode)
3015     register rtx op;
3016     enum machine_mode mode ATTRIBUTE_UNUSED;
3017{
3018  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3019     registers, since carry flag is not set.  */
3020  if (TARGET_PENTIUM4 && !optimize_size)
3021    return 0;
3022  return op == const1_rtx || op == constm1_rtx;
3023}
3024
3025/* Return nonzero if OP is acceptable as operand of DImode shift
3026   expander.  */
3027
3028int
3029shiftdi_operand (op, mode)
3030     rtx op;
3031     enum machine_mode mode ATTRIBUTE_UNUSED;
3032{
3033  if (TARGET_64BIT)
3034    return nonimmediate_operand (op, mode);
3035  else
3036    return register_operand (op, mode);
3037}
3038
3039/* Return false if this is the stack pointer, or any other fake
3040   register eliminable to the stack pointer.  Otherwise, this is
3041   a register operand.
3042
3043   This is used to prevent esp from being used as an index reg.
3044   Which would only happen in pathological cases.  */
3045
3046int
3047reg_no_sp_operand (op, mode)
3048     register rtx op;
3049     enum machine_mode mode;
3050{
3051  rtx t = op;
3052  if (GET_CODE (t) == SUBREG)
3053    t = SUBREG_REG (t);
3054  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3055    return 0;
3056
3057  return register_operand (op, mode);
3058}
3059
3060int
3061mmx_reg_operand (op, mode)
3062     register rtx op;
3063     enum machine_mode mode ATTRIBUTE_UNUSED;
3064{
3065  return MMX_REG_P (op);
3066}
3067
3068/* Return false if this is any eliminable register.  Otherwise
3069   general_operand.  */
3070
3071int
3072general_no_elim_operand (op, mode)
3073     register rtx op;
3074     enum machine_mode mode;
3075{
3076  rtx t = op;
3077  if (GET_CODE (t) == SUBREG)
3078    t = SUBREG_REG (t);
3079  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3080      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3081      || t == virtual_stack_dynamic_rtx)
3082    return 0;
3083  if (REG_P (t)
3084      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3085      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3086    return 0;
3087
3088  return general_operand (op, mode);
3089}
3090
3091/* Return false if this is any eliminable register.  Otherwise
3092   register_operand or const_int.  */
3093
3094int
3095nonmemory_no_elim_operand (op, mode)
3096     register rtx op;
3097     enum machine_mode mode;
3098{
3099  rtx t = op;
3100  if (GET_CODE (t) == SUBREG)
3101    t = SUBREG_REG (t);
3102  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3103      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3104      || t == virtual_stack_dynamic_rtx)
3105    return 0;
3106
3107  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3108}
3109
3110/* Return true if op is a Q_REGS class register.  */
3111
3112int
3113q_regs_operand (op, mode)
3114     register rtx op;
3115     enum machine_mode mode;
3116{
3117  if (mode != VOIDmode && GET_MODE (op) != mode)
3118    return 0;
3119  if (GET_CODE (op) == SUBREG)
3120    op = SUBREG_REG (op);
3121  return QI_REG_P (op);
3122}
3123
3124/* Return true if op is a NON_Q_REGS class register.  */
3125
3126int
3127non_q_regs_operand (op, mode)
3128     register rtx op;
3129     enum machine_mode mode;
3130{
3131  if (mode != VOIDmode && GET_MODE (op) != mode)
3132    return 0;
3133  if (GET_CODE (op) == SUBREG)
3134    op = SUBREG_REG (op);
3135  return NON_QI_REG_P (op);
3136}
3137
3138/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3139   insns.  */
3140int
3141sse_comparison_operator (op, mode)
3142     rtx op;
3143     enum machine_mode mode ATTRIBUTE_UNUSED;
3144{
3145  enum rtx_code code = GET_CODE (op);
3146  switch (code)
3147    {
3148    /* Operations supported directly.  */
3149    case EQ:
3150    case LT:
3151    case LE:
3152    case UNORDERED:
3153    case NE:
3154    case UNGE:
3155    case UNGT:
3156    case ORDERED:
3157      return 1;
3158    /* These are equivalent to ones above in non-IEEE comparisons.  */
3159    case UNEQ:
3160    case UNLT:
3161    case UNLE:
3162    case LTGT:
3163    case GE:
3164    case GT:
3165      return !TARGET_IEEE_FP;
3166    default:
3167      return 0;
3168    }
3169}
3170/* Return 1 if OP is a valid comparison operator in valid mode.  */
3171int
3172ix86_comparison_operator (op, mode)
3173     register rtx op;
3174     enum machine_mode mode;
3175{
3176  enum machine_mode inmode;
3177  enum rtx_code code = GET_CODE (op);
3178  if (mode != VOIDmode && GET_MODE (op) != mode)
3179    return 0;
3180  if (GET_RTX_CLASS (code) != '<')
3181    return 0;
3182  inmode = GET_MODE (XEXP (op, 0));
3183
3184  if (inmode == CCFPmode || inmode == CCFPUmode)
3185    {
3186      enum rtx_code second_code, bypass_code;
3187      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3188      return (bypass_code == NIL && second_code == NIL);
3189    }
3190  switch (code)
3191    {
3192    case EQ: case NE:
3193      return 1;
3194    case LT: case GE:
3195      if (inmode == CCmode || inmode == CCGCmode
3196	  || inmode == CCGOCmode || inmode == CCNOmode)
3197	return 1;
3198      return 0;
3199    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3200      if (inmode == CCmode)
3201	return 1;
3202      return 0;
3203    case GT: case LE:
3204      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3205	return 1;
3206      return 0;
3207    default:
3208      return 0;
3209    }
3210}
3211
3212/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
3213
3214int
3215fcmov_comparison_operator (op, mode)
3216    register rtx op;
3217    enum machine_mode mode;
3218{
3219  enum machine_mode inmode;
3220  enum rtx_code code = GET_CODE (op);
3221  if (mode != VOIDmode && GET_MODE (op) != mode)
3222    return 0;
3223  if (GET_RTX_CLASS (code) != '<')
3224    return 0;
3225  inmode = GET_MODE (XEXP (op, 0));
3226  if (inmode == CCFPmode || inmode == CCFPUmode)
3227    {
3228      enum rtx_code second_code, bypass_code;
3229      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3230      if (bypass_code != NIL || second_code != NIL)
3231	return 0;
3232      code = ix86_fp_compare_code_to_integer (code);
3233    }
3234  /* i387 supports just limited amount of conditional codes.  */
3235  switch (code)
3236    {
3237    case LTU: case GTU: case LEU: case GEU:
3238      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3239	return 1;
3240      return 0;
3241    case ORDERED: case UNORDERED:
3242    case EQ: case NE:
3243      return 1;
3244    default:
3245      return 0;
3246    }
3247}
3248
3249/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
3250
3251int
3252promotable_binary_operator (op, mode)
3253     register rtx op;
3254     enum machine_mode mode ATTRIBUTE_UNUSED;
3255{
3256  switch (GET_CODE (op))
3257    {
3258    case MULT:
3259      /* Modern CPUs have same latency for HImode and SImode multiply,
3260         but 386 and 486 do HImode multiply faster.  */
3261      return ix86_cpu > PROCESSOR_I486;
3262    case PLUS:
3263    case AND:
3264    case IOR:
3265    case XOR:
3266    case ASHIFT:
3267      return 1;
3268    default:
3269      return 0;
3270    }
3271}
3272
3273/* Nearly general operand, but accept any const_double, since we wish
3274   to be able to drop them into memory rather than have them get pulled
3275   into registers.  */
3276
3277int
3278cmp_fp_expander_operand (op, mode)
3279     register rtx op;
3280     enum machine_mode mode;
3281{
3282  if (mode != VOIDmode && mode != GET_MODE (op))
3283    return 0;
3284  if (GET_CODE (op) == CONST_DOUBLE)
3285    return 1;
3286  return general_operand (op, mode);
3287}
3288
3289/* Match an SI or HImode register for a zero_extract.  */
3290
3291int
3292ext_register_operand (op, mode)
3293     register rtx op;
3294     enum machine_mode mode ATTRIBUTE_UNUSED;
3295{
3296  int regno;
3297  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3298      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3299    return 0;
3300
3301  if (!register_operand (op, VOIDmode))
3302    return 0;
3303
3304  /* Be curefull to accept only registers having upper parts.  */
3305  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3306  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3307}
3308
3309/* Return 1 if this is a valid binary floating-point operation.
3310   OP is the expression matched, and MODE is its mode.  */
3311
3312int
3313binary_fp_operator (op, mode)
3314    register rtx op;
3315    enum machine_mode mode;
3316{
3317  if (mode != VOIDmode && mode != GET_MODE (op))
3318    return 0;
3319
3320  switch (GET_CODE (op))
3321    {
3322    case PLUS:
3323    case MINUS:
3324    case MULT:
3325    case DIV:
3326      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3327
3328    default:
3329      return 0;
3330    }
3331}
3332
3333int
3334mult_operator (op, mode)
3335    register rtx op;
3336    enum machine_mode mode ATTRIBUTE_UNUSED;
3337{
3338  return GET_CODE (op) == MULT;
3339}
3340
3341int
3342div_operator (op, mode)
3343    register rtx op;
3344    enum machine_mode mode ATTRIBUTE_UNUSED;
3345{
3346  return GET_CODE (op) == DIV;
3347}
3348
3349int
3350arith_or_logical_operator (op, mode)
3351      rtx op;
3352      enum machine_mode mode;
3353{
3354  return ((mode == VOIDmode || GET_MODE (op) == mode)
3355          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3356              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3357}
3358
3359/* Returns 1 if OP is memory operand with a displacement.  */
3360
3361int
3362memory_displacement_operand (op, mode)
3363     register rtx op;
3364     enum machine_mode mode;
3365{
3366  struct ix86_address parts;
3367
3368  if (! memory_operand (op, mode))
3369    return 0;
3370
3371  if (! ix86_decompose_address (XEXP (op, 0), &parts))
3372    abort ();
3373
3374  return parts.disp != NULL_RTX;
3375}
3376
3377/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3378   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3379
3380   ??? It seems likely that this will only work because cmpsi is an
3381   expander, and no actual insns use this.  */
3382
3383int
3384cmpsi_operand (op, mode)
3385      rtx op;
3386      enum machine_mode mode;
3387{
3388  if (nonimmediate_operand (op, mode))
3389    return 1;
3390
3391  if (GET_CODE (op) == AND
3392      && GET_MODE (op) == SImode
3393      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3394      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3395      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3396      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3397      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3398      && GET_CODE (XEXP (op, 1)) == CONST_INT)
3399    return 1;
3400
3401  return 0;
3402}
3403
3404/* Returns 1 if OP is memory operand that can not be represented by the
3405   modRM array.  */
3406
3407int
3408long_memory_operand (op, mode)
3409     register rtx op;
3410     enum machine_mode mode;
3411{
3412  if (! memory_operand (op, mode))
3413    return 0;
3414
3415  return memory_address_length (op) != 0;
3416}
3417
3418/* Return nonzero if the rtx is known aligned.  */
3419
3420int
3421aligned_operand (op, mode)
3422     rtx op;
3423     enum machine_mode mode;
3424{
3425  struct ix86_address parts;
3426
3427  if (!general_operand (op, mode))
3428    return 0;
3429
3430  /* Registers and immediate operands are always "aligned".  */
3431  if (GET_CODE (op) != MEM)
3432    return 1;
3433
3434  /* Don't even try to do any aligned optimizations with volatiles.  */
3435  if (MEM_VOLATILE_P (op))
3436    return 0;
3437
3438  op = XEXP (op, 0);
3439
3440  /* Pushes and pops are only valid on the stack pointer.  */
3441  if (GET_CODE (op) == PRE_DEC
3442      || GET_CODE (op) == POST_INC)
3443    return 1;
3444
3445  /* Decode the address.  */
3446  if (! ix86_decompose_address (op, &parts))
3447    abort ();
3448
3449  /* Look for some component that isn't known to be aligned.  */
3450  if (parts.index)
3451    {
3452      if (parts.scale < 4
3453	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3454	return 0;
3455    }
3456  if (parts.base)
3457    {
3458      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3459	return 0;
3460    }
3461  if (parts.disp)
3462    {
3463      if (GET_CODE (parts.disp) != CONST_INT
3464	  || (INTVAL (parts.disp) & 3) != 0)
3465	return 0;
3466    }
3467
3468  /* Didn't find one -- this must be an aligned address.  */
3469  return 1;
3470}
3471
3472/* Return true if the constant is something that can be loaded with
3473   a special instruction.  Only handle 0.0 and 1.0; others are less
3474   worthwhile.  */
3475
3476int
3477standard_80387_constant_p (x)
3478     rtx x;
3479{
3480  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3481    return -1;
3482  /* Note that on the 80387, other constants, such as pi, that we should support
3483     too.  On some machines, these are much slower to load as standard constant,
3484     than to load from doubles in memory.  */
3485  if (x == CONST0_RTX (GET_MODE (x)))
3486    return 1;
3487  if (x == CONST1_RTX (GET_MODE (x)))
3488    return 2;
3489  return 0;
3490}
3491
3492/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3493 */
3494int
3495standard_sse_constant_p (x)
3496     rtx x;
3497{
3498  if (GET_CODE (x) != CONST_DOUBLE)
3499    return -1;
3500  return (x == CONST0_RTX (GET_MODE (x)));
3501}
3502
3503/* Returns 1 if OP contains a symbol reference */
3504
3505int
3506symbolic_reference_mentioned_p (op)
3507     rtx op;
3508{
3509  register const char *fmt;
3510  register int i;
3511
3512  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3513    return 1;
3514
3515  fmt = GET_RTX_FORMAT (GET_CODE (op));
3516  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3517    {
3518      if (fmt[i] == 'E')
3519	{
3520	  register int j;
3521
3522	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3523	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3524	      return 1;
3525	}
3526
3527      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3528	return 1;
3529    }
3530
3531  return 0;
3532}
3533
3534/* Return 1 if it is appropriate to emit `ret' instructions in the
3535   body of a function.  Do this only if the epilogue is simple, needing a
3536   couple of insns.  Prior to reloading, we can't tell how many registers
3537   must be saved, so return 0 then.  Return 0 if there is no frame
3538   marker to de-allocate.
3539
3540   If NON_SAVING_SETJMP is defined and true, then it is not possible
3541   for the epilogue to be simple, so return 0.  This is a special case
3542   since NON_SAVING_SETJMP will not cause regs_ever_live to change
3543   until final, but jump_optimize may need to know sooner if a
3544   `return' is OK.  */
3545
3546int
3547ix86_can_use_return_insn_p ()
3548{
3549  struct ix86_frame frame;
3550
3551#ifdef NON_SAVING_SETJMP
3552  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3553    return 0;
3554#endif
3555
3556  if (! reload_completed || frame_pointer_needed)
3557    return 0;
3558
3559  /* Don't allow more than 32 pop, since that's all we can do
3560     with one instruction.  */
3561  if (current_function_pops_args
3562      && current_function_args_size >= 32768)
3563    return 0;
3564
3565  ix86_compute_frame_layout (&frame);
3566  return frame.to_allocate == 0 && frame.nregs == 0;
3567}
3568
3569/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
3570int
3571x86_64_sign_extended_value (value)
3572     rtx value;
3573{
3574  switch (GET_CODE (value))
3575    {
3576      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3577         to be at least 32 and this all acceptable constants are
3578	 represented as CONST_INT.  */
3579      case CONST_INT:
3580	if (HOST_BITS_PER_WIDE_INT == 32)
3581	  return 1;
3582	else
3583	  {
3584	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3585	    return trunc_int_for_mode (val, SImode) == val;
3586	  }
3587	break;
3588
3589      /* For certain code models, the symbolic references are known to fit.  */
3590      case SYMBOL_REF:
3591	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3592
3593      /* For certain code models, the code is near as well.  */
3594      case LABEL_REF:
3595	return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3596
3597      /* We also may accept the offsetted memory references in certain special
3598         cases.  */
3599      case CONST:
3600	if (GET_CODE (XEXP (value, 0)) == UNSPEC
3601	    && XVECLEN (XEXP (value, 0), 0) == 1
3602	    && XINT (XEXP (value, 0), 1) ==  15)
3603	  return 1;
3604	else if (GET_CODE (XEXP (value, 0)) == PLUS)
3605	  {
3606	    rtx op1 = XEXP (XEXP (value, 0), 0);
3607	    rtx op2 = XEXP (XEXP (value, 0), 1);
3608	    HOST_WIDE_INT offset;
3609
3610	    if (ix86_cmodel == CM_LARGE)
3611	      return 0;
3612	    if (GET_CODE (op2) != CONST_INT)
3613	      return 0;
3614	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
3615	    switch (GET_CODE (op1))
3616	      {
3617		case SYMBOL_REF:
3618		  /* For CM_SMALL assume that latest object is 1MB before
3619		     end of 31bits boundary.  We may also accept pretty
3620		     large negative constants knowing that all objects are
3621		     in the positive half of address space.  */
3622		  if (ix86_cmodel == CM_SMALL
3623		      && offset < 1024*1024*1024
3624		      && trunc_int_for_mode (offset, SImode) == offset)
3625		    return 1;
3626		  /* For CM_KERNEL we know that all object resist in the
3627		     negative half of 32bits address space.  We may not
3628		     accept negative offsets, since they may be just off
3629		     and we may accept pretty large positive ones.  */
3630		  if (ix86_cmodel == CM_KERNEL
3631		      && offset > 0
3632		      && trunc_int_for_mode (offset, SImode) == offset)
3633		    return 1;
3634		  break;
3635		case LABEL_REF:
3636		  /* These conditions are similar to SYMBOL_REF ones, just the
3637		     constraints for code models differ.  */
3638		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3639		      && offset < 1024*1024*1024
3640		      && trunc_int_for_mode (offset, SImode) == offset)
3641		    return 1;
3642		  if (ix86_cmodel == CM_KERNEL
3643		      && offset > 0
3644		      && trunc_int_for_mode (offset, SImode) == offset)
3645		    return 1;
3646		  break;
3647		default:
3648		  return 0;
3649	      }
3650	  }
3651	return 0;
3652      default:
3653	return 0;
3654    }
3655}
3656
3657/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
3658int
3659x86_64_zero_extended_value (value)
3660     rtx value;
3661{
3662  switch (GET_CODE (value))
3663    {
3664      case CONST_DOUBLE:
3665	if (HOST_BITS_PER_WIDE_INT == 32)
3666	  return  (GET_MODE (value) == VOIDmode
3667		   && !CONST_DOUBLE_HIGH (value));
3668	else
3669	  return 0;
3670      case CONST_INT:
3671	if (HOST_BITS_PER_WIDE_INT == 32)
3672	  return INTVAL (value) >= 0;
3673	else
3674	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3675	break;
3676
3677      /* For certain code models, the symbolic references are known to fit.  */
3678      case SYMBOL_REF:
3679	return ix86_cmodel == CM_SMALL;
3680
3681      /* For certain code models, the code is near as well.  */
3682      case LABEL_REF:
3683	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3684
3685      /* We also may accept the offsetted memory references in certain special
3686         cases.  */
3687      case CONST:
3688	if (GET_CODE (XEXP (value, 0)) == PLUS)
3689	  {
3690	    rtx op1 = XEXP (XEXP (value, 0), 0);
3691	    rtx op2 = XEXP (XEXP (value, 0), 1);
3692
3693	    if (ix86_cmodel == CM_LARGE)
3694	      return 0;
3695	    switch (GET_CODE (op1))
3696	      {
3697		case SYMBOL_REF:
3698		    return 0;
3699		  /* For small code model we may accept pretty large positive
3700		     offsets, since one bit is available for free.  Negative
3701		     offsets are limited by the size of NULL pointer area
3702		     specified by the ABI.  */
3703		  if (ix86_cmodel == CM_SMALL
3704		      && GET_CODE (op2) == CONST_INT
3705		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3706		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3707			  == INTVAL (op2)))
3708		    return 1;
3709	          /* ??? For the kernel, we may accept adjustment of
3710		     -0x10000000, since we know that it will just convert
3711		     negative address space to positive, but perhaps this
3712		     is not worthwhile.  */
3713		  break;
3714		case LABEL_REF:
3715		  /* These conditions are similar to SYMBOL_REF ones, just the
3716		     constraints for code models differ.  */
3717		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3718		      && GET_CODE (op2) == CONST_INT
3719		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3720		      && (trunc_int_for_mode (INTVAL (op2), SImode)
3721			  == INTVAL (op2)))
3722		    return 1;
3723		  break;
3724		default:
3725		  return 0;
3726	      }
3727	  }
3728	return 0;
3729      default:
3730	return 0;
3731    }
3732}
3733
3734/* Value should be nonzero if functions must have frame pointers.
3735   Zero means the frame pointer need not be set up (and parms may
3736   be accessed via the stack pointer) in functions that seem suitable.  */
3737
3738int
3739ix86_frame_pointer_required ()
3740{
3741  /* If we accessed previous frames, then the generated code expects
3742     to be able to access the saved ebp value in our frame.  */
3743  if (cfun->machine->accesses_prev_frame)
3744    return 1;
3745
3746  /* Several x86 os'es need a frame pointer for other reasons,
3747     usually pertaining to setjmp.  */
3748  if (SUBTARGET_FRAME_POINTER_REQUIRED)
3749    return 1;
3750
3751  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3752     the frame pointer by default.  Turn it back on now if we've not
3753     got a leaf function.  */
3754  if (TARGET_OMIT_LEAF_FRAME_POINTER && ! leaf_function_p ())
3755    return 1;
3756
3757  return 0;
3758}
3759
3760/* Record that the current function accesses previous call frames.  */
3761
3762void
3763ix86_setup_frame_addresses ()
3764{
3765  cfun->machine->accesses_prev_frame = 1;
3766}
3767
3768static char pic_label_name[32];
3769
3770/* This function generates code for -fpic that loads %ebx with
3771   the return address of the caller and then returns.  */
3772
3773void
3774ix86_asm_file_end (file)
3775     FILE *file;
3776{
3777  rtx xops[2];
3778
3779  if (! TARGET_DEEP_BRANCH_PREDICTION || pic_label_name[0] == 0)
3780    return;
3781
3782  /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3783     to updating relocations to a section being discarded such that this
3784     doesn't work.  Ought to detect this at configure time.  */
3785#if 0
3786  /* The trick here is to create a linkonce section containing the
3787     pic label thunk, but to refer to it with an internal label.
3788     Because the label is internal, we don't have inter-dso name
3789     binding issues on hosts that don't support ".hidden".
3790
3791     In order to use these macros, however, we must create a fake
3792     function decl.  */
3793  if (targetm.have_named_sections)
3794    {
3795      tree decl = build_decl (FUNCTION_DECL,
3796			      get_identifier ("i686.get_pc_thunk"),
3797			      error_mark_node);
3798      DECL_ONE_ONLY (decl) = 1;
3799      UNIQUE_SECTION (decl, 0);
3800      named_section (decl, NULL);
3801    }
3802  else
3803#else
3804    text_section ();
3805#endif
3806
3807  /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3808     internal (non-global) label that's being emitted, it didn't make
3809     sense to have .type information for local labels.   This caused
3810     the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3811     me debug info for a label that you're declaring non-global?) this
3812     was changed to call ASM_OUTPUT_LABEL() instead.  */
3813
3814  ASM_OUTPUT_LABEL (file, pic_label_name);
3815
3816  xops[0] = pic_offset_table_rtx;
3817  xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3818  output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3819  output_asm_insn ("ret", xops);
3820}
3821
3822void
3823load_pic_register ()
3824{
3825  rtx gotsym, pclab;
3826
3827  if (TARGET_64BIT)
3828    abort ();
3829
3830  gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3831
3832  if (TARGET_DEEP_BRANCH_PREDICTION)
3833    {
3834      if (! pic_label_name[0])
3835	ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
3836      pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
3837    }
3838  else
3839    {
3840      pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
3841    }
3842
3843  emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
3844
3845  if (! TARGET_DEEP_BRANCH_PREDICTION)
3846    emit_insn (gen_popsi1 (pic_offset_table_rtx));
3847
3848  emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
3849}
3850
3851/* Generate an "push" pattern for input ARG.  */
3852
3853static rtx
3854gen_push (arg)
3855     rtx arg;
3856{
3857  return gen_rtx_SET (VOIDmode,
3858		      gen_rtx_MEM (Pmode,
3859				   gen_rtx_PRE_DEC (Pmode,
3860						    stack_pointer_rtx)),
3861		      arg);
3862}
3863
3864/* Return 1 if we need to save REGNO.  */
3865static int
3866ix86_save_reg (regno, maybe_eh_return)
3867     int regno;
3868     int maybe_eh_return;
3869{
3870  if (flag_pic
3871      && ! TARGET_64BIT
3872      && regno == PIC_OFFSET_TABLE_REGNUM
3873      && (current_function_uses_pic_offset_table
3874	  || current_function_uses_const_pool
3875	  || current_function_calls_eh_return))
3876    return 1;
3877
3878  if (current_function_calls_eh_return && maybe_eh_return)
3879    {
3880      unsigned i;
3881      for (i = 0; ; i++)
3882	{
3883	  unsigned test = EH_RETURN_DATA_REGNO (i);
3884	  if (test == INVALID_REGNUM)
3885	    break;
3886	  if (test == (unsigned) regno)
3887	    return 1;
3888	}
3889    }
3890
3891  return (regs_ever_live[regno]
3892	  && !call_used_regs[regno]
3893	  && !fixed_regs[regno]
3894	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
3895}
3896
3897/* Return number of registers to be saved on the stack.  */
3898
3899static int
3900ix86_nsaved_regs ()
3901{
3902  int nregs = 0;
3903  int regno;
3904
3905  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
3906    if (ix86_save_reg (regno, true))
3907      nregs++;
3908  return nregs;
3909}
3910
3911/* Return the offset between two registers, one to be eliminated, and the other
3912   its replacement, at the start of a routine.  */
3913
3914HOST_WIDE_INT
3915ix86_initial_elimination_offset (from, to)
3916     int from;
3917     int to;
3918{
3919  struct ix86_frame frame;
3920  ix86_compute_frame_layout (&frame);
3921
3922  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3923    return frame.hard_frame_pointer_offset;
3924  else if (from == FRAME_POINTER_REGNUM
3925	   && to == HARD_FRAME_POINTER_REGNUM)
3926    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
3927  else
3928    {
3929      if (to != STACK_POINTER_REGNUM)
3930	abort ();
3931      else if (from == ARG_POINTER_REGNUM)
3932	return frame.stack_pointer_offset;
3933      else if (from != FRAME_POINTER_REGNUM)
3934	abort ();
3935      else
3936	return frame.stack_pointer_offset - frame.frame_pointer_offset;
3937    }
3938}
3939
3940/* Fill structure ix86_frame about frame of currently computed function.  */
3941
3942static void
3943ix86_compute_frame_layout (frame)
3944     struct ix86_frame *frame;
3945{
3946  HOST_WIDE_INT total_size;
3947  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
3948  int offset;
3949  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
3950  HOST_WIDE_INT size = get_frame_size ();
3951
3952  frame->nregs = ix86_nsaved_regs ();
3953  total_size = size;
3954
3955  /* Skip return value and save base pointer.  */
3956  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
3957
3958  frame->hard_frame_pointer_offset = offset;
3959
3960  /* Do some sanity checking of stack_alignment_needed and
3961     preferred_alignment, since i386 port is the only using those features
3962     that may break easily.  */
3963
3964  if (size && !stack_alignment_needed)
3965    abort ();
3966  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
3967    abort ();
3968  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3969    abort ();
3970  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
3971    abort ();
3972
3973  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
3974    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
3975
3976  /* Register save area */
3977  offset += frame->nregs * UNITS_PER_WORD;
3978
3979  /* Va-arg area */
3980  if (ix86_save_varrargs_registers)
3981    {
3982      offset += X86_64_VARARGS_SIZE;
3983      frame->va_arg_size = X86_64_VARARGS_SIZE;
3984    }
3985  else
3986    frame->va_arg_size = 0;
3987
3988  /* Align start of frame for local function.  */
3989  frame->padding1 = ((offset + stack_alignment_needed - 1)
3990		     & -stack_alignment_needed) - offset;
3991
3992  offset += frame->padding1;
3993
3994  /* Frame pointer points here.  */
3995  frame->frame_pointer_offset = offset;
3996
3997  offset += size;
3998
3999  /* Add outgoing arguments area.  */
4000  if (ACCUMULATE_OUTGOING_ARGS)
4001    {
4002      offset += current_function_outgoing_args_size;
4003      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4004    }
4005  else
4006    frame->outgoing_arguments_size = 0;
4007
4008  /* Align stack boundary.  */
4009  frame->padding2 = ((offset + preferred_alignment - 1)
4010		     & -preferred_alignment) - offset;
4011
4012  offset += frame->padding2;
4013
4014  /* We've reached end of stack frame.  */
4015  frame->stack_pointer_offset = offset;
4016
4017  /* Size prologue needs to allocate.  */
4018  frame->to_allocate =
4019    (size + frame->padding1 + frame->padding2
4020     + frame->outgoing_arguments_size + frame->va_arg_size);
4021
4022  if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4023      && current_function_is_leaf)
4024    {
4025      frame->red_zone_size = frame->to_allocate;
4026      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4027	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4028    }
4029  else
4030    frame->red_zone_size = 0;
4031  frame->to_allocate -= frame->red_zone_size;
4032  frame->stack_pointer_offset -= frame->red_zone_size;
4033#if 0
4034  fprintf (stderr, "nregs: %i\n", frame->nregs);
4035  fprintf (stderr, "size: %i\n", size);
4036  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4037  fprintf (stderr, "padding1: %i\n", frame->padding1);
4038  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4039  fprintf (stderr, "padding2: %i\n", frame->padding2);
4040  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4041  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4042  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4043  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4044	   frame->hard_frame_pointer_offset);
4045  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4046#endif
4047}
4048
4049/* Emit code to save registers in the prologue.  */
4050
4051static void
4052ix86_emit_save_regs ()
4053{
4054  register int regno;
4055  rtx insn;
4056
4057  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4058    if (ix86_save_reg (regno, true))
4059      {
4060	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4061	RTX_FRAME_RELATED_P (insn) = 1;
4062      }
4063}
4064
4065/* Emit code to save registers using MOV insns.  First register
4066   is restored from POINTER + OFFSET.  */
4067static void
4068ix86_emit_save_regs_using_mov (pointer, offset)
4069     rtx pointer;
4070     HOST_WIDE_INT offset;
4071{
4072  int regno;
4073  rtx insn;
4074
4075  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4076    if (ix86_save_reg (regno, true))
4077      {
4078	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4079					       Pmode, offset),
4080			       gen_rtx_REG (Pmode, regno));
4081	RTX_FRAME_RELATED_P (insn) = 1;
4082	offset += UNITS_PER_WORD;
4083      }
4084}
4085
4086/* Expand the prologue into a bunch of separate insns.  */
4087
4088void
4089ix86_expand_prologue ()
4090{
4091  rtx insn;
4092  int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
4093				  || current_function_uses_const_pool)
4094		      && !TARGET_64BIT);
4095  struct ix86_frame frame;
4096  int use_mov = 0;
4097  HOST_WIDE_INT allocate;
4098
4099  if (!optimize_size)
4100    {
4101      use_fast_prologue_epilogue
4102	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4103      if (TARGET_PROLOGUE_USING_MOVE)
4104        use_mov = use_fast_prologue_epilogue;
4105    }
4106  ix86_compute_frame_layout (&frame);
4107
4108  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4109     slower on all targets.  Also sdb doesn't like it.  */
4110
4111  if (frame_pointer_needed)
4112    {
4113      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4114      RTX_FRAME_RELATED_P (insn) = 1;
4115
4116      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4117      RTX_FRAME_RELATED_P (insn) = 1;
4118    }
4119
4120  allocate = frame.to_allocate;
4121  /* In case we are dealing only with single register and empty frame,
4122     push is equivalent of the mov+add sequence.  */
4123  if (allocate == 0 && frame.nregs <= 1)
4124    use_mov = 0;
4125
4126  if (!use_mov)
4127    ix86_emit_save_regs ();
4128  else
4129    allocate += frame.nregs * UNITS_PER_WORD;
4130
4131  if (allocate == 0)
4132    ;
4133  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4134    {
4135      insn = emit_insn (gen_pro_epilogue_adjust_stack
4136			(stack_pointer_rtx, stack_pointer_rtx,
4137			 GEN_INT (-allocate)));
4138      RTX_FRAME_RELATED_P (insn) = 1;
4139    }
4140  else
4141    {
4142      /* ??? Is this only valid for Win32?  */
4143
4144      rtx arg0, sym;
4145
4146      if (TARGET_64BIT)
4147	abort ();
4148
4149      arg0 = gen_rtx_REG (SImode, 0);
4150      emit_move_insn (arg0, GEN_INT (allocate));
4151
4152      sym = gen_rtx_MEM (FUNCTION_MODE,
4153			 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4154      insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4155
4156      CALL_INSN_FUNCTION_USAGE (insn)
4157	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4158			     CALL_INSN_FUNCTION_USAGE (insn));
4159    }
4160  if (use_mov)
4161    {
4162      if (!frame_pointer_needed || !frame.to_allocate)
4163        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4164      else
4165        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4166				       -frame.nregs * UNITS_PER_WORD);
4167    }
4168
4169#ifdef SUBTARGET_PROLOGUE
4170  SUBTARGET_PROLOGUE;
4171#endif
4172
4173  if (pic_reg_used)
4174    load_pic_register ();
4175
4176  /* If we are profiling, make sure no instructions are scheduled before
4177     the call to mcount.  However, if -fpic, the above call will have
4178     done that.  */
4179  if (current_function_profile && ! pic_reg_used)
4180    emit_insn (gen_blockage ());
4181}
4182
4183/* Emit code to restore saved registers using MOV insns.  First register
4184   is restored from POINTER + OFFSET.  */
4185static void
4186ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4187     rtx pointer;
4188     int offset;
4189     int maybe_eh_return;
4190{
4191  int regno;
4192
4193  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4194    if (ix86_save_reg (regno, maybe_eh_return))
4195      {
4196	emit_move_insn (gen_rtx_REG (Pmode, regno),
4197			adjust_address (gen_rtx_MEM (Pmode, pointer),
4198					Pmode, offset));
4199	offset += UNITS_PER_WORD;
4200      }
4201}
4202
4203/* Restore function stack, frame, and registers.  */
4204
4205void
4206ix86_expand_epilogue (style)
4207     int style;
4208{
4209  int regno;
4210  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4211  struct ix86_frame frame;
4212  HOST_WIDE_INT offset;
4213
4214  ix86_compute_frame_layout (&frame);
4215
4216  /* Calculate start of saved registers relative to ebp.  Special care
4217     must be taken for the normal return case of a function using
4218     eh_return: the eax and edx registers are marked as saved, but not
4219     restored along this path.  */
4220  offset = frame.nregs;
4221  if (current_function_calls_eh_return && style != 2)
4222    offset -= 2;
4223  offset *= -UNITS_PER_WORD;
4224
4225  /* If we're only restoring one register and sp is not valid then
4226     using a move instruction to restore the register since it's
4227     less work than reloading sp and popping the register.
4228
4229     The default code result in stack adjustment using add/lea instruction,
4230     while this code results in LEAVE instruction (or discrete equivalent),
4231     so it is profitable in some other cases as well.  Especially when there
4232     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
4233     and there is exactly one register to pop. This heruistic may need some
4234     tuning in future.  */
4235  if ((!sp_valid && frame.nregs <= 1)
4236      || (TARGET_EPILOGUE_USING_MOVE
4237	  && use_fast_prologue_epilogue
4238	  && (frame.nregs > 1 || frame.to_allocate))
4239      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4240      || (frame_pointer_needed && TARGET_USE_LEAVE
4241	  && use_fast_prologue_epilogue && frame.nregs == 1)
4242      || current_function_calls_eh_return)
4243    {
4244      /* Restore registers.  We can use ebp or esp to address the memory
4245	 locations.  If both are available, default to ebp, since offsets
4246	 are known to be small.  Only exception is esp pointing directly to the
4247	 end of block of saved registers, where we may simplify addressing
4248	 mode.  */
4249
4250      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4251	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4252					  frame.to_allocate, style == 2);
4253      else
4254	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4255					  offset, style == 2);
4256
4257      /* eh_return epilogues need %ecx added to the stack pointer.  */
4258      if (style == 2)
4259	{
4260	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4261
4262	  if (frame_pointer_needed)
4263	    {
4264	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4265	      tmp = plus_constant (tmp, UNITS_PER_WORD);
4266	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4267
4268	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4269	      emit_move_insn (hard_frame_pointer_rtx, tmp);
4270
4271	      emit_insn (gen_pro_epilogue_adjust_stack
4272			 (stack_pointer_rtx, sa, const0_rtx));
4273	    }
4274	  else
4275	    {
4276	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4277	      tmp = plus_constant (tmp, (frame.to_allocate
4278                                         + frame.nregs * UNITS_PER_WORD));
4279	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4280	    }
4281	}
4282      else if (!frame_pointer_needed)
4283	emit_insn (gen_pro_epilogue_adjust_stack
4284		   (stack_pointer_rtx, stack_pointer_rtx,
4285		    GEN_INT (frame.to_allocate
4286			     + frame.nregs * UNITS_PER_WORD)));
4287      /* If not an i386, mov & pop is faster than "leave".  */
4288      else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4289	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4290      else
4291	{
4292	  emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4293						    hard_frame_pointer_rtx,
4294						    const0_rtx));
4295	  if (TARGET_64BIT)
4296	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4297	  else
4298	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4299	}
4300    }
4301  else
4302    {
4303      /* First step is to deallocate the stack frame so that we can
4304	 pop the registers.  */
4305      if (!sp_valid)
4306	{
4307	  if (!frame_pointer_needed)
4308	    abort ();
4309          emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4310						    hard_frame_pointer_rtx,
4311						    GEN_INT (offset)));
4312	}
4313      else if (frame.to_allocate)
4314	emit_insn (gen_pro_epilogue_adjust_stack
4315		   (stack_pointer_rtx, stack_pointer_rtx,
4316		    GEN_INT (frame.to_allocate)));
4317
4318      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4319	if (ix86_save_reg (regno, false))
4320	  {
4321	    if (TARGET_64BIT)
4322	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4323	    else
4324	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4325	  }
4326      if (frame_pointer_needed)
4327	{
4328	  /* Leave results in shorter dependency chains on CPUs that are
4329	     able to grok it fast.  */
4330	  if (TARGET_USE_LEAVE)
4331	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4332	  else if (TARGET_64BIT)
4333	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4334	  else
4335	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4336	}
4337    }
4338
4339  /* Sibcall epilogues don't want a return instruction.  */
4340  if (style == 0)
4341    return;
4342
4343  if (current_function_pops_args && current_function_args_size)
4344    {
4345      rtx popc = GEN_INT (current_function_pops_args);
4346
4347      /* i386 can only pop 64K bytes.  If asked to pop more, pop
4348	 return address, do explicit add, and jump indirectly to the
4349	 caller.  */
4350
4351      if (current_function_pops_args >= 65536)
4352	{
4353	  rtx ecx = gen_rtx_REG (SImode, 2);
4354
4355	  /* There are is no "pascal" calling convention in 64bit ABI.  */
4356	  if (TARGET_64BIT)
4357	    abort ();
4358
4359	  emit_insn (gen_popsi1 (ecx));
4360	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4361	  emit_jump_insn (gen_return_indirect_internal (ecx));
4362	}
4363      else
4364	emit_jump_insn (gen_return_pop_internal (popc));
4365    }
4366  else
4367    emit_jump_insn (gen_return_internal ());
4368}
4369
4370/* Extract the parts of an RTL expression that is a valid memory address
4371   for an instruction.  Return 0 if the structure of the address is
4372   grossly off.  Return -1 if the address contains ASHIFT, so it is not
4373   strictly valid, but still used for computing length of lea instruction.
4374   */
4375
4376static int
4377ix86_decompose_address (addr, out)
4378     register rtx addr;
4379     struct ix86_address *out;
4380{
4381  rtx base = NULL_RTX;
4382  rtx index = NULL_RTX;
4383  rtx disp = NULL_RTX;
4384  HOST_WIDE_INT scale = 1;
4385  rtx scale_rtx = NULL_RTX;
4386  int retval = 1;
4387
4388  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
4389    base = addr;
4390  else if (GET_CODE (addr) == PLUS)
4391    {
4392      rtx op0 = XEXP (addr, 0);
4393      rtx op1 = XEXP (addr, 1);
4394      enum rtx_code code0 = GET_CODE (op0);
4395      enum rtx_code code1 = GET_CODE (op1);
4396
4397      if (code0 == REG || code0 == SUBREG)
4398	{
4399	  if (code1 == REG || code1 == SUBREG)
4400	    index = op0, base = op1;	/* index + base */
4401	  else
4402	    base = op0, disp = op1;	/* base + displacement */
4403	}
4404      else if (code0 == MULT)
4405	{
4406	  index = XEXP (op0, 0);
4407	  scale_rtx = XEXP (op0, 1);
4408	  if (code1 == REG || code1 == SUBREG)
4409	    base = op1;			/* index*scale + base */
4410	  else
4411	    disp = op1;			/* index*scale + disp */
4412	}
4413      else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4414	{
4415	  index = XEXP (XEXP (op0, 0), 0);	/* index*scale + base + disp */
4416	  scale_rtx = XEXP (XEXP (op0, 0), 1);
4417	  base = XEXP (op0, 1);
4418	  disp = op1;
4419	}
4420      else if (code0 == PLUS)
4421	{
4422	  index = XEXP (op0, 0);	/* index + base + disp */
4423	  base = XEXP (op0, 1);
4424	  disp = op1;
4425	}
4426      else
4427	return 0;
4428    }
4429  else if (GET_CODE (addr) == MULT)
4430    {
4431      index = XEXP (addr, 0);		/* index*scale */
4432      scale_rtx = XEXP (addr, 1);
4433    }
4434  else if (GET_CODE (addr) == ASHIFT)
4435    {
4436      rtx tmp;
4437
4438      /* We're called for lea too, which implements ashift on occasion.  */
4439      index = XEXP (addr, 0);
4440      tmp = XEXP (addr, 1);
4441      if (GET_CODE (tmp) != CONST_INT)
4442	return 0;
4443      scale = INTVAL (tmp);
4444      if ((unsigned HOST_WIDE_INT) scale > 3)
4445	return 0;
4446      scale = 1 << scale;
4447      retval = -1;
4448    }
4449  else
4450    disp = addr;			/* displacement */
4451
4452  /* Extract the integral value of scale.  */
4453  if (scale_rtx)
4454    {
4455      if (GET_CODE (scale_rtx) != CONST_INT)
4456	return 0;
4457      scale = INTVAL (scale_rtx);
4458    }
4459
4460  /* Allow arg pointer and stack pointer as index if there is not scaling */
4461  if (base && index && scale == 1
4462      && (index == arg_pointer_rtx || index == frame_pointer_rtx
4463          || index == stack_pointer_rtx))
4464    {
4465      rtx tmp = base;
4466      base = index;
4467      index = tmp;
4468    }
4469
4470  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
4471  if ((base == hard_frame_pointer_rtx
4472       || base == frame_pointer_rtx
4473       || base == arg_pointer_rtx) && !disp)
4474    disp = const0_rtx;
4475
4476  /* Special case: on K6, [%esi] makes the instruction vector decoded.
4477     Avoid this by transforming to [%esi+0].  */
4478  if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4479      && base && !index && !disp
4480      && REG_P (base)
4481      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4482    disp = const0_rtx;
4483
4484  /* Special case: encode reg+reg instead of reg*2.  */
4485  if (!base && index && scale && scale == 2)
4486    base = index, scale = 1;
4487
4488  /* Special case: scaling cannot be encoded without base or displacement.  */
4489  if (!base && !disp && index && scale != 1)
4490    disp = const0_rtx;
4491
4492  out->base = base;
4493  out->index = index;
4494  out->disp = disp;
4495  out->scale = scale;
4496
4497  return retval;
4498}
4499
4500/* Return cost of the memory address x.
4501   For i386, it is better to use a complex address than let gcc copy
4502   the address into a reg and make a new pseudo.  But not if the address
4503   requires to two regs - that would mean more pseudos with longer
4504   lifetimes.  */
4505int
4506ix86_address_cost (x)
4507     rtx x;
4508{
4509  struct ix86_address parts;
4510  int cost = 1;
4511
4512  if (!ix86_decompose_address (x, &parts))
4513    abort ();
4514
4515  /* More complex memory references are better.  */
4516  if (parts.disp && parts.disp != const0_rtx)
4517    cost--;
4518
4519  /* Attempt to minimize number of registers in the address.  */
4520  if ((parts.base
4521       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4522      || (parts.index
4523	  && (!REG_P (parts.index)
4524	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4525    cost++;
4526
4527  if (parts.base
4528      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4529      && parts.index
4530      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4531      && parts.base != parts.index)
4532    cost++;
4533
4534  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4535     since it's predecode logic can't detect the length of instructions
4536     and it degenerates to vector decoded.  Increase cost of such
4537     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
4538     to split such addresses or even refuse such addresses at all.
4539
4540     Following addressing modes are affected:
4541      [base+scale*index]
4542      [scale*index+disp]
4543      [base+index]
4544
4545     The first and last case  may be avoidable by explicitly coding the zero in
4546     memory address, but I don't have AMD-K6 machine handy to check this
4547     theory.  */
4548
4549  if (TARGET_K6
4550      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4551	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4552	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4553    cost += 10;
4554
4555  return cost;
4556}
4557
4558/* If X is a machine specific address (i.e. a symbol or label being
4559   referenced as a displacement from the GOT implemented using an
4560   UNSPEC), then return the base term.  Otherwise return X.  */
4561
4562rtx
4563ix86_find_base_term (x)
4564     rtx x;
4565{
4566  rtx term;
4567
4568  if (TARGET_64BIT)
4569    {
4570      if (GET_CODE (x) != CONST)
4571	return x;
4572      term = XEXP (x, 0);
4573      if (GET_CODE (term) == PLUS
4574	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
4575	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4576	term = XEXP (term, 0);
4577      if (GET_CODE (term) != UNSPEC
4578	  || XVECLEN (term, 0) != 1
4579	  || XINT (term, 1) !=  15)
4580	return x;
4581
4582      term = XVECEXP (term, 0, 0);
4583
4584      if (GET_CODE (term) != SYMBOL_REF
4585	  && GET_CODE (term) != LABEL_REF)
4586	return x;
4587
4588      return term;
4589    }
4590
4591  if (GET_CODE (x) != PLUS
4592      || XEXP (x, 0) != pic_offset_table_rtx
4593      || GET_CODE (XEXP (x, 1)) != CONST)
4594    return x;
4595
4596  term = XEXP (XEXP (x, 1), 0);
4597
4598  if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4599    term = XEXP (term, 0);
4600
4601  if (GET_CODE (term) != UNSPEC
4602      || XVECLEN (term, 0) != 1
4603      || XINT (term, 1) !=  7)
4604    return x;
4605
4606  term = XVECEXP (term, 0, 0);
4607
4608  if (GET_CODE (term) != SYMBOL_REF
4609      && GET_CODE (term) != LABEL_REF)
4610    return x;
4611
4612  return term;
4613}
4614
4615/* Determine if a given CONST RTX is a valid memory displacement
4616   in PIC mode.  */
4617
4618int
4619legitimate_pic_address_disp_p (disp)
4620     register rtx disp;
4621{
4622  /* In 64bit mode we can allow direct addresses of symbols and labels
4623     when they are not dynamic symbols.  */
4624  if (TARGET_64BIT)
4625    {
4626      rtx x = disp;
4627      if (GET_CODE (disp) == CONST)
4628	x = XEXP (disp, 0);
4629      /* ??? Handle PIC code models */
4630      if (GET_CODE (x) == PLUS
4631	  && (GET_CODE (XEXP (x, 1)) == CONST_INT
4632	      && ix86_cmodel == CM_SMALL_PIC
4633	      && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4634	      && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4635	x = XEXP (x, 0);
4636      if (local_symbolic_operand (x, Pmode))
4637	return 1;
4638    }
4639  if (GET_CODE (disp) != CONST)
4640    return 0;
4641  disp = XEXP (disp, 0);
4642
4643  if (TARGET_64BIT)
4644    {
4645      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
4646         of GOT tables.  We should not need these anyway.  */
4647      if (GET_CODE (disp) != UNSPEC
4648	  || XVECLEN (disp, 0) != 1
4649	  || XINT (disp, 1) != 15)
4650	return 0;
4651
4652      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4653	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4654	return 0;
4655      return 1;
4656    }
4657
4658  if (GET_CODE (disp) == PLUS)
4659    {
4660      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4661	return 0;
4662      disp = XEXP (disp, 0);
4663    }
4664
4665  if (GET_CODE (disp) != UNSPEC
4666      || XVECLEN (disp, 0) != 1)
4667    return 0;
4668
4669  /* Must be @GOT or @GOTOFF.  */
4670  switch (XINT (disp, 1))
4671    {
4672    case 6: /* @GOT */
4673      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4674
4675    case 7: /* @GOTOFF */
4676      return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4677    }
4678
4679  return 0;
4680}
4681
4682/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4683   memory address for an instruction.  The MODE argument is the machine mode
4684   for the MEM expression that wants to use this address.
4685
4686   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
4687   convert common non-canonical forms to canonical form so that they will
4688   be recognized.  */
4689
4690int
4691legitimate_address_p (mode, addr, strict)
4692     enum machine_mode mode;
4693     register rtx addr;
4694     int strict;
4695{
4696  struct ix86_address parts;
4697  rtx base, index, disp;
4698  HOST_WIDE_INT scale;
4699  const char *reason = NULL;
4700  rtx reason_rtx = NULL_RTX;
4701
4702  if (TARGET_DEBUG_ADDR)
4703    {
4704      fprintf (stderr,
4705	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4706	       GET_MODE_NAME (mode), strict);
4707      debug_rtx (addr);
4708    }
4709
4710  if (ix86_decompose_address (addr, &parts) <= 0)
4711    {
4712      reason = "decomposition failed";
4713      goto report_error;
4714    }
4715
4716  base = parts.base;
4717  index = parts.index;
4718  disp = parts.disp;
4719  scale = parts.scale;
4720
4721  /* Validate base register.
4722
4723     Don't allow SUBREG's here, it can lead to spill failures when the base
4724     is one word out of a two word structure, which is represented internally
4725     as a DImode int.  */
4726
4727  if (base)
4728    {
4729      reason_rtx = base;
4730
4731      if (GET_CODE (base) != REG)
4732	{
4733	  reason = "base is not a register";
4734	  goto report_error;
4735	}
4736
4737      if (GET_MODE (base) != Pmode)
4738	{
4739	  reason = "base is not in Pmode";
4740	  goto report_error;
4741	}
4742
4743      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
4744	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
4745	{
4746	  reason = "base is not valid";
4747	  goto report_error;
4748	}
4749    }
4750
4751  /* Validate index register.
4752
4753     Don't allow SUBREG's here, it can lead to spill failures when the index
4754     is one word out of a two word structure, which is represented internally
4755     as a DImode int.  */
4756
4757  if (index)
4758    {
4759      reason_rtx = index;
4760
4761      if (GET_CODE (index) != REG)
4762	{
4763	  reason = "index is not a register";
4764	  goto report_error;
4765	}
4766
4767      if (GET_MODE (index) != Pmode)
4768	{
4769	  reason = "index is not in Pmode";
4770	  goto report_error;
4771	}
4772
4773      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
4774	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
4775	{
4776	  reason = "index is not valid";
4777	  goto report_error;
4778	}
4779    }
4780
4781  /* Validate scale factor.  */
4782  if (scale != 1)
4783    {
4784      reason_rtx = GEN_INT (scale);
4785      if (!index)
4786	{
4787	  reason = "scale without index";
4788	  goto report_error;
4789	}
4790
4791      if (scale != 2 && scale != 4 && scale != 8)
4792	{
4793	  reason = "scale is not a valid multiplier";
4794	  goto report_error;
4795	}
4796    }
4797
4798  /* Validate displacement.  */
4799  if (disp)
4800    {
4801      reason_rtx = disp;
4802
4803      if (!CONSTANT_ADDRESS_P (disp))
4804	{
4805	  reason = "displacement is not constant";
4806	  goto report_error;
4807	}
4808
4809      if (TARGET_64BIT)
4810	{
4811	  if (!x86_64_sign_extended_value (disp))
4812	    {
4813	      reason = "displacement is out of range";
4814	      goto report_error;
4815	    }
4816	}
4817      else
4818	{
4819	  if (GET_CODE (disp) == CONST_DOUBLE)
4820	    {
4821	      reason = "displacement is a const_double";
4822	      goto report_error;
4823	    }
4824	}
4825
4826      if (flag_pic && SYMBOLIC_CONST (disp))
4827	{
4828	  if (TARGET_64BIT && (index || base))
4829	    {
4830	      reason = "non-constant pic memory reference";
4831	      goto report_error;
4832	    }
4833	  if (! legitimate_pic_address_disp_p (disp))
4834	    {
4835	      reason = "displacement is an invalid pic construct";
4836	      goto report_error;
4837	    }
4838
4839          /* This code used to verify that a symbolic pic displacement
4840	     includes the pic_offset_table_rtx register.
4841
4842	     While this is good idea, unfortunately these constructs may
4843	     be created by "adds using lea" optimization for incorrect
4844	     code like:
4845
4846	     int a;
4847	     int foo(int i)
4848	       {
4849	         return *(&a+i);
4850	       }
4851
4852	     This code is nonsensical, but results in addressing
4853	     GOT table with pic_offset_table_rtx base.  We can't
4854	     just refuse it easily, since it gets matched by
4855	     "addsi3" pattern, that later gets split to lea in the
4856	     case output register differs from input.  While this
4857	     can be handled by separate addsi pattern for this case
4858	     that never results in lea, this seems to be easier and
4859	     correct fix for crash to disable this test.  */
4860	}
4861      else if (HALF_PIC_P ())
4862	{
4863	  if (! HALF_PIC_ADDRESS_P (disp)
4864	      || (base != NULL_RTX || index != NULL_RTX))
4865	    {
4866	      reason = "displacement is an invalid half-pic reference";
4867	      goto report_error;
4868	    }
4869	}
4870    }
4871
4872  /* Everything looks valid.  */
4873  if (TARGET_DEBUG_ADDR)
4874    fprintf (stderr, "Success.\n");
4875  return TRUE;
4876
4877report_error:
4878  if (TARGET_DEBUG_ADDR)
4879    {
4880      fprintf (stderr, "Error: %s\n", reason);
4881      debug_rtx (reason_rtx);
4882    }
4883  return FALSE;
4884}
4885
4886/* Return an unique alias set for the GOT.  */
4887
4888static HOST_WIDE_INT
4889ix86_GOT_alias_set ()
4890{
4891    static HOST_WIDE_INT set = -1;
4892    if (set == -1)
4893      set = new_alias_set ();
4894    return set;
4895}
4896
4897/* Return a legitimate reference for ORIG (an address) using the
4898   register REG.  If REG is 0, a new pseudo is generated.
4899
4900   There are two types of references that must be handled:
4901
4902   1. Global data references must load the address from the GOT, via
4903      the PIC reg.  An insn is emitted to do this load, and the reg is
4904      returned.
4905
4906   2. Static data references, constant pool addresses, and code labels
4907      compute the address as an offset from the GOT, whose base is in
4908      the PIC reg.  Static data objects have SYMBOL_REF_FLAG set to
4909      differentiate them from global data objects.  The returned
4910      address is the PIC reg + an unspec constant.
4911
4912   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4913   reg also appears in the address.  */
4914
4915rtx
4916legitimize_pic_address (orig, reg)
4917     rtx orig;
4918     rtx reg;
4919{
4920  rtx addr = orig;
4921  rtx new = orig;
4922  rtx base;
4923
4924  if (local_symbolic_operand (addr, Pmode))
4925    {
4926      /* In 64bit mode we can address such objects directly.  */
4927      if (TARGET_64BIT)
4928	new = addr;
4929      else
4930	{
4931	  /* This symbol may be referenced via a displacement from the PIC
4932	     base address (@GOTOFF).  */
4933
4934	  current_function_uses_pic_offset_table = 1;
4935	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 7);
4936	  new = gen_rtx_CONST (Pmode, new);
4937	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4938
4939	  if (reg != 0)
4940	    {
4941	      emit_move_insn (reg, new);
4942	      new = reg;
4943	    }
4944      	}
4945    }
4946  else if (GET_CODE (addr) == SYMBOL_REF)
4947    {
4948      if (TARGET_64BIT)
4949	{
4950	  current_function_uses_pic_offset_table = 1;
4951	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 15);
4952	  new = gen_rtx_CONST (Pmode, new);
4953	  new = gen_rtx_MEM (Pmode, new);
4954	  RTX_UNCHANGING_P (new) = 1;
4955	  set_mem_alias_set (new, ix86_GOT_alias_set ());
4956
4957	  if (reg == 0)
4958	    reg = gen_reg_rtx (Pmode);
4959	  /* Use directly gen_movsi, otherwise the address is loaded
4960	     into register for CSE.  We don't want to CSE this addresses,
4961	     instead we CSE addresses from the GOT table, so skip this.  */
4962	  emit_insn (gen_movsi (reg, new));
4963	  new = reg;
4964	}
4965      else
4966	{
4967	  /* This symbol must be referenced via a load from the
4968	     Global Offset Table (@GOT).  */
4969
4970	  current_function_uses_pic_offset_table = 1;
4971	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 6);
4972	  new = gen_rtx_CONST (Pmode, new);
4973	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
4974	  new = gen_rtx_MEM (Pmode, new);
4975	  RTX_UNCHANGING_P (new) = 1;
4976	  set_mem_alias_set (new, ix86_GOT_alias_set ());
4977
4978	  if (reg == 0)
4979	    reg = gen_reg_rtx (Pmode);
4980	  emit_move_insn (reg, new);
4981	  new = reg;
4982	}
4983    }
4984  else
4985    {
4986      if (GET_CODE (addr) == CONST)
4987	{
4988	  addr = XEXP (addr, 0);
4989	  if (GET_CODE (addr) == UNSPEC)
4990	    {
4991	      /* Check that the unspec is one of the ones we generate?  */
4992	    }
4993	  else if (GET_CODE (addr) != PLUS)
4994	    abort ();
4995	}
4996      if (GET_CODE (addr) == PLUS)
4997	{
4998	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
4999
5000	  /* Check first to see if this is a constant offset from a @GOTOFF
5001	     symbol reference.  */
5002	  if (local_symbolic_operand (op0, Pmode)
5003	      && GET_CODE (op1) == CONST_INT)
5004	    {
5005	      if (!TARGET_64BIT)
5006		{
5007		  current_function_uses_pic_offset_table = 1;
5008		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 7);
5009		  new = gen_rtx_PLUS (Pmode, new, op1);
5010		  new = gen_rtx_CONST (Pmode, new);
5011		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5012
5013		  if (reg != 0)
5014		    {
5015		      emit_move_insn (reg, new);
5016		      new = reg;
5017		    }
5018		}
5019	      else
5020		{
5021		  /* ??? We need to limit offsets here.  */
5022		}
5023	    }
5024	  else
5025	    {
5026	      base = legitimize_pic_address (XEXP (addr, 0), reg);
5027	      new  = legitimize_pic_address (XEXP (addr, 1),
5028					     base == reg ? NULL_RTX : reg);
5029
5030	      if (GET_CODE (new) == CONST_INT)
5031		new = plus_constant (base, INTVAL (new));
5032	      else
5033		{
5034		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5035		    {
5036		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5037		      new = XEXP (new, 1);
5038		    }
5039		  new = gen_rtx_PLUS (Pmode, base, new);
5040		}
5041	    }
5042	}
5043    }
5044  return new;
5045}
5046
5047/* Try machine-dependent ways of modifying an illegitimate address
5048   to be legitimate.  If we find one, return the new, valid address.
5049   This macro is used in only one place: `memory_address' in explow.c.
5050
5051   OLDX is the address as it was before break_out_memory_refs was called.
5052   In some cases it is useful to look at this to decide what needs to be done.
5053
5054   MODE and WIN are passed so that this macro can use
5055   GO_IF_LEGITIMATE_ADDRESS.
5056
5057   It is always safe for this macro to do nothing.  It exists to recognize
5058   opportunities to optimize the output.
5059
5060   For the 80386, we handle X+REG by loading X into a register R and
5061   using R+REG.  R will go in a general reg and indexing will be used.
5062   However, if REG is a broken-out memory address or multiplication,
5063   nothing needs to be done because REG can certainly go in a general reg.
5064
5065   When -fpic is used, special handling is needed for symbolic references.
5066   See comments by legitimize_pic_address in i386.c for details.  */
5067
5068rtx
5069legitimize_address (x, oldx, mode)
5070     register rtx x;
5071     register rtx oldx ATTRIBUTE_UNUSED;
5072     enum machine_mode mode;
5073{
5074  int changed = 0;
5075  unsigned log;
5076
5077  if (TARGET_DEBUG_ADDR)
5078    {
5079      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5080	       GET_MODE_NAME (mode));
5081      debug_rtx (x);
5082    }
5083
5084  if (flag_pic && SYMBOLIC_CONST (x))
5085    return legitimize_pic_address (x, 0);
5086
5087  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5088  if (GET_CODE (x) == ASHIFT
5089      && GET_CODE (XEXP (x, 1)) == CONST_INT
5090      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5091    {
5092      changed = 1;
5093      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5094			GEN_INT (1 << log));
5095    }
5096
5097  if (GET_CODE (x) == PLUS)
5098    {
5099      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
5100
5101      if (GET_CODE (XEXP (x, 0)) == ASHIFT
5102	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5103	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5104	{
5105	  changed = 1;
5106	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
5107				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5108				      GEN_INT (1 << log));
5109	}
5110
5111      if (GET_CODE (XEXP (x, 1)) == ASHIFT
5112	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5113	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5114	{
5115	  changed = 1;
5116	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
5117				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5118				      GEN_INT (1 << log));
5119	}
5120
5121      /* Put multiply first if it isn't already.  */
5122      if (GET_CODE (XEXP (x, 1)) == MULT)
5123	{
5124	  rtx tmp = XEXP (x, 0);
5125	  XEXP (x, 0) = XEXP (x, 1);
5126	  XEXP (x, 1) = tmp;
5127	  changed = 1;
5128	}
5129
5130      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5131	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
5132	 created by virtual register instantiation, register elimination, and
5133	 similar optimizations.  */
5134      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5135	{
5136	  changed = 1;
5137	  x = gen_rtx_PLUS (Pmode,
5138			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
5139					  XEXP (XEXP (x, 1), 0)),
5140			    XEXP (XEXP (x, 1), 1));
5141	}
5142
5143      /* Canonicalize
5144	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5145	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
5146      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5147	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5148	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5149	       && CONSTANT_P (XEXP (x, 1)))
5150	{
5151	  rtx constant;
5152	  rtx other = NULL_RTX;
5153
5154	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5155	    {
5156	      constant = XEXP (x, 1);
5157	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5158	    }
5159	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5160	    {
5161	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5162	      other = XEXP (x, 1);
5163	    }
5164	  else
5165	    constant = 0;
5166
5167	  if (constant)
5168	    {
5169	      changed = 1;
5170	      x = gen_rtx_PLUS (Pmode,
5171				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5172					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
5173				plus_constant (other, INTVAL (constant)));
5174	    }
5175	}
5176
5177      if (changed && legitimate_address_p (mode, x, FALSE))
5178	return x;
5179
5180      if (GET_CODE (XEXP (x, 0)) == MULT)
5181	{
5182	  changed = 1;
5183	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5184	}
5185
5186      if (GET_CODE (XEXP (x, 1)) == MULT)
5187	{
5188	  changed = 1;
5189	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5190	}
5191
5192      if (changed
5193	  && GET_CODE (XEXP (x, 1)) == REG
5194	  && GET_CODE (XEXP (x, 0)) == REG)
5195	return x;
5196
5197      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5198	{
5199	  changed = 1;
5200	  x = legitimize_pic_address (x, 0);
5201	}
5202
5203      if (changed && legitimate_address_p (mode, x, FALSE))
5204	return x;
5205
5206      if (GET_CODE (XEXP (x, 0)) == REG)
5207	{
5208	  register rtx temp = gen_reg_rtx (Pmode);
5209	  register rtx val  = force_operand (XEXP (x, 1), temp);
5210	  if (val != temp)
5211	    emit_move_insn (temp, val);
5212
5213	  XEXP (x, 1) = temp;
5214	  return x;
5215	}
5216
5217      else if (GET_CODE (XEXP (x, 1)) == REG)
5218	{
5219	  register rtx temp = gen_reg_rtx (Pmode);
5220	  register rtx val  = force_operand (XEXP (x, 0), temp);
5221	  if (val != temp)
5222	    emit_move_insn (temp, val);
5223
5224	  XEXP (x, 0) = temp;
5225	  return x;
5226	}
5227    }
5228
5229  return x;
5230}
5231
5232/* Print an integer constant expression in assembler syntax.  Addition
5233   and subtraction are the only arithmetic that may appear in these
5234   expressions.  FILE is the stdio stream to write to, X is the rtx, and
5235   CODE is the operand print code from the output string.  */
5236
5237static void
5238output_pic_addr_const (file, x, code)
5239     FILE *file;
5240     rtx x;
5241     int code;
5242{
5243  char buf[256];
5244
5245  switch (GET_CODE (x))
5246    {
5247    case PC:
5248      if (flag_pic)
5249	putc ('.', file);
5250      else
5251	abort ();
5252      break;
5253
5254    case SYMBOL_REF:
5255      assemble_name (file, XSTR (x, 0));
5256      if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5257	fputs ("@PLT", file);
5258      break;
5259
5260    case LABEL_REF:
5261      x = XEXP (x, 0);
5262      /* FALLTHRU */
5263    case CODE_LABEL:
5264      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5265      assemble_name (asm_out_file, buf);
5266      break;
5267
5268    case CONST_INT:
5269      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5270      break;
5271
5272    case CONST:
5273      /* This used to output parentheses around the expression,
5274	 but that does not work on the 386 (either ATT or BSD assembler).  */
5275      output_pic_addr_const (file, XEXP (x, 0), code);
5276      break;
5277
5278    case CONST_DOUBLE:
5279      if (GET_MODE (x) == VOIDmode)
5280	{
5281	  /* We can use %d if the number is <32 bits and positive.  */
5282	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5283	    fprintf (file, "0x%lx%08lx",
5284		     (unsigned long) CONST_DOUBLE_HIGH (x),
5285		     (unsigned long) CONST_DOUBLE_LOW (x));
5286	  else
5287	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5288	}
5289      else
5290	/* We can't handle floating point constants;
5291	   PRINT_OPERAND must handle them.  */
5292	output_operand_lossage ("floating constant misused");
5293      break;
5294
5295    case PLUS:
5296      /* Some assemblers need integer constants to appear first.  */
5297      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5298	{
5299	  output_pic_addr_const (file, XEXP (x, 0), code);
5300	  putc ('+', file);
5301	  output_pic_addr_const (file, XEXP (x, 1), code);
5302	}
5303      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5304	{
5305	  output_pic_addr_const (file, XEXP (x, 1), code);
5306	  putc ('+', file);
5307	  output_pic_addr_const (file, XEXP (x, 0), code);
5308	}
5309      else
5310	abort ();
5311      break;
5312
5313    case MINUS:
5314      putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5315      output_pic_addr_const (file, XEXP (x, 0), code);
5316      putc ('-', file);
5317      output_pic_addr_const (file, XEXP (x, 1), code);
5318      putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5319      break;
5320
5321     case UNSPEC:
5322       if (XVECLEN (x, 0) != 1)
5323	abort ();
5324       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5325       switch (XINT (x, 1))
5326	{
5327	case 6:
5328	  fputs ("@GOT", file);
5329	  break;
5330	case 7:
5331	  fputs ("@GOTOFF", file);
5332	  break;
5333	case 8:
5334	  fputs ("@PLT", file);
5335	  break;
5336	case 15:
5337	  fputs ("@GOTPCREL(%RIP)", file);
5338	  break;
5339	default:
5340	  output_operand_lossage ("invalid UNSPEC as operand");
5341	  break;
5342	}
5343       break;
5344
5345    default:
5346      output_operand_lossage ("invalid expression as operand");
5347    }
5348}
5349
5350/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5351   We need to handle our special PIC relocations.  */
5352
5353void
5354i386_dwarf_output_addr_const (file, x)
5355     FILE *file;
5356     rtx x;
5357{
5358#ifdef ASM_QUAD
5359  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5360#else
5361  if (TARGET_64BIT)
5362    abort ();
5363  fprintf (file, "%s", ASM_LONG);
5364#endif
5365  if (flag_pic)
5366    output_pic_addr_const (file, x, '\0');
5367  else
5368    output_addr_const (file, x);
5369  fputc ('\n', file);
5370}
5371
5372/* In the name of slightly smaller debug output, and to cater to
5373   general assembler losage, recognize PIC+GOTOFF and turn it back
5374   into a direct symbol reference.  */
5375
5376rtx
5377i386_simplify_dwarf_addr (orig_x)
5378     rtx orig_x;
5379{
5380  rtx x = orig_x;
5381
5382  if (TARGET_64BIT)
5383    {
5384      if (GET_CODE (x) != CONST
5385	  || GET_CODE (XEXP (x, 0)) != UNSPEC
5386	  || XINT (XEXP (x, 0), 1) != 15)
5387	return orig_x;
5388      return XVECEXP (XEXP (x, 0), 0, 0);
5389    }
5390
5391  if (GET_CODE (x) != PLUS
5392      || GET_CODE (XEXP (x, 0)) != REG
5393      || GET_CODE (XEXP (x, 1)) != CONST)
5394    return orig_x;
5395
5396  x = XEXP (XEXP (x, 1), 0);
5397  if (GET_CODE (x) == UNSPEC
5398      && (XINT (x, 1) == 6
5399	  || XINT (x, 1) == 7))
5400    return XVECEXP (x, 0, 0);
5401
5402  if (GET_CODE (x) == PLUS
5403      && GET_CODE (XEXP (x, 0)) == UNSPEC
5404      && GET_CODE (XEXP (x, 1)) == CONST_INT
5405      && (XINT (XEXP (x, 0), 1) == 6
5406	  || XINT (XEXP (x, 0), 1) == 7))
5407    return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5408
5409  return orig_x;
5410}
5411
5412static void
5413put_condition_code (code, mode, reverse, fp, file)
5414     enum rtx_code code;
5415     enum machine_mode mode;
5416     int reverse, fp;
5417     FILE *file;
5418{
5419  const char *suffix;
5420
5421  if (mode == CCFPmode || mode == CCFPUmode)
5422    {
5423      enum rtx_code second_code, bypass_code;
5424      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5425      if (bypass_code != NIL || second_code != NIL)
5426	abort ();
5427      code = ix86_fp_compare_code_to_integer (code);
5428      mode = CCmode;
5429    }
5430  if (reverse)
5431    code = reverse_condition (code);
5432
5433  switch (code)
5434    {
5435    case EQ:
5436      suffix = "e";
5437      break;
5438    case NE:
5439      suffix = "ne";
5440      break;
5441    case GT:
5442      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
5443	abort ();
5444      suffix = "g";
5445      break;
5446    case GTU:
5447      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5448	 Those same assemblers have the same but opposite losage on cmov.  */
5449      if (mode != CCmode)
5450	abort ();
5451      suffix = fp ? "nbe" : "a";
5452      break;
5453    case LT:
5454      if (mode == CCNOmode || mode == CCGOCmode)
5455	suffix = "s";
5456      else if (mode == CCmode || mode == CCGCmode)
5457	suffix = "l";
5458      else
5459	abort ();
5460      break;
5461    case LTU:
5462      if (mode != CCmode)
5463	abort ();
5464      suffix = "b";
5465      break;
5466    case GE:
5467      if (mode == CCNOmode || mode == CCGOCmode)
5468	suffix = "ns";
5469      else if (mode == CCmode || mode == CCGCmode)
5470	suffix = "ge";
5471      else
5472	abort ();
5473      break;
5474    case GEU:
5475      /* ??? As above.  */
5476      if (mode != CCmode)
5477	abort ();
5478      suffix = fp ? "nb" : "ae";
5479      break;
5480    case LE:
5481      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
5482	abort ();
5483      suffix = "le";
5484      break;
5485    case LEU:
5486      if (mode != CCmode)
5487	abort ();
5488      suffix = "be";
5489      break;
5490    case UNORDERED:
5491      suffix = fp ? "u" : "p";
5492      break;
5493    case ORDERED:
5494      suffix = fp ? "nu" : "np";
5495      break;
5496    default:
5497      abort ();
5498    }
5499  fputs (suffix, file);
5500}
5501
5502void
5503print_reg (x, code, file)
5504     rtx x;
5505     int code;
5506     FILE *file;
5507{
5508  if (REGNO (x) == ARG_POINTER_REGNUM
5509      || REGNO (x) == FRAME_POINTER_REGNUM
5510      || REGNO (x) == FLAGS_REG
5511      || REGNO (x) == FPSR_REG)
5512    abort ();
5513
5514  if (ASSEMBLER_DIALECT == ASM_ATT  || USER_LABEL_PREFIX[0] == 0)
5515    putc ('%', file);
5516
5517  if (code == 'w' || MMX_REG_P (x))
5518    code = 2;
5519  else if (code == 'b')
5520    code = 1;
5521  else if (code == 'k')
5522    code = 4;
5523  else if (code == 'q')
5524    code = 8;
5525  else if (code == 'y')
5526    code = 3;
5527  else if (code == 'h')
5528    code = 0;
5529  else
5530    code = GET_MODE_SIZE (GET_MODE (x));
5531
5532  /* Irritatingly, AMD extended registers use different naming convention
5533     from the normal registers.  */
5534  if (REX_INT_REG_P (x))
5535    {
5536      if (!TARGET_64BIT)
5537	abort ();
5538      switch (code)
5539	{
5540	  case 0:
5541	    error ("extended registers have no high halves");
5542	    break;
5543	  case 1:
5544	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
5545	    break;
5546	  case 2:
5547	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
5548	    break;
5549	  case 4:
5550	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
5551	    break;
5552	  case 8:
5553	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
5554	    break;
5555	  default:
5556	    error ("unsupported operand size for extended register");
5557	    break;
5558	}
5559      return;
5560    }
5561  switch (code)
5562    {
5563    case 3:
5564      if (STACK_TOP_P (x))
5565	{
5566	  fputs ("st(0)", file);
5567	  break;
5568	}
5569      /* FALLTHRU */
5570    case 8:
5571    case 4:
5572    case 12:
5573      if (! ANY_FP_REG_P (x))
5574	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
5575      /* FALLTHRU */
5576    case 16:
5577    case 2:
5578      fputs (hi_reg_name[REGNO (x)], file);
5579      break;
5580    case 1:
5581      fputs (qi_reg_name[REGNO (x)], file);
5582      break;
5583    case 0:
5584      fputs (qi_high_reg_name[REGNO (x)], file);
5585      break;
5586    default:
5587      abort ();
5588    }
5589}
5590
5591/* Meaning of CODE:
5592   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5593   C -- print opcode suffix for set/cmov insn.
5594   c -- like C, but print reversed condition
5595   F,f -- likewise, but for floating-point.
5596   R -- print the prefix for register names.
5597   z -- print the opcode suffix for the size of the current operand.
5598   * -- print a star (in certain assembler syntax)
5599   A -- print an absolute memory reference.
5600   w -- print the operand as if it's a "word" (HImode) even if it isn't.
5601   s -- print a shift double count, followed by the assemblers argument
5602	delimiter.
5603   b -- print the QImode name of the register for the indicated operand.
5604	%b0 would print %al if operands[0] is reg 0.
5605   w --  likewise, print the HImode name of the register.
5606   k --  likewise, print the SImode name of the register.
5607   q --  likewise, print the DImode name of the register.
5608   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5609   y -- print "st(0)" instead of "st" as a register.
5610   D -- print condition for SSE cmp instruction.
5611   P -- if PIC, print an @PLT suffix.
5612   X -- don't print any sort of PIC '@' suffix for a symbol.
5613 */
5614
5615void
5616print_operand (file, x, code)
5617     FILE *file;
5618     rtx x;
5619     int code;
5620{
5621  if (code)
5622    {
5623      switch (code)
5624	{
5625	case '*':
5626	  if (ASSEMBLER_DIALECT == ASM_ATT)
5627	    putc ('*', file);
5628	  return;
5629
5630	case 'A':
5631	  if (ASSEMBLER_DIALECT == ASM_ATT)
5632	    putc ('*', file);
5633	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
5634	    {
5635	      /* Intel syntax. For absolute addresses, registers should not
5636		 be surrounded by braces.  */
5637	      if (GET_CODE (x) != REG)
5638		{
5639		  putc ('[', file);
5640		  PRINT_OPERAND (file, x, 0);
5641		  putc (']', file);
5642		  return;
5643		}
5644	    }
5645	  else
5646	    abort ();
5647
5648	  PRINT_OPERAND (file, x, 0);
5649	  return;
5650
5651
5652	case 'L':
5653	  if (ASSEMBLER_DIALECT == ASM_ATT)
5654	    putc ('l', file);
5655	  return;
5656
5657	case 'W':
5658	  if (ASSEMBLER_DIALECT == ASM_ATT)
5659	    putc ('w', file);
5660	  return;
5661
5662	case 'B':
5663	  if (ASSEMBLER_DIALECT == ASM_ATT)
5664	    putc ('b', file);
5665	  return;
5666
5667	case 'Q':
5668	  if (ASSEMBLER_DIALECT == ASM_ATT)
5669	    putc ('l', file);
5670	  return;
5671
5672	case 'S':
5673	  if (ASSEMBLER_DIALECT == ASM_ATT)
5674	    putc ('s', file);
5675	  return;
5676
5677	case 'T':
5678	  if (ASSEMBLER_DIALECT == ASM_ATT)
5679	    putc ('t', file);
5680	  return;
5681
5682	case 'z':
5683	  /* 387 opcodes don't get size suffixes if the operands are
5684	     registers.  */
5685
5686	  if (STACK_REG_P (x))
5687	    return;
5688
5689	  /* this is the size of op from size of operand */
5690	  switch (GET_MODE_SIZE (GET_MODE (x)))
5691	    {
5692	    case 2:
5693#ifdef HAVE_GAS_FILDS_FISTS
5694	      putc ('s', file);
5695#endif
5696	      return;
5697
5698	    case 4:
5699	      if (GET_MODE (x) == SFmode)
5700		{
5701		  putc ('s', file);
5702		  return;
5703		}
5704	      else
5705		putc ('l', file);
5706	      return;
5707
5708	    case 12:
5709	    case 16:
5710	      putc ('t', file);
5711	      return;
5712
5713	    case 8:
5714	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5715		{
5716#ifdef GAS_MNEMONICS
5717		  putc ('q', file);
5718#else
5719		  putc ('l', file);
5720		  putc ('l', file);
5721#endif
5722		}
5723	      else
5724	        putc ('l', file);
5725	      return;
5726
5727	    default:
5728	      abort ();
5729	    }
5730
5731	case 'b':
5732	case 'w':
5733	case 'k':
5734	case 'q':
5735	case 'h':
5736	case 'y':
5737	case 'X':
5738	case 'P':
5739	  break;
5740
5741	case 's':
5742	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
5743	    {
5744	      PRINT_OPERAND (file, x, 0);
5745	      putc (',', file);
5746	    }
5747	  return;
5748
5749	case 'D':
5750	  /* Little bit of braindamage here.  The SSE compare instructions
5751	     does use completely different names for the comparisons that the
5752	     fp conditional moves.  */
5753	  switch (GET_CODE (x))
5754	    {
5755	    case EQ:
5756	    case UNEQ:
5757	      fputs ("eq", file);
5758	      break;
5759	    case LT:
5760	    case UNLT:
5761	      fputs ("lt", file);
5762	      break;
5763	    case LE:
5764	    case UNLE:
5765	      fputs ("le", file);
5766	      break;
5767	    case UNORDERED:
5768	      fputs ("unord", file);
5769	      break;
5770	    case NE:
5771	    case LTGT:
5772	      fputs ("neq", file);
5773	      break;
5774	    case UNGE:
5775	    case GE:
5776	      fputs ("nlt", file);
5777	      break;
5778	    case UNGT:
5779	    case GT:
5780	      fputs ("nle", file);
5781	      break;
5782	    case ORDERED:
5783	      fputs ("ord", file);
5784	      break;
5785	    default:
5786	      abort ();
5787	      break;
5788	    }
5789	  return;
5790	case 'C':
5791	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
5792	  return;
5793	case 'F':
5794	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
5795	  return;
5796
5797	  /* Like above, but reverse condition */
5798	case 'c':
5799	  /* Check to see if argument to %c is really a constant
5800	     and not a condition code which needs to be reversed.  */
5801	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
5802	  {
5803	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5804	     return;
5805	  }
5806	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
5807	  return;
5808	case 'f':
5809	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
5810	  return;
5811	case '+':
5812	  {
5813	    rtx x;
5814
5815	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
5816	      return;
5817
5818	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5819	    if (x)
5820	      {
5821		int pred_val = INTVAL (XEXP (x, 0));
5822
5823		if (pred_val < REG_BR_PROB_BASE * 45 / 100
5824		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
5825		  {
5826		    int taken = pred_val > REG_BR_PROB_BASE / 2;
5827		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
5828
5829		    /* Emit hints only in the case default branch prediction
5830		       heruistics would fail.  */
5831		    if (taken != cputaken)
5832		      {
5833			/* We use 3e (DS) prefix for taken branches and
5834			   2e (CS) prefix for not taken branches.  */
5835			if (taken)
5836			  fputs ("ds ; ", file);
5837			else
5838			  fputs ("cs ; ", file);
5839		      }
5840		  }
5841	      }
5842	    return;
5843	  }
5844	default:
5845	  {
5846	    char str[50];
5847	    sprintf (str, "invalid operand code `%c'", code);
5848	    output_operand_lossage (str);
5849	  }
5850	}
5851    }
5852
5853  if (GET_CODE (x) == REG)
5854    {
5855      PRINT_REG (x, code, file);
5856    }
5857
5858  else if (GET_CODE (x) == MEM)
5859    {
5860      /* No `byte ptr' prefix for call instructions.  */
5861      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
5862	{
5863	  const char * size;
5864	  switch (GET_MODE_SIZE (GET_MODE (x)))
5865	    {
5866	    case 1: size = "BYTE"; break;
5867	    case 2: size = "WORD"; break;
5868	    case 4: size = "DWORD"; break;
5869	    case 8: size = "QWORD"; break;
5870	    case 12: size = "XWORD"; break;
5871	    case 16: size = "XMMWORD"; break;
5872	    default:
5873	      abort ();
5874	    }
5875
5876	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
5877	  if (code == 'b')
5878	    size = "BYTE";
5879	  else if (code == 'w')
5880	    size = "WORD";
5881	  else if (code == 'k')
5882	    size = "DWORD";
5883
5884	  fputs (size, file);
5885	  fputs (" PTR ", file);
5886	}
5887
5888      x = XEXP (x, 0);
5889      if (flag_pic && CONSTANT_ADDRESS_P (x))
5890	output_pic_addr_const (file, x, code);
5891      /* Avoid (%rip) for call operands.  */
5892      else if (CONSTANT_ADDRESS_P (x) && code =='P'
5893	       && GET_CODE (x) != CONST_INT)
5894	output_addr_const (file, x);
5895      else
5896	output_address (x);
5897    }
5898
5899  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
5900    {
5901      REAL_VALUE_TYPE r;
5902      long l;
5903
5904      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5905      REAL_VALUE_TO_TARGET_SINGLE (r, l);
5906
5907      if (ASSEMBLER_DIALECT == ASM_ATT)
5908	putc ('$', file);
5909      fprintf (file, "0x%lx", l);
5910    }
5911
5912 /* These float cases don't actually occur as immediate operands.  */
5913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
5914    {
5915      REAL_VALUE_TYPE r;
5916      char dstr[30];
5917
5918      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5919      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5920      fprintf (file, "%s", dstr);
5921    }
5922
5923  else if (GET_CODE (x) == CONST_DOUBLE
5924	   && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
5925    {
5926      REAL_VALUE_TYPE r;
5927      char dstr[30];
5928
5929      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5930      REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
5931      fprintf (file, "%s", dstr);
5932    }
5933  else
5934    {
5935      if (code != 'P')
5936	{
5937	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
5938	    {
5939	      if (ASSEMBLER_DIALECT == ASM_ATT)
5940		putc ('$', file);
5941	    }
5942	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
5943		   || GET_CODE (x) == LABEL_REF)
5944	    {
5945	      if (ASSEMBLER_DIALECT == ASM_ATT)
5946		putc ('$', file);
5947	      else
5948		fputs ("OFFSET FLAT:", file);
5949	    }
5950	}
5951      if (GET_CODE (x) == CONST_INT)
5952	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5953      else if (flag_pic)
5954	output_pic_addr_const (file, x, code);
5955      else
5956	output_addr_const (file, x);
5957    }
5958}
5959
5960/* Print a memory operand whose address is ADDR.  */
5961
5962void
5963print_operand_address (file, addr)
5964     FILE *file;
5965     register rtx addr;
5966{
5967  struct ix86_address parts;
5968  rtx base, index, disp;
5969  int scale;
5970
5971  if (! ix86_decompose_address (addr, &parts))
5972    abort ();
5973
5974  base = parts.base;
5975  index = parts.index;
5976  disp = parts.disp;
5977  scale = parts.scale;
5978
5979  if (!base && !index)
5980    {
5981      /* Displacement only requires special attention.  */
5982
5983      if (GET_CODE (disp) == CONST_INT)
5984	{
5985	  if (ASSEMBLER_DIALECT == ASM_INTEL)
5986	    {
5987	      if (USER_LABEL_PREFIX[0] == 0)
5988		putc ('%', file);
5989	      fputs ("ds:", file);
5990	    }
5991	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
5992	}
5993      else if (flag_pic)
5994	output_pic_addr_const (file, addr, 0);
5995      else
5996	output_addr_const (file, addr);
5997
5998      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
5999      if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
6000	fputs ("(%rip)", file);
6001    }
6002  else
6003    {
6004      if (ASSEMBLER_DIALECT == ASM_ATT)
6005	{
6006	  if (disp)
6007	    {
6008	      if (flag_pic)
6009		output_pic_addr_const (file, disp, 0);
6010	      else if (GET_CODE (disp) == LABEL_REF)
6011		output_asm_label (disp);
6012	      else
6013		output_addr_const (file, disp);
6014	    }
6015
6016	  putc ('(', file);
6017	  if (base)
6018	    PRINT_REG (base, 0, file);
6019	  if (index)
6020	    {
6021	      putc (',', file);
6022	      PRINT_REG (index, 0, file);
6023	      if (scale != 1)
6024		fprintf (file, ",%d", scale);
6025	    }
6026	  putc (')', file);
6027	}
6028      else
6029	{
6030	  rtx offset = NULL_RTX;
6031
6032	  if (disp)
6033	    {
6034	      /* Pull out the offset of a symbol; print any symbol itself.  */
6035	      if (GET_CODE (disp) == CONST
6036		  && GET_CODE (XEXP (disp, 0)) == PLUS
6037		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6038		{
6039		  offset = XEXP (XEXP (disp, 0), 1);
6040		  disp = gen_rtx_CONST (VOIDmode,
6041					XEXP (XEXP (disp, 0), 0));
6042		}
6043
6044	      if (flag_pic)
6045		output_pic_addr_const (file, disp, 0);
6046	      else if (GET_CODE (disp) == LABEL_REF)
6047		output_asm_label (disp);
6048	      else if (GET_CODE (disp) == CONST_INT)
6049		offset = disp;
6050	      else
6051		output_addr_const (file, disp);
6052	    }
6053
6054	  putc ('[', file);
6055	  if (base)
6056	    {
6057	      PRINT_REG (base, 0, file);
6058	      if (offset)
6059		{
6060		  if (INTVAL (offset) >= 0)
6061		    putc ('+', file);
6062		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6063		}
6064	    }
6065	  else if (offset)
6066	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6067	  else
6068	    putc ('0', file);
6069
6070	  if (index)
6071	    {
6072	      putc ('+', file);
6073	      PRINT_REG (index, 0, file);
6074	      if (scale != 1)
6075		fprintf (file, "*%d", scale);
6076	    }
6077	  putc (']', file);
6078	}
6079    }
6080}
6081
6082/* Split one or more DImode RTL references into pairs of SImode
6083   references.  The RTL can be REG, offsettable MEM, integer constant, or
6084   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6085   split and "num" is its length.  lo_half and hi_half are output arrays
6086   that parallel "operands".  */
6087
6088void
6089split_di (operands, num, lo_half, hi_half)
6090     rtx operands[];
6091     int num;
6092     rtx lo_half[], hi_half[];
6093{
6094  while (num--)
6095    {
6096      rtx op = operands[num];
6097
6098      /* simplify_subreg refuse to split volatile memory addresses,
6099         but we still have to handle it.  */
6100      if (GET_CODE (op) == MEM)
6101	{
6102	  lo_half[num] = adjust_address (op, SImode, 0);
6103	  hi_half[num] = adjust_address (op, SImode, 4);
6104	}
6105      else
6106	{
6107	  lo_half[num] = simplify_gen_subreg (SImode, op,
6108					      GET_MODE (op) == VOIDmode
6109					      ? DImode : GET_MODE (op), 0);
6110	  hi_half[num] = simplify_gen_subreg (SImode, op,
6111					      GET_MODE (op) == VOIDmode
6112					      ? DImode : GET_MODE (op), 4);
6113	}
6114    }
6115}
6116/* Split one or more TImode RTL references into pairs of SImode
6117   references.  The RTL can be REG, offsettable MEM, integer constant, or
6118   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
6119   split and "num" is its length.  lo_half and hi_half are output arrays
6120   that parallel "operands".  */
6121
6122void
6123split_ti (operands, num, lo_half, hi_half)
6124     rtx operands[];
6125     int num;
6126     rtx lo_half[], hi_half[];
6127{
6128  while (num--)
6129    {
6130      rtx op = operands[num];
6131
6132      /* simplify_subreg refuse to split volatile memory addresses, but we
6133         still have to handle it.  */
6134      if (GET_CODE (op) == MEM)
6135	{
6136	  lo_half[num] = adjust_address (op, DImode, 0);
6137	  hi_half[num] = adjust_address (op, DImode, 8);
6138	}
6139      else
6140	{
6141	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6142	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6143	}
6144    }
6145}
6146
6147/* Output code to perform a 387 binary operation in INSN, one of PLUS,
6148   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
6149   is the expression of the binary operation.  The output may either be
6150   emitted here, or returned to the caller, like all output_* functions.
6151
6152   There is no guarantee that the operands are the same mode, as they
6153   might be within FLOAT or FLOAT_EXTEND expressions.  */
6154
6155#ifndef SYSV386_COMPAT
6156/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
6157   wants to fix the assemblers because that causes incompatibility
6158   with gcc.  No-one wants to fix gcc because that causes
6159   incompatibility with assemblers...  You can use the option of
6160   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
6161#define SYSV386_COMPAT 1
6162#endif
6163
6164const char *
6165output_387_binary_op (insn, operands)
6166     rtx insn;
6167     rtx *operands;
6168{
6169  static char buf[30];
6170  const char *p;
6171  const char *ssep;
6172  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6173
6174#ifdef ENABLE_CHECKING
6175  /* Even if we do not want to check the inputs, this documents input
6176     constraints.  Which helps in understanding the following code.  */
6177  if (STACK_REG_P (operands[0])
6178      && ((REG_P (operands[1])
6179	   && REGNO (operands[0]) == REGNO (operands[1])
6180	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6181	  || (REG_P (operands[2])
6182	      && REGNO (operands[0]) == REGNO (operands[2])
6183	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6184      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6185    ; /* ok */
6186  else if (!is_sse)
6187    abort ();
6188#endif
6189
6190  switch (GET_CODE (operands[3]))
6191    {
6192    case PLUS:
6193      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6194	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6195	p = "fiadd";
6196      else
6197	p = "fadd";
6198      ssep = "add";
6199      break;
6200
6201    case MINUS:
6202      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6203	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6204	p = "fisub";
6205      else
6206	p = "fsub";
6207      ssep = "sub";
6208      break;
6209
6210    case MULT:
6211      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6212	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6213	p = "fimul";
6214      else
6215	p = "fmul";
6216      ssep = "mul";
6217      break;
6218
6219    case DIV:
6220      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6221	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6222	p = "fidiv";
6223      else
6224	p = "fdiv";
6225      ssep = "div";
6226      break;
6227
6228    default:
6229      abort ();
6230    }
6231
6232  if (is_sse)
6233   {
6234      strcpy (buf, ssep);
6235      if (GET_MODE (operands[0]) == SFmode)
6236	strcat (buf, "ss\t{%2, %0|%0, %2}");
6237      else
6238	strcat (buf, "sd\t{%2, %0|%0, %2}");
6239      return buf;
6240   }
6241  strcpy (buf, p);
6242
6243  switch (GET_CODE (operands[3]))
6244    {
6245    case MULT:
6246    case PLUS:
6247      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6248	{
6249	  rtx temp = operands[2];
6250	  operands[2] = operands[1];
6251	  operands[1] = temp;
6252	}
6253
6254      /* know operands[0] == operands[1].  */
6255
6256      if (GET_CODE (operands[2]) == MEM)
6257	{
6258	  p = "%z2\t%2";
6259	  break;
6260	}
6261
6262      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6263	{
6264	  if (STACK_TOP_P (operands[0]))
6265	    /* How is it that we are storing to a dead operand[2]?
6266	       Well, presumably operands[1] is dead too.  We can't
6267	       store the result to st(0) as st(0) gets popped on this
6268	       instruction.  Instead store to operands[2] (which I
6269	       think has to be st(1)).  st(1) will be popped later.
6270	       gcc <= 2.8.1 didn't have this check and generated
6271	       assembly code that the Unixware assembler rejected.  */
6272	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6273	  else
6274	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6275	  break;
6276	}
6277
6278      if (STACK_TOP_P (operands[0]))
6279	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6280      else
6281	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6282      break;
6283
6284    case MINUS:
6285    case DIV:
6286      if (GET_CODE (operands[1]) == MEM)
6287	{
6288	  p = "r%z1\t%1";
6289	  break;
6290	}
6291
6292      if (GET_CODE (operands[2]) == MEM)
6293	{
6294	  p = "%z2\t%2";
6295	  break;
6296	}
6297
6298      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6299	{
6300#if SYSV386_COMPAT
6301	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6302	     derived assemblers, confusingly reverse the direction of
6303	     the operation for fsub{r} and fdiv{r} when the
6304	     destination register is not st(0).  The Intel assembler
6305	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
6306	     figure out what the hardware really does.  */
6307	  if (STACK_TOP_P (operands[0]))
6308	    p = "{p\t%0, %2|rp\t%2, %0}";
6309	  else
6310	    p = "{rp\t%2, %0|p\t%0, %2}";
6311#else
6312	  if (STACK_TOP_P (operands[0]))
6313	    /* As above for fmul/fadd, we can't store to st(0).  */
6314	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
6315	  else
6316	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
6317#endif
6318	  break;
6319	}
6320
6321      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6322	{
6323#if SYSV386_COMPAT
6324	  if (STACK_TOP_P (operands[0]))
6325	    p = "{rp\t%0, %1|p\t%1, %0}";
6326	  else
6327	    p = "{p\t%1, %0|rp\t%0, %1}";
6328#else
6329	  if (STACK_TOP_P (operands[0]))
6330	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
6331	  else
6332	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
6333#endif
6334	  break;
6335	}
6336
6337      if (STACK_TOP_P (operands[0]))
6338	{
6339	  if (STACK_TOP_P (operands[1]))
6340	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
6341	  else
6342	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
6343	  break;
6344	}
6345      else if (STACK_TOP_P (operands[1]))
6346	{
6347#if SYSV386_COMPAT
6348	  p = "{\t%1, %0|r\t%0, %1}";
6349#else
6350	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
6351#endif
6352	}
6353      else
6354	{
6355#if SYSV386_COMPAT
6356	  p = "{r\t%2, %0|\t%0, %2}";
6357#else
6358	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
6359#endif
6360	}
6361      break;
6362
6363    default:
6364      abort ();
6365    }
6366
6367  strcat (buf, p);
6368  return buf;
6369}
6370
6371/* Output code to initialize control word copies used by
6372   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
6373   is set to control word rounding downwards.  */
6374void
6375emit_i387_cw_initialization (normal, round_down)
6376     rtx normal, round_down;
6377{
6378  rtx reg = gen_reg_rtx (HImode);
6379
6380  emit_insn (gen_x86_fnstcw_1 (normal));
6381  emit_move_insn (reg, normal);
6382  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
6383      && !TARGET_64BIT)
6384    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
6385  else
6386    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
6387  emit_move_insn (round_down, reg);
6388}
6389
6390/* Output code for INSN to convert a float to a signed int.  OPERANDS
6391   are the insn operands.  The output may be [HSD]Imode and the input
6392   operand may be [SDX]Fmode.  */
6393
6394const char *
6395output_fix_trunc (insn, operands)
6396     rtx insn;
6397     rtx *operands;
6398{
6399  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6400  int dimode_p = GET_MODE (operands[0]) == DImode;
6401
6402  /* Jump through a hoop or two for DImode, since the hardware has no
6403     non-popping instruction.  We used to do this a different way, but
6404     that was somewhat fragile and broke with post-reload splitters.  */
6405  if (dimode_p && !stack_top_dies)
6406    output_asm_insn ("fld\t%y1", operands);
6407
6408  if (!STACK_TOP_P (operands[1]))
6409    abort ();
6410
6411  if (GET_CODE (operands[0]) != MEM)
6412    abort ();
6413
6414  output_asm_insn ("fldcw\t%3", operands);
6415  if (stack_top_dies || dimode_p)
6416    output_asm_insn ("fistp%z0\t%0", operands);
6417  else
6418    output_asm_insn ("fist%z0\t%0", operands);
6419  output_asm_insn ("fldcw\t%2", operands);
6420
6421  return "";
6422}
6423
6424/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
6425   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
6426   when fucom should be used.  */
6427
6428const char *
6429output_fp_compare (insn, operands, eflags_p, unordered_p)
6430     rtx insn;
6431     rtx *operands;
6432     int eflags_p, unordered_p;
6433{
6434  int stack_top_dies;
6435  rtx cmp_op0 = operands[0];
6436  rtx cmp_op1 = operands[1];
6437  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
6438
6439  if (eflags_p == 2)
6440    {
6441      cmp_op0 = cmp_op1;
6442      cmp_op1 = operands[2];
6443    }
6444  if (is_sse)
6445    {
6446      if (GET_MODE (operands[0]) == SFmode)
6447	if (unordered_p)
6448	  return "ucomiss\t{%1, %0|%0, %1}";
6449	else
6450	  return "comiss\t{%1, %0|%0, %y}";
6451      else
6452	if (unordered_p)
6453	  return "ucomisd\t{%1, %0|%0, %1}";
6454	else
6455	  return "comisd\t{%1, %0|%0, %y}";
6456    }
6457
6458  if (! STACK_TOP_P (cmp_op0))
6459    abort ();
6460
6461  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
6462
6463  if (STACK_REG_P (cmp_op1)
6464      && stack_top_dies
6465      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
6466      && REGNO (cmp_op1) != FIRST_STACK_REG)
6467    {
6468      /* If both the top of the 387 stack dies, and the other operand
6469	 is also a stack register that dies, then this must be a
6470	 `fcompp' float compare */
6471
6472      if (eflags_p == 1)
6473	{
6474	  /* There is no double popping fcomi variant.  Fortunately,
6475	     eflags is immune from the fstp's cc clobbering.  */
6476	  if (unordered_p)
6477	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
6478	  else
6479	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
6480	  return "fstp\t%y0";
6481	}
6482      else
6483	{
6484	  if (eflags_p == 2)
6485	    {
6486	      if (unordered_p)
6487		return "fucompp\n\tfnstsw\t%0";
6488	      else
6489		return "fcompp\n\tfnstsw\t%0";
6490	    }
6491	  else
6492	    {
6493	      if (unordered_p)
6494		return "fucompp";
6495	      else
6496		return "fcompp";
6497	    }
6498	}
6499    }
6500  else
6501    {
6502      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
6503
6504      static const char * const alt[24] =
6505      {
6506	"fcom%z1\t%y1",
6507	"fcomp%z1\t%y1",
6508	"fucom%z1\t%y1",
6509	"fucomp%z1\t%y1",
6510
6511	"ficom%z1\t%y1",
6512	"ficomp%z1\t%y1",
6513	NULL,
6514	NULL,
6515
6516	"fcomi\t{%y1, %0|%0, %y1}",
6517	"fcomip\t{%y1, %0|%0, %y1}",
6518	"fucomi\t{%y1, %0|%0, %y1}",
6519	"fucomip\t{%y1, %0|%0, %y1}",
6520
6521	NULL,
6522	NULL,
6523	NULL,
6524	NULL,
6525
6526	"fcom%z2\t%y2\n\tfnstsw\t%0",
6527	"fcomp%z2\t%y2\n\tfnstsw\t%0",
6528	"fucom%z2\t%y2\n\tfnstsw\t%0",
6529	"fucomp%z2\t%y2\n\tfnstsw\t%0",
6530
6531	"ficom%z2\t%y2\n\tfnstsw\t%0",
6532	"ficomp%z2\t%y2\n\tfnstsw\t%0",
6533	NULL,
6534	NULL
6535      };
6536
6537      int mask;
6538      const char *ret;
6539
6540      mask  = eflags_p << 3;
6541      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
6542      mask |= unordered_p << 1;
6543      mask |= stack_top_dies;
6544
6545      if (mask >= 24)
6546	abort ();
6547      ret = alt[mask];
6548      if (ret == NULL)
6549	abort ();
6550
6551      return ret;
6552    }
6553}
6554
6555void
6556ix86_output_addr_vec_elt (file, value)
6557     FILE *file;
6558     int value;
6559{
6560  const char *directive = ASM_LONG;
6561
6562  if (TARGET_64BIT)
6563    {
6564#ifdef ASM_QUAD
6565      directive = ASM_QUAD;
6566#else
6567      abort ();
6568#endif
6569    }
6570
6571  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
6572}
6573
6574void
6575ix86_output_addr_diff_elt (file, value, rel)
6576     FILE *file;
6577     int value, rel;
6578{
6579  if (TARGET_64BIT)
6580    fprintf (file, "%s%s%d-.+4+(.-%s%d)\n",
6581	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
6582  else if (HAVE_AS_GOTOFF_IN_DATA)
6583    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
6584  else
6585    asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6586		 ASM_LONG, LPREFIX, value);
6587}
6588
6589/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6590   for the target.  */
6591
6592void
6593ix86_expand_clear (dest)
6594     rtx dest;
6595{
6596  rtx tmp;
6597
6598  /* We play register width games, which are only valid after reload.  */
6599  if (!reload_completed)
6600    abort ();
6601
6602  /* Avoid HImode and its attendant prefix byte.  */
6603  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
6604    dest = gen_rtx_REG (SImode, REGNO (dest));
6605
6606  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
6607
6608  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
6609  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
6610    {
6611      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
6612      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
6613    }
6614
6615  emit_insn (tmp);
6616}
6617
6618void
6619ix86_expand_move (mode, operands)
6620     enum machine_mode mode;
6621     rtx operands[];
6622{
6623  int strict = (reload_in_progress || reload_completed);
6624  rtx insn;
6625
6626  if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
6627    {
6628      /* Emit insns to move operands[1] into operands[0].  */
6629
6630      if (GET_CODE (operands[0]) == MEM)
6631	operands[1] = force_reg (Pmode, operands[1]);
6632      else
6633	{
6634	  rtx temp = operands[0];
6635	  if (GET_CODE (temp) != REG)
6636	    temp = gen_reg_rtx (Pmode);
6637	  temp = legitimize_pic_address (operands[1], temp);
6638	  if (temp == operands[0])
6639	    return;
6640	  operands[1] = temp;
6641	}
6642    }
6643  else
6644    {
6645      if (GET_CODE (operands[0]) == MEM
6646	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
6647	      || !push_operand (operands[0], mode))
6648	  && GET_CODE (operands[1]) == MEM)
6649	operands[1] = force_reg (mode, operands[1]);
6650
6651      if (push_operand (operands[0], mode)
6652	  && ! general_no_elim_operand (operands[1], mode))
6653	operands[1] = copy_to_mode_reg (mode, operands[1]);
6654
6655      /* Force large constants in 64bit compilation into register
6656	 to get them CSEed.  */
6657      if (TARGET_64BIT && mode == DImode
6658	  && immediate_operand (operands[1], mode)
6659	  && !x86_64_zero_extended_value (operands[1])
6660	  && !register_operand (operands[0], mode)
6661	  && optimize && !reload_completed && !reload_in_progress)
6662	operands[1] = copy_to_mode_reg (mode, operands[1]);
6663
6664      if (FLOAT_MODE_P (mode))
6665	{
6666	  /* If we are loading a floating point constant to a register,
6667	     force the value to memory now, since we'll get better code
6668	     out the back end.  */
6669
6670	  if (strict)
6671	    ;
6672	  else if (GET_CODE (operands[1]) == CONST_DOUBLE
6673		   && register_operand (operands[0], mode))
6674	    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
6675	}
6676    }
6677
6678  insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
6679
6680  emit_insn (insn);
6681}
6682
6683void
6684ix86_expand_vector_move (mode, operands)
6685     enum machine_mode mode;
6686     rtx operands[];
6687{
6688  /* Force constants other than zero into memory.  We do not know how
6689     the instructions used to build constants modify the upper 64 bits
6690     of the register, once we have that information we may be able
6691     to handle some of them more efficiently.  */
6692  if ((reload_in_progress | reload_completed) == 0
6693      && register_operand (operands[0], mode)
6694      && CONSTANT_P (operands[1]))
6695    {
6696      rtx addr = gen_reg_rtx (Pmode);
6697      emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
6698      operands[1] = gen_rtx_MEM (mode, addr);
6699    }
6700
6701  /* Make operand1 a register if it isn't already.  */
6702  if ((reload_in_progress | reload_completed) == 0
6703      && !register_operand (operands[0], mode)
6704      && !register_operand (operands[1], mode)
6705      && operands[1] != CONST0_RTX (mode))
6706    {
6707      rtx temp = force_reg (TImode, operands[1]);
6708      emit_move_insn (operands[0], temp);
6709      return;
6710    }
6711
6712  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
6713}
6714
6715/* Attempt to expand a binary operator.  Make the expansion closer to the
6716   actual machine, then just general_operand, which will allow 3 separate
6717   memory references (one output, two input) in a single insn.  */
6718
6719void
6720ix86_expand_binary_operator (code, mode, operands)
6721     enum rtx_code code;
6722     enum machine_mode mode;
6723     rtx operands[];
6724{
6725  int matching_memory;
6726  rtx src1, src2, dst, op, clob;
6727
6728  dst = operands[0];
6729  src1 = operands[1];
6730  src2 = operands[2];
6731
6732  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6733  if (GET_RTX_CLASS (code) == 'c'
6734      && (rtx_equal_p (dst, src2)
6735	  || immediate_operand (src1, mode)))
6736    {
6737      rtx temp = src1;
6738      src1 = src2;
6739      src2 = temp;
6740    }
6741
6742  /* If the destination is memory, and we do not have matching source
6743     operands, do things in registers.  */
6744  matching_memory = 0;
6745  if (GET_CODE (dst) == MEM)
6746    {
6747      if (rtx_equal_p (dst, src1))
6748	matching_memory = 1;
6749      else if (GET_RTX_CLASS (code) == 'c'
6750	       && rtx_equal_p (dst, src2))
6751	matching_memory = 2;
6752      else
6753	dst = gen_reg_rtx (mode);
6754    }
6755
6756  /* Both source operands cannot be in memory.  */
6757  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
6758    {
6759      if (matching_memory != 2)
6760	src2 = force_reg (mode, src2);
6761      else
6762	src1 = force_reg (mode, src1);
6763    }
6764
6765  /* If the operation is not commutable, source 1 cannot be a constant
6766     or non-matching memory.  */
6767  if ((CONSTANT_P (src1)
6768       || (!matching_memory && GET_CODE (src1) == MEM))
6769      && GET_RTX_CLASS (code) != 'c')
6770    src1 = force_reg (mode, src1);
6771
6772  /* If optimizing, copy to regs to improve CSE */
6773  if (optimize && ! no_new_pseudos)
6774    {
6775      if (GET_CODE (dst) == MEM)
6776	dst = gen_reg_rtx (mode);
6777      if (GET_CODE (src1) == MEM)
6778	src1 = force_reg (mode, src1);
6779      if (GET_CODE (src2) == MEM)
6780	src2 = force_reg (mode, src2);
6781    }
6782
6783  /* Emit the instruction.  */
6784
6785  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
6786  if (reload_in_progress)
6787    {
6788      /* Reload doesn't know about the flags register, and doesn't know that
6789         it doesn't want to clobber it.  We can only do this with PLUS.  */
6790      if (code != PLUS)
6791	abort ();
6792      emit_insn (op);
6793    }
6794  else
6795    {
6796      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6797      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6798    }
6799
6800  /* Fix up the destination if needed.  */
6801  if (dst != operands[0])
6802    emit_move_insn (operands[0], dst);
6803}
6804
6805/* Return TRUE or FALSE depending on whether the binary operator meets the
6806   appropriate constraints.  */
6807
6808int
6809ix86_binary_operator_ok (code, mode, operands)
6810     enum rtx_code code;
6811     enum machine_mode mode ATTRIBUTE_UNUSED;
6812     rtx operands[3];
6813{
6814  /* Both source operands cannot be in memory.  */
6815  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
6816    return 0;
6817  /* If the operation is not commutable, source 1 cannot be a constant.  */
6818  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
6819    return 0;
6820  /* If the destination is memory, we must have a matching source operand.  */
6821  if (GET_CODE (operands[0]) == MEM
6822      && ! (rtx_equal_p (operands[0], operands[1])
6823	    || (GET_RTX_CLASS (code) == 'c'
6824		&& rtx_equal_p (operands[0], operands[2]))))
6825    return 0;
6826  /* If the operation is not commutable and the source 1 is memory, we must
6827     have a matching destination.  */
6828  if (GET_CODE (operands[1]) == MEM
6829      && GET_RTX_CLASS (code) != 'c'
6830      && ! rtx_equal_p (operands[0], operands[1]))
6831    return 0;
6832  return 1;
6833}
6834
6835/* Attempt to expand a unary operator.  Make the expansion closer to the
6836   actual machine, then just general_operand, which will allow 2 separate
6837   memory references (one output, one input) in a single insn.  */
6838
6839void
6840ix86_expand_unary_operator (code, mode, operands)
6841     enum rtx_code code;
6842     enum machine_mode mode;
6843     rtx operands[];
6844{
6845  int matching_memory;
6846  rtx src, dst, op, clob;
6847
6848  dst = operands[0];
6849  src = operands[1];
6850
6851  /* If the destination is memory, and we do not have matching source
6852     operands, do things in registers.  */
6853  matching_memory = 0;
6854  if (GET_CODE (dst) == MEM)
6855    {
6856      if (rtx_equal_p (dst, src))
6857	matching_memory = 1;
6858      else
6859	dst = gen_reg_rtx (mode);
6860    }
6861
6862  /* When source operand is memory, destination must match.  */
6863  if (!matching_memory && GET_CODE (src) == MEM)
6864    src = force_reg (mode, src);
6865
6866  /* If optimizing, copy to regs to improve CSE */
6867  if (optimize && ! no_new_pseudos)
6868    {
6869      if (GET_CODE (dst) == MEM)
6870	dst = gen_reg_rtx (mode);
6871      if (GET_CODE (src) == MEM)
6872	src = force_reg (mode, src);
6873    }
6874
6875  /* Emit the instruction.  */
6876
6877  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
6878  if (reload_in_progress || code == NOT)
6879    {
6880      /* Reload doesn't know about the flags register, and doesn't know that
6881         it doesn't want to clobber it.  */
6882      if (code != NOT)
6883        abort ();
6884      emit_insn (op);
6885    }
6886  else
6887    {
6888      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
6889      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
6890    }
6891
6892  /* Fix up the destination if needed.  */
6893  if (dst != operands[0])
6894    emit_move_insn (operands[0], dst);
6895}
6896
6897/* Return TRUE or FALSE depending on whether the unary operator meets the
6898   appropriate constraints.  */
6899
6900int
6901ix86_unary_operator_ok (code, mode, operands)
6902     enum rtx_code code ATTRIBUTE_UNUSED;
6903     enum machine_mode mode ATTRIBUTE_UNUSED;
6904     rtx operands[2] ATTRIBUTE_UNUSED;
6905{
6906  /* If one of operands is memory, source and destination must match.  */
6907  if ((GET_CODE (operands[0]) == MEM
6908       || GET_CODE (operands[1]) == MEM)
6909      && ! rtx_equal_p (operands[0], operands[1]))
6910    return FALSE;
6911  return TRUE;
6912}
6913
6914/* Return TRUE or FALSE depending on whether the first SET in INSN
6915   has source and destination with matching CC modes, and that the
6916   CC mode is at least as constrained as REQ_MODE.  */
6917
6918int
6919ix86_match_ccmode (insn, req_mode)
6920     rtx insn;
6921     enum machine_mode req_mode;
6922{
6923  rtx set;
6924  enum machine_mode set_mode;
6925
6926  set = PATTERN (insn);
6927  if (GET_CODE (set) == PARALLEL)
6928    set = XVECEXP (set, 0, 0);
6929  if (GET_CODE (set) != SET)
6930    abort ();
6931  if (GET_CODE (SET_SRC (set)) != COMPARE)
6932    abort ();
6933
6934  set_mode = GET_MODE (SET_DEST (set));
6935  switch (set_mode)
6936    {
6937    case CCNOmode:
6938      if (req_mode != CCNOmode
6939	  && (req_mode != CCmode
6940	      || XEXP (SET_SRC (set), 1) != const0_rtx))
6941	return 0;
6942      break;
6943    case CCmode:
6944      if (req_mode == CCGCmode)
6945	return 0;
6946      /* FALLTHRU */
6947    case CCGCmode:
6948      if (req_mode == CCGOCmode || req_mode == CCNOmode)
6949	return 0;
6950      /* FALLTHRU */
6951    case CCGOCmode:
6952      if (req_mode == CCZmode)
6953	return 0;
6954      /* FALLTHRU */
6955    case CCZmode:
6956      break;
6957
6958    default:
6959      abort ();
6960    }
6961
6962  return (GET_MODE (SET_SRC (set)) == set_mode);
6963}
6964
6965/* Generate insn patterns to do an integer compare of OPERANDS.  */
6966
6967static rtx
6968ix86_expand_int_compare (code, op0, op1)
6969     enum rtx_code code;
6970     rtx op0, op1;
6971{
6972  enum machine_mode cmpmode;
6973  rtx tmp, flags;
6974
6975  cmpmode = SELECT_CC_MODE (code, op0, op1);
6976  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
6977
6978  /* This is very simple, but making the interface the same as in the
6979     FP case makes the rest of the code easier.  */
6980  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
6981  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
6982
6983  /* Return the test that should be put into the flags user, i.e.
6984     the bcc, scc, or cmov instruction.  */
6985  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
6986}
6987
6988/* Figure out whether to use ordered or unordered fp comparisons.
6989   Return the appropriate mode to use.  */
6990
6991enum machine_mode
6992ix86_fp_compare_mode (code)
6993     enum rtx_code code ATTRIBUTE_UNUSED;
6994{
6995  /* ??? In order to make all comparisons reversible, we do all comparisons
6996     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
6997     all forms trapping and nontrapping comparisons, we can make inequality
6998     comparisons trapping again, since it results in better code when using
6999     FCOM based compares.  */
7000  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7001}
7002
7003enum machine_mode
7004ix86_cc_mode (code, op0, op1)
7005     enum rtx_code code;
7006     rtx op0, op1;
7007{
7008  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7009    return ix86_fp_compare_mode (code);
7010  switch (code)
7011    {
7012      /* Only zero flag is needed.  */
7013    case EQ:			/* ZF=0 */
7014    case NE:			/* ZF!=0 */
7015      return CCZmode;
7016      /* Codes needing carry flag.  */
7017    case GEU:			/* CF=0 */
7018    case GTU:			/* CF=0 & ZF=0 */
7019    case LTU:			/* CF=1 */
7020    case LEU:			/* CF=1 | ZF=1 */
7021      return CCmode;
7022      /* Codes possibly doable only with sign flag when
7023         comparing against zero.  */
7024    case GE:			/* SF=OF   or   SF=0 */
7025    case LT:			/* SF<>OF  or   SF=1 */
7026      if (op1 == const0_rtx)
7027	return CCGOCmode;
7028      else
7029	/* For other cases Carry flag is not required.  */
7030	return CCGCmode;
7031      /* Codes doable only with sign flag when comparing
7032         against zero, but we miss jump instruction for it
7033         so we need to use relational tests agains overflow
7034         that thus needs to be zero.  */
7035    case GT:			/* ZF=0 & SF=OF */
7036    case LE:			/* ZF=1 | SF<>OF */
7037      if (op1 == const0_rtx)
7038	return CCNOmode;
7039      else
7040	return CCGCmode;
7041      /* strcmp pattern do (use flags) and combine may ask us for proper
7042	 mode.  */
7043    case USE:
7044      return CCmode;
7045    default:
7046      abort ();
7047    }
7048}
7049
7050/* Return true if we should use an FCOMI instruction for this fp comparison.  */
7051
7052int
7053ix86_use_fcomi_compare (code)
7054     enum rtx_code code ATTRIBUTE_UNUSED;
7055{
7056  enum rtx_code swapped_code = swap_condition (code);
7057  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7058	  || (ix86_fp_comparison_cost (swapped_code)
7059	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
7060}
7061
7062/* Swap, force into registers, or otherwise massage the two operands
7063   to a fp comparison.  The operands are updated in place; the new
7064   comparsion code is returned.  */
7065
7066static enum rtx_code
7067ix86_prepare_fp_compare_args (code, pop0, pop1)
7068     enum rtx_code code;
7069     rtx *pop0, *pop1;
7070{
7071  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7072  rtx op0 = *pop0, op1 = *pop1;
7073  enum machine_mode op_mode = GET_MODE (op0);
7074  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7075
7076  /* All of the unordered compare instructions only work on registers.
7077     The same is true of the XFmode compare instructions.  The same is
7078     true of the fcomi compare instructions.  */
7079
7080  if (!is_sse
7081      && (fpcmp_mode == CCFPUmode
7082	  || op_mode == XFmode
7083	  || op_mode == TFmode
7084	  || ix86_use_fcomi_compare (code)))
7085    {
7086      op0 = force_reg (op_mode, op0);
7087      op1 = force_reg (op_mode, op1);
7088    }
7089  else
7090    {
7091      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
7092	 things around if they appear profitable, otherwise force op0
7093	 into a register.  */
7094
7095      if (standard_80387_constant_p (op0) == 0
7096	  || (GET_CODE (op0) == MEM
7097	      && ! (standard_80387_constant_p (op1) == 0
7098		    || GET_CODE (op1) == MEM)))
7099	{
7100	  rtx tmp;
7101	  tmp = op0, op0 = op1, op1 = tmp;
7102	  code = swap_condition (code);
7103	}
7104
7105      if (GET_CODE (op0) != REG)
7106	op0 = force_reg (op_mode, op0);
7107
7108      if (CONSTANT_P (op1))
7109	{
7110	  if (standard_80387_constant_p (op1))
7111	    op1 = force_reg (op_mode, op1);
7112	  else
7113	    op1 = validize_mem (force_const_mem (op_mode, op1));
7114	}
7115    }
7116
7117  /* Try to rearrange the comparison to make it cheaper.  */
7118  if (ix86_fp_comparison_cost (code)
7119      > ix86_fp_comparison_cost (swap_condition (code))
7120      && (GET_CODE (op0) == REG || !reload_completed))
7121    {
7122      rtx tmp;
7123      tmp = op0, op0 = op1, op1 = tmp;
7124      code = swap_condition (code);
7125      if (GET_CODE (op0) != REG)
7126	op0 = force_reg (op_mode, op0);
7127    }
7128
7129  *pop0 = op0;
7130  *pop1 = op1;
7131  return code;
7132}
7133
7134/* Convert comparison codes we use to represent FP comparison to integer
7135   code that will result in proper branch.  Return UNKNOWN if no such code
7136   is available.  */
7137static enum rtx_code
7138ix86_fp_compare_code_to_integer (code)
7139     enum rtx_code code;
7140{
7141  switch (code)
7142    {
7143    case GT:
7144      return GTU;
7145    case GE:
7146      return GEU;
7147    case ORDERED:
7148    case UNORDERED:
7149      return code;
7150      break;
7151    case UNEQ:
7152      return EQ;
7153      break;
7154    case UNLT:
7155      return LTU;
7156      break;
7157    case UNLE:
7158      return LEU;
7159      break;
7160    case LTGT:
7161      return NE;
7162      break;
7163    default:
7164      return UNKNOWN;
7165    }
7166}
7167
7168/* Split comparison code CODE into comparisons we can do using branch
7169   instructions.  BYPASS_CODE is comparison code for branch that will
7170   branch around FIRST_CODE and SECOND_CODE.  If some of branches
7171   is not required, set value to NIL.
7172   We never require more than two branches.  */
7173static void
7174ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7175     enum rtx_code code, *bypass_code, *first_code, *second_code;
7176{
7177  *first_code = code;
7178  *bypass_code = NIL;
7179  *second_code = NIL;
7180
7181  /* The fcomi comparison sets flags as follows:
7182
7183     cmp    ZF PF CF
7184     >      0  0  0
7185     <      0  0  1
7186     =      1  0  0
7187     un     1  1  1 */
7188
7189  switch (code)
7190    {
7191    case GT:			/* GTU - CF=0 & ZF=0 */
7192    case GE:			/* GEU - CF=0 */
7193    case ORDERED:		/* PF=0 */
7194    case UNORDERED:		/* PF=1 */
7195    case UNEQ:			/* EQ - ZF=1 */
7196    case UNLT:			/* LTU - CF=1 */
7197    case UNLE:			/* LEU - CF=1 | ZF=1 */
7198    case LTGT:			/* EQ - ZF=0 */
7199      break;
7200    case LT:			/* LTU - CF=1 - fails on unordered */
7201      *first_code = UNLT;
7202      *bypass_code = UNORDERED;
7203      break;
7204    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
7205      *first_code = UNLE;
7206      *bypass_code = UNORDERED;
7207      break;
7208    case EQ:			/* EQ - ZF=1 - fails on unordered */
7209      *first_code = UNEQ;
7210      *bypass_code = UNORDERED;
7211      break;
7212    case NE:			/* NE - ZF=0 - fails on unordered */
7213      *first_code = LTGT;
7214      *second_code = UNORDERED;
7215      break;
7216    case UNGE:			/* GEU - CF=0 - fails on unordered */
7217      *first_code = GE;
7218      *second_code = UNORDERED;
7219      break;
7220    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
7221      *first_code = GT;
7222      *second_code = UNORDERED;
7223      break;
7224    default:
7225      abort ();
7226    }
7227  if (!TARGET_IEEE_FP)
7228    {
7229      *second_code = NIL;
7230      *bypass_code = NIL;
7231    }
7232}
7233
7234/* Return cost of comparison done fcom + arithmetics operations on AX.
7235   All following functions do use number of instructions as an cost metrics.
7236   In future this should be tweaked to compute bytes for optimize_size and
7237   take into account performance of various instructions on various CPUs.  */
7238static int
7239ix86_fp_comparison_arithmetics_cost (code)
7240     enum rtx_code code;
7241{
7242  if (!TARGET_IEEE_FP)
7243    return 4;
7244  /* The cost of code output by ix86_expand_fp_compare.  */
7245  switch (code)
7246    {
7247    case UNLE:
7248    case UNLT:
7249    case LTGT:
7250    case GT:
7251    case GE:
7252    case UNORDERED:
7253    case ORDERED:
7254    case UNEQ:
7255      return 4;
7256      break;
7257    case LT:
7258    case NE:
7259    case EQ:
7260    case UNGE:
7261      return 5;
7262      break;
7263    case LE:
7264    case UNGT:
7265      return 6;
7266      break;
7267    default:
7268      abort ();
7269    }
7270}
7271
7272/* Return cost of comparison done using fcomi operation.
7273   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7274static int
7275ix86_fp_comparison_fcomi_cost (code)
7276     enum rtx_code code;
7277{
7278  enum rtx_code bypass_code, first_code, second_code;
7279  /* Return arbitarily high cost when instruction is not supported - this
7280     prevents gcc from using it.  */
7281  if (!TARGET_CMOVE)
7282    return 1024;
7283  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7284  return (bypass_code != NIL || second_code != NIL) + 2;
7285}
7286
7287/* Return cost of comparison done using sahf operation.
7288   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7289static int
7290ix86_fp_comparison_sahf_cost (code)
7291     enum rtx_code code;
7292{
7293  enum rtx_code bypass_code, first_code, second_code;
7294  /* Return arbitarily high cost when instruction is not preferred - this
7295     avoids gcc from using it.  */
7296  if (!TARGET_USE_SAHF && !optimize_size)
7297    return 1024;
7298  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7299  return (bypass_code != NIL || second_code != NIL) + 3;
7300}
7301
7302/* Compute cost of the comparison done using any method.
7303   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
7304static int
7305ix86_fp_comparison_cost (code)
7306     enum rtx_code code;
7307{
7308  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
7309  int min;
7310
7311  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
7312  sahf_cost = ix86_fp_comparison_sahf_cost (code);
7313
7314  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
7315  if (min > sahf_cost)
7316    min = sahf_cost;
7317  if (min > fcomi_cost)
7318    min = fcomi_cost;
7319  return min;
7320}
7321
7322/* Generate insn patterns to do a floating point compare of OPERANDS.  */
7323
7324static rtx
7325ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
7326     enum rtx_code code;
7327     rtx op0, op1, scratch;
7328     rtx *second_test;
7329     rtx *bypass_test;
7330{
7331  enum machine_mode fpcmp_mode, intcmp_mode;
7332  rtx tmp, tmp2;
7333  int cost = ix86_fp_comparison_cost (code);
7334  enum rtx_code bypass_code, first_code, second_code;
7335
7336  fpcmp_mode = ix86_fp_compare_mode (code);
7337  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
7338
7339  if (second_test)
7340    *second_test = NULL_RTX;
7341  if (bypass_test)
7342    *bypass_test = NULL_RTX;
7343
7344  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7345
7346  /* Do fcomi/sahf based test when profitable.  */
7347  if ((bypass_code == NIL || bypass_test)
7348      && (second_code == NIL || second_test)
7349      && ix86_fp_comparison_arithmetics_cost (code) > cost)
7350    {
7351      if (TARGET_CMOVE)
7352	{
7353	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7354	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
7355			     tmp);
7356	  emit_insn (tmp);
7357	}
7358      else
7359	{
7360	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7361	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7362	  if (!scratch)
7363	    scratch = gen_reg_rtx (HImode);
7364	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7365	  emit_insn (gen_x86_sahf_1 (scratch));
7366	}
7367
7368      /* The FP codes work out to act like unsigned.  */
7369      intcmp_mode = fpcmp_mode;
7370      code = first_code;
7371      if (bypass_code != NIL)
7372	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
7373				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7374				       const0_rtx);
7375      if (second_code != NIL)
7376	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
7377				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
7378				       const0_rtx);
7379    }
7380  else
7381    {
7382      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
7383      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
7384      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
7385      if (!scratch)
7386	scratch = gen_reg_rtx (HImode);
7387      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
7388
7389      /* In the unordered case, we have to check C2 for NaN's, which
7390	 doesn't happen to work out to anything nice combination-wise.
7391	 So do some bit twiddling on the value we've got in AH to come
7392	 up with an appropriate set of condition codes.  */
7393
7394      intcmp_mode = CCNOmode;
7395      switch (code)
7396	{
7397	case GT:
7398	case UNGT:
7399	  if (code == GT || !TARGET_IEEE_FP)
7400	    {
7401	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7402	      code = EQ;
7403	    }
7404	  else
7405	    {
7406	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7407	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7408	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
7409	      intcmp_mode = CCmode;
7410	      code = GEU;
7411	    }
7412	  break;
7413	case LT:
7414	case UNLT:
7415	  if (code == LT && TARGET_IEEE_FP)
7416	    {
7417	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7418	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
7419	      intcmp_mode = CCmode;
7420	      code = EQ;
7421	    }
7422	  else
7423	    {
7424	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
7425	      code = NE;
7426	    }
7427	  break;
7428	case GE:
7429	case UNGE:
7430	  if (code == GE || !TARGET_IEEE_FP)
7431	    {
7432	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
7433	      code = EQ;
7434	    }
7435	  else
7436	    {
7437	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7438	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7439					     GEN_INT (0x01)));
7440	      code = NE;
7441	    }
7442	  break;
7443	case LE:
7444	case UNLE:
7445	  if (code == LE && TARGET_IEEE_FP)
7446	    {
7447	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7448	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
7449	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7450	      intcmp_mode = CCmode;
7451	      code = LTU;
7452	    }
7453	  else
7454	    {
7455	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
7456	      code = NE;
7457	    }
7458	  break;
7459	case EQ:
7460	case UNEQ:
7461	  if (code == EQ && TARGET_IEEE_FP)
7462	    {
7463	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7464	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
7465	      intcmp_mode = CCmode;
7466	      code = EQ;
7467	    }
7468	  else
7469	    {
7470	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7471	      code = NE;
7472	      break;
7473	    }
7474	  break;
7475	case NE:
7476	case LTGT:
7477	  if (code == NE && TARGET_IEEE_FP)
7478	    {
7479	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
7480	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
7481					     GEN_INT (0x40)));
7482	      code = NE;
7483	    }
7484	  else
7485	    {
7486	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
7487	      code = EQ;
7488	    }
7489	  break;
7490
7491	case UNORDERED:
7492	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7493	  code = NE;
7494	  break;
7495	case ORDERED:
7496	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
7497	  code = EQ;
7498	  break;
7499
7500	default:
7501	  abort ();
7502	}
7503    }
7504
7505  /* Return the test that should be put into the flags user, i.e.
7506     the bcc, scc, or cmov instruction.  */
7507  return gen_rtx_fmt_ee (code, VOIDmode,
7508			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
7509			 const0_rtx);
7510}
7511
7512rtx
7513ix86_expand_compare (code, second_test, bypass_test)
7514     enum rtx_code code;
7515     rtx *second_test, *bypass_test;
7516{
7517  rtx op0, op1, ret;
7518  op0 = ix86_compare_op0;
7519  op1 = ix86_compare_op1;
7520
7521  if (second_test)
7522    *second_test = NULL_RTX;
7523  if (bypass_test)
7524    *bypass_test = NULL_RTX;
7525
7526  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7527    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
7528				  second_test, bypass_test);
7529  else
7530    ret = ix86_expand_int_compare (code, op0, op1);
7531
7532  return ret;
7533}
7534
7535/* Return true if the CODE will result in nontrivial jump sequence.  */
7536bool
7537ix86_fp_jump_nontrivial_p (code)
7538    enum rtx_code code;
7539{
7540  enum rtx_code bypass_code, first_code, second_code;
7541  if (!TARGET_CMOVE)
7542    return true;
7543  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7544  return bypass_code != NIL || second_code != NIL;
7545}
7546
7547void
7548ix86_expand_branch (code, label)
7549     enum rtx_code code;
7550     rtx label;
7551{
7552  rtx tmp;
7553
7554  switch (GET_MODE (ix86_compare_op0))
7555    {
7556    case QImode:
7557    case HImode:
7558    case SImode:
7559      simple:
7560      tmp = ix86_expand_compare (code, NULL, NULL);
7561      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7562				  gen_rtx_LABEL_REF (VOIDmode, label),
7563				  pc_rtx);
7564      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
7565      return;
7566
7567    case SFmode:
7568    case DFmode:
7569    case XFmode:
7570    case TFmode:
7571      {
7572	rtvec vec;
7573	int use_fcomi;
7574	enum rtx_code bypass_code, first_code, second_code;
7575
7576	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
7577					     &ix86_compare_op1);
7578
7579	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
7580
7581	/* Check whether we will use the natural sequence with one jump.  If
7582	   so, we can expand jump early.  Otherwise delay expansion by
7583	   creating compound insn to not confuse optimizers.  */
7584	if (bypass_code == NIL && second_code == NIL
7585	    && TARGET_CMOVE)
7586	  {
7587	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
7588				  gen_rtx_LABEL_REF (VOIDmode, label),
7589				  pc_rtx, NULL_RTX);
7590	  }
7591	else
7592	  {
7593	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
7594				  ix86_compare_op0, ix86_compare_op1);
7595	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
7596					gen_rtx_LABEL_REF (VOIDmode, label),
7597					pc_rtx);
7598	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
7599
7600	    use_fcomi = ix86_use_fcomi_compare (code);
7601	    vec = rtvec_alloc (3 + !use_fcomi);
7602	    RTVEC_ELT (vec, 0) = tmp;
7603	    RTVEC_ELT (vec, 1)
7604	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
7605	    RTVEC_ELT (vec, 2)
7606	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
7607	    if (! use_fcomi)
7608	      RTVEC_ELT (vec, 3)
7609		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
7610
7611	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
7612	  }
7613	return;
7614      }
7615
7616    case DImode:
7617      if (TARGET_64BIT)
7618	goto simple;
7619      /* Expand DImode branch into multiple compare+branch.  */
7620      {
7621	rtx lo[2], hi[2], label2;
7622	enum rtx_code code1, code2, code3;
7623
7624	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
7625	  {
7626	    tmp = ix86_compare_op0;
7627	    ix86_compare_op0 = ix86_compare_op1;
7628	    ix86_compare_op1 = tmp;
7629	    code = swap_condition (code);
7630	  }
7631	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
7632	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
7633
7634	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7635	   avoid two branches.  This costs one extra insn, so disable when
7636	   optimizing for size.  */
7637
7638	if ((code == EQ || code == NE)
7639	    && (!optimize_size
7640	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
7641	  {
7642	    rtx xor0, xor1;
7643
7644	    xor1 = hi[0];
7645	    if (hi[1] != const0_rtx)
7646	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
7647				   NULL_RTX, 0, OPTAB_WIDEN);
7648
7649	    xor0 = lo[0];
7650	    if (lo[1] != const0_rtx)
7651	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
7652				   NULL_RTX, 0, OPTAB_WIDEN);
7653
7654	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
7655				NULL_RTX, 0, OPTAB_WIDEN);
7656
7657	    ix86_compare_op0 = tmp;
7658	    ix86_compare_op1 = const0_rtx;
7659	    ix86_expand_branch (code, label);
7660	    return;
7661	  }
7662
7663	/* Otherwise, if we are doing less-than or greater-or-equal-than,
7664	   op1 is a constant and the low word is zero, then we can just
7665	   examine the high word.  */
7666
7667	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
7668	  switch (code)
7669	    {
7670	    case LT: case LTU: case GE: case GEU:
7671	      ix86_compare_op0 = hi[0];
7672	      ix86_compare_op1 = hi[1];
7673	      ix86_expand_branch (code, label);
7674	      return;
7675	    default:
7676	      break;
7677	    }
7678
7679	/* Otherwise, we need two or three jumps.  */
7680
7681	label2 = gen_label_rtx ();
7682
7683	code1 = code;
7684	code2 = swap_condition (code);
7685	code3 = unsigned_condition (code);
7686
7687	switch (code)
7688	  {
7689	  case LT: case GT: case LTU: case GTU:
7690	    break;
7691
7692	  case LE:   code1 = LT;  code2 = GT;  break;
7693	  case GE:   code1 = GT;  code2 = LT;  break;
7694	  case LEU:  code1 = LTU; code2 = GTU; break;
7695	  case GEU:  code1 = GTU; code2 = LTU; break;
7696
7697	  case EQ:   code1 = NIL; code2 = NE;  break;
7698	  case NE:   code2 = NIL; break;
7699
7700	  default:
7701	    abort ();
7702	  }
7703
7704	/*
7705	 * a < b =>
7706	 *    if (hi(a) < hi(b)) goto true;
7707	 *    if (hi(a) > hi(b)) goto false;
7708	 *    if (lo(a) < lo(b)) goto true;
7709	 *  false:
7710	 */
7711
7712	ix86_compare_op0 = hi[0];
7713	ix86_compare_op1 = hi[1];
7714
7715	if (code1 != NIL)
7716	  ix86_expand_branch (code1, label);
7717	if (code2 != NIL)
7718	  ix86_expand_branch (code2, label2);
7719
7720	ix86_compare_op0 = lo[0];
7721	ix86_compare_op1 = lo[1];
7722	ix86_expand_branch (code3, label);
7723
7724	if (code2 != NIL)
7725	  emit_label (label2);
7726	return;
7727      }
7728
7729    default:
7730      abort ();
7731    }
7732}
7733
7734/* Split branch based on floating point condition.  */
7735void
7736ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
7737     enum rtx_code code;
7738     rtx op1, op2, target1, target2, tmp;
7739{
7740  rtx second, bypass;
7741  rtx label = NULL_RTX;
7742  rtx condition;
7743  int bypass_probability = -1, second_probability = -1, probability = -1;
7744  rtx i;
7745
7746  if (target2 != pc_rtx)
7747    {
7748      rtx tmp = target2;
7749      code = reverse_condition_maybe_unordered (code);
7750      target2 = target1;
7751      target1 = tmp;
7752    }
7753
7754  condition = ix86_expand_fp_compare (code, op1, op2,
7755				      tmp, &second, &bypass);
7756
7757  if (split_branch_probability >= 0)
7758    {
7759      /* Distribute the probabilities across the jumps.
7760	 Assume the BYPASS and SECOND to be always test
7761	 for UNORDERED.  */
7762      probability = split_branch_probability;
7763
7764      /* Value of 1 is low enough to make no need for probability
7765	 to be updated.  Later we may run some experiments and see
7766	 if unordered values are more frequent in practice.  */
7767      if (bypass)
7768	bypass_probability = 1;
7769      if (second)
7770	second_probability = 1;
7771    }
7772  if (bypass != NULL_RTX)
7773    {
7774      label = gen_label_rtx ();
7775      i = emit_jump_insn (gen_rtx_SET
7776			  (VOIDmode, pc_rtx,
7777			   gen_rtx_IF_THEN_ELSE (VOIDmode,
7778						 bypass,
7779						 gen_rtx_LABEL_REF (VOIDmode,
7780								    label),
7781						 pc_rtx)));
7782      if (bypass_probability >= 0)
7783	REG_NOTES (i)
7784	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7785			       GEN_INT (bypass_probability),
7786			       REG_NOTES (i));
7787    }
7788  i = emit_jump_insn (gen_rtx_SET
7789		      (VOIDmode, pc_rtx,
7790		       gen_rtx_IF_THEN_ELSE (VOIDmode,
7791					     condition, target1, target2)));
7792  if (probability >= 0)
7793    REG_NOTES (i)
7794      = gen_rtx_EXPR_LIST (REG_BR_PROB,
7795			   GEN_INT (probability),
7796			   REG_NOTES (i));
7797  if (second != NULL_RTX)
7798    {
7799      i = emit_jump_insn (gen_rtx_SET
7800			  (VOIDmode, pc_rtx,
7801			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
7802						 target2)));
7803      if (second_probability >= 0)
7804	REG_NOTES (i)
7805	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
7806			       GEN_INT (second_probability),
7807			       REG_NOTES (i));
7808    }
7809  if (label != NULL_RTX)
7810    emit_label (label);
7811}
7812
7813int
7814ix86_expand_setcc (code, dest)
7815     enum rtx_code code;
7816     rtx dest;
7817{
7818  rtx ret, tmp, tmpreg;
7819  rtx second_test, bypass_test;
7820
7821  if (GET_MODE (ix86_compare_op0) == DImode
7822      && !TARGET_64BIT)
7823    return 0; /* FAIL */
7824
7825  if (GET_MODE (dest) != QImode)
7826    abort ();
7827
7828  ret = ix86_expand_compare (code, &second_test, &bypass_test);
7829  PUT_MODE (ret, QImode);
7830
7831  tmp = dest;
7832  tmpreg = dest;
7833
7834  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
7835  if (bypass_test || second_test)
7836    {
7837      rtx test = second_test;
7838      int bypass = 0;
7839      rtx tmp2 = gen_reg_rtx (QImode);
7840      if (bypass_test)
7841	{
7842	  if (second_test)
7843	    abort ();
7844	  test = bypass_test;
7845	  bypass = 1;
7846	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
7847	}
7848      PUT_MODE (test, QImode);
7849      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
7850
7851      if (bypass)
7852	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
7853      else
7854	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
7855    }
7856
7857  return 1; /* DONE */
7858}
7859
7860int
7861ix86_expand_int_movcc (operands)
7862     rtx operands[];
7863{
7864  enum rtx_code code = GET_CODE (operands[1]), compare_code;
7865  rtx compare_seq, compare_op;
7866  rtx second_test, bypass_test;
7867  enum machine_mode mode = GET_MODE (operands[0]);
7868
7869  /* When the compare code is not LTU or GEU, we can not use sbbl case.
7870     In case comparsion is done with immediate, we can convert it to LTU or
7871     GEU by altering the integer.  */
7872
7873  if ((code == LEU || code == GTU)
7874      && GET_CODE (ix86_compare_op1) == CONST_INT
7875      && mode != HImode
7876      && (unsigned int) INTVAL (ix86_compare_op1) != 0xffffffff
7877      && GET_CODE (operands[2]) == CONST_INT
7878      && GET_CODE (operands[3]) == CONST_INT)
7879    {
7880      if (code == LEU)
7881	code = LTU;
7882      else
7883	code = GEU;
7884      ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
7885    }
7886
7887  start_sequence ();
7888  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
7889  compare_seq = gen_sequence ();
7890  end_sequence ();
7891
7892  compare_code = GET_CODE (compare_op);
7893
7894  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
7895     HImode insns, we'd be swallowed in word prefix ops.  */
7896
7897  if (mode != HImode
7898      && (mode != DImode || TARGET_64BIT)
7899      && GET_CODE (operands[2]) == CONST_INT
7900      && GET_CODE (operands[3]) == CONST_INT)
7901    {
7902      rtx out = operands[0];
7903      HOST_WIDE_INT ct = INTVAL (operands[2]);
7904      HOST_WIDE_INT cf = INTVAL (operands[3]);
7905      HOST_WIDE_INT diff;
7906
7907      if ((compare_code == LTU || compare_code == GEU)
7908	  && !second_test && !bypass_test)
7909	{
7910
7911	  /* Detect overlap between destination and compare sources.  */
7912	  rtx tmp = out;
7913
7914	  /* To simplify rest of code, restrict to the GEU case.  */
7915	  if (compare_code == LTU)
7916	    {
7917	      int tmp = ct;
7918	      ct = cf;
7919	      cf = tmp;
7920	      compare_code = reverse_condition (compare_code);
7921	      code = reverse_condition (code);
7922	    }
7923	  diff = ct - cf;
7924
7925	  if (reg_overlap_mentioned_p (out, ix86_compare_op0)
7926	      || reg_overlap_mentioned_p (out, ix86_compare_op1))
7927	    tmp = gen_reg_rtx (mode);
7928
7929	  emit_insn (compare_seq);
7930	  if (mode == DImode)
7931	    emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
7932	  else
7933	    emit_insn (gen_x86_movsicc_0_m1 (tmp));
7934
7935	  if (diff == 1)
7936	    {
7937	      /*
7938	       * cmpl op0,op1
7939	       * sbbl dest,dest
7940	       * [addl dest, ct]
7941	       *
7942	       * Size 5 - 8.
7943	       */
7944	      if (ct)
7945	       	tmp = expand_simple_binop (mode, PLUS,
7946					   tmp, GEN_INT (ct),
7947					   tmp, 1, OPTAB_DIRECT);
7948	    }
7949	  else if (cf == -1)
7950	    {
7951	      /*
7952	       * cmpl op0,op1
7953	       * sbbl dest,dest
7954	       * orl $ct, dest
7955	       *
7956	       * Size 8.
7957	       */
7958	      tmp = expand_simple_binop (mode, IOR,
7959					 tmp, GEN_INT (ct),
7960					 tmp, 1, OPTAB_DIRECT);
7961	    }
7962	  else if (diff == -1 && ct)
7963	    {
7964	      /*
7965	       * cmpl op0,op1
7966	       * sbbl dest,dest
7967	       * xorl $-1, dest
7968	       * [addl dest, cf]
7969	       *
7970	       * Size 8 - 11.
7971	       */
7972	      tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
7973	      if (cf)
7974	       	tmp = expand_simple_binop (mode, PLUS,
7975					   tmp, GEN_INT (cf),
7976					   tmp, 1, OPTAB_DIRECT);
7977	    }
7978	  else
7979	    {
7980	      /*
7981	       * cmpl op0,op1
7982	       * sbbl dest,dest
7983	       * andl cf - ct, dest
7984	       * [addl dest, ct]
7985	       *
7986	       * Size 8 - 11.
7987	       */
7988	      tmp = expand_simple_binop (mode, AND,
7989					 tmp,
7990					 GEN_INT (trunc_int_for_mode
7991						  (cf - ct, mode)),
7992					 tmp, 1, OPTAB_DIRECT);
7993	      if (ct)
7994	       	tmp = expand_simple_binop (mode, PLUS,
7995					   tmp, GEN_INT (ct),
7996					   tmp, 1, OPTAB_DIRECT);
7997	    }
7998
7999	  if (tmp != out)
8000	    emit_move_insn (out, tmp);
8001
8002	  return 1; /* DONE */
8003	}
8004
8005      diff = ct - cf;
8006      if (diff < 0)
8007	{
8008	  HOST_WIDE_INT tmp;
8009	  tmp = ct, ct = cf, cf = tmp;
8010	  diff = -diff;
8011	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8012	    {
8013	      /* We may be reversing unordered compare to normal compare, that
8014		 is not valid in general (we may convert non-trapping condition
8015		 to trapping one), however on i386 we currently emit all
8016		 comparisons unordered.  */
8017	      compare_code = reverse_condition_maybe_unordered (compare_code);
8018	      code = reverse_condition_maybe_unordered (code);
8019	    }
8020	  else
8021	    {
8022	      compare_code = reverse_condition (compare_code);
8023	      code = reverse_condition (code);
8024	    }
8025	}
8026      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8027	   || diff == 3 || diff == 5 || diff == 9)
8028	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8029	{
8030	  /*
8031	   * xorl dest,dest
8032	   * cmpl op1,op2
8033	   * setcc dest
8034	   * lea cf(dest*(ct-cf)),dest
8035	   *
8036	   * Size 14.
8037	   *
8038	   * This also catches the degenerate setcc-only case.
8039	   */
8040
8041	  rtx tmp;
8042	  int nops;
8043
8044	  out = emit_store_flag (out, code, ix86_compare_op0,
8045				 ix86_compare_op1, VOIDmode, 0, 1);
8046
8047	  nops = 0;
8048	  /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8049	     done in proper mode to match.  */
8050	  if (diff == 1)
8051	    tmp = out;
8052	  else
8053	    {
8054	      rtx out1;
8055	      out1 = out;
8056	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8057	      nops++;
8058	      if (diff & 1)
8059		{
8060		  tmp = gen_rtx_PLUS (mode, tmp, out1);
8061		  nops++;
8062		}
8063	    }
8064	  if (cf != 0)
8065	    {
8066	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8067	      nops++;
8068	    }
8069	  if (tmp != out
8070	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8071	    {
8072	      if (nops == 1)
8073		{
8074		  rtx clob;
8075
8076		  clob = gen_rtx_REG (CCmode, FLAGS_REG);
8077		  clob = gen_rtx_CLOBBER (VOIDmode, clob);
8078
8079		  tmp = gen_rtx_SET (VOIDmode, out, tmp);
8080		  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8081		  emit_insn (tmp);
8082		}
8083	      else
8084		emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8085	    }
8086	  if (out != operands[0])
8087	    emit_move_insn (operands[0], out);
8088
8089	  return 1; /* DONE */
8090	}
8091
8092      /*
8093       * General case:			Jumpful:
8094       *   xorl dest,dest		cmpl op1, op2
8095       *   cmpl op1, op2		movl ct, dest
8096       *   setcc dest			jcc 1f
8097       *   decl dest			movl cf, dest
8098       *   andl (cf-ct),dest		1:
8099       *   addl ct,dest
8100       *
8101       * Size 20.			Size 14.
8102       *
8103       * This is reasonably steep, but branch mispredict costs are
8104       * high on modern cpus, so consider failing only if optimizing
8105       * for space.
8106       *
8107       * %%% Parameterize branch_cost on the tuning architecture, then
8108       * use that.  The 80386 couldn't care less about mispredicts.
8109       */
8110
8111      if (!optimize_size && !TARGET_CMOVE)
8112	{
8113	  if (ct == 0)
8114	    {
8115	      ct = cf;
8116	      cf = 0;
8117	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8118		{
8119		  /* We may be reversing unordered compare to normal compare,
8120		     that is not valid in general (we may convert non-trapping
8121		     condition to trapping one), however on i386 we currently
8122		     emit all comparisons unordered.  */
8123		  compare_code = reverse_condition_maybe_unordered (compare_code);
8124		  code = reverse_condition_maybe_unordered (code);
8125		}
8126	      else
8127		{
8128		  compare_code = reverse_condition (compare_code);
8129		  code = reverse_condition (code);
8130		}
8131	    }
8132
8133	  out = emit_store_flag (out, code, ix86_compare_op0,
8134				 ix86_compare_op1, VOIDmode, 0, 1);
8135
8136	  out = expand_simple_binop (mode, PLUS,
8137				     out, constm1_rtx,
8138				     out, 1, OPTAB_DIRECT);
8139	  out = expand_simple_binop (mode, AND,
8140				     out,
8141				     GEN_INT (trunc_int_for_mode
8142					      (cf - ct, mode)),
8143				     out, 1, OPTAB_DIRECT);
8144	  out = expand_simple_binop (mode, PLUS,
8145				     out, GEN_INT (ct),
8146				     out, 1, OPTAB_DIRECT);
8147	  if (out != operands[0])
8148	    emit_move_insn (operands[0], out);
8149
8150	  return 1; /* DONE */
8151	}
8152    }
8153
8154  if (!TARGET_CMOVE)
8155    {
8156      /* Try a few things more with specific constants and a variable.  */
8157
8158      optab op;
8159      rtx var, orig_out, out, tmp;
8160
8161      if (optimize_size)
8162	return 0; /* FAIL */
8163
8164      /* If one of the two operands is an interesting constant, load a
8165	 constant with the above and mask it in with a logical operation.  */
8166
8167      if (GET_CODE (operands[2]) == CONST_INT)
8168	{
8169	  var = operands[3];
8170	  if (INTVAL (operands[2]) == 0)
8171	    operands[3] = constm1_rtx, op = and_optab;
8172	  else if (INTVAL (operands[2]) == -1)
8173	    operands[3] = const0_rtx, op = ior_optab;
8174	  else
8175	    return 0; /* FAIL */
8176	}
8177      else if (GET_CODE (operands[3]) == CONST_INT)
8178	{
8179	  var = operands[2];
8180	  if (INTVAL (operands[3]) == 0)
8181	    operands[2] = constm1_rtx, op = and_optab;
8182	  else if (INTVAL (operands[3]) == -1)
8183	    operands[2] = const0_rtx, op = ior_optab;
8184	  else
8185	    return 0; /* FAIL */
8186	}
8187      else
8188        return 0; /* FAIL */
8189
8190      orig_out = operands[0];
8191      tmp = gen_reg_rtx (mode);
8192      operands[0] = tmp;
8193
8194      /* Recurse to get the constant loaded.  */
8195      if (ix86_expand_int_movcc (operands) == 0)
8196        return 0; /* FAIL */
8197
8198      /* Mask in the interesting variable.  */
8199      out = expand_binop (mode, op, var, tmp, orig_out, 0,
8200			  OPTAB_WIDEN);
8201      if (out != orig_out)
8202	emit_move_insn (orig_out, out);
8203
8204      return 1; /* DONE */
8205    }
8206
8207  /*
8208   * For comparison with above,
8209   *
8210   * movl cf,dest
8211   * movl ct,tmp
8212   * cmpl op1,op2
8213   * cmovcc tmp,dest
8214   *
8215   * Size 15.
8216   */
8217
8218  if (! nonimmediate_operand (operands[2], mode))
8219    operands[2] = force_reg (mode, operands[2]);
8220  if (! nonimmediate_operand (operands[3], mode))
8221    operands[3] = force_reg (mode, operands[3]);
8222
8223  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8224    {
8225      rtx tmp = gen_reg_rtx (mode);
8226      emit_move_insn (tmp, operands[3]);
8227      operands[3] = tmp;
8228    }
8229  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8230    {
8231      rtx tmp = gen_reg_rtx (mode);
8232      emit_move_insn (tmp, operands[2]);
8233      operands[2] = tmp;
8234    }
8235  if (! register_operand (operands[2], VOIDmode)
8236      && ! register_operand (operands[3], VOIDmode))
8237    operands[2] = force_reg (mode, operands[2]);
8238
8239  emit_insn (compare_seq);
8240  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8241			  gen_rtx_IF_THEN_ELSE (mode,
8242						compare_op, operands[2],
8243						operands[3])));
8244  if (bypass_test)
8245    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8246			    gen_rtx_IF_THEN_ELSE (mode,
8247				  bypass_test,
8248				  operands[3],
8249				  operands[0])));
8250  if (second_test)
8251    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8252			    gen_rtx_IF_THEN_ELSE (mode,
8253				  second_test,
8254				  operands[2],
8255				  operands[0])));
8256
8257  return 1; /* DONE */
8258}
8259
8260int
8261ix86_expand_fp_movcc (operands)
8262     rtx operands[];
8263{
8264  enum rtx_code code;
8265  rtx tmp;
8266  rtx compare_op, second_test, bypass_test;
8267
8268  /* For SF/DFmode conditional moves based on comparisons
8269     in same mode, we may want to use SSE min/max instructions.  */
8270  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
8271       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
8272      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
8273      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
8274      && (!TARGET_IEEE_FP
8275	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
8276      /* We may be called from the post-reload splitter.  */
8277      && (!REG_P (operands[0])
8278	  || SSE_REG_P (operands[0])
8279	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
8280    {
8281      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
8282      code = GET_CODE (operands[1]);
8283
8284      /* See if we have (cross) match between comparison operands and
8285         conditional move operands.  */
8286      if (rtx_equal_p (operands[2], op1))
8287	{
8288	  rtx tmp = op0;
8289	  op0 = op1;
8290	  op1 = tmp;
8291	  code = reverse_condition_maybe_unordered (code);
8292	}
8293      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
8294	{
8295	  /* Check for min operation.  */
8296	  if (code == LT)
8297	    {
8298	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8299	       if (memory_operand (op0, VOIDmode))
8300		 op0 = force_reg (GET_MODE (operands[0]), op0);
8301	       if (GET_MODE (operands[0]) == SFmode)
8302		 emit_insn (gen_minsf3 (operands[0], op0, op1));
8303	       else
8304		 emit_insn (gen_mindf3 (operands[0], op0, op1));
8305	       return 1;
8306	    }
8307	  /* Check for max operation.  */
8308	  if (code == GT)
8309	    {
8310	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
8311	       if (memory_operand (op0, VOIDmode))
8312		 op0 = force_reg (GET_MODE (operands[0]), op0);
8313	       if (GET_MODE (operands[0]) == SFmode)
8314		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
8315	       else
8316		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
8317	       return 1;
8318	    }
8319	}
8320      /* Manage condition to be sse_comparison_operator.  In case we are
8321	 in non-ieee mode, try to canonicalize the destination operand
8322	 to be first in the comparison - this helps reload to avoid extra
8323	 moves.  */
8324      if (!sse_comparison_operator (operands[1], VOIDmode)
8325	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
8326	{
8327	  rtx tmp = ix86_compare_op0;
8328	  ix86_compare_op0 = ix86_compare_op1;
8329	  ix86_compare_op1 = tmp;
8330	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
8331					VOIDmode, ix86_compare_op0,
8332					ix86_compare_op1);
8333	}
8334      /* Similary try to manage result to be first operand of conditional
8335	 move. We also don't support the NE comparison on SSE, so try to
8336	 avoid it.  */
8337      if ((rtx_equal_p (operands[0], operands[3])
8338	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
8339	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
8340	{
8341	  rtx tmp = operands[2];
8342	  operands[2] = operands[3];
8343	  operands[3] = tmp;
8344	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8345					  (GET_CODE (operands[1])),
8346					VOIDmode, ix86_compare_op0,
8347					ix86_compare_op1);
8348	}
8349      if (GET_MODE (operands[0]) == SFmode)
8350	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
8351				    operands[2], operands[3],
8352				    ix86_compare_op0, ix86_compare_op1));
8353      else
8354	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
8355				    operands[2], operands[3],
8356				    ix86_compare_op0, ix86_compare_op1));
8357      return 1;
8358    }
8359
8360  /* The floating point conditional move instructions don't directly
8361     support conditions resulting from a signed integer comparison.  */
8362
8363  code = GET_CODE (operands[1]);
8364  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8365
8366  /* The floating point conditional move instructions don't directly
8367     support signed integer comparisons.  */
8368
8369  if (!fcmov_comparison_operator (compare_op, VOIDmode))
8370    {
8371      if (second_test != NULL || bypass_test != NULL)
8372	abort ();
8373      tmp = gen_reg_rtx (QImode);
8374      ix86_expand_setcc (code, tmp);
8375      code = NE;
8376      ix86_compare_op0 = tmp;
8377      ix86_compare_op1 = const0_rtx;
8378      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
8379    }
8380  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
8381    {
8382      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8383      emit_move_insn (tmp, operands[3]);
8384      operands[3] = tmp;
8385    }
8386  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
8387    {
8388      tmp = gen_reg_rtx (GET_MODE (operands[0]));
8389      emit_move_insn (tmp, operands[2]);
8390      operands[2] = tmp;
8391    }
8392
8393  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8394			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8395				compare_op,
8396				operands[2],
8397				operands[3])));
8398  if (bypass_test)
8399    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8400			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8401				  bypass_test,
8402				  operands[3],
8403				  operands[0])));
8404  if (second_test)
8405    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8406			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
8407				  second_test,
8408				  operands[2],
8409				  operands[0])));
8410
8411  return 1;
8412}
8413
8414/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
8415   works for floating pointer parameters and nonoffsetable memories.
8416   For pushes, it returns just stack offsets; the values will be saved
8417   in the right order.  Maximally three parts are generated.  */
8418
8419static int
8420ix86_split_to_parts (operand, parts, mode)
8421     rtx operand;
8422     rtx *parts;
8423     enum machine_mode mode;
8424{
8425  int size;
8426
8427  if (!TARGET_64BIT)
8428    size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
8429  else
8430    size = (GET_MODE_SIZE (mode) + 4) / 8;
8431
8432  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
8433    abort ();
8434  if (size < 2 || size > 3)
8435    abort ();
8436
8437  /* Optimize constant pool reference to immediates.  This is used by fp moves,
8438     that force all constants to memory to allow combining.  */
8439
8440  if (GET_CODE (operand) == MEM
8441      && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
8442      && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
8443    operand = get_pool_constant (XEXP (operand, 0));
8444
8445  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
8446    {
8447      /* The only non-offsetable memories we handle are pushes.  */
8448      if (! push_operand (operand, VOIDmode))
8449	abort ();
8450
8451      operand = copy_rtx (operand);
8452      PUT_MODE (operand, Pmode);
8453      parts[0] = parts[1] = parts[2] = operand;
8454    }
8455  else if (!TARGET_64BIT)
8456    {
8457      if (mode == DImode)
8458	split_di (&operand, 1, &parts[0], &parts[1]);
8459      else
8460	{
8461	  if (REG_P (operand))
8462	    {
8463	      if (!reload_completed)
8464		abort ();
8465	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
8466	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8467	      if (size == 3)
8468		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
8469	    }
8470	  else if (offsettable_memref_p (operand))
8471	    {
8472	      operand = adjust_address (operand, SImode, 0);
8473	      parts[0] = operand;
8474	      parts[1] = adjust_address (operand, SImode, 4);
8475	      if (size == 3)
8476		parts[2] = adjust_address (operand, SImode, 8);
8477	    }
8478	  else if (GET_CODE (operand) == CONST_DOUBLE)
8479	    {
8480	      REAL_VALUE_TYPE r;
8481	      long l[4];
8482
8483	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8484	      switch (mode)
8485		{
8486		case XFmode:
8487		case TFmode:
8488		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8489		  parts[2] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8490		  break;
8491		case DFmode:
8492		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
8493		  break;
8494		default:
8495		  abort ();
8496		}
8497	      parts[1] = GEN_INT (trunc_int_for_mode (l[1], SImode));
8498	      parts[0] = GEN_INT (trunc_int_for_mode (l[0], SImode));
8499	    }
8500	  else
8501	    abort ();
8502	}
8503    }
8504  else
8505    {
8506      if (mode == TImode)
8507	split_ti (&operand, 1, &parts[0], &parts[1]);
8508      if (mode == XFmode || mode == TFmode)
8509	{
8510	  if (REG_P (operand))
8511	    {
8512	      if (!reload_completed)
8513		abort ();
8514	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
8515	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
8516	    }
8517	  else if (offsettable_memref_p (operand))
8518	    {
8519	      operand = adjust_address (operand, DImode, 0);
8520	      parts[0] = operand;
8521	      parts[1] = adjust_address (operand, SImode, 8);
8522	    }
8523	  else if (GET_CODE (operand) == CONST_DOUBLE)
8524	    {
8525	      REAL_VALUE_TYPE r;
8526	      long l[3];
8527
8528	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
8529	      REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
8530	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
8531	      if (HOST_BITS_PER_WIDE_INT >= 64)
8532	        parts[0]
8533		  = GEN_INT (trunc_int_for_mode
8534		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
8535		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
8536		       DImode));
8537	      else
8538	        parts[0] = immed_double_const (l[0], l[1], DImode);
8539	      parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode));
8540	    }
8541	  else
8542	    abort ();
8543	}
8544    }
8545
8546  return size;
8547}
8548
8549/* Emit insns to perform a move or push of DI, DF, and XF values.
8550   Return false when normal moves are needed; true when all required
8551   insns have been emitted.  Operands 2-4 contain the input values
8552   int the correct order; operands 5-7 contain the output values.  */
8553
8554void
8555ix86_split_long_move (operands)
8556     rtx operands[];
8557{
8558  rtx part[2][3];
8559  int nparts;
8560  int push = 0;
8561  int collisions = 0;
8562  enum machine_mode mode = GET_MODE (operands[0]);
8563
8564  /* The DFmode expanders may ask us to move double.
8565     For 64bit target this is single move.  By hiding the fact
8566     here we simplify i386.md splitters.  */
8567  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
8568    {
8569      /* Optimize constant pool reference to immediates.  This is used by
8570	 fp moves, that force all constants to memory to allow combining.  */
8571
8572      if (GET_CODE (operands[1]) == MEM
8573	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8574	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
8575	operands[1] = get_pool_constant (XEXP (operands[1], 0));
8576      if (push_operand (operands[0], VOIDmode))
8577	{
8578	  operands[0] = copy_rtx (operands[0]);
8579	  PUT_MODE (operands[0], Pmode);
8580	}
8581      else
8582        operands[0] = gen_lowpart (DImode, operands[0]);
8583      operands[1] = gen_lowpart (DImode, operands[1]);
8584      emit_move_insn (operands[0], operands[1]);
8585      return;
8586    }
8587
8588  /* The only non-offsettable memory we handle is push.  */
8589  if (push_operand (operands[0], VOIDmode))
8590    push = 1;
8591  else if (GET_CODE (operands[0]) == MEM
8592	   && ! offsettable_memref_p (operands[0]))
8593    abort ();
8594
8595  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
8596  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
8597
8598  /* When emitting push, take care for source operands on the stack.  */
8599  if (push && GET_CODE (operands[1]) == MEM
8600      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
8601    {
8602      if (nparts == 3)
8603	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
8604				     XEXP (part[1][2], 0));
8605      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
8606				   XEXP (part[1][1], 0));
8607    }
8608
8609  /* We need to do copy in the right order in case an address register
8610     of the source overlaps the destination.  */
8611  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
8612    {
8613      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
8614	collisions++;
8615      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8616	collisions++;
8617      if (nparts == 3
8618	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
8619	collisions++;
8620
8621      /* Collision in the middle part can be handled by reordering.  */
8622      if (collisions == 1 && nparts == 3
8623	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
8624	{
8625	  rtx tmp;
8626	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
8627	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
8628	}
8629
8630      /* If there are more collisions, we can't handle it by reordering.
8631	 Do an lea to the last part and use only one colliding move.  */
8632      else if (collisions > 1)
8633	{
8634	  collisions = 1;
8635	  emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
8636				  XEXP (part[1][0], 0)));
8637	  part[1][0] = change_address (part[1][0],
8638				       TARGET_64BIT ? DImode : SImode,
8639				       part[0][nparts - 1]);
8640	  part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
8641	  if (nparts == 3)
8642	    part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
8643	}
8644    }
8645
8646  if (push)
8647    {
8648      if (!TARGET_64BIT)
8649	{
8650	  if (nparts == 3)
8651	    {
8652	      /* We use only first 12 bytes of TFmode value, but for pushing we
8653		 are required to adjust stack as if we were pushing real 16byte
8654		 value.  */
8655	      if (mode == TFmode && !TARGET_64BIT)
8656		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
8657				       GEN_INT (-4)));
8658	      emit_move_insn (part[0][2], part[1][2]);
8659	    }
8660	}
8661      else
8662	{
8663	  /* In 64bit mode we don't have 32bit push available.  In case this is
8664	     register, it is OK - we will just use larger counterpart.  We also
8665	     retype memory - these comes from attempt to avoid REX prefix on
8666	     moving of second half of TFmode value.  */
8667	  if (GET_MODE (part[1][1]) == SImode)
8668	    {
8669	      if (GET_CODE (part[1][1]) == MEM)
8670		part[1][1] = adjust_address (part[1][1], DImode, 0);
8671	      else if (REG_P (part[1][1]))
8672		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
8673	      else
8674		abort ();
8675	      if (GET_MODE (part[1][0]) == SImode)
8676		part[1][0] = part[1][1];
8677	    }
8678	}
8679      emit_move_insn (part[0][1], part[1][1]);
8680      emit_move_insn (part[0][0], part[1][0]);
8681      return;
8682    }
8683
8684  /* Choose correct order to not overwrite the source before it is copied.  */
8685  if ((REG_P (part[0][0])
8686       && REG_P (part[1][1])
8687       && (REGNO (part[0][0]) == REGNO (part[1][1])
8688	   || (nparts == 3
8689	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
8690      || (collisions > 0
8691	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
8692    {
8693      if (nparts == 3)
8694	{
8695	  operands[2] = part[0][2];
8696	  operands[3] = part[0][1];
8697	  operands[4] = part[0][0];
8698	  operands[5] = part[1][2];
8699	  operands[6] = part[1][1];
8700	  operands[7] = part[1][0];
8701	}
8702      else
8703	{
8704	  operands[2] = part[0][1];
8705	  operands[3] = part[0][0];
8706	  operands[5] = part[1][1];
8707	  operands[6] = part[1][0];
8708	}
8709    }
8710  else
8711    {
8712      if (nparts == 3)
8713	{
8714	  operands[2] = part[0][0];
8715	  operands[3] = part[0][1];
8716	  operands[4] = part[0][2];
8717	  operands[5] = part[1][0];
8718	  operands[6] = part[1][1];
8719	  operands[7] = part[1][2];
8720	}
8721      else
8722	{
8723	  operands[2] = part[0][0];
8724	  operands[3] = part[0][1];
8725	  operands[5] = part[1][0];
8726	  operands[6] = part[1][1];
8727	}
8728    }
8729  emit_move_insn (operands[2], operands[5]);
8730  emit_move_insn (operands[3], operands[6]);
8731  if (nparts == 3)
8732    emit_move_insn (operands[4], operands[7]);
8733
8734  return;
8735}
8736
8737void
8738ix86_split_ashldi (operands, scratch)
8739     rtx *operands, scratch;
8740{
8741  rtx low[2], high[2];
8742  int count;
8743
8744  if (GET_CODE (operands[2]) == CONST_INT)
8745    {
8746      split_di (operands, 2, low, high);
8747      count = INTVAL (operands[2]) & 63;
8748
8749      if (count >= 32)
8750	{
8751	  emit_move_insn (high[0], low[1]);
8752	  emit_move_insn (low[0], const0_rtx);
8753
8754	  if (count > 32)
8755	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
8756	}
8757      else
8758	{
8759	  if (!rtx_equal_p (operands[0], operands[1]))
8760	    emit_move_insn (operands[0], operands[1]);
8761	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
8762	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
8763	}
8764    }
8765  else
8766    {
8767      if (!rtx_equal_p (operands[0], operands[1]))
8768	emit_move_insn (operands[0], operands[1]);
8769
8770      split_di (operands, 1, low, high);
8771
8772      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
8773      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
8774
8775      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8776	{
8777	  if (! no_new_pseudos)
8778	    scratch = force_reg (SImode, const0_rtx);
8779	  else
8780	    emit_move_insn (scratch, const0_rtx);
8781
8782	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
8783					  scratch));
8784	}
8785      else
8786	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
8787    }
8788}
8789
8790void
8791ix86_split_ashrdi (operands, scratch)
8792     rtx *operands, scratch;
8793{
8794  rtx low[2], high[2];
8795  int count;
8796
8797  if (GET_CODE (operands[2]) == CONST_INT)
8798    {
8799      split_di (operands, 2, low, high);
8800      count = INTVAL (operands[2]) & 63;
8801
8802      if (count >= 32)
8803	{
8804	  emit_move_insn (low[0], high[1]);
8805
8806	  if (! reload_completed)
8807	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
8808	  else
8809	    {
8810	      emit_move_insn (high[0], low[0]);
8811	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
8812	    }
8813
8814	  if (count > 32)
8815	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
8816	}
8817      else
8818	{
8819	  if (!rtx_equal_p (operands[0], operands[1]))
8820	    emit_move_insn (operands[0], operands[1]);
8821	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8822	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
8823	}
8824    }
8825  else
8826    {
8827      if (!rtx_equal_p (operands[0], operands[1]))
8828	emit_move_insn (operands[0], operands[1]);
8829
8830      split_di (operands, 1, low, high);
8831
8832      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8833      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
8834
8835      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8836	{
8837	  if (! no_new_pseudos)
8838	    scratch = gen_reg_rtx (SImode);
8839	  emit_move_insn (scratch, high[0]);
8840	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
8841	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8842					  scratch));
8843	}
8844      else
8845	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
8846    }
8847}
8848
8849void
8850ix86_split_lshrdi (operands, scratch)
8851     rtx *operands, scratch;
8852{
8853  rtx low[2], high[2];
8854  int count;
8855
8856  if (GET_CODE (operands[2]) == CONST_INT)
8857    {
8858      split_di (operands, 2, low, high);
8859      count = INTVAL (operands[2]) & 63;
8860
8861      if (count >= 32)
8862	{
8863	  emit_move_insn (low[0], high[1]);
8864	  emit_move_insn (high[0], const0_rtx);
8865
8866	  if (count > 32)
8867	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
8868	}
8869      else
8870	{
8871	  if (!rtx_equal_p (operands[0], operands[1]))
8872	    emit_move_insn (operands[0], operands[1]);
8873	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
8874	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
8875	}
8876    }
8877  else
8878    {
8879      if (!rtx_equal_p (operands[0], operands[1]))
8880	emit_move_insn (operands[0], operands[1]);
8881
8882      split_di (operands, 1, low, high);
8883
8884      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
8885      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
8886
8887      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
8888      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
8889	{
8890	  if (! no_new_pseudos)
8891	    scratch = force_reg (SImode, const0_rtx);
8892	  else
8893	    emit_move_insn (scratch, const0_rtx);
8894
8895	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
8896					  scratch));
8897	}
8898      else
8899	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
8900    }
8901}
8902
8903/* Helper function for the string operations below.  Dest VARIABLE whether
8904   it is aligned to VALUE bytes.  If true, jump to the label.  */
8905static rtx
8906ix86_expand_aligntest (variable, value)
8907     rtx variable;
8908     int value;
8909{
8910  rtx label = gen_label_rtx ();
8911  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
8912  if (GET_MODE (variable) == DImode)
8913    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
8914  else
8915    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
8916  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
8917			   1, label);
8918  return label;
8919}
8920
8921/* Adjust COUNTER by the VALUE.  */
8922static void
8923ix86_adjust_counter (countreg, value)
8924     rtx countreg;
8925     HOST_WIDE_INT value;
8926{
8927  if (GET_MODE (countreg) == DImode)
8928    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
8929  else
8930    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
8931}
8932
8933/* Zero extend possibly SImode EXP to Pmode register.  */
8934rtx
8935ix86_zero_extend_to_Pmode (exp)
8936   rtx exp;
8937{
8938  rtx r;
8939  if (GET_MODE (exp) == VOIDmode)
8940    return force_reg (Pmode, exp);
8941  if (GET_MODE (exp) == Pmode)
8942    return copy_to_mode_reg (Pmode, exp);
8943  r = gen_reg_rtx (Pmode);
8944  emit_insn (gen_zero_extendsidi2 (r, exp));
8945  return r;
8946}
8947
8948/* Expand string move (memcpy) operation.  Use i386 string operations when
8949   profitable.  expand_clrstr contains similar code.  */
8950int
8951ix86_expand_movstr (dst, src, count_exp, align_exp)
8952     rtx dst, src, count_exp, align_exp;
8953{
8954  rtx srcreg, destreg, countreg;
8955  enum machine_mode counter_mode;
8956  HOST_WIDE_INT align = 0;
8957  unsigned HOST_WIDE_INT count = 0;
8958  rtx insns;
8959
8960  start_sequence ();
8961
8962  if (GET_CODE (align_exp) == CONST_INT)
8963    align = INTVAL (align_exp);
8964
8965  /* This simple hack avoids all inlining code and simplifies code below.  */
8966  if (!TARGET_ALIGN_STRINGOPS)
8967    align = 64;
8968
8969  if (GET_CODE (count_exp) == CONST_INT)
8970    count = INTVAL (count_exp);
8971
8972  /* Figure out proper mode for counter.  For 32bits it is always SImode,
8973     for 64bits use SImode when possible, otherwise DImode.
8974     Set count to number of bytes copied when known at compile time.  */
8975  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
8976      || x86_64_zero_extended_value (count_exp))
8977    counter_mode = SImode;
8978  else
8979    counter_mode = DImode;
8980
8981  if (counter_mode != SImode && counter_mode != DImode)
8982    abort ();
8983
8984  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
8985  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
8986
8987  emit_insn (gen_cld ());
8988
8989  /* When optimizing for size emit simple rep ; movsb instruction for
8990     counts not divisible by 4.  */
8991
8992  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
8993    {
8994      countreg = ix86_zero_extend_to_Pmode (count_exp);
8995      if (TARGET_64BIT)
8996	emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
8997				        destreg, srcreg, countreg));
8998      else
8999	emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9000				  destreg, srcreg, countreg));
9001    }
9002
9003  /* For constant aligned (or small unaligned) copies use rep movsl
9004     followed by code copying the rest.  For PentiumPro ensure 8 byte
9005     alignment to allow rep movsl acceleration.  */
9006
9007  else if (count != 0
9008	   && (align >= 8
9009	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9010	       || optimize_size || count < (unsigned int) 64))
9011    {
9012      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9013      if (count & ~(size - 1))
9014	{
9015	  countreg = copy_to_mode_reg (counter_mode,
9016				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9017						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9018	  countreg = ix86_zero_extend_to_Pmode (countreg);
9019	  if (size == 4)
9020	    {
9021	      if (TARGET_64BIT)
9022		emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9023					        destreg, srcreg, countreg));
9024	      else
9025		emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9026					  destreg, srcreg, countreg));
9027	    }
9028	  else
9029	    emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9030					    destreg, srcreg, countreg));
9031	}
9032      if (size == 8 && (count & 0x04))
9033	emit_insn (gen_strmovsi (destreg, srcreg));
9034      if (count & 0x02)
9035	emit_insn (gen_strmovhi (destreg, srcreg));
9036      if (count & 0x01)
9037	emit_insn (gen_strmovqi (destreg, srcreg));
9038    }
9039  /* The generic code based on the glibc implementation:
9040     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9041     allowing accelerated copying there)
9042     - copy the data using rep movsl
9043     - copy the rest.  */
9044  else
9045    {
9046      rtx countreg2;
9047      rtx label = NULL;
9048
9049      /* In case we don't know anything about the alignment, default to
9050         library version, since it is usually equally fast and result in
9051         shorter code.  */
9052      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9053	{
9054	  end_sequence ();
9055	  return 0;
9056	}
9057
9058      if (TARGET_SINGLE_STRINGOP)
9059	emit_insn (gen_cld ());
9060
9061      countreg2 = gen_reg_rtx (Pmode);
9062      countreg = copy_to_mode_reg (counter_mode, count_exp);
9063
9064      /* We don't use loops to align destination and to copy parts smaller
9065         than 4 bytes, because gcc is able to optimize such code better (in
9066         the case the destination or the count really is aligned, gcc is often
9067         able to predict the branches) and also it is friendlier to the
9068         hardware branch prediction.
9069
9070         Using loops is benefical for generic case, because we can
9071         handle small counts using the loops.  Many CPUs (such as Athlon)
9072         have large REP prefix setup costs.
9073
9074         This is quite costy.  Maybe we can revisit this decision later or
9075         add some customizability to this code.  */
9076
9077      if (count == 0
9078	  && align < (TARGET_PENTIUMPRO && (count == 0
9079					    || count >= (unsigned int) 260)
9080		      ? 8 : UNITS_PER_WORD))
9081	{
9082	  label = gen_label_rtx ();
9083	  emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9084				   LEU, 0, counter_mode, 1, label);
9085	}
9086      if (align <= 1)
9087	{
9088	  rtx label = ix86_expand_aligntest (destreg, 1);
9089	  emit_insn (gen_strmovqi (destreg, srcreg));
9090	  ix86_adjust_counter (countreg, 1);
9091	  emit_label (label);
9092	  LABEL_NUSES (label) = 1;
9093	}
9094      if (align <= 2)
9095	{
9096	  rtx label = ix86_expand_aligntest (destreg, 2);
9097	  emit_insn (gen_strmovhi (destreg, srcreg));
9098	  ix86_adjust_counter (countreg, 2);
9099	  emit_label (label);
9100	  LABEL_NUSES (label) = 1;
9101	}
9102      if (align <= 4
9103	  && ((TARGET_PENTIUMPRO && (count == 0
9104				     || count >= (unsigned int) 260))
9105	      || TARGET_64BIT))
9106	{
9107	  rtx label = ix86_expand_aligntest (destreg, 4);
9108	  emit_insn (gen_strmovsi (destreg, srcreg));
9109	  ix86_adjust_counter (countreg, 4);
9110	  emit_label (label);
9111	  LABEL_NUSES (label) = 1;
9112	}
9113
9114      if (!TARGET_SINGLE_STRINGOP)
9115	emit_insn (gen_cld ());
9116      if (TARGET_64BIT)
9117	{
9118	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9119				  GEN_INT (3)));
9120	  emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9121					  destreg, srcreg, countreg2));
9122	}
9123      else
9124	{
9125	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9126	  emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9127				    destreg, srcreg, countreg2));
9128	}
9129
9130      if (label)
9131	{
9132	  emit_label (label);
9133	  LABEL_NUSES (label) = 1;
9134	}
9135      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9136	emit_insn (gen_strmovsi (destreg, srcreg));
9137      if ((align <= 4 || count == 0) && TARGET_64BIT)
9138	{
9139	  rtx label = ix86_expand_aligntest (countreg, 4);
9140	  emit_insn (gen_strmovsi (destreg, srcreg));
9141	  emit_label (label);
9142	  LABEL_NUSES (label) = 1;
9143	}
9144      if (align > 2 && count != 0 && (count & 2))
9145	emit_insn (gen_strmovhi (destreg, srcreg));
9146      if (align <= 2 || count == 0)
9147	{
9148	  rtx label = ix86_expand_aligntest (countreg, 2);
9149	  emit_insn (gen_strmovhi (destreg, srcreg));
9150	  emit_label (label);
9151	  LABEL_NUSES (label) = 1;
9152	}
9153      if (align > 1 && count != 0 && (count & 1))
9154	emit_insn (gen_strmovqi (destreg, srcreg));
9155      if (align <= 1 || count == 0)
9156	{
9157	  rtx label = ix86_expand_aligntest (countreg, 1);
9158	  emit_insn (gen_strmovqi (destreg, srcreg));
9159	  emit_label (label);
9160	  LABEL_NUSES (label) = 1;
9161	}
9162    }
9163
9164  insns = get_insns ();
9165  end_sequence ();
9166
9167  ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
9168  emit_insns (insns);
9169  return 1;
9170}
9171
9172/* Expand string clear operation (bzero).  Use i386 string operations when
9173   profitable.  expand_movstr contains similar code.  */
9174int
9175ix86_expand_clrstr (src, count_exp, align_exp)
9176     rtx src, count_exp, align_exp;
9177{
9178  rtx destreg, zeroreg, countreg;
9179  enum machine_mode counter_mode;
9180  HOST_WIDE_INT align = 0;
9181  unsigned HOST_WIDE_INT count = 0;
9182
9183  if (GET_CODE (align_exp) == CONST_INT)
9184    align = INTVAL (align_exp);
9185
9186  /* This simple hack avoids all inlining code and simplifies code below.  */
9187  if (!TARGET_ALIGN_STRINGOPS)
9188    align = 32;
9189
9190  if (GET_CODE (count_exp) == CONST_INT)
9191    count = INTVAL (count_exp);
9192  /* Figure out proper mode for counter.  For 32bits it is always SImode,
9193     for 64bits use SImode when possible, otherwise DImode.
9194     Set count to number of bytes copied when known at compile time.  */
9195  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9196      || x86_64_zero_extended_value (count_exp))
9197    counter_mode = SImode;
9198  else
9199    counter_mode = DImode;
9200
9201  destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9202
9203  emit_insn (gen_cld ());
9204
9205  /* When optimizing for size emit simple rep ; movsb instruction for
9206     counts not divisible by 4.  */
9207
9208  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9209    {
9210      countreg = ix86_zero_extend_to_Pmode (count_exp);
9211      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
9212      if (TARGET_64BIT)
9213	emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
9214				         destreg, countreg));
9215      else
9216	emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
9217				   destreg, countreg));
9218    }
9219  else if (count != 0
9220	   && (align >= 8
9221	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9222	       || optimize_size || count < (unsigned int) 64))
9223    {
9224      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9225      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
9226      if (count & ~(size - 1))
9227	{
9228	  countreg = copy_to_mode_reg (counter_mode,
9229				       GEN_INT ((count >> (size == 4 ? 2 : 3))
9230						& (TARGET_64BIT ? -1 : 0x3fffffff)));
9231	  countreg = ix86_zero_extend_to_Pmode (countreg);
9232	  if (size == 4)
9233	    {
9234	      if (TARGET_64BIT)
9235		emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
9236					         destreg, countreg));
9237	      else
9238		emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
9239					   destreg, countreg));
9240	    }
9241	  else
9242	    emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
9243					     destreg, countreg));
9244	}
9245      if (size == 8 && (count & 0x04))
9246	emit_insn (gen_strsetsi (destreg,
9247				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9248      if (count & 0x02)
9249	emit_insn (gen_strsethi (destreg,
9250				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9251      if (count & 0x01)
9252	emit_insn (gen_strsetqi (destreg,
9253				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9254    }
9255  else
9256    {
9257      rtx countreg2;
9258      rtx label = NULL;
9259
9260      /* In case we don't know anything about the alignment, default to
9261         library version, since it is usually equally fast and result in
9262         shorter code.  */
9263      if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9264	return 0;
9265
9266      if (TARGET_SINGLE_STRINGOP)
9267	emit_insn (gen_cld ());
9268
9269      countreg2 = gen_reg_rtx (Pmode);
9270      countreg = copy_to_mode_reg (counter_mode, count_exp);
9271      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
9272
9273      if (count == 0
9274	  && align < (TARGET_PENTIUMPRO && (count == 0
9275					    || count >= (unsigned int) 260)
9276		      ? 8 : UNITS_PER_WORD))
9277	{
9278	  label = gen_label_rtx ();
9279	  emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
9280				   LEU, 0, counter_mode, 1, label);
9281	}
9282      if (align <= 1)
9283	{
9284	  rtx label = ix86_expand_aligntest (destreg, 1);
9285	  emit_insn (gen_strsetqi (destreg,
9286				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9287	  ix86_adjust_counter (countreg, 1);
9288	  emit_label (label);
9289	  LABEL_NUSES (label) = 1;
9290	}
9291      if (align <= 2)
9292	{
9293	  rtx label = ix86_expand_aligntest (destreg, 2);
9294	  emit_insn (gen_strsethi (destreg,
9295				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9296	  ix86_adjust_counter (countreg, 2);
9297	  emit_label (label);
9298	  LABEL_NUSES (label) = 1;
9299	}
9300      if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
9301					      || count >= (unsigned int) 260))
9302	{
9303	  rtx label = ix86_expand_aligntest (destreg, 4);
9304	  emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
9305					     ? gen_rtx_SUBREG (SImode, zeroreg, 0)
9306					     : zeroreg)));
9307	  ix86_adjust_counter (countreg, 4);
9308	  emit_label (label);
9309	  LABEL_NUSES (label) = 1;
9310	}
9311
9312      if (!TARGET_SINGLE_STRINGOP)
9313	emit_insn (gen_cld ());
9314      if (TARGET_64BIT)
9315	{
9316	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9317				  GEN_INT (3)));
9318	  emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
9319					   destreg, countreg2));
9320	}
9321      else
9322	{
9323	  emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9324	  emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
9325				     destreg, countreg2));
9326	}
9327
9328      if (label)
9329	{
9330	  emit_label (label);
9331	  LABEL_NUSES (label) = 1;
9332	}
9333      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9334	emit_insn (gen_strsetsi (destreg,
9335				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
9336      if (TARGET_64BIT && (align <= 4 || count == 0))
9337	{
9338	  rtx label = ix86_expand_aligntest (destreg, 2);
9339	  emit_insn (gen_strsetsi (destreg,
9340				   gen_rtx_SUBREG (SImode, zeroreg, 0)));
9341	  emit_label (label);
9342	  LABEL_NUSES (label) = 1;
9343	}
9344      if (align > 2 && count != 0 && (count & 2))
9345	emit_insn (gen_strsethi (destreg,
9346				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
9347      if (align <= 2 || count == 0)
9348	{
9349	  rtx label = ix86_expand_aligntest (destreg, 2);
9350	  emit_insn (gen_strsethi (destreg,
9351				   gen_rtx_SUBREG (HImode, zeroreg, 0)));
9352	  emit_label (label);
9353	  LABEL_NUSES (label) = 1;
9354	}
9355      if (align > 1 && count != 0 && (count & 1))
9356	emit_insn (gen_strsetqi (destreg,
9357				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
9358      if (align <= 1 || count == 0)
9359	{
9360	  rtx label = ix86_expand_aligntest (destreg, 1);
9361	  emit_insn (gen_strsetqi (destreg,
9362				   gen_rtx_SUBREG (QImode, zeroreg, 0)));
9363	  emit_label (label);
9364	  LABEL_NUSES (label) = 1;
9365	}
9366    }
9367  return 1;
9368}
9369/* Expand strlen.  */
9370int
9371ix86_expand_strlen (out, src, eoschar, align)
9372     rtx out, src, eoschar, align;
9373{
9374  rtx addr, scratch1, scratch2, scratch3, scratch4;
9375
9376  /* The generic case of strlen expander is long.  Avoid it's
9377     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
9378
9379  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9380      && !TARGET_INLINE_ALL_STRINGOPS
9381      && !optimize_size
9382      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
9383    return 0;
9384
9385  addr = force_reg (Pmode, XEXP (src, 0));
9386  scratch1 = gen_reg_rtx (Pmode);
9387
9388  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
9389      && !optimize_size)
9390    {
9391      /* Well it seems that some optimizer does not combine a call like
9392         foo(strlen(bar), strlen(bar));
9393         when the move and the subtraction is done here.  It does calculate
9394         the length just once when these instructions are done inside of
9395         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
9396         often used and I use one fewer register for the lifetime of
9397         output_strlen_unroll() this is better.  */
9398
9399      emit_move_insn (out, addr);
9400
9401      ix86_expand_strlensi_unroll_1 (out, align);
9402
9403      /* strlensi_unroll_1 returns the address of the zero at the end of
9404         the string, like memchr(), so compute the length by subtracting
9405         the start address.  */
9406      if (TARGET_64BIT)
9407	emit_insn (gen_subdi3 (out, out, addr));
9408      else
9409	emit_insn (gen_subsi3 (out, out, addr));
9410    }
9411  else
9412    {
9413      scratch2 = gen_reg_rtx (Pmode);
9414      scratch3 = gen_reg_rtx (Pmode);
9415      scratch4 = force_reg (Pmode, constm1_rtx);
9416
9417      emit_move_insn (scratch3, addr);
9418      eoschar = force_reg (QImode, eoschar);
9419
9420      emit_insn (gen_cld ());
9421      if (TARGET_64BIT)
9422	{
9423	  emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
9424					 align, scratch4, scratch3));
9425	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
9426	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
9427	}
9428      else
9429	{
9430	  emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
9431				     align, scratch4, scratch3));
9432	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
9433	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
9434	}
9435    }
9436  return 1;
9437}
9438
9439/* Expand the appropriate insns for doing strlen if not just doing
9440   repnz; scasb
9441
9442   out = result, initialized with the start address
9443   align_rtx = alignment of the address.
9444   scratch = scratch register, initialized with the startaddress when
9445	not aligned, otherwise undefined
9446
9447   This is just the body. It needs the initialisations mentioned above and
9448   some address computing at the end.  These things are done in i386.md.  */
9449
9450static void
9451ix86_expand_strlensi_unroll_1 (out, align_rtx)
9452     rtx out, align_rtx;
9453{
9454  int align;
9455  rtx tmp;
9456  rtx align_2_label = NULL_RTX;
9457  rtx align_3_label = NULL_RTX;
9458  rtx align_4_label = gen_label_rtx ();
9459  rtx end_0_label = gen_label_rtx ();
9460  rtx mem;
9461  rtx tmpreg = gen_reg_rtx (SImode);
9462  rtx scratch = gen_reg_rtx (SImode);
9463
9464  align = 0;
9465  if (GET_CODE (align_rtx) == CONST_INT)
9466    align = INTVAL (align_rtx);
9467
9468  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
9469
9470  /* Is there a known alignment and is it less than 4?  */
9471  if (align < 4)
9472    {
9473      rtx scratch1 = gen_reg_rtx (Pmode);
9474      emit_move_insn (scratch1, out);
9475      /* Is there a known alignment and is it not 2? */
9476      if (align != 2)
9477	{
9478	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
9479	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
9480
9481	  /* Leave just the 3 lower bits.  */
9482	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
9483				    NULL_RTX, 0, OPTAB_WIDEN);
9484
9485	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9486				   Pmode, 1, align_4_label);
9487	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
9488				   Pmode, 1, align_2_label);
9489	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
9490				   Pmode, 1, align_3_label);
9491	}
9492      else
9493        {
9494	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
9495	     check if is aligned to 4 - byte.  */
9496
9497	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
9498				    NULL_RTX, 0, OPTAB_WIDEN);
9499
9500	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
9501				   Pmode, 1, align_4_label);
9502        }
9503
9504      mem = gen_rtx_MEM (QImode, out);
9505
9506      /* Now compare the bytes.  */
9507
9508      /* Compare the first n unaligned byte on a byte per byte basis.  */
9509      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
9510			       QImode, 1, end_0_label);
9511
9512      /* Increment the address.  */
9513      if (TARGET_64BIT)
9514	emit_insn (gen_adddi3 (out, out, const1_rtx));
9515      else
9516	emit_insn (gen_addsi3 (out, out, const1_rtx));
9517
9518      /* Not needed with an alignment of 2 */
9519      if (align != 2)
9520	{
9521	  emit_label (align_2_label);
9522
9523	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9524				   end_0_label);
9525
9526	  if (TARGET_64BIT)
9527	    emit_insn (gen_adddi3 (out, out, const1_rtx));
9528	  else
9529	    emit_insn (gen_addsi3 (out, out, const1_rtx));
9530
9531	  emit_label (align_3_label);
9532	}
9533
9534      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
9535			       end_0_label);
9536
9537      if (TARGET_64BIT)
9538	emit_insn (gen_adddi3 (out, out, const1_rtx));
9539      else
9540	emit_insn (gen_addsi3 (out, out, const1_rtx));
9541    }
9542
9543  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
9544     align this loop.  It gives only huge programs, but does not help to
9545     speed up.  */
9546  emit_label (align_4_label);
9547
9548  mem = gen_rtx_MEM (SImode, out);
9549  emit_move_insn (scratch, mem);
9550  if (TARGET_64BIT)
9551    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
9552  else
9553    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
9554
9555  /* This formula yields a nonzero result iff one of the bytes is zero.
9556     This saves three branches inside loop and many cycles.  */
9557
9558  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
9559  emit_insn (gen_one_cmplsi2 (scratch, scratch));
9560  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
9561  emit_insn (gen_andsi3 (tmpreg, tmpreg,
9562			 GEN_INT (trunc_int_for_mode
9563				  (0x80808080, SImode))));
9564  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
9565			   align_4_label);
9566
9567  if (TARGET_CMOVE)
9568    {
9569       rtx reg = gen_reg_rtx (SImode);
9570       rtx reg2 = gen_reg_rtx (Pmode);
9571       emit_move_insn (reg, tmpreg);
9572       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
9573
9574       /* If zero is not in the first two bytes, move two bytes forward.  */
9575       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9576       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9577       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9578       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
9579			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
9580						     reg,
9581						     tmpreg)));
9582       /* Emit lea manually to avoid clobbering of flags.  */
9583       emit_insn (gen_rtx_SET (SImode, reg2,
9584			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
9585
9586       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9587       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
9588       emit_insn (gen_rtx_SET (VOIDmode, out,
9589			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
9590						     reg2,
9591						     out)));
9592
9593    }
9594  else
9595    {
9596       rtx end_2_label = gen_label_rtx ();
9597       /* Is zero in the first two bytes? */
9598
9599       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
9600       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
9601       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
9602       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9603                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
9604                            pc_rtx);
9605       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9606       JUMP_LABEL (tmp) = end_2_label;
9607
9608       /* Not in the first two.  Move two bytes forward.  */
9609       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
9610       if (TARGET_64BIT)
9611	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
9612       else
9613	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
9614
9615       emit_label (end_2_label);
9616
9617    }
9618
9619  /* Avoid branch in fixing the byte.  */
9620  tmpreg = gen_lowpart (QImode, tmpreg);
9621  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
9622  if (TARGET_64BIT)
9623    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
9624  else
9625    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
9626
9627  emit_label (end_0_label);
9628}
9629
9630/* Clear stack slot assignments remembered from previous functions.
9631   This is called from INIT_EXPANDERS once before RTL is emitted for each
9632   function.  */
9633
9634static void
9635ix86_init_machine_status (p)
9636     struct function *p;
9637{
9638  p->machine = (struct machine_function *)
9639    xcalloc (1, sizeof (struct machine_function));
9640}
9641
9642/* Mark machine specific bits of P for GC.  */
9643static void
9644ix86_mark_machine_status (p)
9645     struct function *p;
9646{
9647  struct machine_function *machine = p->machine;
9648  enum machine_mode mode;
9649  int n;
9650
9651  if (! machine)
9652    return;
9653
9654  for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
9655       mode = (enum machine_mode) ((int) mode + 1))
9656    for (n = 0; n < MAX_386_STACK_LOCALS; n++)
9657      ggc_mark_rtx (machine->stack_locals[(int) mode][n]);
9658}
9659
9660static void
9661ix86_free_machine_status (p)
9662     struct function *p;
9663{
9664  free (p->machine);
9665  p->machine = NULL;
9666}
9667
9668/* Return a MEM corresponding to a stack slot with mode MODE.
9669   Allocate a new slot if necessary.
9670
9671   The RTL for a function can have several slots available: N is
9672   which slot to use.  */
9673
9674rtx
9675assign_386_stack_local (mode, n)
9676     enum machine_mode mode;
9677     int n;
9678{
9679  if (n < 0 || n >= MAX_386_STACK_LOCALS)
9680    abort ();
9681
9682  if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
9683    ix86_stack_locals[(int) mode][n]
9684      = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
9685
9686  return ix86_stack_locals[(int) mode][n];
9687}
9688
9689/* Calculate the length of the memory address in the instruction
9690   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
9691
9692static int
9693memory_address_length (addr)
9694     rtx addr;
9695{
9696  struct ix86_address parts;
9697  rtx base, index, disp;
9698  int len;
9699
9700  if (GET_CODE (addr) == PRE_DEC
9701      || GET_CODE (addr) == POST_INC
9702      || GET_CODE (addr) == PRE_MODIFY
9703      || GET_CODE (addr) == POST_MODIFY)
9704    return 0;
9705
9706  if (! ix86_decompose_address (addr, &parts))
9707    abort ();
9708
9709  base = parts.base;
9710  index = parts.index;
9711  disp = parts.disp;
9712  len = 0;
9713
9714  /* Register Indirect.  */
9715  if (base && !index && !disp)
9716    {
9717      /* Special cases: ebp and esp need the two-byte modrm form.  */
9718      if (addr == stack_pointer_rtx
9719	  || addr == arg_pointer_rtx
9720	  || addr == frame_pointer_rtx
9721	  || addr == hard_frame_pointer_rtx)
9722	len = 1;
9723    }
9724
9725  /* Direct Addressing.  */
9726  else if (disp && !base && !index)
9727    len = 4;
9728
9729  else
9730    {
9731      /* Find the length of the displacement constant.  */
9732      if (disp)
9733	{
9734	  if (GET_CODE (disp) == CONST_INT
9735	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
9736	    len = 1;
9737	  else
9738	    len = 4;
9739	}
9740
9741      /* An index requires the two-byte modrm form.  */
9742      if (index)
9743	len += 1;
9744    }
9745
9746  return len;
9747}
9748
9749/* Compute default value for "length_immediate" attribute.  When SHORTFORM is set
9750   expect that insn have 8bit immediate alternative.  */
9751int
9752ix86_attr_length_immediate_default (insn, shortform)
9753     rtx insn;
9754     int shortform;
9755{
9756  int len = 0;
9757  int i;
9758  extract_insn_cached (insn);
9759  for (i = recog_data.n_operands - 1; i >= 0; --i)
9760    if (CONSTANT_P (recog_data.operand[i]))
9761      {
9762	if (len)
9763	  abort ();
9764	if (shortform
9765	    && GET_CODE (recog_data.operand[i]) == CONST_INT
9766	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
9767	  len = 1;
9768	else
9769	  {
9770	    switch (get_attr_mode (insn))
9771	      {
9772		case MODE_QI:
9773		  len+=1;
9774		  break;
9775		case MODE_HI:
9776		  len+=2;
9777		  break;
9778		case MODE_SI:
9779		  len+=4;
9780		  break;
9781		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
9782		case MODE_DI:
9783		  len+=4;
9784		  break;
9785		default:
9786		  fatal_insn ("unknown insn mode", insn);
9787	      }
9788	  }
9789      }
9790  return len;
9791}
9792/* Compute default value for "length_address" attribute.  */
9793int
9794ix86_attr_length_address_default (insn)
9795     rtx insn;
9796{
9797  int i;
9798  extract_insn_cached (insn);
9799  for (i = recog_data.n_operands - 1; i >= 0; --i)
9800    if (GET_CODE (recog_data.operand[i]) == MEM)
9801      {
9802	return memory_address_length (XEXP (recog_data.operand[i], 0));
9803	break;
9804      }
9805  return 0;
9806}
9807
9808/* Return the maximum number of instructions a cpu can issue.  */
9809
9810static int
9811ix86_issue_rate ()
9812{
9813  switch (ix86_cpu)
9814    {
9815    case PROCESSOR_PENTIUM:
9816    case PROCESSOR_K6:
9817      return 2;
9818
9819    case PROCESSOR_PENTIUMPRO:
9820    case PROCESSOR_PENTIUM4:
9821    case PROCESSOR_ATHLON:
9822      return 3;
9823
9824    default:
9825      return 1;
9826    }
9827}
9828
9829/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
9830   by DEP_INSN and nothing set by DEP_INSN.  */
9831
9832static int
9833ix86_flags_dependant (insn, dep_insn, insn_type)
9834     rtx insn, dep_insn;
9835     enum attr_type insn_type;
9836{
9837  rtx set, set2;
9838
9839  /* Simplify the test for uninteresting insns.  */
9840  if (insn_type != TYPE_SETCC
9841      && insn_type != TYPE_ICMOV
9842      && insn_type != TYPE_FCMOV
9843      && insn_type != TYPE_IBR)
9844    return 0;
9845
9846  if ((set = single_set (dep_insn)) != 0)
9847    {
9848      set = SET_DEST (set);
9849      set2 = NULL_RTX;
9850    }
9851  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
9852	   && XVECLEN (PATTERN (dep_insn), 0) == 2
9853	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
9854	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
9855    {
9856      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9857      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
9858    }
9859  else
9860    return 0;
9861
9862  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
9863    return 0;
9864
9865  /* This test is true if the dependent insn reads the flags but
9866     not any other potentially set register.  */
9867  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
9868    return 0;
9869
9870  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
9871    return 0;
9872
9873  return 1;
9874}
9875
9876/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
9877   address with operands set by DEP_INSN.  */
9878
9879static int
9880ix86_agi_dependant (insn, dep_insn, insn_type)
9881     rtx insn, dep_insn;
9882     enum attr_type insn_type;
9883{
9884  rtx addr;
9885
9886  if (insn_type == TYPE_LEA
9887      && TARGET_PENTIUM)
9888    {
9889      addr = PATTERN (insn);
9890      if (GET_CODE (addr) == SET)
9891	;
9892      else if (GET_CODE (addr) == PARALLEL
9893	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
9894	addr = XVECEXP (addr, 0, 0);
9895      else
9896	abort ();
9897      addr = SET_SRC (addr);
9898    }
9899  else
9900    {
9901      int i;
9902      extract_insn_cached (insn);
9903      for (i = recog_data.n_operands - 1; i >= 0; --i)
9904	if (GET_CODE (recog_data.operand[i]) == MEM)
9905	  {
9906	    addr = XEXP (recog_data.operand[i], 0);
9907	    goto found;
9908	  }
9909      return 0;
9910    found:;
9911    }
9912
9913  return modified_in_p (addr, dep_insn);
9914}
9915
9916static int
9917ix86_adjust_cost (insn, link, dep_insn, cost)
9918     rtx insn, link, dep_insn;
9919     int cost;
9920{
9921  enum attr_type insn_type, dep_insn_type;
9922  enum attr_memory memory, dep_memory;
9923  rtx set, set2;
9924  int dep_insn_code_number;
9925
9926  /* Anti and output depenancies have zero cost on all CPUs.  */
9927  if (REG_NOTE_KIND (link) != 0)
9928    return 0;
9929
9930  dep_insn_code_number = recog_memoized (dep_insn);
9931
9932  /* If we can't recognize the insns, we can't really do anything.  */
9933  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
9934    return cost;
9935
9936  insn_type = get_attr_type (insn);
9937  dep_insn_type = get_attr_type (dep_insn);
9938
9939  switch (ix86_cpu)
9940    {
9941    case PROCESSOR_PENTIUM:
9942      /* Address Generation Interlock adds a cycle of latency.  */
9943      if (ix86_agi_dependant (insn, dep_insn, insn_type))
9944	cost += 1;
9945
9946      /* ??? Compares pair with jump/setcc.  */
9947      if (ix86_flags_dependant (insn, dep_insn, insn_type))
9948	cost = 0;
9949
9950      /* Floating point stores require value to be ready one cycle ealier.  */
9951      if (insn_type == TYPE_FMOV
9952	  && get_attr_memory (insn) == MEMORY_STORE
9953	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
9954	cost += 1;
9955      break;
9956
9957    case PROCESSOR_PENTIUMPRO:
9958      memory = get_attr_memory (insn);
9959      dep_memory = get_attr_memory (dep_insn);
9960
9961      /* Since we can't represent delayed latencies of load+operation,
9962	 increase the cost here for non-imov insns.  */
9963      if (dep_insn_type != TYPE_IMOV
9964          && dep_insn_type != TYPE_FMOV
9965          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
9966	cost += 1;
9967
9968      /* INT->FP conversion is expensive.  */
9969      if (get_attr_fp_int_src (dep_insn))
9970	cost += 5;
9971
9972      /* There is one cycle extra latency between an FP op and a store.  */
9973      if (insn_type == TYPE_FMOV
9974	  && (set = single_set (dep_insn)) != NULL_RTX
9975	  && (set2 = single_set (insn)) != NULL_RTX
9976	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
9977	  && GET_CODE (SET_DEST (set2)) == MEM)
9978	cost += 1;
9979
9980      /* Show ability of reorder buffer to hide latency of load by executing
9981	 in parallel with previous instruction in case
9982	 previous instruction is not needed to compute the address.  */
9983      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
9984	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
9985 	{
9986	  /* Claim moves to take one cycle, as core can issue one load
9987	     at time and the next load can start cycle later.  */
9988	  if (dep_insn_type == TYPE_IMOV
9989	      || dep_insn_type == TYPE_FMOV)
9990	    cost = 1;
9991	  else if (cost > 1)
9992	    cost--;
9993	}
9994      break;
9995
9996    case PROCESSOR_K6:
9997      memory = get_attr_memory (insn);
9998      dep_memory = get_attr_memory (dep_insn);
9999      /* The esp dependency is resolved before the instruction is really
10000         finished.  */
10001      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10002	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10003	return 1;
10004
10005      /* Since we can't represent delayed latencies of load+operation,
10006	 increase the cost here for non-imov insns.  */
10007      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10008	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10009
10010      /* INT->FP conversion is expensive.  */
10011      if (get_attr_fp_int_src (dep_insn))
10012	cost += 5;
10013
10014      /* Show ability of reorder buffer to hide latency of load by executing
10015	 in parallel with previous instruction in case
10016	 previous instruction is not needed to compute the address.  */
10017      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10018	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10019 	{
10020	  /* Claim moves to take one cycle, as core can issue one load
10021	     at time and the next load can start cycle later.  */
10022	  if (dep_insn_type == TYPE_IMOV
10023	      || dep_insn_type == TYPE_FMOV)
10024	    cost = 1;
10025	  else if (cost > 2)
10026	    cost -= 2;
10027	  else
10028	    cost = 1;
10029	}
10030      break;
10031
10032    case PROCESSOR_ATHLON:
10033      memory = get_attr_memory (insn);
10034      dep_memory = get_attr_memory (dep_insn);
10035
10036      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10037	{
10038	  if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10039	    cost += 2;
10040	  else
10041	    cost += 3;
10042        }
10043      /* Show ability of reorder buffer to hide latency of load by executing
10044	 in parallel with previous instruction in case
10045	 previous instruction is not needed to compute the address.  */
10046      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10047	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
10048 	{
10049	  /* Claim moves to take one cycle, as core can issue one load
10050	     at time and the next load can start cycle later.  */
10051	  if (dep_insn_type == TYPE_IMOV
10052	      || dep_insn_type == TYPE_FMOV)
10053	    cost = 0;
10054	  else if (cost >= 3)
10055	    cost -= 3;
10056	  else
10057	    cost = 0;
10058	}
10059
10060    default:
10061      break;
10062    }
10063
10064  return cost;
10065}
10066
10067static union
10068{
10069  struct ppro_sched_data
10070  {
10071    rtx decode[3];
10072    int issued_this_cycle;
10073  } ppro;
10074} ix86_sched_data;
10075
10076static int
10077ix86_safe_length (insn)
10078     rtx insn;
10079{
10080  if (recog_memoized (insn) >= 0)
10081    return get_attr_length (insn);
10082  else
10083    return 128;
10084}
10085
10086static int
10087ix86_safe_length_prefix (insn)
10088     rtx insn;
10089{
10090  if (recog_memoized (insn) >= 0)
10091    return get_attr_length (insn);
10092  else
10093    return 0;
10094}
10095
10096static enum attr_memory
10097ix86_safe_memory (insn)
10098     rtx insn;
10099{
10100  if (recog_memoized (insn) >= 0)
10101    return get_attr_memory (insn);
10102  else
10103    return MEMORY_UNKNOWN;
10104}
10105
10106static enum attr_pent_pair
10107ix86_safe_pent_pair (insn)
10108     rtx insn;
10109{
10110  if (recog_memoized (insn) >= 0)
10111    return get_attr_pent_pair (insn);
10112  else
10113    return PENT_PAIR_NP;
10114}
10115
10116static enum attr_ppro_uops
10117ix86_safe_ppro_uops (insn)
10118     rtx insn;
10119{
10120  if (recog_memoized (insn) >= 0)
10121    return get_attr_ppro_uops (insn);
10122  else
10123    return PPRO_UOPS_MANY;
10124}
10125
10126static void
10127ix86_dump_ppro_packet (dump)
10128     FILE *dump;
10129{
10130  if (ix86_sched_data.ppro.decode[0])
10131    {
10132      fprintf (dump, "PPRO packet: %d",
10133	       INSN_UID (ix86_sched_data.ppro.decode[0]));
10134      if (ix86_sched_data.ppro.decode[1])
10135	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10136      if (ix86_sched_data.ppro.decode[2])
10137	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10138      fputc ('\n', dump);
10139    }
10140}
10141
10142/* We're beginning a new block.  Initialize data structures as necessary.  */
10143
10144static void
10145ix86_sched_init (dump, sched_verbose, veclen)
10146     FILE *dump ATTRIBUTE_UNUSED;
10147     int sched_verbose ATTRIBUTE_UNUSED;
10148     int veclen ATTRIBUTE_UNUSED;
10149{
10150  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
10151}
10152
10153/* Shift INSN to SLOT, and shift everything else down.  */
10154
10155static void
10156ix86_reorder_insn (insnp, slot)
10157     rtx *insnp, *slot;
10158{
10159  if (insnp != slot)
10160    {
10161      rtx insn = *insnp;
10162      do
10163	insnp[0] = insnp[1];
10164      while (++insnp != slot);
10165      *insnp = insn;
10166    }
10167}
10168
10169/* Find an instruction with given pairability and minimal amount of cycles
10170   lost by the fact that the CPU waits for both pipelines to finish before
10171   reading next instructions.  Also take care that both instructions together
10172   can not exceed 7 bytes.  */
10173
10174static rtx *
10175ix86_pent_find_pair (e_ready, ready, type, first)
10176     rtx *e_ready;
10177     rtx *ready;
10178     enum attr_pent_pair type;
10179     rtx first;
10180{
10181  int mincycles, cycles;
10182  enum attr_pent_pair tmp;
10183  enum attr_memory memory;
10184  rtx *insnp, *bestinsnp = NULL;
10185
10186  if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
10187    return NULL;
10188
10189  memory = ix86_safe_memory (first);
10190  cycles = result_ready_cost (first);
10191  mincycles = INT_MAX;
10192
10193  for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
10194    if ((tmp = ix86_safe_pent_pair (*insnp)) == type
10195	&& ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
10196      {
10197	enum attr_memory second_memory;
10198	int secondcycles, currentcycles;
10199
10200	second_memory = ix86_safe_memory (*insnp);
10201	secondcycles = result_ready_cost (*insnp);
10202	currentcycles = abs (cycles - secondcycles);
10203
10204	if (secondcycles >= 1 && cycles >= 1)
10205	  {
10206	    /* Two read/modify/write instructions together takes two
10207	       cycles longer.  */
10208	    if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
10209	      currentcycles += 2;
10210
10211	    /* Read modify/write instruction followed by read/modify
10212	       takes one cycle longer.  */
10213	    if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
10214	        && tmp != PENT_PAIR_UV
10215	        && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
10216	      currentcycles += 1;
10217	  }
10218	if (currentcycles < mincycles)
10219	  bestinsnp = insnp, mincycles = currentcycles;
10220      }
10221
10222  return bestinsnp;
10223}
10224
10225/* Subroutines of ix86_sched_reorder.  */
10226
10227static void
10228ix86_sched_reorder_pentium (ready, e_ready)
10229     rtx *ready;
10230     rtx *e_ready;
10231{
10232  enum attr_pent_pair pair1, pair2;
10233  rtx *insnp;
10234
10235  /* This wouldn't be necessary if Haifa knew that static insn ordering
10236     is important to which pipe an insn is issued to.  So we have to make
10237     some minor rearrangements.  */
10238
10239  pair1 = ix86_safe_pent_pair (*e_ready);
10240
10241  /* If the first insn is non-pairable, let it be.  */
10242  if (pair1 == PENT_PAIR_NP)
10243    return;
10244
10245  pair2 = PENT_PAIR_NP;
10246  insnp = 0;
10247
10248  /* If the first insn is UV or PV pairable, search for a PU
10249     insn to go with.  */
10250  if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
10251    {
10252      insnp = ix86_pent_find_pair (e_ready-1, ready,
10253				   PENT_PAIR_PU, *e_ready);
10254      if (insnp)
10255	pair2 = PENT_PAIR_PU;
10256    }
10257
10258  /* If the first insn is PU or UV pairable, search for a PV
10259     insn to go with.  */
10260  if (pair2 == PENT_PAIR_NP
10261      && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
10262    {
10263      insnp = ix86_pent_find_pair (e_ready-1, ready,
10264				   PENT_PAIR_PV, *e_ready);
10265      if (insnp)
10266	pair2 = PENT_PAIR_PV;
10267    }
10268
10269  /* If the first insn is pairable, search for a UV
10270     insn to go with.  */
10271  if (pair2 == PENT_PAIR_NP)
10272    {
10273      insnp = ix86_pent_find_pair (e_ready-1, ready,
10274				   PENT_PAIR_UV, *e_ready);
10275      if (insnp)
10276	pair2 = PENT_PAIR_UV;
10277    }
10278
10279  if (pair2 == PENT_PAIR_NP)
10280    return;
10281
10282  /* Found something!  Decide if we need to swap the order.  */
10283  if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
10284      || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
10285	  && ix86_safe_memory (*e_ready) == MEMORY_BOTH
10286	  && ix86_safe_memory (*insnp) == MEMORY_LOAD))
10287    ix86_reorder_insn (insnp, e_ready);
10288  else
10289    ix86_reorder_insn (insnp, e_ready - 1);
10290}
10291
10292static void
10293ix86_sched_reorder_ppro (ready, e_ready)
10294     rtx *ready;
10295     rtx *e_ready;
10296{
10297  rtx decode[3];
10298  enum attr_ppro_uops cur_uops;
10299  int issued_this_cycle;
10300  rtx *insnp;
10301  int i;
10302
10303  /* At this point .ppro.decode contains the state of the three
10304     decoders from last "cycle".  That is, those insns that were
10305     actually independent.  But here we're scheduling for the
10306     decoder, and we may find things that are decodable in the
10307     same cycle.  */
10308
10309  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
10310  issued_this_cycle = 0;
10311
10312  insnp = e_ready;
10313  cur_uops = ix86_safe_ppro_uops (*insnp);
10314
10315  /* If the decoders are empty, and we've a complex insn at the
10316     head of the priority queue, let it issue without complaint.  */
10317  if (decode[0] == NULL)
10318    {
10319      if (cur_uops == PPRO_UOPS_MANY)
10320	{
10321	  decode[0] = *insnp;
10322	  goto ppro_done;
10323	}
10324
10325      /* Otherwise, search for a 2-4 uop unsn to issue.  */
10326      while (cur_uops != PPRO_UOPS_FEW)
10327	{
10328	  if (insnp == ready)
10329	    break;
10330	  cur_uops = ix86_safe_ppro_uops (*--insnp);
10331	}
10332
10333      /* If so, move it to the head of the line.  */
10334      if (cur_uops == PPRO_UOPS_FEW)
10335	ix86_reorder_insn (insnp, e_ready);
10336
10337      /* Issue the head of the queue.  */
10338      issued_this_cycle = 1;
10339      decode[0] = *e_ready--;
10340    }
10341
10342  /* Look for simple insns to fill in the other two slots.  */
10343  for (i = 1; i < 3; ++i)
10344    if (decode[i] == NULL)
10345      {
10346	if (ready >= e_ready)
10347	  goto ppro_done;
10348
10349	insnp = e_ready;
10350	cur_uops = ix86_safe_ppro_uops (*insnp);
10351	while (cur_uops != PPRO_UOPS_ONE)
10352	  {
10353	    if (insnp == ready)
10354	      break;
10355	    cur_uops = ix86_safe_ppro_uops (*--insnp);
10356	  }
10357
10358	/* Found one.  Move it to the head of the queue and issue it.  */
10359	if (cur_uops == PPRO_UOPS_ONE)
10360	  {
10361	    ix86_reorder_insn (insnp, e_ready);
10362	    decode[i] = *e_ready--;
10363	    issued_this_cycle++;
10364	    continue;
10365	  }
10366
10367	/* ??? Didn't find one.  Ideally, here we would do a lazy split
10368	   of 2-uop insns, issue one and queue the other.  */
10369      }
10370
10371 ppro_done:
10372  if (issued_this_cycle == 0)
10373    issued_this_cycle = 1;
10374  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
10375}
10376
10377/* We are about to being issuing insns for this clock cycle.
10378   Override the default sort algorithm to better slot instructions.  */
10379static int
10380ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
10381     FILE *dump ATTRIBUTE_UNUSED;
10382     int sched_verbose ATTRIBUTE_UNUSED;
10383     rtx *ready;
10384     int *n_readyp;
10385     int clock_var ATTRIBUTE_UNUSED;
10386{
10387  int n_ready = *n_readyp;
10388  rtx *e_ready = ready + n_ready - 1;
10389
10390  if (n_ready < 2)
10391    goto out;
10392
10393  switch (ix86_cpu)
10394    {
10395    default:
10396      break;
10397
10398    case PROCESSOR_PENTIUM:
10399      ix86_sched_reorder_pentium (ready, e_ready);
10400      break;
10401
10402    case PROCESSOR_PENTIUMPRO:
10403      ix86_sched_reorder_ppro (ready, e_ready);
10404      break;
10405    }
10406
10407out:
10408  return ix86_issue_rate ();
10409}
10410
10411/* We are about to issue INSN.  Return the number of insns left on the
10412   ready queue that can be issued this cycle.  */
10413
10414static int
10415ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
10416     FILE *dump;
10417     int sched_verbose;
10418     rtx insn;
10419     int can_issue_more;
10420{
10421  int i;
10422  switch (ix86_cpu)
10423    {
10424    default:
10425      return can_issue_more - 1;
10426
10427    case PROCESSOR_PENTIUMPRO:
10428      {
10429	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
10430
10431	if (uops == PPRO_UOPS_MANY)
10432	  {
10433	    if (sched_verbose)
10434	      ix86_dump_ppro_packet (dump);
10435	    ix86_sched_data.ppro.decode[0] = insn;
10436	    ix86_sched_data.ppro.decode[1] = NULL;
10437	    ix86_sched_data.ppro.decode[2] = NULL;
10438	    if (sched_verbose)
10439	      ix86_dump_ppro_packet (dump);
10440	    ix86_sched_data.ppro.decode[0] = NULL;
10441	  }
10442	else if (uops == PPRO_UOPS_FEW)
10443	  {
10444	    if (sched_verbose)
10445	      ix86_dump_ppro_packet (dump);
10446	    ix86_sched_data.ppro.decode[0] = insn;
10447	    ix86_sched_data.ppro.decode[1] = NULL;
10448	    ix86_sched_data.ppro.decode[2] = NULL;
10449	  }
10450	else
10451	  {
10452	    for (i = 0; i < 3; ++i)
10453	      if (ix86_sched_data.ppro.decode[i] == NULL)
10454		{
10455		  ix86_sched_data.ppro.decode[i] = insn;
10456		  break;
10457		}
10458	    if (i == 3)
10459	      abort ();
10460	    if (i == 2)
10461	      {
10462	        if (sched_verbose)
10463	          ix86_dump_ppro_packet (dump);
10464		ix86_sched_data.ppro.decode[0] = NULL;
10465		ix86_sched_data.ppro.decode[1] = NULL;
10466		ix86_sched_data.ppro.decode[2] = NULL;
10467	      }
10468	  }
10469      }
10470      return --ix86_sched_data.ppro.issued_this_cycle;
10471    }
10472}
10473
10474/* Walk through INSNS and look for MEM references whose address is DSTREG or
10475   SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10476   appropriate.  */
10477
10478void
10479ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
10480     rtx insns;
10481     rtx dstref, srcref, dstreg, srcreg;
10482{
10483  rtx insn;
10484
10485  for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
10486    if (INSN_P (insn))
10487      ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
10488				 dstreg, srcreg);
10489}
10490
10491/* Subroutine of above to actually do the updating by recursively walking
10492   the rtx.  */
10493
10494static void
10495ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
10496     rtx x;
10497     rtx dstref, srcref, dstreg, srcreg;
10498{
10499  enum rtx_code code = GET_CODE (x);
10500  const char *format_ptr = GET_RTX_FORMAT (code);
10501  int i, j;
10502
10503  if (code == MEM && XEXP (x, 0) == dstreg)
10504    MEM_COPY_ATTRIBUTES (x, dstref);
10505  else if (code == MEM && XEXP (x, 0) == srcreg)
10506    MEM_COPY_ATTRIBUTES (x, srcref);
10507
10508  for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
10509    {
10510      if (*format_ptr == 'e')
10511	ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
10512				   dstreg, srcreg);
10513      else if (*format_ptr == 'E')
10514	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10515	  ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
10516				     dstreg, srcreg);
10517    }
10518}
10519
10520/* Compute the alignment given to a constant that is being placed in memory.
10521   EXP is the constant and ALIGN is the alignment that the object would
10522   ordinarily have.
10523   The value of this function is used instead of that alignment to align
10524   the object.  */
10525
10526int
10527ix86_constant_alignment (exp, align)
10528     tree exp;
10529     int align;
10530{
10531  if (TREE_CODE (exp) == REAL_CST)
10532    {
10533      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
10534	return 64;
10535      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
10536	return 128;
10537    }
10538  else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
10539	   && align < 256)
10540    return 256;
10541
10542  return align;
10543}
10544
10545/* Compute the alignment for a static variable.
10546   TYPE is the data type, and ALIGN is the alignment that
10547   the object would ordinarily have.  The value of this function is used
10548   instead of that alignment to align the object.  */
10549
10550int
10551ix86_data_alignment (type, align)
10552     tree type;
10553     int align;
10554{
10555  if (AGGREGATE_TYPE_P (type)
10556       && TYPE_SIZE (type)
10557       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10558       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
10559	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
10560    return 256;
10561
10562  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10563     to 16byte boundary.  */
10564  if (TARGET_64BIT)
10565    {
10566      if (AGGREGATE_TYPE_P (type)
10567	   && TYPE_SIZE (type)
10568	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10569	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
10570	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10571	return 128;
10572    }
10573
10574  if (TREE_CODE (type) == ARRAY_TYPE)
10575    {
10576      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10577	return 64;
10578      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10579	return 128;
10580    }
10581  else if (TREE_CODE (type) == COMPLEX_TYPE)
10582    {
10583
10584      if (TYPE_MODE (type) == DCmode && align < 64)
10585	return 64;
10586      if (TYPE_MODE (type) == XCmode && align < 128)
10587	return 128;
10588    }
10589  else if ((TREE_CODE (type) == RECORD_TYPE
10590	    || TREE_CODE (type) == UNION_TYPE
10591	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10592	   && TYPE_FIELDS (type))
10593    {
10594      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10595	return 64;
10596      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10597	return 128;
10598    }
10599  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10600	   || TREE_CODE (type) == INTEGER_TYPE)
10601    {
10602      if (TYPE_MODE (type) == DFmode && align < 64)
10603	return 64;
10604      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10605	return 128;
10606    }
10607
10608  return align;
10609}
10610
10611/* Compute the alignment for a local variable.
10612   TYPE is the data type, and ALIGN is the alignment that
10613   the object would ordinarily have.  The value of this macro is used
10614   instead of that alignment to align the object.  */
10615
10616int
10617ix86_local_alignment (type, align)
10618     tree type;
10619     int align;
10620{
10621  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10622     to 16byte boundary.  */
10623  if (TARGET_64BIT)
10624    {
10625      if (AGGREGATE_TYPE_P (type)
10626	   && TYPE_SIZE (type)
10627	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
10628	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
10629	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
10630	return 128;
10631    }
10632  if (TREE_CODE (type) == ARRAY_TYPE)
10633    {
10634      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
10635	return 64;
10636      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
10637	return 128;
10638    }
10639  else if (TREE_CODE (type) == COMPLEX_TYPE)
10640    {
10641      if (TYPE_MODE (type) == DCmode && align < 64)
10642	return 64;
10643      if (TYPE_MODE (type) == XCmode && align < 128)
10644	return 128;
10645    }
10646  else if ((TREE_CODE (type) == RECORD_TYPE
10647	    || TREE_CODE (type) == UNION_TYPE
10648	    || TREE_CODE (type) == QUAL_UNION_TYPE)
10649	   && TYPE_FIELDS (type))
10650    {
10651      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
10652	return 64;
10653      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
10654	return 128;
10655    }
10656  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
10657	   || TREE_CODE (type) == INTEGER_TYPE)
10658    {
10659
10660      if (TYPE_MODE (type) == DFmode && align < 64)
10661	return 64;
10662      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
10663	return 128;
10664    }
10665  return align;
10666}
10667
10668/* Emit RTL insns to initialize the variable parts of a trampoline.
10669   FNADDR is an RTX for the address of the function's pure code.
10670   CXT is an RTX for the static chain value for the function.  */
10671void
10672x86_initialize_trampoline (tramp, fnaddr, cxt)
10673     rtx tramp, fnaddr, cxt;
10674{
10675  if (!TARGET_64BIT)
10676    {
10677      /* Compute offset from the end of the jmp to the target function.  */
10678      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
10679			       plus_constant (tramp, 10),
10680			       NULL_RTX, 1, OPTAB_DIRECT);
10681      emit_move_insn (gen_rtx_MEM (QImode, tramp),
10682		      GEN_INT (trunc_int_for_mode (0xb9, QImode)));
10683      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
10684      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
10685		      GEN_INT (trunc_int_for_mode (0xe9, QImode)));
10686      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
10687    }
10688  else
10689    {
10690      int offset = 0;
10691      /* Try to load address using shorter movl instead of movabs.
10692         We may want to support movq for kernel mode, but kernel does not use
10693         trampolines at the moment.  */
10694      if (x86_64_zero_extended_value (fnaddr))
10695	{
10696	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
10697	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10698			  GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
10699	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
10700			  gen_lowpart (SImode, fnaddr));
10701	  offset += 6;
10702	}
10703      else
10704	{
10705	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10706			  GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
10707	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10708			  fnaddr);
10709	  offset += 10;
10710	}
10711      /* Load static chain using movabs to r10.  */
10712      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10713		      GEN_INT (trunc_int_for_mode (0xba49, HImode)));
10714      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
10715		      cxt);
10716      offset += 10;
10717      /* Jump to the r11 */
10718      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
10719		      GEN_INT (trunc_int_for_mode (0xff49, HImode)));
10720      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
10721		      GEN_INT (trunc_int_for_mode (0xe3, QImode)));
10722      offset += 3;
10723      if (offset > TRAMPOLINE_SIZE)
10724	abort ();
10725    }
10726}
10727
10728#define def_builtin(MASK, NAME, TYPE, CODE)				\
10729do {									\
10730  if ((MASK) & target_flags)						\
10731    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL);	\
10732} while (0)
10733
10734struct builtin_description
10735{
10736  const unsigned int mask;
10737  const enum insn_code icode;
10738  const char *const name;
10739  const enum ix86_builtins code;
10740  const enum rtx_code comparison;
10741  const unsigned int flag;
10742};
10743
10744static const struct builtin_description bdesc_comi[] =
10745{
10746  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
10747  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
10748  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
10749  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
10750  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
10751  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
10752  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
10753  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
10754  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
10755  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
10756  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
10757  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }
10758};
10759
10760static const struct builtin_description bdesc_2arg[] =
10761{
10762  /* SSE */
10763  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
10764  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
10765  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
10766  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
10767  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
10768  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
10769  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
10770  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
10771
10772  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
10773  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
10774  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
10775  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
10776  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
10777  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
10778  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
10779  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
10780  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
10781  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
10782  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
10783  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
10784  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
10785  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
10786  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
10787  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
10788  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
10789  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
10790  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
10791  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
10792  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
10793  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
10794  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
10795  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
10796
10797  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
10798  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
10799  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
10800  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
10801
10802  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
10803  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
10804  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
10805  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
10806  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
10807
10808  /* MMX */
10809  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
10810  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
10811  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
10812  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
10813  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
10814  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
10815
10816  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
10817  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
10818  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
10819  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
10820  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
10821  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
10822  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
10823  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
10824
10825  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
10826  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
10827  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
10828
10829  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
10830  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
10831  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
10832  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
10833
10834  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
10835  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
10836
10837  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
10838  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
10839  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
10840  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
10841  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
10842  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
10843
10844  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
10845  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
10846  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
10847  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
10848
10849  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
10850  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
10851  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
10852  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
10853  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
10854  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
10855
10856  /* Special.  */
10857  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
10858  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
10859  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
10860
10861  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
10862  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
10863
10864  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
10865  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
10866  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
10867  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
10868  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
10869  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
10870
10871  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
10872  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
10873  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
10874  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
10875  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
10876  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
10877
10878  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
10879  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
10880  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
10881  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
10882
10883  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
10884  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }
10885
10886};
10887
10888static const struct builtin_description bdesc_1arg[] =
10889{
10890  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
10891  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
10892
10893  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
10894  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
10895  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
10896
10897  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
10898  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
10899  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
10900  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 }
10901
10902};
10903
10904void
10905ix86_init_builtins ()
10906{
10907  if (TARGET_MMX)
10908    ix86_init_mmx_sse_builtins ();
10909}
10910
10911/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
10912   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
10913   builtins.  */
10914static void
10915ix86_init_mmx_sse_builtins ()
10916{
10917  const struct builtin_description * d;
10918  size_t i;
10919  tree endlink = void_list_node;
10920
10921  tree pchar_type_node = build_pointer_type (char_type_node);
10922  tree pfloat_type_node = build_pointer_type (float_type_node);
10923  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
10924  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
10925
10926  /* Comparisons.  */
10927  tree int_ftype_v4sf_v4sf
10928    = build_function_type (integer_type_node,
10929			   tree_cons (NULL_TREE, V4SF_type_node,
10930				      tree_cons (NULL_TREE,
10931						 V4SF_type_node,
10932						 endlink)));
10933  tree v4si_ftype_v4sf_v4sf
10934    = build_function_type (V4SI_type_node,
10935			   tree_cons (NULL_TREE, V4SF_type_node,
10936				      tree_cons (NULL_TREE,
10937						 V4SF_type_node,
10938						 endlink)));
10939  /* MMX/SSE/integer conversions.  */
10940  tree int_ftype_v4sf
10941    = build_function_type (integer_type_node,
10942			   tree_cons (NULL_TREE, V4SF_type_node,
10943				      endlink));
10944  tree int_ftype_v8qi
10945    = build_function_type (integer_type_node,
10946			   tree_cons (NULL_TREE, V8QI_type_node,
10947				      endlink));
10948  tree v4sf_ftype_v4sf_int
10949    = build_function_type (V4SF_type_node,
10950			   tree_cons (NULL_TREE, V4SF_type_node,
10951				      tree_cons (NULL_TREE, integer_type_node,
10952						 endlink)));
10953  tree v4sf_ftype_v4sf_v2si
10954    = build_function_type (V4SF_type_node,
10955			   tree_cons (NULL_TREE, V4SF_type_node,
10956				      tree_cons (NULL_TREE, V2SI_type_node,
10957						 endlink)));
10958  tree int_ftype_v4hi_int
10959    = build_function_type (integer_type_node,
10960			   tree_cons (NULL_TREE, V4HI_type_node,
10961				      tree_cons (NULL_TREE, integer_type_node,
10962						 endlink)));
10963  tree v4hi_ftype_v4hi_int_int
10964    = build_function_type (V4HI_type_node,
10965			   tree_cons (NULL_TREE, V4HI_type_node,
10966				      tree_cons (NULL_TREE, integer_type_node,
10967						 tree_cons (NULL_TREE,
10968							    integer_type_node,
10969							    endlink))));
10970  /* Miscellaneous.  */
10971  tree v8qi_ftype_v4hi_v4hi
10972    = build_function_type (V8QI_type_node,
10973			   tree_cons (NULL_TREE, V4HI_type_node,
10974				      tree_cons (NULL_TREE, V4HI_type_node,
10975						 endlink)));
10976  tree v4hi_ftype_v2si_v2si
10977    = build_function_type (V4HI_type_node,
10978			   tree_cons (NULL_TREE, V2SI_type_node,
10979				      tree_cons (NULL_TREE, V2SI_type_node,
10980						 endlink)));
10981  tree v4sf_ftype_v4sf_v4sf_int
10982    = build_function_type (V4SF_type_node,
10983			   tree_cons (NULL_TREE, V4SF_type_node,
10984				      tree_cons (NULL_TREE, V4SF_type_node,
10985						 tree_cons (NULL_TREE,
10986							    integer_type_node,
10987							    endlink))));
10988  tree v4hi_ftype_v8qi_v8qi
10989    = build_function_type (V4HI_type_node,
10990			   tree_cons (NULL_TREE, V8QI_type_node,
10991				      tree_cons (NULL_TREE, V8QI_type_node,
10992						 endlink)));
10993  tree v2si_ftype_v4hi_v4hi
10994    = build_function_type (V2SI_type_node,
10995			   tree_cons (NULL_TREE, V4HI_type_node,
10996				      tree_cons (NULL_TREE, V4HI_type_node,
10997						 endlink)));
10998  tree v4hi_ftype_v4hi_int
10999    = build_function_type (V4HI_type_node,
11000			   tree_cons (NULL_TREE, V4HI_type_node,
11001				      tree_cons (NULL_TREE, integer_type_node,
11002						 endlink)));
11003  tree v4hi_ftype_v4hi_di
11004    = build_function_type (V4HI_type_node,
11005			   tree_cons (NULL_TREE, V4HI_type_node,
11006				      tree_cons (NULL_TREE,
11007						 long_long_integer_type_node,
11008						 endlink)));
11009  tree v2si_ftype_v2si_di
11010    = build_function_type (V2SI_type_node,
11011			   tree_cons (NULL_TREE, V2SI_type_node,
11012				      tree_cons (NULL_TREE,
11013						 long_long_integer_type_node,
11014						 endlink)));
11015  tree void_ftype_void
11016    = build_function_type (void_type_node, endlink);
11017  tree void_ftype_unsigned
11018    = build_function_type (void_type_node,
11019			   tree_cons (NULL_TREE, unsigned_type_node,
11020				      endlink));
11021  tree unsigned_ftype_void
11022    = build_function_type (unsigned_type_node, endlink);
11023  tree di_ftype_void
11024    = build_function_type (long_long_unsigned_type_node, endlink);
11025  tree v4sf_ftype_void
11026    = build_function_type (V4SF_type_node, endlink);
11027  tree v2si_ftype_v4sf
11028    = build_function_type (V2SI_type_node,
11029			   tree_cons (NULL_TREE, V4SF_type_node,
11030				      endlink));
11031  /* Loads/stores.  */
11032  tree maskmovq_args = tree_cons (NULL_TREE, V8QI_type_node,
11033				  tree_cons (NULL_TREE, V8QI_type_node,
11034					     tree_cons (NULL_TREE,
11035							pchar_type_node,
11036							endlink)));
11037  tree void_ftype_v8qi_v8qi_pchar
11038    = build_function_type (void_type_node, maskmovq_args);
11039  tree v4sf_ftype_pfloat
11040    = build_function_type (V4SF_type_node,
11041			   tree_cons (NULL_TREE, pfloat_type_node,
11042				      endlink));
11043  /* @@@ the type is bogus */
11044  tree v4sf_ftype_v4sf_pv2si
11045    = build_function_type (V4SF_type_node,
11046			   tree_cons (NULL_TREE, V4SF_type_node,
11047				      tree_cons (NULL_TREE, pv2si_type_node,
11048						 endlink)));
11049  tree void_ftype_pv2si_v4sf
11050    = build_function_type (void_type_node,
11051			   tree_cons (NULL_TREE, pv2si_type_node,
11052				      tree_cons (NULL_TREE, V4SF_type_node,
11053						 endlink)));
11054  tree void_ftype_pfloat_v4sf
11055    = build_function_type (void_type_node,
11056			   tree_cons (NULL_TREE, pfloat_type_node,
11057				      tree_cons (NULL_TREE, V4SF_type_node,
11058						 endlink)));
11059  tree void_ftype_pdi_di
11060    = build_function_type (void_type_node,
11061			   tree_cons (NULL_TREE, pdi_type_node,
11062				      tree_cons (NULL_TREE,
11063						 long_long_unsigned_type_node,
11064						 endlink)));
11065  /* Normal vector unops.  */
11066  tree v4sf_ftype_v4sf
11067    = build_function_type (V4SF_type_node,
11068			   tree_cons (NULL_TREE, V4SF_type_node,
11069				      endlink));
11070
11071  /* Normal vector binops.  */
11072  tree v4sf_ftype_v4sf_v4sf
11073    = build_function_type (V4SF_type_node,
11074			   tree_cons (NULL_TREE, V4SF_type_node,
11075				      tree_cons (NULL_TREE, V4SF_type_node,
11076						 endlink)));
11077  tree v8qi_ftype_v8qi_v8qi
11078    = build_function_type (V8QI_type_node,
11079			   tree_cons (NULL_TREE, V8QI_type_node,
11080				      tree_cons (NULL_TREE, V8QI_type_node,
11081						 endlink)));
11082  tree v4hi_ftype_v4hi_v4hi
11083    = build_function_type (V4HI_type_node,
11084			   tree_cons (NULL_TREE, V4HI_type_node,
11085				      tree_cons (NULL_TREE, V4HI_type_node,
11086						 endlink)));
11087  tree v2si_ftype_v2si_v2si
11088    = build_function_type (V2SI_type_node,
11089			   tree_cons (NULL_TREE, V2SI_type_node,
11090				      tree_cons (NULL_TREE, V2SI_type_node,
11091						 endlink)));
11092  tree di_ftype_di_di
11093    = build_function_type (long_long_unsigned_type_node,
11094			   tree_cons (NULL_TREE, long_long_unsigned_type_node,
11095				      tree_cons (NULL_TREE,
11096						 long_long_unsigned_type_node,
11097						 endlink)));
11098
11099  tree v2si_ftype_v2sf
11100    = build_function_type (V2SI_type_node,
11101                           tree_cons (NULL_TREE, V2SF_type_node,
11102                                      endlink));
11103  tree v2sf_ftype_v2si
11104    = build_function_type (V2SF_type_node,
11105                           tree_cons (NULL_TREE, V2SI_type_node,
11106                                      endlink));
11107  tree v2si_ftype_v2si
11108    = build_function_type (V2SI_type_node,
11109                           tree_cons (NULL_TREE, V2SI_type_node,
11110                                      endlink));
11111  tree v2sf_ftype_v2sf
11112    = build_function_type (V2SF_type_node,
11113                           tree_cons (NULL_TREE, V2SF_type_node,
11114                                      endlink));
11115  tree v2sf_ftype_v2sf_v2sf
11116    = build_function_type (V2SF_type_node,
11117                           tree_cons (NULL_TREE, V2SF_type_node,
11118                                      tree_cons (NULL_TREE,
11119                                                 V2SF_type_node,
11120                                                 endlink)));
11121  tree v2si_ftype_v2sf_v2sf
11122    = build_function_type (V2SI_type_node,
11123                           tree_cons (NULL_TREE, V2SF_type_node,
11124                                      tree_cons (NULL_TREE,
11125                                                 V2SF_type_node,
11126                                                 endlink)));
11127
11128  /* Add all builtins that are more or less simple operations on two
11129     operands.  */
11130  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11131    {
11132      /* Use one of the operands; the target can have a different mode for
11133	 mask-generating compares.  */
11134      enum machine_mode mode;
11135      tree type;
11136
11137      if (d->name == 0)
11138	continue;
11139      mode = insn_data[d->icode].operand[1].mode;
11140
11141      switch (mode)
11142	{
11143	case V4SFmode:
11144	  type = v4sf_ftype_v4sf_v4sf;
11145	  break;
11146	case V8QImode:
11147	  type = v8qi_ftype_v8qi_v8qi;
11148	  break;
11149	case V4HImode:
11150	  type = v4hi_ftype_v4hi_v4hi;
11151	  break;
11152	case V2SImode:
11153	  type = v2si_ftype_v2si_v2si;
11154	  break;
11155	case DImode:
11156	  type = di_ftype_di_di;
11157	  break;
11158
11159	default:
11160	  abort ();
11161	}
11162
11163      /* Override for comparisons.  */
11164      if (d->icode == CODE_FOR_maskcmpv4sf3
11165	  || d->icode == CODE_FOR_maskncmpv4sf3
11166	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
11167	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11168	type = v4si_ftype_v4sf_v4sf;
11169
11170      def_builtin (d->mask, d->name, type, d->code);
11171    }
11172
11173  /* Add the remaining MMX insns with somewhat more complicated types.  */
11174  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
11175  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
11176  def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
11177  def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
11178  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
11179  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
11180  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
11181
11182  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
11183  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
11184  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
11185
11186  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
11187  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
11188
11189  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
11190  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
11191
11192  /* comi/ucomi insns.  */
11193  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11194    def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
11195
11196  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
11197  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
11198  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
11199
11200  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
11201  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
11202  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
11203  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
11204  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
11205  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
11206
11207  def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
11208  def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
11209  def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
11210  def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
11211
11212  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
11213  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
11214
11215  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
11216
11217  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
11218  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
11219  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
11220  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
11221  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
11222  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
11223
11224  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
11225  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
11226  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
11227  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
11228
11229  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
11230  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
11231  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
11232  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
11233
11234  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
11235
11236  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
11237
11238  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
11239  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
11240  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
11241  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
11242  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
11243  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
11244
11245  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
11246
11247  /* Original 3DNow!  */
11248  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
11249  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
11250  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
11251  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
11252  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
11253  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
11254  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
11255  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
11256  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
11257  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
11258  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
11259  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
11260  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
11261  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
11262  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
11263  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
11264  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
11265  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
11266  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
11267  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
11268
11269  /* 3DNow! extension as used in the Athlon CPU.  */
11270  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
11271  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
11272  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
11273  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
11274  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
11275  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
11276
11277  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
11278}
11279
11280/* Errors in the source file can cause expand_expr to return const0_rtx
11281   where we expect a vector.  To avoid crashing, use one of the vector
11282   clear instructions.  */
11283static rtx
11284safe_vector_operand (x, mode)
11285     rtx x;
11286     enum machine_mode mode;
11287{
11288  if (x != const0_rtx)
11289    return x;
11290  x = gen_reg_rtx (mode);
11291
11292  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
11293    emit_insn (gen_mmx_clrdi (mode == DImode ? x
11294			      : gen_rtx_SUBREG (DImode, x, 0)));
11295  else
11296    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
11297				: gen_rtx_SUBREG (V4SFmode, x, 0)));
11298  return x;
11299}
11300
11301/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
11302
11303static rtx
11304ix86_expand_binop_builtin (icode, arglist, target)
11305     enum insn_code icode;
11306     tree arglist;
11307     rtx target;
11308{
11309  rtx pat;
11310  tree arg0 = TREE_VALUE (arglist);
11311  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11312  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11313  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11314  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11315  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11316  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11317
11318  if (VECTOR_MODE_P (mode0))
11319    op0 = safe_vector_operand (op0, mode0);
11320  if (VECTOR_MODE_P (mode1))
11321    op1 = safe_vector_operand (op1, mode1);
11322
11323  if (! target
11324      || GET_MODE (target) != tmode
11325      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11326    target = gen_reg_rtx (tmode);
11327
11328  /* In case the insn wants input operands in modes different from
11329     the result, abort.  */
11330  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
11331    abort ();
11332
11333  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11334    op0 = copy_to_mode_reg (mode0, op0);
11335  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11336    op1 = copy_to_mode_reg (mode1, op1);
11337
11338  pat = GEN_FCN (icode) (target, op0, op1);
11339  if (! pat)
11340    return 0;
11341  emit_insn (pat);
11342  return target;
11343}
11344
11345/* In type_for_mode we restrict the ability to create TImode types
11346   to hosts with 64-bit H_W_I.  So we've defined the SSE logicals
11347   to have a V4SFmode signature.  Convert them in-place to TImode.  */
11348
11349static rtx
11350ix86_expand_timode_binop_builtin (icode, arglist, target)
11351     enum insn_code icode;
11352     tree arglist;
11353     rtx target;
11354{
11355  rtx pat;
11356  tree arg0 = TREE_VALUE (arglist);
11357  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11358  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11359  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11360
11361  op0 = gen_lowpart (TImode, op0);
11362  op1 = gen_lowpart (TImode, op1);
11363  target = gen_reg_rtx (TImode);
11364
11365  if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
11366    op0 = copy_to_mode_reg (TImode, op0);
11367  if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
11368    op1 = copy_to_mode_reg (TImode, op1);
11369
11370  pat = GEN_FCN (icode) (target, op0, op1);
11371  if (! pat)
11372    return 0;
11373  emit_insn (pat);
11374
11375  return gen_lowpart (V4SFmode, target);
11376}
11377
11378/* Subroutine of ix86_expand_builtin to take care of stores.  */
11379
11380static rtx
11381ix86_expand_store_builtin (icode, arglist)
11382     enum insn_code icode;
11383     tree arglist;
11384{
11385  rtx pat;
11386  tree arg0 = TREE_VALUE (arglist);
11387  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11388  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11389  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11390  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11391  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11392
11393  if (VECTOR_MODE_P (mode1))
11394    op1 = safe_vector_operand (op1, mode1);
11395
11396  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11397  pat = GEN_FCN (icode) (op0, op1);
11398  if (pat)
11399    emit_insn (pat);
11400  return 0;
11401}
11402
11403/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
11404
11405static rtx
11406ix86_expand_unop_builtin (icode, arglist, target, do_load)
11407     enum insn_code icode;
11408     tree arglist;
11409     rtx target;
11410     int do_load;
11411{
11412  rtx pat;
11413  tree arg0 = TREE_VALUE (arglist);
11414  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11415  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11416  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11417
11418  if (! target
11419      || GET_MODE (target) != tmode
11420      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11421    target = gen_reg_rtx (tmode);
11422  if (do_load)
11423    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11424  else
11425    {
11426      if (VECTOR_MODE_P (mode0))
11427	op0 = safe_vector_operand (op0, mode0);
11428
11429      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11430	op0 = copy_to_mode_reg (mode0, op0);
11431    }
11432
11433  pat = GEN_FCN (icode) (target, op0);
11434  if (! pat)
11435    return 0;
11436  emit_insn (pat);
11437  return target;
11438}
11439
11440/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
11441   sqrtss, rsqrtss, rcpss.  */
11442
11443static rtx
11444ix86_expand_unop1_builtin (icode, arglist, target)
11445     enum insn_code icode;
11446     tree arglist;
11447     rtx target;
11448{
11449  rtx pat;
11450  tree arg0 = TREE_VALUE (arglist);
11451  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11452  enum machine_mode tmode = insn_data[icode].operand[0].mode;
11453  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11454
11455  if (! target
11456      || GET_MODE (target) != tmode
11457      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11458    target = gen_reg_rtx (tmode);
11459
11460  if (VECTOR_MODE_P (mode0))
11461    op0 = safe_vector_operand (op0, mode0);
11462
11463  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11464    op0 = copy_to_mode_reg (mode0, op0);
11465
11466  pat = GEN_FCN (icode) (target, op0, op0);
11467  if (! pat)
11468    return 0;
11469  emit_insn (pat);
11470  return target;
11471}
11472
11473/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
11474
11475static rtx
11476ix86_expand_sse_compare (d, arglist, target)
11477     const struct builtin_description *d;
11478     tree arglist;
11479     rtx target;
11480{
11481  rtx pat;
11482  tree arg0 = TREE_VALUE (arglist);
11483  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11484  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11485  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11486  rtx op2;
11487  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
11488  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
11489  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
11490  enum rtx_code comparison = d->comparison;
11491
11492  if (VECTOR_MODE_P (mode0))
11493    op0 = safe_vector_operand (op0, mode0);
11494  if (VECTOR_MODE_P (mode1))
11495    op1 = safe_vector_operand (op1, mode1);
11496
11497  /* Swap operands if we have a comparison that isn't available in
11498     hardware.  */
11499  if (d->flag)
11500    {
11501      rtx tmp = gen_reg_rtx (mode1);
11502      emit_move_insn (tmp, op1);
11503      op1 = op0;
11504      op0 = tmp;
11505    }
11506
11507  if (! target
11508      || GET_MODE (target) != tmode
11509      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
11510    target = gen_reg_rtx (tmode);
11511
11512  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
11513    op0 = copy_to_mode_reg (mode0, op0);
11514  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
11515    op1 = copy_to_mode_reg (mode1, op1);
11516
11517  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11518  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
11519  if (! pat)
11520    return 0;
11521  emit_insn (pat);
11522  return target;
11523}
11524
11525/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
11526
11527static rtx
11528ix86_expand_sse_comi (d, arglist, target)
11529     const struct builtin_description *d;
11530     tree arglist;
11531     rtx target;
11532{
11533  rtx pat;
11534  tree arg0 = TREE_VALUE (arglist);
11535  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11536  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11537  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11538  rtx op2;
11539  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
11540  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
11541  enum rtx_code comparison = d->comparison;
11542
11543  if (VECTOR_MODE_P (mode0))
11544    op0 = safe_vector_operand (op0, mode0);
11545  if (VECTOR_MODE_P (mode1))
11546    op1 = safe_vector_operand (op1, mode1);
11547
11548  /* Swap operands if we have a comparison that isn't available in
11549     hardware.  */
11550  if (d->flag)
11551    {
11552      rtx tmp = op1;
11553      op1 = op0;
11554      op0 = tmp;
11555    }
11556
11557  target = gen_reg_rtx (SImode);
11558  emit_move_insn (target, const0_rtx);
11559  target = gen_rtx_SUBREG (QImode, target, 0);
11560
11561  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11562    op0 = copy_to_mode_reg (mode0, op0);
11563  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11564    op1 = copy_to_mode_reg (mode1, op1);
11565
11566  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
11567  pat = GEN_FCN (d->icode) (op0, op1, op2);
11568  if (! pat)
11569    return 0;
11570  emit_insn (pat);
11571  emit_insn (gen_rtx_SET (VOIDmode,
11572			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
11573			  gen_rtx_fmt_ee (comparison, QImode,
11574					  gen_rtx_REG (CCmode, FLAGS_REG),
11575					  const0_rtx)));
11576
11577  return SUBREG_REG (target);
11578}
11579
11580/* Expand an expression EXP that calls a built-in function,
11581   with result going to TARGET if that's convenient
11582   (and in mode MODE if that's convenient).
11583   SUBTARGET may be used as the target for computing one of EXP's operands.
11584   IGNORE is nonzero if the value is to be ignored.  */
11585
11586rtx
11587ix86_expand_builtin (exp, target, subtarget, mode, ignore)
11588     tree exp;
11589     rtx target;
11590     rtx subtarget ATTRIBUTE_UNUSED;
11591     enum machine_mode mode ATTRIBUTE_UNUSED;
11592     int ignore ATTRIBUTE_UNUSED;
11593{
11594  const struct builtin_description *d;
11595  size_t i;
11596  enum insn_code icode;
11597  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
11598  tree arglist = TREE_OPERAND (exp, 1);
11599  tree arg0, arg1, arg2;
11600  rtx op0, op1, op2, pat;
11601  enum machine_mode tmode, mode0, mode1, mode2;
11602  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11603
11604  switch (fcode)
11605    {
11606    case IX86_BUILTIN_EMMS:
11607      emit_insn (gen_emms ());
11608      return 0;
11609
11610    case IX86_BUILTIN_SFENCE:
11611      emit_insn (gen_sfence ());
11612      return 0;
11613
11614    case IX86_BUILTIN_PEXTRW:
11615      icode = CODE_FOR_mmx_pextrw;
11616      arg0 = TREE_VALUE (arglist);
11617      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11618      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11619      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11620      tmode = insn_data[icode].operand[0].mode;
11621      mode0 = insn_data[icode].operand[1].mode;
11622      mode1 = insn_data[icode].operand[2].mode;
11623
11624      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11625	op0 = copy_to_mode_reg (mode0, op0);
11626      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11627	{
11628	  /* @@@ better error message */
11629	  error ("selector must be an immediate");
11630	  return gen_reg_rtx (tmode);
11631	}
11632      if (target == 0
11633	  || GET_MODE (target) != tmode
11634	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11635	target = gen_reg_rtx (tmode);
11636      pat = GEN_FCN (icode) (target, op0, op1);
11637      if (! pat)
11638	return 0;
11639      emit_insn (pat);
11640      return target;
11641
11642    case IX86_BUILTIN_PINSRW:
11643      icode = CODE_FOR_mmx_pinsrw;
11644      arg0 = TREE_VALUE (arglist);
11645      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11646      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11647      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11648      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11649      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11650      tmode = insn_data[icode].operand[0].mode;
11651      mode0 = insn_data[icode].operand[1].mode;
11652      mode1 = insn_data[icode].operand[2].mode;
11653      mode2 = insn_data[icode].operand[3].mode;
11654
11655      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11656	op0 = copy_to_mode_reg (mode0, op0);
11657      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11658	op1 = copy_to_mode_reg (mode1, op1);
11659      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11660	{
11661	  /* @@@ better error message */
11662	  error ("selector must be an immediate");
11663	  return const0_rtx;
11664	}
11665      if (target == 0
11666	  || GET_MODE (target) != tmode
11667	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11668	target = gen_reg_rtx (tmode);
11669      pat = GEN_FCN (icode) (target, op0, op1, op2);
11670      if (! pat)
11671	return 0;
11672      emit_insn (pat);
11673      return target;
11674
11675    case IX86_BUILTIN_MASKMOVQ:
11676      icode = CODE_FOR_mmx_maskmovq;
11677      /* Note the arg order is different from the operand order.  */
11678      arg1 = TREE_VALUE (arglist);
11679      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
11680      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11681      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11682      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11683      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11684      mode0 = insn_data[icode].operand[0].mode;
11685      mode1 = insn_data[icode].operand[1].mode;
11686      mode2 = insn_data[icode].operand[2].mode;
11687
11688      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11689	op0 = copy_to_mode_reg (mode0, op0);
11690      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11691	op1 = copy_to_mode_reg (mode1, op1);
11692      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
11693	op2 = copy_to_mode_reg (mode2, op2);
11694      pat = GEN_FCN (icode) (op0, op1, op2);
11695      if (! pat)
11696	return 0;
11697      emit_insn (pat);
11698      return 0;
11699
11700    case IX86_BUILTIN_SQRTSS:
11701      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
11702    case IX86_BUILTIN_RSQRTSS:
11703      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
11704    case IX86_BUILTIN_RCPSS:
11705      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
11706
11707    case IX86_BUILTIN_ANDPS:
11708      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
11709					       arglist, target);
11710    case IX86_BUILTIN_ANDNPS:
11711      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
11712					       arglist, target);
11713    case IX86_BUILTIN_ORPS:
11714      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
11715					       arglist, target);
11716    case IX86_BUILTIN_XORPS:
11717      return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
11718					       arglist, target);
11719
11720    case IX86_BUILTIN_LOADAPS:
11721      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
11722
11723    case IX86_BUILTIN_LOADUPS:
11724      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
11725
11726    case IX86_BUILTIN_STOREAPS:
11727      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
11728    case IX86_BUILTIN_STOREUPS:
11729      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
11730
11731    case IX86_BUILTIN_LOADSS:
11732      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
11733
11734    case IX86_BUILTIN_STORESS:
11735      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
11736
11737    case IX86_BUILTIN_LOADHPS:
11738    case IX86_BUILTIN_LOADLPS:
11739      icode = (fcode == IX86_BUILTIN_LOADHPS
11740	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11741      arg0 = TREE_VALUE (arglist);
11742      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11743      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11744      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11745      tmode = insn_data[icode].operand[0].mode;
11746      mode0 = insn_data[icode].operand[1].mode;
11747      mode1 = insn_data[icode].operand[2].mode;
11748
11749      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11750	op0 = copy_to_mode_reg (mode0, op0);
11751      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
11752      if (target == 0
11753	  || GET_MODE (target) != tmode
11754	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11755	target = gen_reg_rtx (tmode);
11756      pat = GEN_FCN (icode) (target, op0, op1);
11757      if (! pat)
11758	return 0;
11759      emit_insn (pat);
11760      return target;
11761
11762    case IX86_BUILTIN_STOREHPS:
11763    case IX86_BUILTIN_STORELPS:
11764      icode = (fcode == IX86_BUILTIN_STOREHPS
11765	       ? CODE_FOR_sse_movhps : CODE_FOR_sse_movlps);
11766      arg0 = TREE_VALUE (arglist);
11767      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11768      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11769      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11770      mode0 = insn_data[icode].operand[1].mode;
11771      mode1 = insn_data[icode].operand[2].mode;
11772
11773      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11774      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11775	op1 = copy_to_mode_reg (mode1, op1);
11776
11777      pat = GEN_FCN (icode) (op0, op0, op1);
11778      if (! pat)
11779	return 0;
11780      emit_insn (pat);
11781      return 0;
11782
11783    case IX86_BUILTIN_MOVNTPS:
11784      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
11785    case IX86_BUILTIN_MOVNTQ:
11786      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
11787
11788    case IX86_BUILTIN_LDMXCSR:
11789      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
11790      target = assign_386_stack_local (SImode, 0);
11791      emit_move_insn (target, op0);
11792      emit_insn (gen_ldmxcsr (target));
11793      return 0;
11794
11795    case IX86_BUILTIN_STMXCSR:
11796      target = assign_386_stack_local (SImode, 0);
11797      emit_insn (gen_stmxcsr (target));
11798      return copy_to_mode_reg (SImode, target);
11799
11800    case IX86_BUILTIN_SHUFPS:
11801      icode = CODE_FOR_sse_shufps;
11802      arg0 = TREE_VALUE (arglist);
11803      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11804      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
11805      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11806      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11807      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
11808      tmode = insn_data[icode].operand[0].mode;
11809      mode0 = insn_data[icode].operand[1].mode;
11810      mode1 = insn_data[icode].operand[2].mode;
11811      mode2 = insn_data[icode].operand[3].mode;
11812
11813      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11814	op0 = copy_to_mode_reg (mode0, op0);
11815      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11816	op1 = copy_to_mode_reg (mode1, op1);
11817      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11818	{
11819	  /* @@@ better error message */
11820	  error ("mask must be an immediate");
11821	  return gen_reg_rtx (tmode);
11822	}
11823      if (target == 0
11824	  || GET_MODE (target) != tmode
11825	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11826	target = gen_reg_rtx (tmode);
11827      pat = GEN_FCN (icode) (target, op0, op1, op2);
11828      if (! pat)
11829	return 0;
11830      emit_insn (pat);
11831      return target;
11832
11833    case IX86_BUILTIN_PSHUFW:
11834      icode = CODE_FOR_mmx_pshufw;
11835      arg0 = TREE_VALUE (arglist);
11836      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
11837      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
11838      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
11839      tmode = insn_data[icode].operand[0].mode;
11840      mode1 = insn_data[icode].operand[1].mode;
11841      mode2 = insn_data[icode].operand[2].mode;
11842
11843      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
11844	op0 = copy_to_mode_reg (mode1, op0);
11845      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
11846	{
11847	  /* @@@ better error message */
11848	  error ("mask must be an immediate");
11849	  return const0_rtx;
11850	}
11851      if (target == 0
11852	  || GET_MODE (target) != tmode
11853	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11854	target = gen_reg_rtx (tmode);
11855      pat = GEN_FCN (icode) (target, op0, op1);
11856      if (! pat)
11857	return 0;
11858      emit_insn (pat);
11859      return target;
11860
11861    case IX86_BUILTIN_FEMMS:
11862      emit_insn (gen_femms ());
11863      return NULL_RTX;
11864
11865    case IX86_BUILTIN_PAVGUSB:
11866      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
11867
11868    case IX86_BUILTIN_PF2ID:
11869      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
11870
11871    case IX86_BUILTIN_PFACC:
11872      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
11873
11874    case IX86_BUILTIN_PFADD:
11875     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
11876
11877    case IX86_BUILTIN_PFCMPEQ:
11878      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
11879
11880    case IX86_BUILTIN_PFCMPGE:
11881      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
11882
11883    case IX86_BUILTIN_PFCMPGT:
11884      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
11885
11886    case IX86_BUILTIN_PFMAX:
11887      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
11888
11889    case IX86_BUILTIN_PFMIN:
11890      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
11891
11892    case IX86_BUILTIN_PFMUL:
11893      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
11894
11895    case IX86_BUILTIN_PFRCP:
11896      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
11897
11898    case IX86_BUILTIN_PFRCPIT1:
11899      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
11900
11901    case IX86_BUILTIN_PFRCPIT2:
11902      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
11903
11904    case IX86_BUILTIN_PFRSQIT1:
11905      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
11906
11907    case IX86_BUILTIN_PFRSQRT:
11908      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
11909
11910    case IX86_BUILTIN_PFSUB:
11911      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
11912
11913    case IX86_BUILTIN_PFSUBR:
11914      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
11915
11916    case IX86_BUILTIN_PI2FD:
11917      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
11918
11919    case IX86_BUILTIN_PMULHRW:
11920      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
11921
11922    case IX86_BUILTIN_PF2IW:
11923      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
11924
11925    case IX86_BUILTIN_PFNACC:
11926      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
11927
11928    case IX86_BUILTIN_PFPNACC:
11929      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
11930
11931    case IX86_BUILTIN_PI2FW:
11932      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
11933
11934    case IX86_BUILTIN_PSWAPDSI:
11935      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
11936
11937    case IX86_BUILTIN_PSWAPDSF:
11938      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
11939
11940    case IX86_BUILTIN_SSE_ZERO:
11941      target = gen_reg_rtx (V4SFmode);
11942      emit_insn (gen_sse_clrv4sf (target));
11943      return target;
11944
11945    case IX86_BUILTIN_MMX_ZERO:
11946      target = gen_reg_rtx (DImode);
11947      emit_insn (gen_mmx_clrdi (target));
11948      return target;
11949
11950    default:
11951      break;
11952    }
11953
11954  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
11955    if (d->code == fcode)
11956      {
11957	/* Compares are treated specially.  */
11958	if (d->icode == CODE_FOR_maskcmpv4sf3
11959	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
11960	    || d->icode == CODE_FOR_maskncmpv4sf3
11961	    || d->icode == CODE_FOR_vmmaskncmpv4sf3)
11962	  return ix86_expand_sse_compare (d, arglist, target);
11963
11964	return ix86_expand_binop_builtin (d->icode, arglist, target);
11965      }
11966
11967  for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
11968    if (d->code == fcode)
11969      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
11970
11971  for (i = 0, d = bdesc_comi; i < sizeof (bdesc_comi) / sizeof *d; i++, d++)
11972    if (d->code == fcode)
11973      return ix86_expand_sse_comi (d, arglist, target);
11974
11975  /* @@@ Should really do something sensible here.  */
11976  return 0;
11977}
11978
11979/* Store OPERAND to the memory after reload is completed.  This means
11980   that we can't easily use assign_stack_local.  */
11981rtx
11982ix86_force_to_memory (mode, operand)
11983     enum machine_mode mode;
11984     rtx operand;
11985{
11986  rtx result;
11987  if (!reload_completed)
11988    abort ();
11989  if (TARGET_64BIT && TARGET_RED_ZONE)
11990    {
11991      result = gen_rtx_MEM (mode,
11992			    gen_rtx_PLUS (Pmode,
11993					  stack_pointer_rtx,
11994					  GEN_INT (-RED_ZONE_SIZE)));
11995      emit_move_insn (result, operand);
11996    }
11997  else if (TARGET_64BIT && !TARGET_RED_ZONE)
11998    {
11999      switch (mode)
12000	{
12001	case HImode:
12002	case SImode:
12003	  operand = gen_lowpart (DImode, operand);
12004	  /* FALLTHRU */
12005	case DImode:
12006	  emit_insn (
12007		      gen_rtx_SET (VOIDmode,
12008				   gen_rtx_MEM (DImode,
12009						gen_rtx_PRE_DEC (DImode,
12010							stack_pointer_rtx)),
12011				   operand));
12012	  break;
12013	default:
12014	  abort ();
12015	}
12016      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12017    }
12018  else
12019    {
12020      switch (mode)
12021	{
12022	case DImode:
12023	  {
12024	    rtx operands[2];
12025	    split_di (&operand, 1, operands, operands + 1);
12026	    emit_insn (
12027			gen_rtx_SET (VOIDmode,
12028				     gen_rtx_MEM (SImode,
12029						  gen_rtx_PRE_DEC (Pmode,
12030							stack_pointer_rtx)),
12031				     operands[1]));
12032	    emit_insn (
12033			gen_rtx_SET (VOIDmode,
12034				     gen_rtx_MEM (SImode,
12035						  gen_rtx_PRE_DEC (Pmode,
12036							stack_pointer_rtx)),
12037				     operands[0]));
12038	  }
12039	  break;
12040	case HImode:
12041	  /* It is better to store HImodes as SImodes.  */
12042	  if (!TARGET_PARTIAL_REG_STALL)
12043	    operand = gen_lowpart (SImode, operand);
12044	  /* FALLTHRU */
12045	case SImode:
12046	  emit_insn (
12047		      gen_rtx_SET (VOIDmode,
12048				   gen_rtx_MEM (GET_MODE (operand),
12049						gen_rtx_PRE_DEC (SImode,
12050							stack_pointer_rtx)),
12051				   operand));
12052	  break;
12053	default:
12054	  abort ();
12055	}
12056      result = gen_rtx_MEM (mode, stack_pointer_rtx);
12057    }
12058  return result;
12059}
12060
12061/* Free operand from the memory.  */
12062void
12063ix86_free_from_memory (mode)
12064     enum machine_mode mode;
12065{
12066  if (!TARGET_64BIT || !TARGET_RED_ZONE)
12067    {
12068      int size;
12069
12070      if (mode == DImode || TARGET_64BIT)
12071	size = 8;
12072      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
12073	size = 2;
12074      else
12075	size = 4;
12076      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
12077         to pop or add instruction if registers are available.  */
12078      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12079			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12080					    GEN_INT (size))));
12081    }
12082}
12083
12084/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12085   QImode must go into class Q_REGS.
12086   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
12087   movdf to do mem-to-mem moves through integer regs.  */
12088enum reg_class
12089ix86_preferred_reload_class (x, class)
12090     rtx x;
12091     enum reg_class class;
12092{
12093  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
12094    {
12095      /* SSE can't load any constant directly yet.  */
12096      if (SSE_CLASS_P (class))
12097	return NO_REGS;
12098      /* Floats can load 0 and 1.  */
12099      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
12100	{
12101	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
12102	  if (MAYBE_SSE_CLASS_P (class))
12103	    return (reg_class_subset_p (class, GENERAL_REGS)
12104		    ? GENERAL_REGS : FLOAT_REGS);
12105	  else
12106	    return class;
12107	}
12108      /* General regs can load everything.  */
12109      if (reg_class_subset_p (class, GENERAL_REGS))
12110	return GENERAL_REGS;
12111      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
12112      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12113	return NO_REGS;
12114    }
12115  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
12116    return NO_REGS;
12117  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
12118    return Q_REGS;
12119  return class;
12120}
12121
12122/* If we are copying between general and FP registers, we need a memory
12123   location. The same is true for SSE and MMX registers.
12124
12125   The macro can't work reliably when one of the CLASSES is class containing
12126   registers from multiple units (SSE, MMX, integer).  We avoid this by never
12127   combining those units in single alternative in the machine description.
12128   Ensure that this constraint holds to avoid unexpected surprises.
12129
12130   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12131   enforce these sanity checks.  */
12132int
12133ix86_secondary_memory_needed (class1, class2, mode, strict)
12134     enum reg_class class1, class2;
12135     enum machine_mode mode;
12136     int strict;
12137{
12138  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
12139      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
12140      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
12141      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
12142      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
12143      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
12144    {
12145      if (strict)
12146	abort ();
12147      else
12148	return 1;
12149    }
12150  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
12151	  || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
12152	      && (mode) != SImode)
12153	  || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12154	      && (mode) != SImode));
12155}
12156/* Return the cost of moving data from a register in class CLASS1 to
12157   one in class CLASS2.
12158
12159   It is not required that the cost always equal 2 when FROM is the same as TO;
12160   on some machines it is expensive to move between registers if they are not
12161   general registers.  */
12162int
12163ix86_register_move_cost (mode, class1, class2)
12164     enum machine_mode mode;
12165     enum reg_class class1, class2;
12166{
12167  /* In case we require secondary memory, compute cost of the store followed
12168     by load.  In case of copying from general_purpose_register we may emit
12169     multiple stores followed by single load causing memory size mismatch
12170     stall.  Count this as arbitarily high cost of 20.  */
12171  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
12172    {
12173      int add_cost = 0;
12174      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
12175	  add_cost = 20;
12176      return (MEMORY_MOVE_COST (mode, class1, 0)
12177	      + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
12178    }
12179  /* Moves between SSE/MMX and integer unit are expensive.  */
12180  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
12181      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
12182    return ix86_cost->mmxsse_to_integer;
12183  if (MAYBE_FLOAT_CLASS_P (class1))
12184    return ix86_cost->fp_move;
12185  if (MAYBE_SSE_CLASS_P (class1))
12186    return ix86_cost->sse_move;
12187  if (MAYBE_MMX_CLASS_P (class1))
12188    return ix86_cost->mmx_move;
12189  return 2;
12190}
12191
12192/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
12193int
12194ix86_hard_regno_mode_ok (regno, mode)
12195     int regno;
12196     enum machine_mode mode;
12197{
12198  /* Flags and only flags can only hold CCmode values.  */
12199  if (CC_REGNO_P (regno))
12200    return GET_MODE_CLASS (mode) == MODE_CC;
12201  if (GET_MODE_CLASS (mode) == MODE_CC
12202      || GET_MODE_CLASS (mode) == MODE_RANDOM
12203      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
12204    return 0;
12205  if (FP_REGNO_P (regno))
12206    return VALID_FP_MODE_P (mode);
12207  if (SSE_REGNO_P (regno))
12208    return VALID_SSE_REG_MODE (mode);
12209  if (MMX_REGNO_P (regno))
12210    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
12211  /* We handle both integer and floats in the general purpose registers.
12212     In future we should be able to handle vector modes as well.  */
12213  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
12214    return 0;
12215  /* Take care for QImode values - they can be in non-QI regs, but then
12216     they do cause partial register stalls.  */
12217  if (regno < 4 || mode != QImode || TARGET_64BIT)
12218    return 1;
12219  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
12220}
12221
12222/* Return the cost of moving data of mode M between a
12223   register and memory.  A value of 2 is the default; this cost is
12224   relative to those in `REGISTER_MOVE_COST'.
12225
12226   If moving between registers and memory is more expensive than
12227   between two registers, you should define this macro to express the
12228   relative cost.
12229
12230   Model also increased moving costs of QImode registers in non
12231   Q_REGS classes.
12232 */
12233int
12234ix86_memory_move_cost (mode, class, in)
12235     enum machine_mode mode;
12236     enum reg_class class;
12237     int in;
12238{
12239  if (FLOAT_CLASS_P (class))
12240    {
12241      int index;
12242      switch (mode)
12243	{
12244	  case SFmode:
12245	    index = 0;
12246	    break;
12247	  case DFmode:
12248	    index = 1;
12249	    break;
12250	  case XFmode:
12251	  case TFmode:
12252	    index = 2;
12253	    break;
12254	  default:
12255	    return 100;
12256	}
12257      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
12258    }
12259  if (SSE_CLASS_P (class))
12260    {
12261      int index;
12262      switch (GET_MODE_SIZE (mode))
12263	{
12264	  case 4:
12265	    index = 0;
12266	    break;
12267	  case 8:
12268	    index = 1;
12269	    break;
12270	  case 16:
12271	    index = 2;
12272	    break;
12273	  default:
12274	    return 100;
12275	}
12276      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
12277    }
12278  if (MMX_CLASS_P (class))
12279    {
12280      int index;
12281      switch (GET_MODE_SIZE (mode))
12282	{
12283	  case 4:
12284	    index = 0;
12285	    break;
12286	  case 8:
12287	    index = 1;
12288	    break;
12289	  default:
12290	    return 100;
12291	}
12292      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
12293    }
12294  switch (GET_MODE_SIZE (mode))
12295    {
12296      case 1:
12297	if (in)
12298	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
12299		  : ix86_cost->movzbl_load);
12300	else
12301	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
12302		  : ix86_cost->int_store[0] + 4);
12303	break;
12304      case 2:
12305	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
12306      default:
12307	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
12308	if (mode == TFmode)
12309	  mode = XFmode;
12310	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
12311		* (int) GET_MODE_SIZE (mode) / 4);
12312    }
12313}
12314
12315#ifdef DO_GLOBAL_CTORS_BODY
12316static void
12317ix86_svr3_asm_out_constructor (symbol, priority)
12318     rtx symbol;
12319     int priority ATTRIBUTE_UNUSED;
12320{
12321  init_section ();
12322  fputs ("\tpushl $", asm_out_file);
12323  assemble_name (asm_out_file, XSTR (symbol, 0));
12324  fputc ('\n', asm_out_file);
12325}
12326#endif
12327