i386.c revision 218895
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 218895 2011-02-20 22:25:23Z mm $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables.  */
61#define MODE_INDEX(mode)					\
62  ((mode) == QImode ? 0						\
63   : (mode) == HImode ? 1					\
64   : (mode) == SImode ? 2					\
65   : (mode) == DImode ? 3					\
66   : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = {	/* costs for tuning for size */
74  COSTS_N_BYTES (2),			/* cost of an add instruction */
75  COSTS_N_BYTES (3),			/* cost of a lea instruction */
76  COSTS_N_BYTES (2),			/* variable shift costs */
77  COSTS_N_BYTES (3),			/* constant shift costs */
78  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
79   COSTS_N_BYTES (3),			/*                               HI */
80   COSTS_N_BYTES (3),			/*                               SI */
81   COSTS_N_BYTES (3),			/*                               DI */
82   COSTS_N_BYTES (5)},			/*                            other */
83  0,					/* cost of multiply per each bit set */
84  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
85   COSTS_N_BYTES (3),			/*                          HI */
86   COSTS_N_BYTES (3),			/*                          SI */
87   COSTS_N_BYTES (3),			/*                          DI */
88   COSTS_N_BYTES (5)},			/*                       other */
89  COSTS_N_BYTES (3),			/* cost of movsx */
90  COSTS_N_BYTES (3),			/* cost of movzx */
91  0,					/* "large" insn */
92  2,					/* MOVE_RATIO */
93  2,					/* cost for loading QImode using movzbl */
94  {2, 2, 2},				/* cost of loading integer registers
95					   in QImode, HImode and SImode.
96					   Relative to reg-reg move (2).  */
97  {2, 2, 2},				/* cost of storing integer registers */
98  2,					/* cost of reg,reg fld/fst */
99  {2, 2, 2},				/* cost of loading fp registers
100					   in SFmode, DFmode and XFmode */
101  {2, 2, 2},				/* cost of storing fp registers
102					   in SFmode, DFmode and XFmode */
103  3,					/* cost of moving MMX register */
104  {3, 3},				/* cost of loading MMX registers
105					   in SImode and DImode */
106  {3, 3},				/* cost of storing MMX registers
107					   in SImode and DImode */
108  3,					/* cost of moving SSE register */
109  {3, 3, 3},				/* cost of loading SSE registers
110					   in SImode, DImode and TImode */
111  {3, 3, 3},				/* cost of storing SSE registers
112					   in SImode, DImode and TImode */
113  3,					/* MMX or SSE register to integer */
114  0,					/* size of prefetch block */
115  0,					/* number of parallel prefetches */
116  2,					/* Branch cost */
117  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
118  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
119  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
120  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
121  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
122  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = {	/* 386 specific costs */
128  COSTS_N_INSNS (1),			/* cost of an add instruction */
129  COSTS_N_INSNS (1),			/* cost of a lea instruction */
130  COSTS_N_INSNS (3),			/* variable shift costs */
131  COSTS_N_INSNS (2),			/* constant shift costs */
132  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
133   COSTS_N_INSNS (6),			/*                               HI */
134   COSTS_N_INSNS (6),			/*                               SI */
135   COSTS_N_INSNS (6),			/*                               DI */
136   COSTS_N_INSNS (6)},			/*                               other */
137  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
138  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
139   COSTS_N_INSNS (23),			/*                          HI */
140   COSTS_N_INSNS (23),			/*                          SI */
141   COSTS_N_INSNS (23),			/*                          DI */
142   COSTS_N_INSNS (23)},			/*                          other */
143  COSTS_N_INSNS (3),			/* cost of movsx */
144  COSTS_N_INSNS (2),			/* cost of movzx */
145  15,					/* "large" insn */
146  3,					/* MOVE_RATIO */
147  4,					/* cost for loading QImode using movzbl */
148  {2, 4, 2},				/* cost of loading integer registers
149					   in QImode, HImode and SImode.
150					   Relative to reg-reg move (2).  */
151  {2, 4, 2},				/* cost of storing integer registers */
152  2,					/* cost of reg,reg fld/fst */
153  {8, 8, 8},				/* cost of loading fp registers
154					   in SFmode, DFmode and XFmode */
155  {8, 8, 8},				/* cost of storing fp registers
156					   in SFmode, DFmode and XFmode */
157  2,					/* cost of moving MMX register */
158  {4, 8},				/* cost of loading MMX registers
159					   in SImode and DImode */
160  {4, 8},				/* cost of storing MMX registers
161					   in SImode and DImode */
162  2,					/* cost of moving SSE register */
163  {4, 8, 16},				/* cost of loading SSE registers
164					   in SImode, DImode and TImode */
165  {4, 8, 16},				/* cost of storing SSE registers
166					   in SImode, DImode and TImode */
167  3,					/* MMX or SSE register to integer */
168  0,					/* size of prefetch block */
169  0,					/* number of parallel prefetches */
170  1,					/* Branch cost */
171  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
172  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
173  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
174  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
175  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
176  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
177};
178
179static const
180struct processor_costs i486_cost = {	/* 486 specific costs */
181  COSTS_N_INSNS (1),			/* cost of an add instruction */
182  COSTS_N_INSNS (1),			/* cost of a lea instruction */
183  COSTS_N_INSNS (3),			/* variable shift costs */
184  COSTS_N_INSNS (2),			/* constant shift costs */
185  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
186   COSTS_N_INSNS (12),			/*                               HI */
187   COSTS_N_INSNS (12),			/*                               SI */
188   COSTS_N_INSNS (12),			/*                               DI */
189   COSTS_N_INSNS (12)},			/*                               other */
190  1,					/* cost of multiply per each bit set */
191  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
192   COSTS_N_INSNS (40),			/*                          HI */
193   COSTS_N_INSNS (40),			/*                          SI */
194   COSTS_N_INSNS (40),			/*                          DI */
195   COSTS_N_INSNS (40)},			/*                          other */
196  COSTS_N_INSNS (3),			/* cost of movsx */
197  COSTS_N_INSNS (2),			/* cost of movzx */
198  15,					/* "large" insn */
199  3,					/* MOVE_RATIO */
200  4,					/* cost for loading QImode using movzbl */
201  {2, 4, 2},				/* cost of loading integer registers
202					   in QImode, HImode and SImode.
203					   Relative to reg-reg move (2).  */
204  {2, 4, 2},				/* cost of storing integer registers */
205  2,					/* cost of reg,reg fld/fst */
206  {8, 8, 8},				/* cost of loading fp registers
207					   in SFmode, DFmode and XFmode */
208  {8, 8, 8},				/* cost of storing fp registers
209					   in SFmode, DFmode and XFmode */
210  2,					/* cost of moving MMX register */
211  {4, 8},				/* cost of loading MMX registers
212					   in SImode and DImode */
213  {4, 8},				/* cost of storing MMX registers
214					   in SImode and DImode */
215  2,					/* cost of moving SSE register */
216  {4, 8, 16},				/* cost of loading SSE registers
217					   in SImode, DImode and TImode */
218  {4, 8, 16},				/* cost of storing SSE registers
219					   in SImode, DImode and TImode */
220  3,					/* MMX or SSE register to integer */
221  0,					/* size of prefetch block */
222  0,					/* number of parallel prefetches */
223  1,					/* Branch cost */
224  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
225  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
226  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
227  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
228  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
229  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
230};
231
232static const
233struct processor_costs pentium_cost = {
234  COSTS_N_INSNS (1),			/* cost of an add instruction */
235  COSTS_N_INSNS (1),			/* cost of a lea instruction */
236  COSTS_N_INSNS (4),			/* variable shift costs */
237  COSTS_N_INSNS (1),			/* constant shift costs */
238  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
239   COSTS_N_INSNS (11),			/*                               HI */
240   COSTS_N_INSNS (11),			/*                               SI */
241   COSTS_N_INSNS (11),			/*                               DI */
242   COSTS_N_INSNS (11)},			/*                               other */
243  0,					/* cost of multiply per each bit set */
244  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
245   COSTS_N_INSNS (25),			/*                          HI */
246   COSTS_N_INSNS (25),			/*                          SI */
247   COSTS_N_INSNS (25),			/*                          DI */
248   COSTS_N_INSNS (25)},			/*                          other */
249  COSTS_N_INSNS (3),			/* cost of movsx */
250  COSTS_N_INSNS (2),			/* cost of movzx */
251  8,					/* "large" insn */
252  6,					/* MOVE_RATIO */
253  6,					/* cost for loading QImode using movzbl */
254  {2, 4, 2},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 4, 2},				/* cost of storing integer registers */
258  2,					/* cost of reg,reg fld/fst */
259  {2, 2, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 6},				/* cost of storing fp registers
262					   in SFmode, DFmode and XFmode */
263  8,					/* cost of moving MMX register */
264  {8, 8},				/* cost of loading MMX registers
265					   in SImode and DImode */
266  {8, 8},				/* cost of storing MMX registers
267					   in SImode and DImode */
268  2,					/* cost of moving SSE register */
269  {4, 8, 16},				/* cost of loading SSE registers
270					   in SImode, DImode and TImode */
271  {4, 8, 16},				/* cost of storing SSE registers
272					   in SImode, DImode and TImode */
273  3,					/* MMX or SSE register to integer */
274  0,					/* size of prefetch block */
275  0,					/* number of parallel prefetches */
276  2,					/* Branch cost */
277  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
278  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
279  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
280  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
281  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
282  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287  COSTS_N_INSNS (1),			/* cost of an add instruction */
288  COSTS_N_INSNS (1),			/* cost of a lea instruction */
289  COSTS_N_INSNS (1),			/* variable shift costs */
290  COSTS_N_INSNS (1),			/* constant shift costs */
291  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
292   COSTS_N_INSNS (4),			/*                               HI */
293   COSTS_N_INSNS (4),			/*                               SI */
294   COSTS_N_INSNS (4),			/*                               DI */
295   COSTS_N_INSNS (4)},			/*                               other */
296  0,					/* cost of multiply per each bit set */
297  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
298   COSTS_N_INSNS (17),			/*                          HI */
299   COSTS_N_INSNS (17),			/*                          SI */
300   COSTS_N_INSNS (17),			/*                          DI */
301   COSTS_N_INSNS (17)},			/*                          other */
302  COSTS_N_INSNS (1),			/* cost of movsx */
303  COSTS_N_INSNS (1),			/* cost of movzx */
304  8,					/* "large" insn */
305  6,					/* MOVE_RATIO */
306  2,					/* cost for loading QImode using movzbl */
307  {4, 4, 4},				/* cost of loading integer registers
308					   in QImode, HImode and SImode.
309					   Relative to reg-reg move (2).  */
310  {2, 2, 2},				/* cost of storing integer registers */
311  2,					/* cost of reg,reg fld/fst */
312  {2, 2, 6},				/* cost of loading fp registers
313					   in SFmode, DFmode and XFmode */
314  {4, 4, 6},				/* cost of storing fp registers
315					   in SFmode, DFmode and XFmode */
316  2,					/* cost of moving MMX register */
317  {2, 2},				/* cost of loading MMX registers
318					   in SImode and DImode */
319  {2, 2},				/* cost of storing MMX registers
320					   in SImode and DImode */
321  2,					/* cost of moving SSE register */
322  {2, 2, 8},				/* cost of loading SSE registers
323					   in SImode, DImode and TImode */
324  {2, 2, 8},				/* cost of storing SSE registers
325					   in SImode, DImode and TImode */
326  3,					/* MMX or SSE register to integer */
327  32,					/* size of prefetch block */
328  6,					/* number of parallel prefetches */
329  2,					/* Branch cost */
330  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
331  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
332  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
333  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
334  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
335  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
336};
337
338static const
339struct processor_costs k6_cost = {
340  COSTS_N_INSNS (1),			/* cost of an add instruction */
341  COSTS_N_INSNS (2),			/* cost of a lea instruction */
342  COSTS_N_INSNS (1),			/* variable shift costs */
343  COSTS_N_INSNS (1),			/* constant shift costs */
344  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
345   COSTS_N_INSNS (3),			/*                               HI */
346   COSTS_N_INSNS (3),			/*                               SI */
347   COSTS_N_INSNS (3),			/*                               DI */
348   COSTS_N_INSNS (3)},			/*                               other */
349  0,					/* cost of multiply per each bit set */
350  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
351   COSTS_N_INSNS (18),			/*                          HI */
352   COSTS_N_INSNS (18),			/*                          SI */
353   COSTS_N_INSNS (18),			/*                          DI */
354   COSTS_N_INSNS (18)},			/*                          other */
355  COSTS_N_INSNS (2),			/* cost of movsx */
356  COSTS_N_INSNS (2),			/* cost of movzx */
357  8,					/* "large" insn */
358  4,					/* MOVE_RATIO */
359  3,					/* cost for loading QImode using movzbl */
360  {4, 5, 4},				/* cost of loading integer registers
361					   in QImode, HImode and SImode.
362					   Relative to reg-reg move (2).  */
363  {2, 3, 2},				/* cost of storing integer registers */
364  4,					/* cost of reg,reg fld/fst */
365  {6, 6, 6},				/* cost of loading fp registers
366					   in SFmode, DFmode and XFmode */
367  {4, 4, 4},				/* cost of storing fp registers
368					   in SFmode, DFmode and XFmode */
369  2,					/* cost of moving MMX register */
370  {2, 2},				/* cost of loading MMX registers
371					   in SImode and DImode */
372  {2, 2},				/* cost of storing MMX registers
373					   in SImode and DImode */
374  2,					/* cost of moving SSE register */
375  {2, 2, 8},				/* cost of loading SSE registers
376					   in SImode, DImode and TImode */
377  {2, 2, 8},				/* cost of storing SSE registers
378					   in SImode, DImode and TImode */
379  6,					/* MMX or SSE register to integer */
380  32,					/* size of prefetch block */
381  1,					/* number of parallel prefetches */
382  1,					/* Branch cost */
383  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
384  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
385  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
386  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
387  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
388  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
389};
390
391static const
392struct processor_costs athlon_cost = {
393  COSTS_N_INSNS (1),			/* cost of an add instruction */
394  COSTS_N_INSNS (2),			/* cost of a lea instruction */
395  COSTS_N_INSNS (1),			/* variable shift costs */
396  COSTS_N_INSNS (1),			/* constant shift costs */
397  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
398   COSTS_N_INSNS (5),			/*                               HI */
399   COSTS_N_INSNS (5),			/*                               SI */
400   COSTS_N_INSNS (5),			/*                               DI */
401   COSTS_N_INSNS (5)},			/*                               other */
402  0,					/* cost of multiply per each bit set */
403  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
404   COSTS_N_INSNS (26),			/*                          HI */
405   COSTS_N_INSNS (42),			/*                          SI */
406   COSTS_N_INSNS (74),			/*                          DI */
407   COSTS_N_INSNS (74)},			/*                          other */
408  COSTS_N_INSNS (1),			/* cost of movsx */
409  COSTS_N_INSNS (1),			/* cost of movzx */
410  8,					/* "large" insn */
411  9,					/* MOVE_RATIO */
412  4,					/* cost for loading QImode using movzbl */
413  {3, 4, 3},				/* cost of loading integer registers
414					   in QImode, HImode and SImode.
415					   Relative to reg-reg move (2).  */
416  {3, 4, 3},				/* cost of storing integer registers */
417  4,					/* cost of reg,reg fld/fst */
418  {4, 4, 12},				/* cost of loading fp registers
419					   in SFmode, DFmode and XFmode */
420  {6, 6, 8},				/* cost of storing fp registers
421					   in SFmode, DFmode and XFmode */
422  2,					/* cost of moving MMX register */
423  {4, 4},				/* cost of loading MMX registers
424					   in SImode and DImode */
425  {4, 4},				/* cost of storing MMX registers
426					   in SImode and DImode */
427  2,					/* cost of moving SSE register */
428  {4, 4, 6},				/* cost of loading SSE registers
429					   in SImode, DImode and TImode */
430  {4, 4, 5},				/* cost of storing SSE registers
431					   in SImode, DImode and TImode */
432  5,					/* MMX or SSE register to integer */
433  64,					/* size of prefetch block */
434  6,					/* number of parallel prefetches */
435  5,					/* Branch cost */
436  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
437  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
438  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
439  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
440  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
441  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
442};
443
444static const
445struct processor_costs k8_cost = {
446  COSTS_N_INSNS (1),			/* cost of an add instruction */
447  COSTS_N_INSNS (2),			/* cost of a lea instruction */
448  COSTS_N_INSNS (1),			/* variable shift costs */
449  COSTS_N_INSNS (1),			/* constant shift costs */
450  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
451   COSTS_N_INSNS (4),			/*                               HI */
452   COSTS_N_INSNS (3),			/*                               SI */
453   COSTS_N_INSNS (4),			/*                               DI */
454   COSTS_N_INSNS (5)},			/*                               other */
455  0,					/* cost of multiply per each bit set */
456  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
457   COSTS_N_INSNS (26),			/*                          HI */
458   COSTS_N_INSNS (42),			/*                          SI */
459   COSTS_N_INSNS (74),			/*                          DI */
460   COSTS_N_INSNS (74)},			/*                          other */
461  COSTS_N_INSNS (1),			/* cost of movsx */
462  COSTS_N_INSNS (1),			/* cost of movzx */
463  8,					/* "large" insn */
464  9,					/* MOVE_RATIO */
465  4,					/* cost for loading QImode using movzbl */
466  {3, 4, 3},				/* cost of loading integer registers
467					   in QImode, HImode and SImode.
468					   Relative to reg-reg move (2).  */
469  {3, 4, 3},				/* cost of storing integer registers */
470  4,					/* cost of reg,reg fld/fst */
471  {4, 4, 12},				/* cost of loading fp registers
472					   in SFmode, DFmode and XFmode */
473  {6, 6, 8},				/* cost of storing fp registers
474					   in SFmode, DFmode and XFmode */
475  2,					/* cost of moving MMX register */
476  {3, 3},				/* cost of loading MMX registers
477					   in SImode and DImode */
478  {4, 4},				/* cost of storing MMX registers
479					   in SImode and DImode */
480  2,					/* cost of moving SSE register */
481  {4, 3, 6},				/* cost of loading SSE registers
482					   in SImode, DImode and TImode */
483  {4, 4, 5},				/* cost of storing SSE registers
484					   in SImode, DImode and TImode */
485  5,					/* MMX or SSE register to integer */
486  64,					/* size of prefetch block */
487  6,					/* number of parallel prefetches */
488  5,					/* Branch cost */
489  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
490  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
491  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
492  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
493  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
494  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
495};
496
497static const
498struct processor_costs pentium4_cost = {
499  COSTS_N_INSNS (1),			/* cost of an add instruction */
500  COSTS_N_INSNS (3),			/* cost of a lea instruction */
501  COSTS_N_INSNS (4),			/* variable shift costs */
502  COSTS_N_INSNS (4),			/* constant shift costs */
503  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
504   COSTS_N_INSNS (15),			/*                               HI */
505   COSTS_N_INSNS (15),			/*                               SI */
506   COSTS_N_INSNS (15),			/*                               DI */
507   COSTS_N_INSNS (15)},			/*                               other */
508  0,					/* cost of multiply per each bit set */
509  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
510   COSTS_N_INSNS (56),			/*                          HI */
511   COSTS_N_INSNS (56),			/*                          SI */
512   COSTS_N_INSNS (56),			/*                          DI */
513   COSTS_N_INSNS (56)},			/*                          other */
514  COSTS_N_INSNS (1),			/* cost of movsx */
515  COSTS_N_INSNS (1),			/* cost of movzx */
516  16,					/* "large" insn */
517  6,					/* MOVE_RATIO */
518  2,					/* cost for loading QImode using movzbl */
519  {4, 5, 4},				/* cost of loading integer registers
520					   in QImode, HImode and SImode.
521					   Relative to reg-reg move (2).  */
522  {2, 3, 2},				/* cost of storing integer registers */
523  2,					/* cost of reg,reg fld/fst */
524  {2, 2, 6},				/* cost of loading fp registers
525					   in SFmode, DFmode and XFmode */
526  {4, 4, 6},				/* cost of storing fp registers
527					   in SFmode, DFmode and XFmode */
528  2,					/* cost of moving MMX register */
529  {2, 2},				/* cost of loading MMX registers
530					   in SImode and DImode */
531  {2, 2},				/* cost of storing MMX registers
532					   in SImode and DImode */
533  12,					/* cost of moving SSE register */
534  {12, 12, 12},				/* cost of loading SSE registers
535					   in SImode, DImode and TImode */
536  {2, 2, 8},				/* cost of storing SSE registers
537					   in SImode, DImode and TImode */
538  10,					/* MMX or SSE register to integer */
539  64,					/* size of prefetch block */
540  6,					/* number of parallel prefetches */
541  2,					/* Branch cost */
542  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
543  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
544  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
545  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
546  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
547  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
548};
549
550static const
551struct processor_costs nocona_cost = {
552  COSTS_N_INSNS (1),			/* cost of an add instruction */
553  COSTS_N_INSNS (1),			/* cost of a lea instruction */
554  COSTS_N_INSNS (1),			/* variable shift costs */
555  COSTS_N_INSNS (1),			/* constant shift costs */
556  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
557   COSTS_N_INSNS (10),			/*                               HI */
558   COSTS_N_INSNS (10),			/*                               SI */
559   COSTS_N_INSNS (10),			/*                               DI */
560   COSTS_N_INSNS (10)},			/*                               other */
561  0,					/* cost of multiply per each bit set */
562  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
563   COSTS_N_INSNS (66),			/*                          HI */
564   COSTS_N_INSNS (66),			/*                          SI */
565   COSTS_N_INSNS (66),			/*                          DI */
566   COSTS_N_INSNS (66)},			/*                          other */
567  COSTS_N_INSNS (1),			/* cost of movsx */
568  COSTS_N_INSNS (1),			/* cost of movzx */
569  16,					/* "large" insn */
570  17,					/* MOVE_RATIO */
571  4,					/* cost for loading QImode using movzbl */
572  {4, 4, 4},				/* cost of loading integer registers
573					   in QImode, HImode and SImode.
574					   Relative to reg-reg move (2).  */
575  {4, 4, 4},				/* cost of storing integer registers */
576  3,					/* cost of reg,reg fld/fst */
577  {12, 12, 12},				/* cost of loading fp registers
578					   in SFmode, DFmode and XFmode */
579  {4, 4, 4},				/* cost of storing fp registers
580					   in SFmode, DFmode and XFmode */
581  6,					/* cost of moving MMX register */
582  {12, 12},				/* cost of loading MMX registers
583					   in SImode and DImode */
584  {12, 12},				/* cost of storing MMX registers
585					   in SImode and DImode */
586  6,					/* cost of moving SSE register */
587  {12, 12, 12},				/* cost of loading SSE registers
588					   in SImode, DImode and TImode */
589  {12, 12, 12},				/* cost of storing SSE registers
590					   in SImode, DImode and TImode */
591  8,					/* MMX or SSE register to integer */
592  128,					/* size of prefetch block */
593  8,					/* number of parallel prefetches */
594  1,					/* Branch cost */
595  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
596  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
597  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
598  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
599  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
600  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
601};
602
603/* Generic64 should produce code tuned for Nocona and K8.  */
604static const
605struct processor_costs generic64_cost = {
606  COSTS_N_INSNS (1),			/* cost of an add instruction */
607  /* On all chips taken into consideration lea is 2 cycles and more.  With
608     this cost however our current implementation of synth_mult results in
609     use of unnecessary temporary registers causing regression on several
610     SPECfp benchmarks.  */
611  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
612  COSTS_N_INSNS (1),			/* variable shift costs */
613  COSTS_N_INSNS (1),			/* constant shift costs */
614  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
615   COSTS_N_INSNS (4),			/*                               HI */
616   COSTS_N_INSNS (3),			/*                               SI */
617   COSTS_N_INSNS (4),			/*                               DI */
618   COSTS_N_INSNS (2)},			/*                               other */
619  0,					/* cost of multiply per each bit set */
620  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
621   COSTS_N_INSNS (26),			/*                          HI */
622   COSTS_N_INSNS (42),			/*                          SI */
623   COSTS_N_INSNS (74),			/*                          DI */
624   COSTS_N_INSNS (74)},			/*                          other */
625  COSTS_N_INSNS (1),			/* cost of movsx */
626  COSTS_N_INSNS (1),			/* cost of movzx */
627  8,					/* "large" insn */
628  17,					/* MOVE_RATIO */
629  4,					/* cost for loading QImode using movzbl */
630  {4, 4, 4},				/* cost of loading integer registers
631					   in QImode, HImode and SImode.
632					   Relative to reg-reg move (2).  */
633  {4, 4, 4},				/* cost of storing integer registers */
634  4,					/* cost of reg,reg fld/fst */
635  {12, 12, 12},				/* cost of loading fp registers
636					   in SFmode, DFmode and XFmode */
637  {6, 6, 8},				/* cost of storing fp registers
638					   in SFmode, DFmode and XFmode */
639  2,					/* cost of moving MMX register */
640  {8, 8},				/* cost of loading MMX registers
641					   in SImode and DImode */
642  {8, 8},				/* cost of storing MMX registers
643					   in SImode and DImode */
644  2,					/* cost of moving SSE register */
645  {8, 8, 8},				/* cost of loading SSE registers
646					   in SImode, DImode and TImode */
647  {8, 8, 8},				/* cost of storing SSE registers
648					   in SImode, DImode and TImode */
649  5,					/* MMX or SSE register to integer */
650  64,					/* size of prefetch block */
651  6,					/* number of parallel prefetches */
652  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
653     is increased to perhaps more appropriate value of 5.  */
654  3,					/* Branch cost */
655  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
656  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
657  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
658  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
659  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
660  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
661};
662
663/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
664static const
665struct processor_costs generic32_cost = {
666  COSTS_N_INSNS (1),			/* cost of an add instruction */
667  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
668  COSTS_N_INSNS (1),			/* variable shift costs */
669  COSTS_N_INSNS (1),			/* constant shift costs */
670  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
671   COSTS_N_INSNS (4),			/*                               HI */
672   COSTS_N_INSNS (3),			/*                               SI */
673   COSTS_N_INSNS (4),			/*                               DI */
674   COSTS_N_INSNS (2)},			/*                               other */
675  0,					/* cost of multiply per each bit set */
676  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
677   COSTS_N_INSNS (26),			/*                          HI */
678   COSTS_N_INSNS (42),			/*                          SI */
679   COSTS_N_INSNS (74),			/*                          DI */
680   COSTS_N_INSNS (74)},			/*                          other */
681  COSTS_N_INSNS (1),			/* cost of movsx */
682  COSTS_N_INSNS (1),			/* cost of movzx */
683  8,					/* "large" insn */
684  17,					/* MOVE_RATIO */
685  4,					/* cost for loading QImode using movzbl */
686  {4, 4, 4},				/* cost of loading integer registers
687					   in QImode, HImode and SImode.
688					   Relative to reg-reg move (2).  */
689  {4, 4, 4},				/* cost of storing integer registers */
690  4,					/* cost of reg,reg fld/fst */
691  {12, 12, 12},				/* cost of loading fp registers
692					   in SFmode, DFmode and XFmode */
693  {6, 6, 8},				/* cost of storing fp registers
694					   in SFmode, DFmode and XFmode */
695  2,					/* cost of moving MMX register */
696  {8, 8},				/* cost of loading MMX registers
697					   in SImode and DImode */
698  {8, 8},				/* cost of storing MMX registers
699					   in SImode and DImode */
700  2,					/* cost of moving SSE register */
701  {8, 8, 8},				/* cost of loading SSE registers
702					   in SImode, DImode and TImode */
703  {8, 8, 8},				/* cost of storing SSE registers
704					   in SImode, DImode and TImode */
705  5,					/* MMX or SSE register to integer */
706  64,					/* size of prefetch block */
707  6,					/* number of parallel prefetches */
708  3,					/* Branch cost */
709  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
710  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
711  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
712  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
713  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
714  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
715};
716
717const struct processor_costs *ix86_cost = &pentium_cost;
718
719/* Processor feature/optimization bitmasks.  */
720#define m_386 (1<<PROCESSOR_I386)
721#define m_486 (1<<PROCESSOR_I486)
722#define m_PENT (1<<PROCESSOR_PENTIUM)
723#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
724#define m_K6  (1<<PROCESSOR_K6)
725#define m_ATHLON  (1<<PROCESSOR_ATHLON)
726#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
727#define m_K8  (1<<PROCESSOR_K8)
728#define m_ATHLON_K8  (m_K8 | m_ATHLON)
729#define m_NOCONA  (1<<PROCESSOR_NOCONA)
730#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
731#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
732#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
733
734/* Generic instruction choice should be common subset of supported CPUs
735   (PPro/PENT4/NOCONA/Athlon/K8).  */
736
737/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
738   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
739   generic because it is not working well with PPro base chips.  */
740const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
741const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
742const int x86_zero_extend_with_and = m_486 | m_PENT;
743const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
744const int x86_double_with_add = ~m_386;
745const int x86_use_bit_test = m_386;
746const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
747const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
748const int x86_3dnow_a = m_ATHLON_K8;
749const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
750/* Branch hints were put in P4 based on simulation result. But
751   after P4 was made, no performance benefit was observed with
752   branch hints. It also increases the code size. As the result,
753   icc never generates branch hints.  */
754const int x86_branch_hints = 0;
755const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
756/* We probably ought to watch for partial register stalls on Generic32
757   compilation setting as well.  However in current implementation the
758   partial register stalls are not eliminated very well - they can
759   be introduced via subregs synthesized by combine and can happen
760   in caller/callee saving sequences.
761   Because this option pays back little on PPro based chips and is in conflict
762   with partial reg. dependencies used by Athlon/P4 based chips, it is better
763   to leave it off for generic32 for now.  */
764const int x86_partial_reg_stall = m_PPRO;
765const int x86_partial_flag_reg_stall = m_GENERIC;
766const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
767const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
768const int x86_use_mov0 = m_K6;
769const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
770const int x86_read_modify_write = ~m_PENT;
771const int x86_read_modify = ~(m_PENT | m_PPRO);
772const int x86_split_long_moves = m_PPRO;
773const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
774const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
775const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
776const int x86_qimode_math = ~(0);
777const int x86_promote_qi_regs = 0;
778/* On PPro this flag is meant to avoid partial register stalls.  Just like
779   the x86_partial_reg_stall this option might be considered for Generic32
780   if our scheme for avoiding partial stalls was more effective.  */
781const int x86_himode_math = ~(m_PPRO);
782const int x86_promote_hi_regs = m_PPRO;
783const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
784const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
786const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
787const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
788const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
789const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
790const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
791const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
792const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
793const int x86_shift1 = ~m_486;
794const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
795/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
796   that thread 128bit SSE registers as single units versus K8 based chips that
797   divide SSE registers to two 64bit halves.
798   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
799   to allow register renaming on 128bit SSE units, but usually results in one
800   extra microop on 64bit SSE units.  Experimental results shows that disabling
801   this option on P4 brings over 20% SPECfp regression, while enabling it on
802   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
803   of moves.  */
804const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
805/* Set for machines where the type and dependencies are resolved on SSE
806   register parts instead of whole registers, so we may maintain just
807   lower part of scalar values in proper format leaving the upper part
808   undefined.  */
809const int x86_sse_split_regs = m_ATHLON_K8;
810const int x86_sse_typeless_stores = m_ATHLON_K8;
811const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
812const int x86_use_ffreep = m_ATHLON_K8;
813const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
814const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
815
816/* ??? Allowing interunit moves makes it all too easy for the compiler to put
817   integer data in xmm registers.  Which results in pretty abysmal code.  */
818const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
819
820const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
821/* Some CPU cores are not able to predict more than 4 branch instructions in
822   the 16 byte window.  */
823const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
824const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
825const int x86_use_bt = m_ATHLON_K8;
826/* Compare and exchange was added for 80486.  */
827const int x86_cmpxchg = ~m_386;
828/* Compare and exchange 8 bytes was added for pentium.  */
829const int x86_cmpxchg8b = ~(m_386 | m_486);
830/* Compare and exchange 16 bytes was added for nocona.  */
831const int x86_cmpxchg16b = m_NOCONA;
832/* Exchange and add was added for 80486.  */
833const int x86_xadd = ~m_386;
834const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
835
836/* In case the average insn count for single function invocation is
837   lower than this constant, emit fast (but longer) prologue and
838   epilogue code.  */
839#define FAST_PROLOGUE_INSN_COUNT 20
840
841/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
842static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
843static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
844static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
845
846/* Array of the smallest class containing reg number REGNO, indexed by
847   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
848
849enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850{
851  /* ax, dx, cx, bx */
852  AREG, DREG, CREG, BREG,
853  /* si, di, bp, sp */
854  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
855  /* FP registers */
856  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
857  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858  /* arg pointer */
859  NON_Q_REGS,
860  /* flags, fpsr, dirflag, frame */
861  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
862  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
863  SSE_REGS, SSE_REGS,
864  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
865  MMX_REGS, MMX_REGS,
866  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
867  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
868  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
869  SSE_REGS, SSE_REGS,
870};
871
872/* The "default" register map used in 32bit mode.  */
873
874int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
875{
876  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
877  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
878  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
879  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
880  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
881  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
882  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
883};
884
885static int const x86_64_int_parameter_registers[6] =
886{
887  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
888  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889};
890
891static int const x86_64_int_return_registers[4] =
892{
893  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894};
895
896/* The "default" register map used in 64bit mode.  */
897int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
898{
899  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
900  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
901  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
902  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
903  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
904  8,9,10,11,12,13,14,15,		/* extended integer registers */
905  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
906};
907
908/* Define the register numbers to be used in Dwarf debugging information.
909   The SVR4 reference port C compiler uses the following register numbers
910   in its Dwarf output code:
911	0 for %eax (gcc regno = 0)
912	1 for %ecx (gcc regno = 2)
913	2 for %edx (gcc regno = 1)
914	3 for %ebx (gcc regno = 3)
915	4 for %esp (gcc regno = 7)
916	5 for %ebp (gcc regno = 6)
917	6 for %esi (gcc regno = 4)
918	7 for %edi (gcc regno = 5)
919   The following three DWARF register numbers are never generated by
920   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
921   believes these numbers have these meanings.
922	8  for %eip    (no gcc equivalent)
923	9  for %eflags (gcc regno = 17)
924	10 for %trapno (no gcc equivalent)
925   It is not at all clear how we should number the FP stack registers
926   for the x86 architecture.  If the version of SDB on x86/svr4 were
927   a bit less brain dead with respect to floating-point then we would
928   have a precedent to follow with respect to DWARF register numbers
929   for x86 FP registers, but the SDB on x86/svr4 is so completely
930   broken with respect to FP registers that it is hardly worth thinking
931   of it as something to strive for compatibility with.
932   The version of x86/svr4 SDB I have at the moment does (partially)
933   seem to believe that DWARF register number 11 is associated with
934   the x86 register %st(0), but that's about all.  Higher DWARF
935   register numbers don't seem to be associated with anything in
936   particular, and even for DWARF regno 11, SDB only seems to under-
937   stand that it should say that a variable lives in %st(0) (when
938   asked via an `=' command) if we said it was in DWARF regno 11,
939   but SDB still prints garbage when asked for the value of the
940   variable in question (via a `/' command).
941   (Also note that the labels SDB prints for various FP stack regs
942   when doing an `x' command are all wrong.)
943   Note that these problems generally don't affect the native SVR4
944   C compiler because it doesn't allow the use of -O with -g and
945   because when it is *not* optimizing, it allocates a memory
946   location for each floating-point variable, and the memory
947   location is what gets described in the DWARF AT_location
948   attribute for the variable in question.
949   Regardless of the severe mental illness of the x86/svr4 SDB, we
950   do something sensible here and we use the following DWARF
951   register numbers.  Note that these are all stack-top-relative
952   numbers.
953	11 for %st(0) (gcc regno = 8)
954	12 for %st(1) (gcc regno = 9)
955	13 for %st(2) (gcc regno = 10)
956	14 for %st(3) (gcc regno = 11)
957	15 for %st(4) (gcc regno = 12)
958	16 for %st(5) (gcc regno = 13)
959	17 for %st(6) (gcc regno = 14)
960	18 for %st(7) (gcc regno = 15)
961*/
962int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
963{
964  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
965  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
966  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
967  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
968  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
969  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
970  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
971};
972
973/* Test and compare insns in i386.md store the information needed to
974   generate branch and scc insns here.  */
975
976rtx ix86_compare_op0 = NULL_RTX;
977rtx ix86_compare_op1 = NULL_RTX;
978rtx ix86_compare_emitted = NULL_RTX;
979
980/* Size of the register save area.  */
981#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
982
983/* Define the structure for the machine field in struct function.  */
984
985struct stack_local_entry GTY(())
986{
987  unsigned short mode;
988  unsigned short n;
989  rtx rtl;
990  struct stack_local_entry *next;
991};
992
993/* Structure describing stack frame layout.
994   Stack grows downward:
995
996   [arguments]
997					      <- ARG_POINTER
998   saved pc
999
1000   saved frame pointer if frame_pointer_needed
1001					      <- HARD_FRAME_POINTER
1002   [saved regs]
1003
1004   [padding1]          \
1005		        )
1006   [va_arg registers]  (
1007		        > to_allocate	      <- FRAME_POINTER
1008   [frame]	       (
1009		        )
1010   [padding2]	       /
1011  */
1012struct ix86_frame
1013{
1014  int nregs;
1015  int padding1;
1016  int va_arg_size;
1017  HOST_WIDE_INT frame;
1018  int padding2;
1019  int outgoing_arguments_size;
1020  int red_zone_size;
1021
1022  HOST_WIDE_INT to_allocate;
1023  /* The offsets relative to ARG_POINTER.  */
1024  HOST_WIDE_INT frame_pointer_offset;
1025  HOST_WIDE_INT hard_frame_pointer_offset;
1026  HOST_WIDE_INT stack_pointer_offset;
1027
1028  /* When save_regs_using_mov is set, emit prologue using
1029     move instead of push instructions.  */
1030  bool save_regs_using_mov;
1031};
1032
1033/* Code model option.  */
1034enum cmodel ix86_cmodel;
1035/* Asm dialect.  */
1036enum asm_dialect ix86_asm_dialect = ASM_ATT;
1037/* TLS dialects.  */
1038enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1039
1040/* Which unit we are generating floating point math for.  */
1041enum fpmath_unit ix86_fpmath;
1042
1043/* Which cpu are we scheduling for.  */
1044enum processor_type ix86_tune;
1045/* Which instruction set architecture to use.  */
1046enum processor_type ix86_arch;
1047
1048/* true if sse prefetch instruction is not NOOP.  */
1049int x86_prefetch_sse;
1050
1051/* ix86_regparm_string as a number */
1052static int ix86_regparm;
1053
1054/* -mstackrealign option */
1055extern int ix86_force_align_arg_pointer;
1056static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1057
1058/* Preferred alignment for stack boundary in bits.  */
1059unsigned int ix86_preferred_stack_boundary;
1060
1061/* Values 1-5: see jump.c */
1062int ix86_branch_cost;
1063
1064/* Variables which are this size or smaller are put in the data/bss
1065   or ldata/lbss sections.  */
1066
1067int ix86_section_threshold = 65536;
1068
1069/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1070char internal_label_prefix[16];
1071int internal_label_prefix_len;
1072
1073static bool ix86_handle_option (size_t, const char *, int);
1074static void output_pic_addr_const (FILE *, rtx, int);
1075static void put_condition_code (enum rtx_code, enum machine_mode,
1076				int, int, FILE *);
1077static const char *get_some_local_dynamic_name (void);
1078static int get_some_local_dynamic_name_1 (rtx *, void *);
1079static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1080static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1081						   rtx *);
1082static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1083static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1084						   enum machine_mode);
1085static rtx get_thread_pointer (int);
1086static rtx legitimize_tls_address (rtx, enum tls_model, int);
1087static void get_pc_thunk_name (char [32], unsigned int);
1088static rtx gen_push (rtx);
1089static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1090static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1091static struct machine_function * ix86_init_machine_status (void);
1092static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1093static int ix86_nsaved_regs (void);
1094static void ix86_emit_save_regs (void);
1095static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1096static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1097static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1098static HOST_WIDE_INT ix86_GOT_alias_set (void);
1099static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1100static rtx ix86_expand_aligntest (rtx, int);
1101static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1102static int ix86_issue_rate (void);
1103static int ix86_adjust_cost (rtx, rtx, rtx, int);
1104static int ia32_multipass_dfa_lookahead (void);
1105static void ix86_init_mmx_sse_builtins (void);
1106static rtx x86_this_parameter (tree);
1107static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1108				 HOST_WIDE_INT, tree);
1109static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1110static void x86_file_start (void);
1111static void ix86_reorg (void);
1112static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1113static tree ix86_build_builtin_va_list (void);
1114static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1115					 tree, int *, int);
1116static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1117static bool ix86_scalar_mode_supported_p (enum machine_mode);
1118static bool ix86_vector_mode_supported_p (enum machine_mode);
1119
1120static int ix86_address_cost (rtx);
1121static bool ix86_cannot_force_const_mem (rtx);
1122static rtx ix86_delegitimize_address (rtx);
1123
1124static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1125
1126struct builtin_description;
1127static rtx ix86_expand_sse_comi (const struct builtin_description *,
1128				 tree, rtx);
1129static rtx ix86_expand_sse_compare (const struct builtin_description *,
1130				    tree, rtx);
1131static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1132static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1133static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1134static rtx ix86_expand_store_builtin (enum insn_code, tree);
1135static rtx safe_vector_operand (rtx, enum machine_mode);
1136static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1137static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1138static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1139static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1140static int ix86_fp_comparison_cost (enum rtx_code code);
1141static unsigned int ix86_select_alt_pic_regnum (void);
1142static int ix86_save_reg (unsigned int, int);
1143static void ix86_compute_frame_layout (struct ix86_frame *);
1144static int ix86_comp_type_attributes (tree, tree);
1145static int ix86_function_regparm (tree, tree);
1146const struct attribute_spec ix86_attribute_table[];
1147static bool ix86_function_ok_for_sibcall (tree, tree);
1148static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1149static int ix86_value_regno (enum machine_mode, tree, tree);
1150static bool contains_128bit_aligned_vector_p (tree);
1151static rtx ix86_struct_value_rtx (tree, int);
1152static bool ix86_ms_bitfield_layout_p (tree);
1153static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1154static int extended_reg_mentioned_1 (rtx *, void *);
1155static bool ix86_rtx_costs (rtx, int, int, int *);
1156static int min_insn_size (rtx);
1157static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1158static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1159static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1160				    tree, bool);
1161static void ix86_init_builtins (void);
1162static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1163static const char *ix86_mangle_fundamental_type (tree);
1164static tree ix86_stack_protect_fail (void);
1165static rtx ix86_internal_arg_pointer (void);
1166static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167
1168/* This function is only used on Solaris.  */
1169static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170  ATTRIBUTE_UNUSED;
1171
1172/* Register class used for passing given 64bit part of the argument.
1173   These represent classes as documented by the PS ABI, with the exception
1174   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1175   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176
1177   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1178   whenever possible (upper half does contain padding).
1179 */
1180enum x86_64_reg_class
1181  {
1182    X86_64_NO_CLASS,
1183    X86_64_INTEGER_CLASS,
1184    X86_64_INTEGERSI_CLASS,
1185    X86_64_SSE_CLASS,
1186    X86_64_SSESF_CLASS,
1187    X86_64_SSEDF_CLASS,
1188    X86_64_SSEUP_CLASS,
1189    X86_64_X87_CLASS,
1190    X86_64_X87UP_CLASS,
1191    X86_64_COMPLEX_X87_CLASS,
1192    X86_64_MEMORY_CLASS
1193  };
1194static const char * const x86_64_reg_class_name[] = {
1195  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1196  "sseup", "x87", "x87up", "cplx87", "no"
1197};
1198
1199#define MAX_CLASSES 4
1200
1201/* Table of constants used by fldpi, fldln2, etc....  */
1202static REAL_VALUE_TYPE ext_80387_constants_table [5];
1203static bool ext_80387_constants_init = 0;
1204static void init_ext_80387_constants (void);
1205static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1206static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1207static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1208static section *x86_64_elf_select_section (tree decl, int reloc,
1209					   unsigned HOST_WIDE_INT align)
1210					     ATTRIBUTE_UNUSED;
1211
1212/* Initialize the GCC target structure.  */
1213#undef TARGET_ATTRIBUTE_TABLE
1214#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1215#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1216#  undef TARGET_MERGE_DECL_ATTRIBUTES
1217#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218#endif
1219
1220#undef TARGET_COMP_TYPE_ATTRIBUTES
1221#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222
1223#undef TARGET_INIT_BUILTINS
1224#define TARGET_INIT_BUILTINS ix86_init_builtins
1225#undef TARGET_EXPAND_BUILTIN
1226#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1227
1228#undef TARGET_ASM_FUNCTION_EPILOGUE
1229#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1230
1231#undef TARGET_ENCODE_SECTION_INFO
1232#ifndef SUBTARGET_ENCODE_SECTION_INFO
1233#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1234#else
1235#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236#endif
1237
1238#undef TARGET_ASM_OPEN_PAREN
1239#define TARGET_ASM_OPEN_PAREN ""
1240#undef TARGET_ASM_CLOSE_PAREN
1241#define TARGET_ASM_CLOSE_PAREN ""
1242
1243#undef TARGET_ASM_ALIGNED_HI_OP
1244#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1245#undef TARGET_ASM_ALIGNED_SI_OP
1246#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1247#ifdef ASM_QUAD
1248#undef TARGET_ASM_ALIGNED_DI_OP
1249#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250#endif
1251
1252#undef TARGET_ASM_UNALIGNED_HI_OP
1253#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1254#undef TARGET_ASM_UNALIGNED_SI_OP
1255#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1256#undef TARGET_ASM_UNALIGNED_DI_OP
1257#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1258
1259#undef TARGET_SCHED_ADJUST_COST
1260#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1261#undef TARGET_SCHED_ISSUE_RATE
1262#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1263#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1264#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1265  ia32_multipass_dfa_lookahead
1266
1267#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1268#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269
1270#ifdef HAVE_AS_TLS
1271#undef TARGET_HAVE_TLS
1272#define TARGET_HAVE_TLS true
1273#endif
1274#undef TARGET_CANNOT_FORCE_CONST_MEM
1275#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1276#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1277#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1278
1279#undef TARGET_DELEGITIMIZE_ADDRESS
1280#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1281
1282#undef TARGET_MS_BITFIELD_LAYOUT_P
1283#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284
1285#if TARGET_MACHO
1286#undef TARGET_BINDS_LOCAL_P
1287#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288#endif
1289
1290#undef TARGET_ASM_OUTPUT_MI_THUNK
1291#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1292#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1293#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1294
1295#undef TARGET_ASM_FILE_START
1296#define TARGET_ASM_FILE_START x86_file_start
1297
1298#undef TARGET_DEFAULT_TARGET_FLAGS
1299#define TARGET_DEFAULT_TARGET_FLAGS	\
1300  (TARGET_DEFAULT			\
1301   | TARGET_64BIT_DEFAULT		\
1302   | TARGET_SUBTARGET_DEFAULT		\
1303   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1304
1305#undef TARGET_HANDLE_OPTION
1306#define TARGET_HANDLE_OPTION ix86_handle_option
1307
1308#undef TARGET_RTX_COSTS
1309#define TARGET_RTX_COSTS ix86_rtx_costs
1310#undef TARGET_ADDRESS_COST
1311#define TARGET_ADDRESS_COST ix86_address_cost
1312
1313#undef TARGET_FIXED_CONDITION_CODE_REGS
1314#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1315#undef TARGET_CC_MODES_COMPATIBLE
1316#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1317
1318#undef TARGET_MACHINE_DEPENDENT_REORG
1319#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1320
1321#undef TARGET_BUILD_BUILTIN_VA_LIST
1322#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1323
1324#undef TARGET_MD_ASM_CLOBBERS
1325#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1326
1327#undef TARGET_PROMOTE_PROTOTYPES
1328#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1329#undef TARGET_STRUCT_VALUE_RTX
1330#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1331#undef TARGET_SETUP_INCOMING_VARARGS
1332#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1333#undef TARGET_MUST_PASS_IN_STACK
1334#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1335#undef TARGET_PASS_BY_REFERENCE
1336#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1337#undef TARGET_INTERNAL_ARG_POINTER
1338#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1339#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1340#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1341
1342#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1343#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1344
1345#undef TARGET_SCALAR_MODE_SUPPORTED_P
1346#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1347
1348#undef TARGET_VECTOR_MODE_SUPPORTED_P
1349#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350
1351#ifdef HAVE_AS_TLS
1352#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1353#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354#endif
1355
1356#ifdef SUBTARGET_INSERT_ATTRIBUTES
1357#undef TARGET_INSERT_ATTRIBUTES
1358#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359#endif
1360
1361#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1362#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1363
1364#undef TARGET_STACK_PROTECT_FAIL
1365#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1366
1367#undef TARGET_FUNCTION_VALUE
1368#define TARGET_FUNCTION_VALUE ix86_function_value
1369
1370struct gcc_target targetm = TARGET_INITIALIZER;
1371
1372
1373/* The svr4 ABI for the i386 says that records and unions are returned
1374   in memory.  */
1375#ifndef DEFAULT_PCC_STRUCT_RETURN
1376#define DEFAULT_PCC_STRUCT_RETURN 1
1377#endif
1378
1379/* Implement TARGET_HANDLE_OPTION.  */
1380
1381static bool
1382ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1383{
1384  switch (code)
1385    {
1386    case OPT_m3dnow:
1387      if (!value)
1388	{
1389	  target_flags &= ~MASK_3DNOW_A;
1390	  target_flags_explicit |= MASK_3DNOW_A;
1391	}
1392      return true;
1393
1394    case OPT_mmmx:
1395      if (!value)
1396	{
1397	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1398	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1399	}
1400      return true;
1401
1402    case OPT_msse:
1403      if (!value)
1404	{
1405	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1406	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1407	}
1408      return true;
1409
1410    case OPT_msse2:
1411      if (!value)
1412	{
1413	  target_flags &= ~MASK_SSE3;
1414	  target_flags_explicit |= MASK_SSE3;
1415	}
1416      return true;
1417
1418    default:
1419      return true;
1420    }
1421}
1422
1423/* Sometimes certain combinations of command options do not make
1424   sense on a particular target machine.  You can define a macro
1425   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1426   defined, is executed once just after all the command options have
1427   been parsed.
1428
1429   Don't use this macro to turn on various extra optimizations for
1430   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1431
1432void
1433override_options (void)
1434{
1435  int i;
1436  int ix86_tune_defaulted = 0;
1437
1438  /* Comes from final.c -- no real reason to change it.  */
1439#define MAX_CODE_ALIGN 16
1440
1441  static struct ptt
1442    {
1443      const struct processor_costs *cost;	/* Processor costs */
1444      const int target_enable;			/* Target flags to enable.  */
1445      const int target_disable;			/* Target flags to disable.  */
1446      const int align_loop;			/* Default alignments.  */
1447      const int align_loop_max_skip;
1448      const int align_jump;
1449      const int align_jump_max_skip;
1450      const int align_func;
1451    }
1452  const processor_target_table[PROCESSOR_max] =
1453    {
1454      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1455      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1456      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1457      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1458      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1459      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1460      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1461      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1462      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1463      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1464      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465    };
1466
1467  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468  static struct pta
1469    {
1470      const char *const name;		/* processor name or nickname.  */
1471      const enum processor_type processor;
1472      const enum pta_flags
1473	{
1474	  PTA_SSE = 1,
1475	  PTA_SSE2 = 2,
1476	  PTA_SSE3 = 4,
1477	  PTA_MMX = 8,
1478	  PTA_PREFETCH_SSE = 16,
1479	  PTA_3DNOW = 32,
1480	  PTA_3DNOW_A = 64,
1481	  PTA_64BIT = 128
1482	} flags;
1483    }
1484  const processor_alias_table[] =
1485    {
1486      {"i386", PROCESSOR_I386, 0},
1487      {"i486", PROCESSOR_I486, 0},
1488      {"i586", PROCESSOR_PENTIUM, 0},
1489      {"pentium", PROCESSOR_PENTIUM, 0},
1490      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1491      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1492      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1495      {"i686", PROCESSOR_PENTIUMPRO, 0},
1496      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1497      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1498      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1500      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1501      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502				       | PTA_MMX | PTA_PREFETCH_SSE},
1503      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1504				        | PTA_MMX | PTA_PREFETCH_SSE},
1505      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1506				        | PTA_MMX | PTA_PREFETCH_SSE},
1507      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1508				        | PTA_MMX | PTA_PREFETCH_SSE},
1509      {"k6", PROCESSOR_K6, PTA_MMX},
1510      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1513				   | PTA_3DNOW_A},
1514      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1515					 | PTA_3DNOW | PTA_3DNOW_A},
1516      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517				    | PTA_3DNOW_A | PTA_SSE},
1518      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519				      | PTA_3DNOW_A | PTA_SSE},
1520      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1521				      | PTA_3DNOW_A | PTA_SSE},
1522      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1523			       | PTA_SSE | PTA_SSE2 },
1524      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526      {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1528				      | PTA_SSE3 },
1529      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1530				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1531      {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1532				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1533				      | PTA_SSE3 },
1534      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1535				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1536      {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1537				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1538				      | PTA_SSE3 },
1539      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1540				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1541      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1542      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1543    };
1544
1545  int const pta_size = ARRAY_SIZE (processor_alias_table);
1546
1547#ifdef SUBTARGET_OVERRIDE_OPTIONS
1548  SUBTARGET_OVERRIDE_OPTIONS;
1549#endif
1550
1551#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1552  SUBSUBTARGET_OVERRIDE_OPTIONS;
1553#endif
1554
1555  /* -fPIC is the default for x86_64.  */
1556  if (TARGET_MACHO && TARGET_64BIT)
1557    flag_pic = 2;
1558
1559  /* Set the default values for switches whose default depends on TARGET_64BIT
1560     in case they weren't overwritten by command line options.  */
1561  if (TARGET_64BIT)
1562    {
1563      /* Mach-O doesn't support omitting the frame pointer for now.  */
1564      if (flag_omit_frame_pointer == 2)
1565	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1566      if (flag_asynchronous_unwind_tables == 2)
1567	flag_asynchronous_unwind_tables = 1;
1568      if (flag_pcc_struct_return == 2)
1569	flag_pcc_struct_return = 0;
1570    }
1571  else
1572    {
1573      if (flag_omit_frame_pointer == 2)
1574	flag_omit_frame_pointer = 0;
1575      if (flag_asynchronous_unwind_tables == 2)
1576	flag_asynchronous_unwind_tables = 0;
1577      if (flag_pcc_struct_return == 2)
1578	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1579    }
1580
1581  /* Need to check -mtune=generic first.  */
1582  if (ix86_tune_string)
1583    {
1584      if (!strcmp (ix86_tune_string, "generic")
1585	  || !strcmp (ix86_tune_string, "i686")
1586	  /* As special support for cross compilers we read -mtune=native
1587	     as -mtune=generic.  With native compilers we won't see the
1588	     -mtune=native, as it was changed by the driver.  */
1589	  || !strcmp (ix86_tune_string, "native"))
1590	{
1591	  if (TARGET_64BIT)
1592	    ix86_tune_string = "generic64";
1593	  else
1594	    ix86_tune_string = "generic32";
1595	}
1596      else if (!strncmp (ix86_tune_string, "generic", 7))
1597	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1598    }
1599  else
1600    {
1601      if (ix86_arch_string)
1602	ix86_tune_string = ix86_arch_string;
1603      if (!ix86_tune_string)
1604	{
1605	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1606	  ix86_tune_defaulted = 1;
1607	}
1608
1609      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1610	 need to use a sensible tune option.  */
1611      if (!strcmp (ix86_tune_string, "generic")
1612	  || !strcmp (ix86_tune_string, "x86-64")
1613	  || !strcmp (ix86_tune_string, "i686"))
1614	{
1615	  if (TARGET_64BIT)
1616	    ix86_tune_string = "generic64";
1617	  else
1618	    ix86_tune_string = "generic32";
1619	}
1620    }
1621  if (!strcmp (ix86_tune_string, "x86-64"))
1622    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1623	     "-mtune=generic instead as appropriate.");
1624
1625  if (!ix86_arch_string)
1626    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1627  if (!strcmp (ix86_arch_string, "generic"))
1628    error ("generic CPU can be used only for -mtune= switch");
1629  if (!strncmp (ix86_arch_string, "generic", 7))
1630    error ("bad value (%s) for -march= switch", ix86_arch_string);
1631
1632  if (ix86_cmodel_string != 0)
1633    {
1634      if (!strcmp (ix86_cmodel_string, "small"))
1635	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1636      else if (!strcmp (ix86_cmodel_string, "medium"))
1637	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1638      else if (flag_pic)
1639	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1640      else if (!strcmp (ix86_cmodel_string, "32"))
1641	ix86_cmodel = CM_32;
1642      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1643	ix86_cmodel = CM_KERNEL;
1644      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1645	ix86_cmodel = CM_LARGE;
1646      else
1647	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1648    }
1649  else
1650    {
1651      ix86_cmodel = CM_32;
1652      if (TARGET_64BIT)
1653	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1654    }
1655  if (ix86_asm_string != 0)
1656    {
1657      if (! TARGET_MACHO
1658	  && !strcmp (ix86_asm_string, "intel"))
1659	ix86_asm_dialect = ASM_INTEL;
1660      else if (!strcmp (ix86_asm_string, "att"))
1661	ix86_asm_dialect = ASM_ATT;
1662      else
1663	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1664    }
1665  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1666    error ("code model %qs not supported in the %s bit mode",
1667	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1668  if (ix86_cmodel == CM_LARGE)
1669    sorry ("code model %<large%> not supported yet");
1670  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1671    sorry ("%i-bit mode not compiled in",
1672	   (target_flags & MASK_64BIT) ? 64 : 32);
1673
1674  for (i = 0; i < pta_size; i++)
1675    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1676      {
1677	ix86_arch = processor_alias_table[i].processor;
1678	/* Default cpu tuning to the architecture.  */
1679	ix86_tune = ix86_arch;
1680	if (processor_alias_table[i].flags & PTA_MMX
1681	    && !(target_flags_explicit & MASK_MMX))
1682	  target_flags |= MASK_MMX;
1683	if (processor_alias_table[i].flags & PTA_3DNOW
1684	    && !(target_flags_explicit & MASK_3DNOW))
1685	  target_flags |= MASK_3DNOW;
1686	if (processor_alias_table[i].flags & PTA_3DNOW_A
1687	    && !(target_flags_explicit & MASK_3DNOW_A))
1688	  target_flags |= MASK_3DNOW_A;
1689	if (processor_alias_table[i].flags & PTA_SSE
1690	    && !(target_flags_explicit & MASK_SSE))
1691	  target_flags |= MASK_SSE;
1692	if (processor_alias_table[i].flags & PTA_SSE2
1693	    && !(target_flags_explicit & MASK_SSE2))
1694	  target_flags |= MASK_SSE2;
1695	if (processor_alias_table[i].flags & PTA_SSE3
1696	    && !(target_flags_explicit & MASK_SSE3))
1697	  target_flags |= MASK_SSE3;
1698	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1699	  x86_prefetch_sse = true;
1700	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1701	  error ("CPU you selected does not support x86-64 "
1702		 "instruction set");
1703	break;
1704      }
1705
1706  if (i == pta_size)
1707    error ("bad value (%s) for -march= switch", ix86_arch_string);
1708
1709  for (i = 0; i < pta_size; i++)
1710    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1711      {
1712	ix86_tune = processor_alias_table[i].processor;
1713	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1714	  {
1715	    if (ix86_tune_defaulted)
1716	      {
1717		ix86_tune_string = "x86-64";
1718		for (i = 0; i < pta_size; i++)
1719		  if (! strcmp (ix86_tune_string,
1720				processor_alias_table[i].name))
1721		    break;
1722		ix86_tune = processor_alias_table[i].processor;
1723	      }
1724	    else
1725	      error ("CPU you selected does not support x86-64 "
1726		     "instruction set");
1727	  }
1728        /* Intel CPUs have always interpreted SSE prefetch instructions as
1729	   NOPs; so, we can enable SSE prefetch instructions even when
1730	   -mtune (rather than -march) points us to a processor that has them.
1731	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1732	   higher processors.  */
1733	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1734	  x86_prefetch_sse = true;
1735	break;
1736      }
1737  if (i == pta_size)
1738    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1739
1740  if (optimize_size)
1741    ix86_cost = &size_cost;
1742  else
1743    ix86_cost = processor_target_table[ix86_tune].cost;
1744  target_flags |= processor_target_table[ix86_tune].target_enable;
1745  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1746
1747  /* Arrange to set up i386_stack_locals for all functions.  */
1748  init_machine_status = ix86_init_machine_status;
1749
1750  /* Validate -mregparm= value.  */
1751  if (ix86_regparm_string)
1752    {
1753      i = atoi (ix86_regparm_string);
1754      if (i < 0 || i > REGPARM_MAX)
1755	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1756      else
1757	ix86_regparm = i;
1758    }
1759  else
1760   if (TARGET_64BIT)
1761     ix86_regparm = REGPARM_MAX;
1762
1763  /* If the user has provided any of the -malign-* options,
1764     warn and use that value only if -falign-* is not set.
1765     Remove this code in GCC 3.2 or later.  */
1766  if (ix86_align_loops_string)
1767    {
1768      warning (0, "-malign-loops is obsolete, use -falign-loops");
1769      if (align_loops == 0)
1770	{
1771	  i = atoi (ix86_align_loops_string);
1772	  if (i < 0 || i > MAX_CODE_ALIGN)
1773	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1774	  else
1775	    align_loops = 1 << i;
1776	}
1777    }
1778
1779  if (ix86_align_jumps_string)
1780    {
1781      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1782      if (align_jumps == 0)
1783	{
1784	  i = atoi (ix86_align_jumps_string);
1785	  if (i < 0 || i > MAX_CODE_ALIGN)
1786	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1787	  else
1788	    align_jumps = 1 << i;
1789	}
1790    }
1791
1792  if (ix86_align_funcs_string)
1793    {
1794      warning (0, "-malign-functions is obsolete, use -falign-functions");
1795      if (align_functions == 0)
1796	{
1797	  i = atoi (ix86_align_funcs_string);
1798	  if (i < 0 || i > MAX_CODE_ALIGN)
1799	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1800	  else
1801	    align_functions = 1 << i;
1802	}
1803    }
1804
1805  /* Default align_* from the processor table.  */
1806  if (align_loops == 0)
1807    {
1808      align_loops = processor_target_table[ix86_tune].align_loop;
1809      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1810    }
1811  if (align_jumps == 0)
1812    {
1813      align_jumps = processor_target_table[ix86_tune].align_jump;
1814      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1815    }
1816  if (align_functions == 0)
1817    {
1818      align_functions = processor_target_table[ix86_tune].align_func;
1819    }
1820
1821  /* Validate -mbranch-cost= value, or provide default.  */
1822  ix86_branch_cost = ix86_cost->branch_cost;
1823  if (ix86_branch_cost_string)
1824    {
1825      i = atoi (ix86_branch_cost_string);
1826      if (i < 0 || i > 5)
1827	error ("-mbranch-cost=%d is not between 0 and 5", i);
1828      else
1829	ix86_branch_cost = i;
1830    }
1831  if (ix86_section_threshold_string)
1832    {
1833      i = atoi (ix86_section_threshold_string);
1834      if (i < 0)
1835	error ("-mlarge-data-threshold=%d is negative", i);
1836      else
1837	ix86_section_threshold = i;
1838    }
1839
1840  if (ix86_tls_dialect_string)
1841    {
1842      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1843	ix86_tls_dialect = TLS_DIALECT_GNU;
1844      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1845	ix86_tls_dialect = TLS_DIALECT_GNU2;
1846      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1847	ix86_tls_dialect = TLS_DIALECT_SUN;
1848      else
1849	error ("bad value (%s) for -mtls-dialect= switch",
1850	       ix86_tls_dialect_string);
1851    }
1852
1853  /* Keep nonleaf frame pointers.  */
1854  if (flag_omit_frame_pointer)
1855    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1856  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1857    flag_omit_frame_pointer = 1;
1858
1859  /* If we're doing fast math, we don't care about comparison order
1860     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1861  if (flag_finite_math_only)
1862    target_flags &= ~MASK_IEEE_FP;
1863
1864  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1865     since the insns won't need emulation.  */
1866  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1867    target_flags &= ~MASK_NO_FANCY_MATH_387;
1868
1869  /* Likewise, if the target doesn't have a 387, or we've specified
1870     software floating point, don't use 387 inline intrinsics.  */
1871  if (!TARGET_80387)
1872    target_flags |= MASK_NO_FANCY_MATH_387;
1873
1874  /* Turn on SSE2 builtins for -msse3.  */
1875  if (TARGET_SSE3)
1876    target_flags |= MASK_SSE2;
1877
1878  /* Turn on SSE builtins for -msse2.  */
1879  if (TARGET_SSE2)
1880    target_flags |= MASK_SSE;
1881
1882  /* Turn on MMX builtins for -msse.  */
1883  if (TARGET_SSE)
1884    {
1885      target_flags |= MASK_MMX & ~target_flags_explicit;
1886      x86_prefetch_sse = true;
1887    }
1888
1889  /* Turn on MMX builtins for 3Dnow.  */
1890  if (TARGET_3DNOW)
1891    target_flags |= MASK_MMX;
1892
1893  if (TARGET_64BIT)
1894    {
1895      if (TARGET_ALIGN_DOUBLE)
1896	error ("-malign-double makes no sense in the 64bit mode");
1897      if (TARGET_RTD)
1898	error ("-mrtd calling convention not supported in the 64bit mode");
1899
1900      /* Enable by default the SSE and MMX builtins.  Do allow the user to
1901	 explicitly disable any of these.  In particular, disabling SSE and
1902	 MMX for kernel code is extremely useful.  */
1903      target_flags
1904	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1905	    & ~target_flags_explicit);
1906     }
1907  else
1908    {
1909      /* i386 ABI does not specify red zone.  It still makes sense to use it
1910         when programmer takes care to stack from being destroyed.  */
1911      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1912        target_flags |= MASK_NO_RED_ZONE;
1913    }
1914
1915  /* Validate -mpreferred-stack-boundary= value, or provide default.
1916     The default of 128 bits is for Pentium III's SSE __m128.  We can't
1917     change it because of optimize_size.  Otherwise, we can't mix object
1918     files compiled with -Os and -On.  */
1919  ix86_preferred_stack_boundary = 128;
1920  if (ix86_preferred_stack_boundary_string)
1921    {
1922      i = atoi (ix86_preferred_stack_boundary_string);
1923      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1924	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1925	       TARGET_64BIT ? 4 : 2);
1926      else
1927	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1928    }
1929
1930  /* Accept -msseregparm only if at least SSE support is enabled.  */
1931  if (TARGET_SSEREGPARM
1932      && ! TARGET_SSE)
1933    error ("-msseregparm used without SSE enabled");
1934
1935  ix86_fpmath = TARGET_FPMATH_DEFAULT;
1936
1937  if (ix86_fpmath_string != 0)
1938    {
1939      if (! strcmp (ix86_fpmath_string, "387"))
1940	ix86_fpmath = FPMATH_387;
1941      else if (! strcmp (ix86_fpmath_string, "sse"))
1942	{
1943	  if (!TARGET_SSE)
1944	    {
1945	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1946	      ix86_fpmath = FPMATH_387;
1947	    }
1948	  else
1949	    ix86_fpmath = FPMATH_SSE;
1950	}
1951      else if (! strcmp (ix86_fpmath_string, "387,sse")
1952	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1953	{
1954	  if (!TARGET_SSE)
1955	    {
1956	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1957	      ix86_fpmath = FPMATH_387;
1958	    }
1959	  else if (!TARGET_80387)
1960	    {
1961	      warning (0, "387 instruction set disabled, using SSE arithmetics");
1962	      ix86_fpmath = FPMATH_SSE;
1963	    }
1964	  else
1965	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1966	}
1967      else
1968	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1969    }
1970
1971  /* If the i387 is disabled, then do not return values in it. */
1972  if (!TARGET_80387)
1973    target_flags &= ~MASK_FLOAT_RETURNS;
1974
1975  if ((x86_accumulate_outgoing_args & TUNEMASK)
1976      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1977      && !optimize_size)
1978    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1979
1980  /* ??? Unwind info is not correct around the CFG unless either a frame
1981     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1982     unwind info generation to be aware of the CFG and propagating states
1983     around edges.  */
1984  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1985       || flag_exceptions || flag_non_call_exceptions)
1986      && flag_omit_frame_pointer
1987      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1988    {
1989      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1990	warning (0, "unwind tables currently require either a frame pointer "
1991		 "or -maccumulate-outgoing-args for correctness");
1992      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1993    }
1994
1995  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1996  {
1997    char *p;
1998    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1999    p = strchr (internal_label_prefix, 'X');
2000    internal_label_prefix_len = p - internal_label_prefix;
2001    *p = '\0';
2002  }
2003
2004  /* When scheduling description is not available, disable scheduler pass
2005     so it won't slow down the compilation and make x87 code slower.  */
2006  if (!TARGET_SCHEDULE)
2007    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2008}
2009
2010/* switch to the appropriate section for output of DECL.
2011   DECL is either a `VAR_DECL' node or a constant of some sort.
2012   RELOC indicates whether forming the initial value of DECL requires
2013   link-time relocations.  */
2014
2015static section *
2016x86_64_elf_select_section (tree decl, int reloc,
2017			   unsigned HOST_WIDE_INT align)
2018{
2019  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2020      && ix86_in_large_data_p (decl))
2021    {
2022      const char *sname = NULL;
2023      unsigned int flags = SECTION_WRITE;
2024      switch (categorize_decl_for_section (decl, reloc))
2025	{
2026	case SECCAT_DATA:
2027	  sname = ".ldata";
2028	  break;
2029	case SECCAT_DATA_REL:
2030	  sname = ".ldata.rel";
2031	  break;
2032	case SECCAT_DATA_REL_LOCAL:
2033	  sname = ".ldata.rel.local";
2034	  break;
2035	case SECCAT_DATA_REL_RO:
2036	  sname = ".ldata.rel.ro";
2037	  break;
2038	case SECCAT_DATA_REL_RO_LOCAL:
2039	  sname = ".ldata.rel.ro.local";
2040	  break;
2041	case SECCAT_BSS:
2042	  sname = ".lbss";
2043	  flags |= SECTION_BSS;
2044	  break;
2045	case SECCAT_RODATA:
2046	case SECCAT_RODATA_MERGE_STR:
2047	case SECCAT_RODATA_MERGE_STR_INIT:
2048	case SECCAT_RODATA_MERGE_CONST:
2049	  sname = ".lrodata";
2050	  flags = 0;
2051	  break;
2052	case SECCAT_SRODATA:
2053	case SECCAT_SDATA:
2054	case SECCAT_SBSS:
2055	  gcc_unreachable ();
2056	case SECCAT_TEXT:
2057	case SECCAT_TDATA:
2058	case SECCAT_TBSS:
2059	  /* We don't split these for medium model.  Place them into
2060	     default sections and hope for best.  */
2061	  break;
2062	}
2063      if (sname)
2064	{
2065	  /* We might get called with string constants, but get_named_section
2066	     doesn't like them as they are not DECLs.  Also, we need to set
2067	     flags in that case.  */
2068	  if (!DECL_P (decl))
2069	    return get_section (sname, flags, NULL);
2070	  return get_named_section (decl, sname, reloc);
2071	}
2072    }
2073  return default_elf_select_section (decl, reloc, align);
2074}
2075
2076/* Build up a unique section name, expressed as a
2077   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2078   RELOC indicates whether the initial value of EXP requires
2079   link-time relocations.  */
2080
2081static void
2082x86_64_elf_unique_section (tree decl, int reloc)
2083{
2084  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2085      && ix86_in_large_data_p (decl))
2086    {
2087      const char *prefix = NULL;
2088      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2089      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2090
2091      switch (categorize_decl_for_section (decl, reloc))
2092	{
2093	case SECCAT_DATA:
2094	case SECCAT_DATA_REL:
2095	case SECCAT_DATA_REL_LOCAL:
2096	case SECCAT_DATA_REL_RO:
2097	case SECCAT_DATA_REL_RO_LOCAL:
2098          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2099	  break;
2100	case SECCAT_BSS:
2101          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2102	  break;
2103	case SECCAT_RODATA:
2104	case SECCAT_RODATA_MERGE_STR:
2105	case SECCAT_RODATA_MERGE_STR_INIT:
2106	case SECCAT_RODATA_MERGE_CONST:
2107          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2108	  break;
2109	case SECCAT_SRODATA:
2110	case SECCAT_SDATA:
2111	case SECCAT_SBSS:
2112	  gcc_unreachable ();
2113	case SECCAT_TEXT:
2114	case SECCAT_TDATA:
2115	case SECCAT_TBSS:
2116	  /* We don't split these for medium model.  Place them into
2117	     default sections and hope for best.  */
2118	  break;
2119	}
2120      if (prefix)
2121	{
2122	  const char *name;
2123	  size_t nlen, plen;
2124	  char *string;
2125	  plen = strlen (prefix);
2126
2127	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2128	  name = targetm.strip_name_encoding (name);
2129	  nlen = strlen (name);
2130
2131	  string = alloca (nlen + plen + 1);
2132	  memcpy (string, prefix, plen);
2133	  memcpy (string + plen, name, nlen + 1);
2134
2135	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2136	  return;
2137	}
2138    }
2139  default_unique_section (decl, reloc);
2140}
2141
2142#ifdef COMMON_ASM_OP
2143/* This says how to output assembler code to declare an
2144   uninitialized external linkage data object.
2145
2146   For medium model x86-64 we need to use .largecomm opcode for
2147   large objects.  */
2148void
2149x86_elf_aligned_common (FILE *file,
2150			const char *name, unsigned HOST_WIDE_INT size,
2151			int align)
2152{
2153  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2154      && size > (unsigned int)ix86_section_threshold)
2155    fprintf (file, ".largecomm\t");
2156  else
2157    fprintf (file, "%s", COMMON_ASM_OP);
2158  assemble_name (file, name);
2159  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2160	   size, align / BITS_PER_UNIT);
2161}
2162
2163/* Utility function for targets to use in implementing
2164   ASM_OUTPUT_ALIGNED_BSS.  */
2165
2166void
2167x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2168			const char *name, unsigned HOST_WIDE_INT size,
2169			int align)
2170{
2171  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2172      && size > (unsigned int)ix86_section_threshold)
2173    switch_to_section (get_named_section (decl, ".lbss", 0));
2174  else
2175    switch_to_section (bss_section);
2176  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2177#ifdef ASM_DECLARE_OBJECT_NAME
2178  last_assemble_variable_decl = decl;
2179  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2180#else
2181  /* Standard thing is just output label for the object.  */
2182  ASM_OUTPUT_LABEL (file, name);
2183#endif /* ASM_DECLARE_OBJECT_NAME */
2184  ASM_OUTPUT_SKIP (file, size ? size : 1);
2185}
2186#endif
2187
2188void
2189optimization_options (int level, int size ATTRIBUTE_UNUSED)
2190{
2191  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2192     make the problem with not enough registers even worse.  */
2193#ifdef INSN_SCHEDULING
2194  if (level > 1)
2195    flag_schedule_insns = 0;
2196#endif
2197
2198  if (TARGET_MACHO)
2199    /* The Darwin libraries never set errno, so we might as well
2200       avoid calling them when that's the only reason we would.  */
2201    flag_errno_math = 0;
2202
2203  /* The default values of these switches depend on the TARGET_64BIT
2204     that is not known at this moment.  Mark these values with 2 and
2205     let user the to override these.  In case there is no command line option
2206     specifying them, we will set the defaults in override_options.  */
2207  if (optimize >= 1)
2208    flag_omit_frame_pointer = 2;
2209  flag_pcc_struct_return = 2;
2210  flag_asynchronous_unwind_tables = 2;
2211#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2212  SUBTARGET_OPTIMIZATION_OPTIONS;
2213#endif
2214}
2215
2216/* Table of valid machine attributes.  */
2217const struct attribute_spec ix86_attribute_table[] =
2218{
2219  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2220  /* Stdcall attribute says callee is responsible for popping arguments
2221     if they are not variable.  */
2222  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2223  /* Fastcall attribute says callee is responsible for popping arguments
2224     if they are not variable.  */
2225  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2226  /* Cdecl attribute says the callee is a normal C declaration */
2227  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2228  /* Regparm attribute specifies how many integer arguments are to be
2229     passed in registers.  */
2230  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2231  /* Sseregparm attribute says we are using x86_64 calling conventions
2232     for FP arguments.  */
2233  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2234  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2235  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2236    false, true,  true, ix86_handle_cconv_attribute },
2237#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2238  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2239  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2240  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2241#endif
2242  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2243  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2244#ifdef SUBTARGET_ATTRIBUTE_TABLE
2245  SUBTARGET_ATTRIBUTE_TABLE,
2246#endif
2247  { NULL,        0, 0, false, false, false, NULL }
2248};
2249
2250/* Decide whether we can make a sibling call to a function.  DECL is the
2251   declaration of the function being targeted by the call and EXP is the
2252   CALL_EXPR representing the call.  */
2253
2254static bool
2255ix86_function_ok_for_sibcall (tree decl, tree exp)
2256{
2257  tree func;
2258  rtx a, b;
2259
2260  /* If we are generating position-independent code, we cannot sibcall
2261     optimize any indirect call, or a direct call to a global function,
2262     as the PLT requires %ebx be live.  */
2263  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2264    return false;
2265
2266  if (decl)
2267    func = decl;
2268  else
2269    {
2270      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2271      if (POINTER_TYPE_P (func))
2272        func = TREE_TYPE (func);
2273    }
2274
2275  /* Check that the return value locations are the same.  Like
2276     if we are returning floats on the 80387 register stack, we cannot
2277     make a sibcall from a function that doesn't return a float to a
2278     function that does or, conversely, from a function that does return
2279     a float to a function that doesn't; the necessary stack adjustment
2280     would not be executed.  This is also the place we notice
2281     differences in the return value ABI.  Note that it is ok for one
2282     of the functions to have void return type as long as the return
2283     value of the other is passed in a register.  */
2284  a = ix86_function_value (TREE_TYPE (exp), func, false);
2285  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2286			   cfun->decl, false);
2287  if (STACK_REG_P (a) || STACK_REG_P (b))
2288    {
2289      if (!rtx_equal_p (a, b))
2290	return false;
2291    }
2292  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2293    ;
2294  else if (!rtx_equal_p (a, b))
2295    return false;
2296
2297  /* If this call is indirect, we'll need to be able to use a call-clobbered
2298     register for the address of the target function.  Make sure that all
2299     such registers are not used for passing parameters.  */
2300  if (!decl && !TARGET_64BIT)
2301    {
2302      tree type;
2303
2304      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2305      type = TREE_OPERAND (exp, 0);		/* pointer expression */
2306      type = TREE_TYPE (type);			/* pointer type */
2307      type = TREE_TYPE (type);			/* function type */
2308
2309      if (ix86_function_regparm (type, NULL) >= 3)
2310	{
2311	  /* ??? Need to count the actual number of registers to be used,
2312	     not the possible number of registers.  Fix later.  */
2313	  return false;
2314	}
2315    }
2316
2317#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2318  /* Dllimport'd functions are also called indirectly.  */
2319  if (decl && DECL_DLLIMPORT_P (decl)
2320      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2321    return false;
2322#endif
2323
2324  /* If we forced aligned the stack, then sibcalling would unalign the
2325     stack, which may break the called function.  */
2326  if (cfun->machine->force_align_arg_pointer)
2327    return false;
2328
2329  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2330  return true;
2331}
2332
2333/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2334   calling convention attributes;
2335   arguments as in struct attribute_spec.handler.  */
2336
2337static tree
2338ix86_handle_cconv_attribute (tree *node, tree name,
2339				   tree args,
2340				   int flags ATTRIBUTE_UNUSED,
2341				   bool *no_add_attrs)
2342{
2343  if (TREE_CODE (*node) != FUNCTION_TYPE
2344      && TREE_CODE (*node) != METHOD_TYPE
2345      && TREE_CODE (*node) != FIELD_DECL
2346      && TREE_CODE (*node) != TYPE_DECL)
2347    {
2348      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2349	       IDENTIFIER_POINTER (name));
2350      *no_add_attrs = true;
2351      return NULL_TREE;
2352    }
2353
2354  /* Can combine regparm with all attributes but fastcall.  */
2355  if (is_attribute_p ("regparm", name))
2356    {
2357      tree cst;
2358
2359      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2360        {
2361	  error ("fastcall and regparm attributes are not compatible");
2362	}
2363
2364      cst = TREE_VALUE (args);
2365      if (TREE_CODE (cst) != INTEGER_CST)
2366	{
2367	  warning (OPT_Wattributes,
2368		   "%qs attribute requires an integer constant argument",
2369		   IDENTIFIER_POINTER (name));
2370	  *no_add_attrs = true;
2371	}
2372      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2373	{
2374	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2375		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2376	  *no_add_attrs = true;
2377	}
2378
2379      if (!TARGET_64BIT
2380	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2381			       TYPE_ATTRIBUTES (*node))
2382	  && compare_tree_int (cst, REGPARM_MAX-1))
2383	{
2384	  error ("%s functions limited to %d register parameters",
2385		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2386	}
2387
2388      return NULL_TREE;
2389    }
2390
2391  if (TARGET_64BIT)
2392    {
2393      warning (OPT_Wattributes, "%qs attribute ignored",
2394	       IDENTIFIER_POINTER (name));
2395      *no_add_attrs = true;
2396      return NULL_TREE;
2397    }
2398
2399  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2400  if (is_attribute_p ("fastcall", name))
2401    {
2402      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2403        {
2404	  error ("fastcall and cdecl attributes are not compatible");
2405	}
2406      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2407        {
2408	  error ("fastcall and stdcall attributes are not compatible");
2409	}
2410      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2411        {
2412	  error ("fastcall and regparm attributes are not compatible");
2413	}
2414    }
2415
2416  /* Can combine stdcall with fastcall (redundant), regparm and
2417     sseregparm.  */
2418  else if (is_attribute_p ("stdcall", name))
2419    {
2420      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2421        {
2422	  error ("stdcall and cdecl attributes are not compatible");
2423	}
2424      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2425        {
2426	  error ("stdcall and fastcall attributes are not compatible");
2427	}
2428    }
2429
2430  /* Can combine cdecl with regparm and sseregparm.  */
2431  else if (is_attribute_p ("cdecl", name))
2432    {
2433      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2434        {
2435	  error ("stdcall and cdecl attributes are not compatible");
2436	}
2437      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2438        {
2439	  error ("fastcall and cdecl attributes are not compatible");
2440	}
2441    }
2442
2443  /* Can combine sseregparm with all attributes.  */
2444
2445  return NULL_TREE;
2446}
2447
2448/* Return 0 if the attributes for two types are incompatible, 1 if they
2449   are compatible, and 2 if they are nearly compatible (which causes a
2450   warning to be generated).  */
2451
2452static int
2453ix86_comp_type_attributes (tree type1, tree type2)
2454{
2455  /* Check for mismatch of non-default calling convention.  */
2456  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2457
2458  if (TREE_CODE (type1) != FUNCTION_TYPE)
2459    return 1;
2460
2461  /* Check for mismatched fastcall/regparm types.  */
2462  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2463       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2464      || (ix86_function_regparm (type1, NULL)
2465	  != ix86_function_regparm (type2, NULL)))
2466    return 0;
2467
2468  /* Check for mismatched sseregparm types.  */
2469  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2470      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2471    return 0;
2472
2473  /* Check for mismatched return types (cdecl vs stdcall).  */
2474  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2475      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2476    return 0;
2477
2478  return 1;
2479}
2480
2481/* Return the regparm value for a function with the indicated TYPE and DECL.
2482   DECL may be NULL when calling function indirectly
2483   or considering a libcall.  */
2484
2485static int
2486ix86_function_regparm (tree type, tree decl)
2487{
2488  tree attr;
2489  int regparm = ix86_regparm;
2490  bool user_convention = false;
2491
2492  if (!TARGET_64BIT)
2493    {
2494      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2495      if (attr)
2496	{
2497	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2498	  user_convention = true;
2499	}
2500
2501      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2502	{
2503	  regparm = 2;
2504	  user_convention = true;
2505	}
2506
2507      /* Use register calling convention for local functions when possible.  */
2508      if (!TARGET_64BIT && !user_convention && decl
2509	  && flag_unit_at_a_time && !profile_flag)
2510	{
2511	  struct cgraph_local_info *i = cgraph_local_info (decl);
2512	  if (i && i->local)
2513	    {
2514	      int local_regparm, globals = 0, regno;
2515
2516	      /* Make sure no regparm register is taken by a global register
2517		 variable.  */
2518	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2519		if (global_regs[local_regparm])
2520		  break;
2521	      /* We can't use regparm(3) for nested functions as these use
2522		 static chain pointer in third argument.  */
2523	      if (local_regparm == 3
2524		  && decl_function_context (decl)
2525		  && !DECL_NO_STATIC_CHAIN (decl))
2526		local_regparm = 2;
2527	      /* If the function realigns its stackpointer, the
2528		 prologue will clobber %ecx.  If we've already
2529		 generated code for the callee, the callee
2530		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2531		 scanning the attributes for the self-realigning
2532		 property.  */
2533	      if ((DECL_STRUCT_FUNCTION (decl)
2534		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2535		  || (!DECL_STRUCT_FUNCTION (decl)
2536		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2537					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2538		local_regparm = 2;
2539	      /* Each global register variable increases register preassure,
2540		 so the more global reg vars there are, the smaller regparm
2541		 optimization use, unless requested by the user explicitly.  */
2542	      for (regno = 0; regno < 6; regno++)
2543		if (global_regs[regno])
2544		  globals++;
2545	      local_regparm
2546		= globals < local_regparm ? local_regparm - globals : 0;
2547
2548	      if (local_regparm > regparm)
2549		regparm = local_regparm;
2550	    }
2551	}
2552    }
2553  return regparm;
2554}
2555
2556/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2557   DFmode (2) arguments in SSE registers for a function with the
2558   indicated TYPE and DECL.  DECL may be NULL when calling function
2559   indirectly or considering a libcall.  Otherwise return 0.  */
2560
2561static int
2562ix86_function_sseregparm (tree type, tree decl)
2563{
2564  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2565     by the sseregparm attribute.  */
2566  if (TARGET_SSEREGPARM
2567      || (type
2568	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2569    {
2570      if (!TARGET_SSE)
2571	{
2572	  if (decl)
2573	    error ("Calling %qD with attribute sseregparm without "
2574		   "SSE/SSE2 enabled", decl);
2575	  else
2576	    error ("Calling %qT with attribute sseregparm without "
2577		   "SSE/SSE2 enabled", type);
2578	  return 0;
2579	}
2580
2581      return 2;
2582    }
2583
2584  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2585     (and DFmode for SSE2) arguments in SSE registers,
2586     even for 32-bit targets.  */
2587  if (!TARGET_64BIT && decl
2588      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2589    {
2590      struct cgraph_local_info *i = cgraph_local_info (decl);
2591      if (i && i->local)
2592	return TARGET_SSE2 ? 2 : 1;
2593    }
2594
2595  return 0;
2596}
2597
2598/* Return true if EAX is live at the start of the function.  Used by
2599   ix86_expand_prologue to determine if we need special help before
2600   calling allocate_stack_worker.  */
2601
2602static bool
2603ix86_eax_live_at_start_p (void)
2604{
2605  /* Cheat.  Don't bother working forward from ix86_function_regparm
2606     to the function type to whether an actual argument is located in
2607     eax.  Instead just look at cfg info, which is still close enough
2608     to correct at this point.  This gives false positives for broken
2609     functions that might use uninitialized data that happens to be
2610     allocated in eax, but who cares?  */
2611  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2612}
2613
2614/* Value is the number of bytes of arguments automatically
2615   popped when returning from a subroutine call.
2616   FUNDECL is the declaration node of the function (as a tree),
2617   FUNTYPE is the data type of the function (as a tree),
2618   or for a library call it is an identifier node for the subroutine name.
2619   SIZE is the number of bytes of arguments passed on the stack.
2620
2621   On the 80386, the RTD insn may be used to pop them if the number
2622     of args is fixed, but if the number is variable then the caller
2623     must pop them all.  RTD can't be used for library calls now
2624     because the library is compiled with the Unix compiler.
2625   Use of RTD is a selectable option, since it is incompatible with
2626   standard Unix calling sequences.  If the option is not selected,
2627   the caller must always pop the args.
2628
2629   The attribute stdcall is equivalent to RTD on a per module basis.  */
2630
2631int
2632ix86_return_pops_args (tree fundecl, tree funtype, int size)
2633{
2634  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2635
2636  /* Cdecl functions override -mrtd, and never pop the stack.  */
2637  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2638
2639    /* Stdcall and fastcall functions will pop the stack if not
2640       variable args.  */
2641    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2642        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2643      rtd = 1;
2644
2645    if (rtd
2646        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2647	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2648		== void_type_node)))
2649      return size;
2650  }
2651
2652  /* Lose any fake structure return argument if it is passed on the stack.  */
2653  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2654      && !TARGET_64BIT
2655      && !KEEP_AGGREGATE_RETURN_POINTER)
2656    {
2657      int nregs = ix86_function_regparm (funtype, fundecl);
2658
2659      if (!nregs)
2660	return GET_MODE_SIZE (Pmode);
2661    }
2662
2663  return 0;
2664}
2665
2666/* Argument support functions.  */
2667
2668/* Return true when register may be used to pass function parameters.  */
2669bool
2670ix86_function_arg_regno_p (int regno)
2671{
2672  int i;
2673  if (!TARGET_64BIT)
2674    {
2675      if (TARGET_MACHO)
2676        return (regno < REGPARM_MAX
2677                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2678      else
2679        return (regno < REGPARM_MAX
2680	        || (TARGET_MMX && MMX_REGNO_P (regno)
2681	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2682	        || (TARGET_SSE && SSE_REGNO_P (regno)
2683		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2684    }
2685
2686  if (TARGET_MACHO)
2687    {
2688      if (SSE_REGNO_P (regno) && TARGET_SSE)
2689        return true;
2690    }
2691  else
2692    {
2693      if (TARGET_SSE && SSE_REGNO_P (regno)
2694          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2695        return true;
2696    }
2697  /* RAX is used as hidden argument to va_arg functions.  */
2698  if (!regno)
2699    return true;
2700  for (i = 0; i < REGPARM_MAX; i++)
2701    if (regno == x86_64_int_parameter_registers[i])
2702      return true;
2703  return false;
2704}
2705
2706/* Return if we do not know how to pass TYPE solely in registers.  */
2707
2708static bool
2709ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2710{
2711  if (must_pass_in_stack_var_size_or_pad (mode, type))
2712    return true;
2713
2714  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2715     The layout_type routine is crafty and tries to trick us into passing
2716     currently unsupported vector types on the stack by using TImode.  */
2717  return (!TARGET_64BIT && mode == TImode
2718	  && type && TREE_CODE (type) != VECTOR_TYPE);
2719}
2720
2721/* Initialize a variable CUM of type CUMULATIVE_ARGS
2722   for a call to a function whose data type is FNTYPE.
2723   For a library call, FNTYPE is 0.  */
2724
2725void
2726init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2727		      tree fntype,	/* tree ptr for function decl */
2728		      rtx libname,	/* SYMBOL_REF of library name or 0 */
2729		      tree fndecl)
2730{
2731  static CUMULATIVE_ARGS zero_cum;
2732  tree param, next_param;
2733
2734  if (TARGET_DEBUG_ARG)
2735    {
2736      fprintf (stderr, "\ninit_cumulative_args (");
2737      if (fntype)
2738	fprintf (stderr, "fntype code = %s, ret code = %s",
2739		 tree_code_name[(int) TREE_CODE (fntype)],
2740		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2741      else
2742	fprintf (stderr, "no fntype");
2743
2744      if (libname)
2745	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2746    }
2747
2748  *cum = zero_cum;
2749
2750  /* Set up the number of registers to use for passing arguments.  */
2751  cum->nregs = ix86_regparm;
2752  if (TARGET_SSE)
2753    cum->sse_nregs = SSE_REGPARM_MAX;
2754  if (TARGET_MMX)
2755    cum->mmx_nregs = MMX_REGPARM_MAX;
2756  cum->warn_sse = true;
2757  cum->warn_mmx = true;
2758  cum->maybe_vaarg = false;
2759
2760  /* Use ecx and edx registers if function has fastcall attribute,
2761     else look for regparm information.  */
2762  if (fntype && !TARGET_64BIT)
2763    {
2764      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2765	{
2766	  cum->nregs = 2;
2767	  cum->fastcall = 1;
2768	}
2769      else
2770	cum->nregs = ix86_function_regparm (fntype, fndecl);
2771    }
2772
2773  /* Set up the number of SSE registers used for passing SFmode
2774     and DFmode arguments.  Warn for mismatching ABI.  */
2775  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2776
2777  /* Determine if this function has variable arguments.  This is
2778     indicated by the last argument being 'void_type_mode' if there
2779     are no variable arguments.  If there are variable arguments, then
2780     we won't pass anything in registers in 32-bit mode. */
2781
2782  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2783    {
2784      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2785	   param != 0; param = next_param)
2786	{
2787	  next_param = TREE_CHAIN (param);
2788	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2789	    {
2790	      if (!TARGET_64BIT)
2791		{
2792		  cum->nregs = 0;
2793		  cum->sse_nregs = 0;
2794		  cum->mmx_nregs = 0;
2795		  cum->warn_sse = 0;
2796		  cum->warn_mmx = 0;
2797		  cum->fastcall = 0;
2798		  cum->float_in_sse = 0;
2799		}
2800	      cum->maybe_vaarg = true;
2801	    }
2802	}
2803    }
2804  if ((!fntype && !libname)
2805      || (fntype && !TYPE_ARG_TYPES (fntype)))
2806    cum->maybe_vaarg = true;
2807
2808  if (TARGET_DEBUG_ARG)
2809    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2810
2811  return;
2812}
2813
2814/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2815   But in the case of vector types, it is some vector mode.
2816
2817   When we have only some of our vector isa extensions enabled, then there
2818   are some modes for which vector_mode_supported_p is false.  For these
2819   modes, the generic vector support in gcc will choose some non-vector mode
2820   in order to implement the type.  By computing the natural mode, we'll
2821   select the proper ABI location for the operand and not depend on whatever
2822   the middle-end decides to do with these vector types.  */
2823
2824static enum machine_mode
2825type_natural_mode (tree type)
2826{
2827  enum machine_mode mode = TYPE_MODE (type);
2828
2829  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2830    {
2831      HOST_WIDE_INT size = int_size_in_bytes (type);
2832      if ((size == 8 || size == 16)
2833	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2834	  && TYPE_VECTOR_SUBPARTS (type) > 1)
2835	{
2836	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2837
2838	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2839	    mode = MIN_MODE_VECTOR_FLOAT;
2840	  else
2841	    mode = MIN_MODE_VECTOR_INT;
2842
2843	  /* Get the mode which has this inner mode and number of units.  */
2844	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2845	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2846		&& GET_MODE_INNER (mode) == innermode)
2847	      return mode;
2848
2849	  gcc_unreachable ();
2850	}
2851    }
2852
2853  return mode;
2854}
2855
2856/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2857   this may not agree with the mode that the type system has chosen for the
2858   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2859   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2860
2861static rtx
2862gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2863		     unsigned int regno)
2864{
2865  rtx tmp;
2866
2867  if (orig_mode != BLKmode)
2868    tmp = gen_rtx_REG (orig_mode, regno);
2869  else
2870    {
2871      tmp = gen_rtx_REG (mode, regno);
2872      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2873      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2874    }
2875
2876  return tmp;
2877}
2878
2879/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2880   of this code is to classify each 8bytes of incoming argument by the register
2881   class and assign registers accordingly.  */
2882
2883/* Return the union class of CLASS1 and CLASS2.
2884   See the x86-64 PS ABI for details.  */
2885
2886static enum x86_64_reg_class
2887merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2888{
2889  /* Rule #1: If both classes are equal, this is the resulting class.  */
2890  if (class1 == class2)
2891    return class1;
2892
2893  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2894     the other class.  */
2895  if (class1 == X86_64_NO_CLASS)
2896    return class2;
2897  if (class2 == X86_64_NO_CLASS)
2898    return class1;
2899
2900  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2901  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2902    return X86_64_MEMORY_CLASS;
2903
2904  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2905  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2906      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2907    return X86_64_INTEGERSI_CLASS;
2908  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2909      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2910    return X86_64_INTEGER_CLASS;
2911
2912  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2913     MEMORY is used.  */
2914  if (class1 == X86_64_X87_CLASS
2915      || class1 == X86_64_X87UP_CLASS
2916      || class1 == X86_64_COMPLEX_X87_CLASS
2917      || class2 == X86_64_X87_CLASS
2918      || class2 == X86_64_X87UP_CLASS
2919      || class2 == X86_64_COMPLEX_X87_CLASS)
2920    return X86_64_MEMORY_CLASS;
2921
2922  /* Rule #6: Otherwise class SSE is used.  */
2923  return X86_64_SSE_CLASS;
2924}
2925
2926/* Classify the argument of type TYPE and mode MODE.
2927   CLASSES will be filled by the register class used to pass each word
2928   of the operand.  The number of words is returned.  In case the parameter
2929   should be passed in memory, 0 is returned. As a special case for zero
2930   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2931
2932   BIT_OFFSET is used internally for handling records and specifies offset
2933   of the offset in bits modulo 256 to avoid overflow cases.
2934
2935   See the x86-64 PS ABI for details.
2936*/
2937
2938static int
2939classify_argument (enum machine_mode mode, tree type,
2940		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2941{
2942  HOST_WIDE_INT bytes =
2943    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2944  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2945
2946  /* Variable sized entities are always passed/returned in memory.  */
2947  if (bytes < 0)
2948    return 0;
2949
2950  if (mode != VOIDmode
2951      && targetm.calls.must_pass_in_stack (mode, type))
2952    return 0;
2953
2954  if (type && AGGREGATE_TYPE_P (type))
2955    {
2956      int i;
2957      tree field;
2958      enum x86_64_reg_class subclasses[MAX_CLASSES];
2959
2960      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2961      if (bytes > 16)
2962	return 0;
2963
2964      for (i = 0; i < words; i++)
2965	classes[i] = X86_64_NO_CLASS;
2966
2967      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2968	 signalize memory class, so handle it as special case.  */
2969      if (!words)
2970	{
2971	  classes[0] = X86_64_NO_CLASS;
2972	  return 1;
2973	}
2974
2975      /* Classify each field of record and merge classes.  */
2976      switch (TREE_CODE (type))
2977	{
2978	case RECORD_TYPE:
2979	  /* For classes first merge in the field of the subclasses.  */
2980	  if (TYPE_BINFO (type))
2981	    {
2982	      tree binfo, base_binfo;
2983	      int basenum;
2984
2985	      for (binfo = TYPE_BINFO (type), basenum = 0;
2986		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2987		{
2988		   int num;
2989		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2990		   tree type = BINFO_TYPE (base_binfo);
2991
2992		   num = classify_argument (TYPE_MODE (type),
2993					    type, subclasses,
2994					    (offset + bit_offset) % 256);
2995		   if (!num)
2996		     return 0;
2997		   for (i = 0; i < num; i++)
2998		     {
2999		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
3000		       classes[i + pos] =
3001			 merge_classes (subclasses[i], classes[i + pos]);
3002		     }
3003		}
3004	    }
3005	  /* And now merge the fields of structure.  */
3006	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3007	    {
3008	      if (TREE_CODE (field) == FIELD_DECL)
3009		{
3010		  int num;
3011
3012		  if (TREE_TYPE (field) == error_mark_node)
3013		    continue;
3014
3015		  /* Bitfields are always classified as integer.  Handle them
3016		     early, since later code would consider them to be
3017		     misaligned integers.  */
3018		  if (DECL_BIT_FIELD (field))
3019		    {
3020		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3021			   i < ((int_bit_position (field) + (bit_offset % 64))
3022			        + tree_low_cst (DECL_SIZE (field), 0)
3023				+ 63) / 8 / 8; i++)
3024			classes[i] =
3025			  merge_classes (X86_64_INTEGER_CLASS,
3026					 classes[i]);
3027		    }
3028		  else
3029		    {
3030		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3031					       TREE_TYPE (field), subclasses,
3032					       (int_bit_position (field)
3033						+ bit_offset) % 256);
3034		      if (!num)
3035			return 0;
3036		      for (i = 0; i < num; i++)
3037			{
3038			  int pos =
3039			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3040			  classes[i + pos] =
3041			    merge_classes (subclasses[i], classes[i + pos]);
3042			}
3043		    }
3044		}
3045	    }
3046	  break;
3047
3048	case ARRAY_TYPE:
3049	  /* Arrays are handled as small records.  */
3050	  {
3051	    int num;
3052	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3053				     TREE_TYPE (type), subclasses, bit_offset);
3054	    if (!num)
3055	      return 0;
3056
3057	    /* The partial classes are now full classes.  */
3058	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3059	      subclasses[0] = X86_64_SSE_CLASS;
3060	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3061	      subclasses[0] = X86_64_INTEGER_CLASS;
3062
3063	    for (i = 0; i < words; i++)
3064	      classes[i] = subclasses[i % num];
3065
3066	    break;
3067	  }
3068	case UNION_TYPE:
3069	case QUAL_UNION_TYPE:
3070	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3071	     */
3072
3073	  /* Unions are not derived.  */
3074	  gcc_assert (!TYPE_BINFO (type)
3075		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3076	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3077	    {
3078	      if (TREE_CODE (field) == FIELD_DECL)
3079		{
3080		  int num;
3081
3082		  if (TREE_TYPE (field) == error_mark_node)
3083		    continue;
3084
3085		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3086					   TREE_TYPE (field), subclasses,
3087					   bit_offset);
3088		  if (!num)
3089		    return 0;
3090		  for (i = 0; i < num; i++)
3091		    classes[i] = merge_classes (subclasses[i], classes[i]);
3092		}
3093	    }
3094	  break;
3095
3096	default:
3097	  gcc_unreachable ();
3098	}
3099
3100      /* Final merger cleanup.  */
3101      for (i = 0; i < words; i++)
3102	{
3103	  /* If one class is MEMORY, everything should be passed in
3104	     memory.  */
3105	  if (classes[i] == X86_64_MEMORY_CLASS)
3106	    return 0;
3107
3108	  /* The X86_64_SSEUP_CLASS should be always preceded by
3109	     X86_64_SSE_CLASS.  */
3110	  if (classes[i] == X86_64_SSEUP_CLASS
3111	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3112	    classes[i] = X86_64_SSE_CLASS;
3113
3114	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3115	  if (classes[i] == X86_64_X87UP_CLASS
3116	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3117	    classes[i] = X86_64_SSE_CLASS;
3118	}
3119      return words;
3120    }
3121
3122  /* Compute alignment needed.  We align all types to natural boundaries with
3123     exception of XFmode that is aligned to 64bits.  */
3124  if (mode != VOIDmode && mode != BLKmode)
3125    {
3126      int mode_alignment = GET_MODE_BITSIZE (mode);
3127
3128      if (mode == XFmode)
3129	mode_alignment = 128;
3130      else if (mode == XCmode)
3131	mode_alignment = 256;
3132      if (COMPLEX_MODE_P (mode))
3133	mode_alignment /= 2;
3134      /* Misaligned fields are always returned in memory.  */
3135      if (bit_offset % mode_alignment)
3136	return 0;
3137    }
3138
3139  /* for V1xx modes, just use the base mode */
3140  if (VECTOR_MODE_P (mode)
3141      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3142    mode = GET_MODE_INNER (mode);
3143
3144  /* Classification of atomic types.  */
3145  switch (mode)
3146    {
3147    case SDmode:
3148    case DDmode:
3149      classes[0] = X86_64_SSE_CLASS;
3150      return 1;
3151    case TDmode:
3152      classes[0] = X86_64_SSE_CLASS;
3153      classes[1] = X86_64_SSEUP_CLASS;
3154      return 2;
3155    case DImode:
3156    case SImode:
3157    case HImode:
3158    case QImode:
3159    case CSImode:
3160    case CHImode:
3161    case CQImode:
3162      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3163	classes[0] = X86_64_INTEGERSI_CLASS;
3164      else
3165	classes[0] = X86_64_INTEGER_CLASS;
3166      return 1;
3167    case CDImode:
3168    case TImode:
3169      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3170      return 2;
3171    case CTImode:
3172      return 0;
3173    case SFmode:
3174      if (!(bit_offset % 64))
3175	classes[0] = X86_64_SSESF_CLASS;
3176      else
3177	classes[0] = X86_64_SSE_CLASS;
3178      return 1;
3179    case DFmode:
3180      classes[0] = X86_64_SSEDF_CLASS;
3181      return 1;
3182    case XFmode:
3183      classes[0] = X86_64_X87_CLASS;
3184      classes[1] = X86_64_X87UP_CLASS;
3185      return 2;
3186    case TFmode:
3187      classes[0] = X86_64_SSE_CLASS;
3188      classes[1] = X86_64_SSEUP_CLASS;
3189      return 2;
3190    case SCmode:
3191      classes[0] = X86_64_SSE_CLASS;
3192      return 1;
3193    case DCmode:
3194      classes[0] = X86_64_SSEDF_CLASS;
3195      classes[1] = X86_64_SSEDF_CLASS;
3196      return 2;
3197    case XCmode:
3198      classes[0] = X86_64_COMPLEX_X87_CLASS;
3199      return 1;
3200    case TCmode:
3201      /* This modes is larger than 16 bytes.  */
3202      return 0;
3203    case V4SFmode:
3204    case V4SImode:
3205    case V16QImode:
3206    case V8HImode:
3207    case V2DFmode:
3208    case V2DImode:
3209      classes[0] = X86_64_SSE_CLASS;
3210      classes[1] = X86_64_SSEUP_CLASS;
3211      return 2;
3212    case V2SFmode:
3213    case V2SImode:
3214    case V4HImode:
3215    case V8QImode:
3216      classes[0] = X86_64_SSE_CLASS;
3217      return 1;
3218    case BLKmode:
3219    case VOIDmode:
3220      return 0;
3221    default:
3222      gcc_assert (VECTOR_MODE_P (mode));
3223
3224      if (bytes > 16)
3225	return 0;
3226
3227      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3228
3229      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3230	classes[0] = X86_64_INTEGERSI_CLASS;
3231      else
3232	classes[0] = X86_64_INTEGER_CLASS;
3233      classes[1] = X86_64_INTEGER_CLASS;
3234      return 1 + (bytes > 8);
3235    }
3236}
3237
3238/* Examine the argument and return set number of register required in each
3239   class.  Return 0 iff parameter should be passed in memory.  */
3240static int
3241examine_argument (enum machine_mode mode, tree type, int in_return,
3242		  int *int_nregs, int *sse_nregs)
3243{
3244  enum x86_64_reg_class class[MAX_CLASSES];
3245  int n = classify_argument (mode, type, class, 0);
3246
3247  *int_nregs = 0;
3248  *sse_nregs = 0;
3249  if (!n)
3250    return 0;
3251  for (n--; n >= 0; n--)
3252    switch (class[n])
3253      {
3254      case X86_64_INTEGER_CLASS:
3255      case X86_64_INTEGERSI_CLASS:
3256	(*int_nregs)++;
3257	break;
3258      case X86_64_SSE_CLASS:
3259      case X86_64_SSESF_CLASS:
3260      case X86_64_SSEDF_CLASS:
3261	(*sse_nregs)++;
3262	break;
3263      case X86_64_NO_CLASS:
3264      case X86_64_SSEUP_CLASS:
3265	break;
3266      case X86_64_X87_CLASS:
3267      case X86_64_X87UP_CLASS:
3268	if (!in_return)
3269	  return 0;
3270	break;
3271      case X86_64_COMPLEX_X87_CLASS:
3272	return in_return ? 2 : 0;
3273      case X86_64_MEMORY_CLASS:
3274	gcc_unreachable ();
3275      }
3276  return 1;
3277}
3278
3279/* Construct container for the argument used by GCC interface.  See
3280   FUNCTION_ARG for the detailed description.  */
3281
3282static rtx
3283construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3284		     tree type, int in_return, int nintregs, int nsseregs,
3285		     const int *intreg, int sse_regno)
3286{
3287  /* The following variables hold the static issued_error state.  */
3288  static bool issued_sse_arg_error;
3289  static bool issued_sse_ret_error;
3290  static bool issued_x87_ret_error;
3291
3292  enum machine_mode tmpmode;
3293  int bytes =
3294    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3295  enum x86_64_reg_class class[MAX_CLASSES];
3296  int n;
3297  int i;
3298  int nexps = 0;
3299  int needed_sseregs, needed_intregs;
3300  rtx exp[MAX_CLASSES];
3301  rtx ret;
3302
3303  n = classify_argument (mode, type, class, 0);
3304  if (TARGET_DEBUG_ARG)
3305    {
3306      if (!n)
3307	fprintf (stderr, "Memory class\n");
3308      else
3309	{
3310	  fprintf (stderr, "Classes:");
3311	  for (i = 0; i < n; i++)
3312	    {
3313	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3314	    }
3315	   fprintf (stderr, "\n");
3316	}
3317    }
3318  if (!n)
3319    return NULL;
3320  if (!examine_argument (mode, type, in_return, &needed_intregs,
3321			 &needed_sseregs))
3322    return NULL;
3323  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3324    return NULL;
3325
3326  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3327     some less clueful developer tries to use floating-point anyway.  */
3328  if (needed_sseregs && !TARGET_SSE)
3329    {
3330      if (in_return)
3331	{
3332	  if (!issued_sse_ret_error)
3333	    {
3334	      error ("SSE register return with SSE disabled");
3335	      issued_sse_ret_error = true;
3336	    }
3337	}
3338      else if (!issued_sse_arg_error)
3339	{
3340	  error ("SSE register argument with SSE disabled");
3341	  issued_sse_arg_error = true;
3342	}
3343      return NULL;
3344    }
3345
3346  /* Likewise, error if the ABI requires us to return values in the
3347     x87 registers and the user specified -mno-80387.  */
3348  if (!TARGET_80387 && in_return)
3349    for (i = 0; i < n; i++)
3350      if (class[i] == X86_64_X87_CLASS
3351	  || class[i] == X86_64_X87UP_CLASS
3352	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3353	{
3354	  if (!issued_x87_ret_error)
3355	    {
3356	      error ("x87 register return with x87 disabled");
3357	      issued_x87_ret_error = true;
3358	    }
3359	  return NULL;
3360	}
3361
3362  /* First construct simple cases.  Avoid SCmode, since we want to use
3363     single register to pass this type.  */
3364  if (n == 1 && mode != SCmode)
3365    switch (class[0])
3366      {
3367      case X86_64_INTEGER_CLASS:
3368      case X86_64_INTEGERSI_CLASS:
3369	return gen_rtx_REG (mode, intreg[0]);
3370      case X86_64_SSE_CLASS:
3371      case X86_64_SSESF_CLASS:
3372      case X86_64_SSEDF_CLASS:
3373	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3374      case X86_64_X87_CLASS:
3375      case X86_64_COMPLEX_X87_CLASS:
3376	return gen_rtx_REG (mode, FIRST_STACK_REG);
3377      case X86_64_NO_CLASS:
3378	/* Zero sized array, struct or class.  */
3379	return NULL;
3380      default:
3381	gcc_unreachable ();
3382      }
3383  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3384      && mode != BLKmode)
3385    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3386  if (n == 2
3387      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3388    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3389  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3390      && class[1] == X86_64_INTEGER_CLASS
3391      && (mode == CDImode || mode == TImode || mode == TFmode)
3392      && intreg[0] + 1 == intreg[1])
3393    return gen_rtx_REG (mode, intreg[0]);
3394
3395  /* Otherwise figure out the entries of the PARALLEL.  */
3396  for (i = 0; i < n; i++)
3397    {
3398      switch (class[i])
3399        {
3400	  case X86_64_NO_CLASS:
3401	    break;
3402	  case X86_64_INTEGER_CLASS:
3403	  case X86_64_INTEGERSI_CLASS:
3404	    /* Merge TImodes on aligned occasions here too.  */
3405	    if (i * 8 + 8 > bytes)
3406	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3407	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3408	      tmpmode = SImode;
3409	    else
3410	      tmpmode = DImode;
3411	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3412	    if (tmpmode == BLKmode)
3413	      tmpmode = DImode;
3414	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3415					       gen_rtx_REG (tmpmode, *intreg),
3416					       GEN_INT (i*8));
3417	    intreg++;
3418	    break;
3419	  case X86_64_SSESF_CLASS:
3420	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3421					       gen_rtx_REG (SFmode,
3422							    SSE_REGNO (sse_regno)),
3423					       GEN_INT (i*8));
3424	    sse_regno++;
3425	    break;
3426	  case X86_64_SSEDF_CLASS:
3427	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3428					       gen_rtx_REG (DFmode,
3429							    SSE_REGNO (sse_regno)),
3430					       GEN_INT (i*8));
3431	    sse_regno++;
3432	    break;
3433	  case X86_64_SSE_CLASS:
3434	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3435	      tmpmode = TImode;
3436	    else
3437	      tmpmode = DImode;
3438	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3439					       gen_rtx_REG (tmpmode,
3440							    SSE_REGNO (sse_regno)),
3441					       GEN_INT (i*8));
3442	    if (tmpmode == TImode)
3443	      i++;
3444	    sse_regno++;
3445	    break;
3446	  default:
3447	    gcc_unreachable ();
3448	}
3449    }
3450
3451  /* Empty aligned struct, union or class.  */
3452  if (nexps == 0)
3453    return NULL;
3454
3455  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3456  for (i = 0; i < nexps; i++)
3457    XVECEXP (ret, 0, i) = exp [i];
3458  return ret;
3459}
3460
3461/* Update the data in CUM to advance over an argument
3462   of mode MODE and data type TYPE.
3463   (TYPE is null for libcalls where that information may not be available.)  */
3464
3465void
3466function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3467		      tree type, int named)
3468{
3469  int bytes =
3470    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3471  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3472
3473  if (type)
3474    mode = type_natural_mode (type);
3475
3476  if (TARGET_DEBUG_ARG)
3477    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3478	     "mode=%s, named=%d)\n\n",
3479	     words, cum->words, cum->nregs, cum->sse_nregs,
3480	     GET_MODE_NAME (mode), named);
3481
3482  if (TARGET_64BIT)
3483    {
3484      int int_nregs, sse_nregs;
3485      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3486	cum->words += words;
3487      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3488	{
3489	  cum->nregs -= int_nregs;
3490	  cum->sse_nregs -= sse_nregs;
3491	  cum->regno += int_nregs;
3492	  cum->sse_regno += sse_nregs;
3493	}
3494      else
3495	cum->words += words;
3496    }
3497  else
3498    {
3499      switch (mode)
3500	{
3501	default:
3502	  break;
3503
3504	case BLKmode:
3505	  if (bytes < 0)
3506	    break;
3507	  /* FALLTHRU */
3508
3509	case DImode:
3510	case SImode:
3511	case HImode:
3512	case QImode:
3513	  cum->words += words;
3514	  cum->nregs -= words;
3515	  cum->regno += words;
3516
3517	  if (cum->nregs <= 0)
3518	    {
3519	      cum->nregs = 0;
3520	      cum->regno = 0;
3521	    }
3522	  break;
3523
3524	case DFmode:
3525	  if (cum->float_in_sse < 2)
3526	    break;
3527	case SFmode:
3528	  if (cum->float_in_sse < 1)
3529	    break;
3530	  /* FALLTHRU */
3531
3532	case TImode:
3533	case V16QImode:
3534	case V8HImode:
3535	case V4SImode:
3536	case V2DImode:
3537	case V4SFmode:
3538	case V2DFmode:
3539	  if (!type || !AGGREGATE_TYPE_P (type))
3540	    {
3541	      cum->sse_words += words;
3542	      cum->sse_nregs -= 1;
3543	      cum->sse_regno += 1;
3544	      if (cum->sse_nregs <= 0)
3545		{
3546		  cum->sse_nregs = 0;
3547		  cum->sse_regno = 0;
3548		}
3549	    }
3550	  break;
3551
3552	case V8QImode:
3553	case V4HImode:
3554	case V2SImode:
3555	case V2SFmode:
3556	  if (!type || !AGGREGATE_TYPE_P (type))
3557	    {
3558	      cum->mmx_words += words;
3559	      cum->mmx_nregs -= 1;
3560	      cum->mmx_regno += 1;
3561	      if (cum->mmx_nregs <= 0)
3562		{
3563		  cum->mmx_nregs = 0;
3564		  cum->mmx_regno = 0;
3565		}
3566	    }
3567	  break;
3568	}
3569    }
3570}
3571
3572/* Define where to put the arguments to a function.
3573   Value is zero to push the argument on the stack,
3574   or a hard register in which to store the argument.
3575
3576   MODE is the argument's machine mode.
3577   TYPE is the data type of the argument (as a tree).
3578    This is null for libcalls where that information may
3579    not be available.
3580   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3581    the preceding args and about the function being called.
3582   NAMED is nonzero if this argument is a named parameter
3583    (otherwise it is an extra parameter matching an ellipsis).  */
3584
3585rtx
3586function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3587	      tree type, int named)
3588{
3589  enum machine_mode mode = orig_mode;
3590  rtx ret = NULL_RTX;
3591  int bytes =
3592    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3593  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3594  static bool warnedsse, warnedmmx;
3595
3596  /* To simplify the code below, represent vector types with a vector mode
3597     even if MMX/SSE are not active.  */
3598  if (type && TREE_CODE (type) == VECTOR_TYPE)
3599    mode = type_natural_mode (type);
3600
3601  /* Handle a hidden AL argument containing number of registers for varargs
3602     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3603     any AL settings.  */
3604  if (mode == VOIDmode)
3605    {
3606      if (TARGET_64BIT)
3607	return GEN_INT (cum->maybe_vaarg
3608			? (cum->sse_nregs < 0
3609			   ? SSE_REGPARM_MAX
3610			   : cum->sse_regno)
3611			: -1);
3612      else
3613	return constm1_rtx;
3614    }
3615  if (TARGET_64BIT)
3616    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3617			       cum->sse_nregs,
3618			       &x86_64_int_parameter_registers [cum->regno],
3619			       cum->sse_regno);
3620  else
3621    switch (mode)
3622      {
3623	/* For now, pass fp/complex values on the stack.  */
3624      default:
3625	break;
3626
3627      case BLKmode:
3628	if (bytes < 0)
3629	  break;
3630	/* FALLTHRU */
3631      case DImode:
3632      case SImode:
3633      case HImode:
3634      case QImode:
3635	if (words <= cum->nregs)
3636	  {
3637	    int regno = cum->regno;
3638
3639	    /* Fastcall allocates the first two DWORD (SImode) or
3640	       smaller arguments to ECX and EDX.  */
3641	    if (cum->fastcall)
3642	      {
3643	        if (mode == BLKmode || mode == DImode)
3644	          break;
3645
3646	        /* ECX not EAX is the first allocated register.  */
3647	        if (regno == 0)
3648		  regno = 2;
3649	      }
3650	    ret = gen_rtx_REG (mode, regno);
3651	  }
3652	break;
3653      case DFmode:
3654	if (cum->float_in_sse < 2)
3655	  break;
3656      case SFmode:
3657	if (cum->float_in_sse < 1)
3658	  break;
3659	/* FALLTHRU */
3660      case TImode:
3661      case V16QImode:
3662      case V8HImode:
3663      case V4SImode:
3664      case V2DImode:
3665      case V4SFmode:
3666      case V2DFmode:
3667	if (!type || !AGGREGATE_TYPE_P (type))
3668	  {
3669	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3670	      {
3671		warnedsse = true;
3672		warning (0, "SSE vector argument without SSE enabled "
3673			 "changes the ABI");
3674	      }
3675	    if (cum->sse_nregs)
3676	      ret = gen_reg_or_parallel (mode, orig_mode,
3677					 cum->sse_regno + FIRST_SSE_REG);
3678	  }
3679	break;
3680      case V8QImode:
3681      case V4HImode:
3682      case V2SImode:
3683      case V2SFmode:
3684	if (!type || !AGGREGATE_TYPE_P (type))
3685	  {
3686	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3687	      {
3688		warnedmmx = true;
3689		warning (0, "MMX vector argument without MMX enabled "
3690			 "changes the ABI");
3691	      }
3692	    if (cum->mmx_nregs)
3693	      ret = gen_reg_or_parallel (mode, orig_mode,
3694					 cum->mmx_regno + FIRST_MMX_REG);
3695	  }
3696	break;
3697      }
3698
3699  if (TARGET_DEBUG_ARG)
3700    {
3701      fprintf (stderr,
3702	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3703	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3704
3705      if (ret)
3706	print_simple_rtl (stderr, ret);
3707      else
3708	fprintf (stderr, ", stack");
3709
3710      fprintf (stderr, " )\n");
3711    }
3712
3713  return ret;
3714}
3715
3716/* A C expression that indicates when an argument must be passed by
3717   reference.  If nonzero for an argument, a copy of that argument is
3718   made in memory and a pointer to the argument is passed instead of
3719   the argument itself.  The pointer is passed in whatever way is
3720   appropriate for passing a pointer to that type.  */
3721
3722static bool
3723ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3724			enum machine_mode mode ATTRIBUTE_UNUSED,
3725			tree type, bool named ATTRIBUTE_UNUSED)
3726{
3727  if (!TARGET_64BIT)
3728    return 0;
3729
3730  if (type && int_size_in_bytes (type) == -1)
3731    {
3732      if (TARGET_DEBUG_ARG)
3733	fprintf (stderr, "function_arg_pass_by_reference\n");
3734      return 1;
3735    }
3736
3737  return 0;
3738}
3739
3740/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3741   ABI.  Only called if TARGET_SSE.  */
3742static bool
3743contains_128bit_aligned_vector_p (tree type)
3744{
3745  enum machine_mode mode = TYPE_MODE (type);
3746  if (SSE_REG_MODE_P (mode)
3747      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3748    return true;
3749  if (TYPE_ALIGN (type) < 128)
3750    return false;
3751
3752  if (AGGREGATE_TYPE_P (type))
3753    {
3754      /* Walk the aggregates recursively.  */
3755      switch (TREE_CODE (type))
3756	{
3757	case RECORD_TYPE:
3758	case UNION_TYPE:
3759	case QUAL_UNION_TYPE:
3760	  {
3761	    tree field;
3762
3763	    if (TYPE_BINFO (type))
3764	      {
3765		tree binfo, base_binfo;
3766		int i;
3767
3768		for (binfo = TYPE_BINFO (type), i = 0;
3769		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3770		  if (contains_128bit_aligned_vector_p
3771		      (BINFO_TYPE (base_binfo)))
3772		    return true;
3773	      }
3774	    /* And now merge the fields of structure.  */
3775	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3776	      {
3777		if (TREE_CODE (field) == FIELD_DECL
3778		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3779		  return true;
3780	      }
3781	    break;
3782	  }
3783
3784	case ARRAY_TYPE:
3785	  /* Just for use if some languages passes arrays by value.  */
3786	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3787	    return true;
3788	  break;
3789
3790	default:
3791	  gcc_unreachable ();
3792	}
3793    }
3794  return false;
3795}
3796
3797/* Gives the alignment boundary, in bits, of an argument with the
3798   specified mode and type.  */
3799
3800int
3801ix86_function_arg_boundary (enum machine_mode mode, tree type)
3802{
3803  int align;
3804  if (type)
3805    align = TYPE_ALIGN (type);
3806  else
3807    align = GET_MODE_ALIGNMENT (mode);
3808  if (align < PARM_BOUNDARY)
3809    align = PARM_BOUNDARY;
3810  if (!TARGET_64BIT)
3811    {
3812      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3813	 make an exception for SSE modes since these require 128bit
3814	 alignment.
3815
3816	 The handling here differs from field_alignment.  ICC aligns MMX
3817	 arguments to 4 byte boundaries, while structure fields are aligned
3818	 to 8 byte boundaries.  */
3819      if (!TARGET_SSE)
3820	align = PARM_BOUNDARY;
3821      else if (!type)
3822	{
3823	  if (!SSE_REG_MODE_P (mode))
3824	    align = PARM_BOUNDARY;
3825	}
3826      else
3827	{
3828	  if (!contains_128bit_aligned_vector_p (type))
3829	    align = PARM_BOUNDARY;
3830	}
3831    }
3832  if (align > 128)
3833    align = 128;
3834  return align;
3835}
3836
3837/* Return true if N is a possible register number of function value.  */
3838bool
3839ix86_function_value_regno_p (int regno)
3840{
3841  if (TARGET_MACHO)
3842    {
3843      if (!TARGET_64BIT)
3844        {
3845          return ((regno) == 0
3846                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3847                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3848        }
3849      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3850              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3851              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3852      }
3853  else
3854    {
3855      if (regno == 0
3856          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3857          || (regno == FIRST_SSE_REG && TARGET_SSE))
3858        return true;
3859
3860      if (!TARGET_64BIT
3861          && (regno == FIRST_MMX_REG && TARGET_MMX))
3862	    return true;
3863
3864      return false;
3865    }
3866}
3867
3868/* Define how to find the value returned by a function.
3869   VALTYPE is the data type of the value (as a tree).
3870   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3871   otherwise, FUNC is 0.  */
3872rtx
3873ix86_function_value (tree valtype, tree fntype_or_decl,
3874		     bool outgoing ATTRIBUTE_UNUSED)
3875{
3876  enum machine_mode natmode = type_natural_mode (valtype);
3877
3878  if (TARGET_64BIT)
3879    {
3880      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3881				     1, REGPARM_MAX, SSE_REGPARM_MAX,
3882				     x86_64_int_return_registers, 0);
3883      /* For zero sized structures, construct_container return NULL, but we
3884	 need to keep rest of compiler happy by returning meaningful value.  */
3885      if (!ret)
3886	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3887      return ret;
3888    }
3889  else
3890    {
3891      tree fn = NULL_TREE, fntype;
3892      if (fntype_or_decl
3893	  && DECL_P (fntype_or_decl))
3894        fn = fntype_or_decl;
3895      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3896      return gen_rtx_REG (TYPE_MODE (valtype),
3897			  ix86_value_regno (natmode, fn, fntype));
3898    }
3899}
3900
3901/* Return true iff type is returned in memory.  */
3902int
3903ix86_return_in_memory (tree type)
3904{
3905  int needed_intregs, needed_sseregs, size;
3906  enum machine_mode mode = type_natural_mode (type);
3907
3908  if (TARGET_64BIT)
3909    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3910
3911  if (mode == BLKmode)
3912    return 1;
3913
3914  size = int_size_in_bytes (type);
3915
3916  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3917    return 0;
3918
3919  if (VECTOR_MODE_P (mode) || mode == TImode)
3920    {
3921      /* User-created vectors small enough to fit in EAX.  */
3922      if (size < 8)
3923	return 0;
3924
3925      /* MMX/3dNow values are returned in MM0,
3926	 except when it doesn't exits.  */
3927      if (size == 8)
3928	return (TARGET_MMX ? 0 : 1);
3929
3930      /* SSE values are returned in XMM0, except when it doesn't exist.  */
3931      if (size == 16)
3932	return (TARGET_SSE ? 0 : 1);
3933    }
3934
3935  if (mode == XFmode)
3936    return 0;
3937
3938  if (mode == TDmode)
3939    return 1;
3940
3941  if (size > 12)
3942    return 1;
3943  return 0;
3944}
3945
3946/* When returning SSE vector types, we have a choice of either
3947     (1) being abi incompatible with a -march switch, or
3948     (2) generating an error.
3949   Given no good solution, I think the safest thing is one warning.
3950   The user won't be able to use -Werror, but....
3951
3952   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3953   called in response to actually generating a caller or callee that
3954   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3955   via aggregate_value_p for general type probing from tree-ssa.  */
3956
3957static rtx
3958ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3959{
3960  static bool warnedsse, warnedmmx;
3961
3962  if (type)
3963    {
3964      /* Look at the return type of the function, not the function type.  */
3965      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3966
3967      if (!TARGET_SSE && !warnedsse)
3968	{
3969	  if (mode == TImode
3970	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3971	    {
3972	      warnedsse = true;
3973	      warning (0, "SSE vector return without SSE enabled "
3974		       "changes the ABI");
3975	    }
3976	}
3977
3978      if (!TARGET_MMX && !warnedmmx)
3979	{
3980	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3981	    {
3982	      warnedmmx = true;
3983	      warning (0, "MMX vector return without MMX enabled "
3984		       "changes the ABI");
3985	    }
3986	}
3987    }
3988
3989  return NULL;
3990}
3991
3992/* Define how to find the value returned by a library function
3993   assuming the value has mode MODE.  */
3994rtx
3995ix86_libcall_value (enum machine_mode mode)
3996{
3997  if (TARGET_64BIT)
3998    {
3999      switch (mode)
4000	{
4001	case SFmode:
4002	case SCmode:
4003	case DFmode:
4004	case DCmode:
4005	case TFmode:
4006	case SDmode:
4007	case DDmode:
4008	case TDmode:
4009	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4010	case XFmode:
4011	case XCmode:
4012	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4013	case TCmode:
4014	  return NULL;
4015	default:
4016	  return gen_rtx_REG (mode, 0);
4017	}
4018    }
4019  else
4020    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4021}
4022
4023/* Given a mode, return the register to use for a return value.  */
4024
4025static int
4026ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4027{
4028  gcc_assert (!TARGET_64BIT);
4029
4030  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4031     we normally prevent this case when mmx is not available.  However
4032     some ABIs may require the result to be returned like DImode.  */
4033  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4034    return TARGET_MMX ? FIRST_MMX_REG : 0;
4035
4036  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4037     we prevent this case when sse is not available.  However some ABIs
4038     may require the result to be returned like integer TImode.  */
4039  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4040    return TARGET_SSE ? FIRST_SSE_REG : 0;
4041
4042  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4043  if (DECIMAL_FLOAT_MODE_P (mode))
4044    return 0;
4045
4046  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4047  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4048    return 0;
4049
4050  /* Floating point return values in %st(0), except for local functions when
4051     SSE math is enabled or for functions with sseregparm attribute.  */
4052  if ((func || fntype)
4053      && (mode == SFmode || mode == DFmode))
4054    {
4055      int sse_level = ix86_function_sseregparm (fntype, func);
4056      if ((sse_level >= 1 && mode == SFmode)
4057	  || (sse_level == 2 && mode == DFmode))
4058        return FIRST_SSE_REG;
4059    }
4060
4061  return FIRST_FLOAT_REG;
4062}
4063
4064/* Create the va_list data type.  */
4065
4066static tree
4067ix86_build_builtin_va_list (void)
4068{
4069  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4070
4071  /* For i386 we use plain pointer to argument area.  */
4072  if (!TARGET_64BIT)
4073    return build_pointer_type (char_type_node);
4074
4075  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4076  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4077
4078  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4079		      unsigned_type_node);
4080  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4081		      unsigned_type_node);
4082  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4083		      ptr_type_node);
4084  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4085		      ptr_type_node);
4086
4087  va_list_gpr_counter_field = f_gpr;
4088  va_list_fpr_counter_field = f_fpr;
4089
4090  DECL_FIELD_CONTEXT (f_gpr) = record;
4091  DECL_FIELD_CONTEXT (f_fpr) = record;
4092  DECL_FIELD_CONTEXT (f_ovf) = record;
4093  DECL_FIELD_CONTEXT (f_sav) = record;
4094
4095  TREE_CHAIN (record) = type_decl;
4096  TYPE_NAME (record) = type_decl;
4097  TYPE_FIELDS (record) = f_gpr;
4098  TREE_CHAIN (f_gpr) = f_fpr;
4099  TREE_CHAIN (f_fpr) = f_ovf;
4100  TREE_CHAIN (f_ovf) = f_sav;
4101
4102  layout_type (record);
4103
4104  /* The correct type is an array type of one element.  */
4105  return build_array_type (record, build_index_type (size_zero_node));
4106}
4107
4108/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4109
4110static void
4111ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4112			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4113			     int no_rtl)
4114{
4115  CUMULATIVE_ARGS next_cum;
4116  rtx save_area = NULL_RTX, mem;
4117  rtx label;
4118  rtx label_ref;
4119  rtx tmp_reg;
4120  rtx nsse_reg;
4121  int set;
4122  tree fntype;
4123  int stdarg_p;
4124  int i;
4125
4126  if (!TARGET_64BIT)
4127    return;
4128
4129  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4130    return;
4131
4132  /* Indicate to allocate space on the stack for varargs save area.  */
4133  ix86_save_varrargs_registers = 1;
4134
4135  cfun->stack_alignment_needed = 128;
4136
4137  fntype = TREE_TYPE (current_function_decl);
4138  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4139	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4140		  != void_type_node));
4141
4142  /* For varargs, we do not want to skip the dummy va_dcl argument.
4143     For stdargs, we do want to skip the last named argument.  */
4144  next_cum = *cum;
4145  if (stdarg_p)
4146    function_arg_advance (&next_cum, mode, type, 1);
4147
4148  if (!no_rtl)
4149    save_area = frame_pointer_rtx;
4150
4151  set = get_varargs_alias_set ();
4152
4153  for (i = next_cum.regno;
4154       i < ix86_regparm
4155       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4156       i++)
4157    {
4158      mem = gen_rtx_MEM (Pmode,
4159			 plus_constant (save_area, i * UNITS_PER_WORD));
4160      MEM_NOTRAP_P (mem) = 1;
4161      set_mem_alias_set (mem, set);
4162      emit_move_insn (mem, gen_rtx_REG (Pmode,
4163					x86_64_int_parameter_registers[i]));
4164    }
4165
4166  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4167    {
4168      /* Now emit code to save SSE registers.  The AX parameter contains number
4169	 of SSE parameter registers used to call this function.  We use
4170	 sse_prologue_save insn template that produces computed jump across
4171	 SSE saves.  We need some preparation work to get this working.  */
4172
4173      label = gen_label_rtx ();
4174      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4175
4176      /* Compute address to jump to :
4177         label - 5*eax + nnamed_sse_arguments*5  */
4178      tmp_reg = gen_reg_rtx (Pmode);
4179      nsse_reg = gen_reg_rtx (Pmode);
4180      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4181      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4182			      gen_rtx_MULT (Pmode, nsse_reg,
4183					    GEN_INT (4))));
4184      if (next_cum.sse_regno)
4185	emit_move_insn
4186	  (nsse_reg,
4187	   gen_rtx_CONST (DImode,
4188			  gen_rtx_PLUS (DImode,
4189					label_ref,
4190					GEN_INT (next_cum.sse_regno * 4))));
4191      else
4192	emit_move_insn (nsse_reg, label_ref);
4193      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4194
4195      /* Compute address of memory block we save into.  We always use pointer
4196	 pointing 127 bytes after first byte to store - this is needed to keep
4197	 instruction size limited by 4 bytes.  */
4198      tmp_reg = gen_reg_rtx (Pmode);
4199      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4200			      plus_constant (save_area,
4201					     8 * REGPARM_MAX + 127)));
4202      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4203      MEM_NOTRAP_P (mem) = 1;
4204      set_mem_alias_set (mem, set);
4205      set_mem_align (mem, BITS_PER_WORD);
4206
4207      /* And finally do the dirty job!  */
4208      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4209					GEN_INT (next_cum.sse_regno), label));
4210    }
4211
4212}
4213
4214/* Implement va_start.  */
4215
4216void
4217ix86_va_start (tree valist, rtx nextarg)
4218{
4219  HOST_WIDE_INT words, n_gpr, n_fpr;
4220  tree f_gpr, f_fpr, f_ovf, f_sav;
4221  tree gpr, fpr, ovf, sav, t;
4222  tree type;
4223
4224  /* Only 64bit target needs something special.  */
4225  if (!TARGET_64BIT)
4226    {
4227      std_expand_builtin_va_start (valist, nextarg);
4228      return;
4229    }
4230
4231  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4232  f_fpr = TREE_CHAIN (f_gpr);
4233  f_ovf = TREE_CHAIN (f_fpr);
4234  f_sav = TREE_CHAIN (f_ovf);
4235
4236  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4237  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4238  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4239  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4240  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4241
4242  /* Count number of gp and fp argument registers used.  */
4243  words = current_function_args_info.words;
4244  n_gpr = current_function_args_info.regno;
4245  n_fpr = current_function_args_info.sse_regno;
4246
4247  if (TARGET_DEBUG_ARG)
4248    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4249	     (int) words, (int) n_gpr, (int) n_fpr);
4250
4251  if (cfun->va_list_gpr_size)
4252    {
4253      type = TREE_TYPE (gpr);
4254      t = build2 (MODIFY_EXPR, type, gpr,
4255		  build_int_cst (type, n_gpr * 8));
4256      TREE_SIDE_EFFECTS (t) = 1;
4257      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4258    }
4259
4260  if (cfun->va_list_fpr_size)
4261    {
4262      type = TREE_TYPE (fpr);
4263      t = build2 (MODIFY_EXPR, type, fpr,
4264		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4265      TREE_SIDE_EFFECTS (t) = 1;
4266      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4267    }
4268
4269  /* Find the overflow area.  */
4270  type = TREE_TYPE (ovf);
4271  t = make_tree (type, virtual_incoming_args_rtx);
4272  if (words != 0)
4273    t = build2 (PLUS_EXPR, type, t,
4274	        build_int_cst (type, words * UNITS_PER_WORD));
4275  t = build2 (MODIFY_EXPR, type, ovf, t);
4276  TREE_SIDE_EFFECTS (t) = 1;
4277  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4278
4279  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4280    {
4281      /* Find the register save area.
4282	 Prologue of the function save it right above stack frame.  */
4283      type = TREE_TYPE (sav);
4284      t = make_tree (type, frame_pointer_rtx);
4285      t = build2 (MODIFY_EXPR, type, sav, t);
4286      TREE_SIDE_EFFECTS (t) = 1;
4287      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4288    }
4289}
4290
4291/* Implement va_arg.  */
4292
4293tree
4294ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4295{
4296  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4297  tree f_gpr, f_fpr, f_ovf, f_sav;
4298  tree gpr, fpr, ovf, sav, t;
4299  int size, rsize;
4300  tree lab_false, lab_over = NULL_TREE;
4301  tree addr, t2;
4302  rtx container;
4303  int indirect_p = 0;
4304  tree ptrtype;
4305  enum machine_mode nat_mode;
4306
4307  /* Only 64bit target needs something special.  */
4308  if (!TARGET_64BIT)
4309    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4310
4311  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4312  f_fpr = TREE_CHAIN (f_gpr);
4313  f_ovf = TREE_CHAIN (f_fpr);
4314  f_sav = TREE_CHAIN (f_ovf);
4315
4316  valist = build_va_arg_indirect_ref (valist);
4317  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4318  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4319  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4320  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4321
4322  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4323  if (indirect_p)
4324    type = build_pointer_type (type);
4325  size = int_size_in_bytes (type);
4326  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4327
4328  nat_mode = type_natural_mode (type);
4329  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4330				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4331
4332  /* Pull the value out of the saved registers.  */
4333
4334  addr = create_tmp_var (ptr_type_node, "addr");
4335  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4336
4337  if (container)
4338    {
4339      int needed_intregs, needed_sseregs;
4340      bool need_temp;
4341      tree int_addr, sse_addr;
4342
4343      lab_false = create_artificial_label ();
4344      lab_over = create_artificial_label ();
4345
4346      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4347
4348      need_temp = (!REG_P (container)
4349		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4350		       || TYPE_ALIGN (type) > 128));
4351
4352      /* In case we are passing structure, verify that it is consecutive block
4353         on the register save area.  If not we need to do moves.  */
4354      if (!need_temp && !REG_P (container))
4355	{
4356	  /* Verify that all registers are strictly consecutive  */
4357	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4358	    {
4359	      int i;
4360
4361	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4362		{
4363		  rtx slot = XVECEXP (container, 0, i);
4364		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4365		      || INTVAL (XEXP (slot, 1)) != i * 16)
4366		    need_temp = 1;
4367		}
4368	    }
4369	  else
4370	    {
4371	      int i;
4372
4373	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4374		{
4375		  rtx slot = XVECEXP (container, 0, i);
4376		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4377		      || INTVAL (XEXP (slot, 1)) != i * 8)
4378		    need_temp = 1;
4379		}
4380	    }
4381	}
4382      if (!need_temp)
4383	{
4384	  int_addr = addr;
4385	  sse_addr = addr;
4386	}
4387      else
4388	{
4389	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4390	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4391	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4392	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4393	}
4394
4395      /* First ensure that we fit completely in registers.  */
4396      if (needed_intregs)
4397	{
4398	  t = build_int_cst (TREE_TYPE (gpr),
4399			     (REGPARM_MAX - needed_intregs + 1) * 8);
4400	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4401	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4402	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4403	  gimplify_and_add (t, pre_p);
4404	}
4405      if (needed_sseregs)
4406	{
4407	  t = build_int_cst (TREE_TYPE (fpr),
4408			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4409			     + REGPARM_MAX * 8);
4410	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4411	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4412	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4413	  gimplify_and_add (t, pre_p);
4414	}
4415
4416      /* Compute index to start of area used for integer regs.  */
4417      if (needed_intregs)
4418	{
4419	  /* int_addr = gpr + sav; */
4420	  t = fold_convert (ptr_type_node, gpr);
4421	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4422	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4423	  gimplify_and_add (t, pre_p);
4424	}
4425      if (needed_sseregs)
4426	{
4427	  /* sse_addr = fpr + sav; */
4428	  t = fold_convert (ptr_type_node, fpr);
4429	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4430	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4431	  gimplify_and_add (t, pre_p);
4432	}
4433      if (need_temp)
4434	{
4435	  int i;
4436	  tree temp = create_tmp_var (type, "va_arg_tmp");
4437
4438	  /* addr = &temp; */
4439	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4440	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4441	  gimplify_and_add (t, pre_p);
4442
4443	  for (i = 0; i < XVECLEN (container, 0); i++)
4444	    {
4445	      rtx slot = XVECEXP (container, 0, i);
4446	      rtx reg = XEXP (slot, 0);
4447	      enum machine_mode mode = GET_MODE (reg);
4448	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4449	      tree addr_type = build_pointer_type (piece_type);
4450	      tree src_addr, src;
4451	      int src_offset;
4452	      tree dest_addr, dest;
4453
4454	      if (SSE_REGNO_P (REGNO (reg)))
4455		{
4456		  src_addr = sse_addr;
4457		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4458		}
4459	      else
4460		{
4461		  src_addr = int_addr;
4462		  src_offset = REGNO (reg) * 8;
4463		}
4464	      src_addr = fold_convert (addr_type, src_addr);
4465	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4466				       size_int (src_offset)));
4467	      src = build_va_arg_indirect_ref (src_addr);
4468
4469	      dest_addr = fold_convert (addr_type, addr);
4470	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4471					size_int (INTVAL (XEXP (slot, 1)))));
4472	      dest = build_va_arg_indirect_ref (dest_addr);
4473
4474	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4475	      gimplify_and_add (t, pre_p);
4476	    }
4477	}
4478
4479      if (needed_intregs)
4480	{
4481	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4482		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4483	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4484	  gimplify_and_add (t, pre_p);
4485	}
4486      if (needed_sseregs)
4487	{
4488	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4489		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4490	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4491	  gimplify_and_add (t, pre_p);
4492	}
4493
4494      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4495      gimplify_and_add (t, pre_p);
4496
4497      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4498      append_to_statement_list (t, pre_p);
4499    }
4500
4501  /* ... otherwise out of the overflow area.  */
4502
4503  /* Care for on-stack alignment if needed.  */
4504  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4505      || integer_zerop (TYPE_SIZE (type)))
4506    t = ovf;
4507  else
4508    {
4509      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4510      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4511		  build_int_cst (TREE_TYPE (ovf), align - 1));
4512      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4513		  build_int_cst (TREE_TYPE (t), -align));
4514    }
4515  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4516
4517  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4518  gimplify_and_add (t2, pre_p);
4519
4520  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4521	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4522  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4523  gimplify_and_add (t, pre_p);
4524
4525  if (container)
4526    {
4527      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4528      append_to_statement_list (t, pre_p);
4529    }
4530
4531  ptrtype = build_pointer_type (type);
4532  addr = fold_convert (ptrtype, addr);
4533
4534  if (indirect_p)
4535    addr = build_va_arg_indirect_ref (addr);
4536  return build_va_arg_indirect_ref (addr);
4537}
4538
4539/* Return nonzero if OPNUM's MEM should be matched
4540   in movabs* patterns.  */
4541
4542int
4543ix86_check_movabs (rtx insn, int opnum)
4544{
4545  rtx set, mem;
4546
4547  set = PATTERN (insn);
4548  if (GET_CODE (set) == PARALLEL)
4549    set = XVECEXP (set, 0, 0);
4550  gcc_assert (GET_CODE (set) == SET);
4551  mem = XEXP (set, opnum);
4552  while (GET_CODE (mem) == SUBREG)
4553    mem = SUBREG_REG (mem);
4554  gcc_assert (GET_CODE (mem) == MEM);
4555  return (volatile_ok || !MEM_VOLATILE_P (mem));
4556}
4557
4558/* Initialize the table of extra 80387 mathematical constants.  */
4559
4560static void
4561init_ext_80387_constants (void)
4562{
4563  static const char * cst[5] =
4564  {
4565    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4566    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4567    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4568    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4569    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4570  };
4571  int i;
4572
4573  for (i = 0; i < 5; i++)
4574    {
4575      real_from_string (&ext_80387_constants_table[i], cst[i]);
4576      /* Ensure each constant is rounded to XFmode precision.  */
4577      real_convert (&ext_80387_constants_table[i],
4578		    XFmode, &ext_80387_constants_table[i]);
4579    }
4580
4581  ext_80387_constants_init = 1;
4582}
4583
4584/* Return true if the constant is something that can be loaded with
4585   a special instruction.  */
4586
4587int
4588standard_80387_constant_p (rtx x)
4589{
4590  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4591    return -1;
4592
4593  if (x == CONST0_RTX (GET_MODE (x)))
4594    return 1;
4595  if (x == CONST1_RTX (GET_MODE (x)))
4596    return 2;
4597
4598  /* For XFmode constants, try to find a special 80387 instruction when
4599     optimizing for size or on those CPUs that benefit from them.  */
4600  if (GET_MODE (x) == XFmode
4601      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4602    {
4603      REAL_VALUE_TYPE r;
4604      int i;
4605
4606      if (! ext_80387_constants_init)
4607	init_ext_80387_constants ();
4608
4609      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4610      for (i = 0; i < 5; i++)
4611        if (real_identical (&r, &ext_80387_constants_table[i]))
4612	  return i + 3;
4613    }
4614
4615  return 0;
4616}
4617
4618/* Return the opcode of the special instruction to be used to load
4619   the constant X.  */
4620
4621const char *
4622standard_80387_constant_opcode (rtx x)
4623{
4624  switch (standard_80387_constant_p (x))
4625    {
4626    case 1:
4627      return "fldz";
4628    case 2:
4629      return "fld1";
4630    case 3:
4631      return "fldlg2";
4632    case 4:
4633      return "fldln2";
4634    case 5:
4635      return "fldl2e";
4636    case 6:
4637      return "fldl2t";
4638    case 7:
4639      return "fldpi";
4640    default:
4641      gcc_unreachable ();
4642    }
4643}
4644
4645/* Return the CONST_DOUBLE representing the 80387 constant that is
4646   loaded by the specified special instruction.  The argument IDX
4647   matches the return value from standard_80387_constant_p.  */
4648
4649rtx
4650standard_80387_constant_rtx (int idx)
4651{
4652  int i;
4653
4654  if (! ext_80387_constants_init)
4655    init_ext_80387_constants ();
4656
4657  switch (idx)
4658    {
4659    case 3:
4660    case 4:
4661    case 5:
4662    case 6:
4663    case 7:
4664      i = idx - 3;
4665      break;
4666
4667    default:
4668      gcc_unreachable ();
4669    }
4670
4671  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4672				       XFmode);
4673}
4674
4675/* Return 1 if mode is a valid mode for sse.  */
4676static int
4677standard_sse_mode_p (enum machine_mode mode)
4678{
4679  switch (mode)
4680    {
4681    case V16QImode:
4682    case V8HImode:
4683    case V4SImode:
4684    case V2DImode:
4685    case V4SFmode:
4686    case V2DFmode:
4687      return 1;
4688
4689    default:
4690      return 0;
4691    }
4692}
4693
4694/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4695 */
4696int
4697standard_sse_constant_p (rtx x)
4698{
4699  enum machine_mode mode = GET_MODE (x);
4700
4701  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4702    return 1;
4703  if (vector_all_ones_operand (x, mode)
4704      && standard_sse_mode_p (mode))
4705    return TARGET_SSE2 ? 2 : -1;
4706
4707  return 0;
4708}
4709
4710/* Return the opcode of the special instruction to be used to load
4711   the constant X.  */
4712
4713const char *
4714standard_sse_constant_opcode (rtx insn, rtx x)
4715{
4716  switch (standard_sse_constant_p (x))
4717    {
4718    case 1:
4719      if (get_attr_mode (insn) == MODE_V4SF)
4720        return "xorps\t%0, %0";
4721      else if (get_attr_mode (insn) == MODE_V2DF)
4722        return "xorpd\t%0, %0";
4723      else
4724        return "pxor\t%0, %0";
4725    case 2:
4726      return "pcmpeqd\t%0, %0";
4727    }
4728  gcc_unreachable ();
4729}
4730
4731/* Returns 1 if OP contains a symbol reference */
4732
4733int
4734symbolic_reference_mentioned_p (rtx op)
4735{
4736  const char *fmt;
4737  int i;
4738
4739  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4740    return 1;
4741
4742  fmt = GET_RTX_FORMAT (GET_CODE (op));
4743  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4744    {
4745      if (fmt[i] == 'E')
4746	{
4747	  int j;
4748
4749	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4750	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4751	      return 1;
4752	}
4753
4754      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4755	return 1;
4756    }
4757
4758  return 0;
4759}
4760
4761/* Return 1 if it is appropriate to emit `ret' instructions in the
4762   body of a function.  Do this only if the epilogue is simple, needing a
4763   couple of insns.  Prior to reloading, we can't tell how many registers
4764   must be saved, so return 0 then.  Return 0 if there is no frame
4765   marker to de-allocate.  */
4766
4767int
4768ix86_can_use_return_insn_p (void)
4769{
4770  struct ix86_frame frame;
4771
4772  if (! reload_completed || frame_pointer_needed)
4773    return 0;
4774
4775  /* Don't allow more than 32 pop, since that's all we can do
4776     with one instruction.  */
4777  if (current_function_pops_args
4778      && current_function_args_size >= 32768)
4779    return 0;
4780
4781  ix86_compute_frame_layout (&frame);
4782  return frame.to_allocate == 0 && frame.nregs == 0;
4783}
4784
4785/* Value should be nonzero if functions must have frame pointers.
4786   Zero means the frame pointer need not be set up (and parms may
4787   be accessed via the stack pointer) in functions that seem suitable.  */
4788
4789int
4790ix86_frame_pointer_required (void)
4791{
4792  /* If we accessed previous frames, then the generated code expects
4793     to be able to access the saved ebp value in our frame.  */
4794  if (cfun->machine->accesses_prev_frame)
4795    return 1;
4796
4797  /* Several x86 os'es need a frame pointer for other reasons,
4798     usually pertaining to setjmp.  */
4799  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4800    return 1;
4801
4802  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4803     the frame pointer by default.  Turn it back on now if we've not
4804     got a leaf function.  */
4805  if (TARGET_OMIT_LEAF_FRAME_POINTER
4806      && (!current_function_is_leaf
4807	  || ix86_current_function_calls_tls_descriptor))
4808    return 1;
4809
4810  if (current_function_profile)
4811    return 1;
4812
4813  return 0;
4814}
4815
4816/* Record that the current function accesses previous call frames.  */
4817
4818void
4819ix86_setup_frame_addresses (void)
4820{
4821  cfun->machine->accesses_prev_frame = 1;
4822}
4823
4824#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4825# define USE_HIDDEN_LINKONCE 1
4826#else
4827# define USE_HIDDEN_LINKONCE 0
4828#endif
4829
4830static int pic_labels_used;
4831
4832/* Fills in the label name that should be used for a pc thunk for
4833   the given register.  */
4834
4835static void
4836get_pc_thunk_name (char name[32], unsigned int regno)
4837{
4838  gcc_assert (!TARGET_64BIT);
4839
4840  if (USE_HIDDEN_LINKONCE)
4841    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4842  else
4843    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4844}
4845
4846
4847/* This function generates code for -fpic that loads %ebx with
4848   the return address of the caller and then returns.  */
4849
4850void
4851ix86_file_end (void)
4852{
4853  rtx xops[2];
4854  int regno;
4855
4856  for (regno = 0; regno < 8; ++regno)
4857    {
4858      char name[32];
4859
4860      if (! ((pic_labels_used >> regno) & 1))
4861	continue;
4862
4863      get_pc_thunk_name (name, regno);
4864
4865#if TARGET_MACHO
4866      if (TARGET_MACHO)
4867	{
4868	  switch_to_section (darwin_sections[text_coal_section]);
4869	  fputs ("\t.weak_definition\t", asm_out_file);
4870	  assemble_name (asm_out_file, name);
4871	  fputs ("\n\t.private_extern\t", asm_out_file);
4872	  assemble_name (asm_out_file, name);
4873	  fputs ("\n", asm_out_file);
4874	  ASM_OUTPUT_LABEL (asm_out_file, name);
4875	}
4876      else
4877#endif
4878      if (USE_HIDDEN_LINKONCE)
4879	{
4880	  tree decl;
4881
4882	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4883			     error_mark_node);
4884	  TREE_PUBLIC (decl) = 1;
4885	  TREE_STATIC (decl) = 1;
4886	  DECL_ONE_ONLY (decl) = 1;
4887
4888	  (*targetm.asm_out.unique_section) (decl, 0);
4889	  switch_to_section (get_named_section (decl, NULL, 0));
4890
4891	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4892	  fputs ("\t.hidden\t", asm_out_file);
4893	  assemble_name (asm_out_file, name);
4894	  fputc ('\n', asm_out_file);
4895	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4896	}
4897      else
4898	{
4899	  switch_to_section (text_section);
4900	  ASM_OUTPUT_LABEL (asm_out_file, name);
4901	}
4902
4903      xops[0] = gen_rtx_REG (SImode, regno);
4904      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4905      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4906      output_asm_insn ("ret", xops);
4907    }
4908
4909  if (NEED_INDICATE_EXEC_STACK)
4910    file_end_indicate_exec_stack ();
4911}
4912
4913/* Emit code for the SET_GOT patterns.  */
4914
4915const char *
4916output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4917{
4918  rtx xops[3];
4919
4920  xops[0] = dest;
4921  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4922
4923  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4924    {
4925      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4926
4927      if (!flag_pic)
4928	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4929      else
4930	output_asm_insn ("call\t%a2", xops);
4931
4932#if TARGET_MACHO
4933      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4934         is what will be referenced by the Mach-O PIC subsystem.  */
4935      if (!label)
4936	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4937#endif
4938
4939      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4940				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4941
4942      if (flag_pic)
4943	output_asm_insn ("pop{l}\t%0", xops);
4944    }
4945  else
4946    {
4947      char name[32];
4948      get_pc_thunk_name (name, REGNO (dest));
4949      pic_labels_used |= 1 << REGNO (dest);
4950
4951      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4952      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4953      output_asm_insn ("call\t%X2", xops);
4954      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4955         is what will be referenced by the Mach-O PIC subsystem.  */
4956#if TARGET_MACHO
4957      if (!label)
4958	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4959      else
4960        targetm.asm_out.internal_label (asm_out_file, "L",
4961					   CODE_LABEL_NUMBER (label));
4962#endif
4963    }
4964
4965  if (TARGET_MACHO)
4966    return "";
4967
4968  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4969    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4970  else
4971    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4972
4973  return "";
4974}
4975
4976/* Generate an "push" pattern for input ARG.  */
4977
4978static rtx
4979gen_push (rtx arg)
4980{
4981  return gen_rtx_SET (VOIDmode,
4982		      gen_rtx_MEM (Pmode,
4983				   gen_rtx_PRE_DEC (Pmode,
4984						    stack_pointer_rtx)),
4985		      arg);
4986}
4987
4988/* Return >= 0 if there is an unused call-clobbered register available
4989   for the entire function.  */
4990
4991static unsigned int
4992ix86_select_alt_pic_regnum (void)
4993{
4994  if (current_function_is_leaf && !current_function_profile
4995      && !ix86_current_function_calls_tls_descriptor)
4996    {
4997      int i;
4998      for (i = 2; i >= 0; --i)
4999        if (!regs_ever_live[i])
5000	  return i;
5001    }
5002
5003  return INVALID_REGNUM;
5004}
5005
5006/* Return 1 if we need to save REGNO.  */
5007static int
5008ix86_save_reg (unsigned int regno, int maybe_eh_return)
5009{
5010  if (pic_offset_table_rtx
5011      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5012      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5013	  || current_function_profile
5014	  || current_function_calls_eh_return
5015	  || current_function_uses_const_pool))
5016    {
5017      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5018	return 0;
5019      return 1;
5020    }
5021
5022  if (current_function_calls_eh_return && maybe_eh_return)
5023    {
5024      unsigned i;
5025      for (i = 0; ; i++)
5026	{
5027	  unsigned test = EH_RETURN_DATA_REGNO (i);
5028	  if (test == INVALID_REGNUM)
5029	    break;
5030	  if (test == regno)
5031	    return 1;
5032	}
5033    }
5034
5035  if (cfun->machine->force_align_arg_pointer
5036      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5037    return 1;
5038
5039  return (regs_ever_live[regno]
5040	  && !call_used_regs[regno]
5041	  && !fixed_regs[regno]
5042	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5043}
5044
5045/* Return number of registers to be saved on the stack.  */
5046
5047static int
5048ix86_nsaved_regs (void)
5049{
5050  int nregs = 0;
5051  int regno;
5052
5053  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5054    if (ix86_save_reg (regno, true))
5055      nregs++;
5056  return nregs;
5057}
5058
5059/* Return the offset between two registers, one to be eliminated, and the other
5060   its replacement, at the start of a routine.  */
5061
5062HOST_WIDE_INT
5063ix86_initial_elimination_offset (int from, int to)
5064{
5065  struct ix86_frame frame;
5066  ix86_compute_frame_layout (&frame);
5067
5068  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5069    return frame.hard_frame_pointer_offset;
5070  else if (from == FRAME_POINTER_REGNUM
5071	   && to == HARD_FRAME_POINTER_REGNUM)
5072    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5073  else
5074    {
5075      gcc_assert (to == STACK_POINTER_REGNUM);
5076
5077      if (from == ARG_POINTER_REGNUM)
5078	return frame.stack_pointer_offset;
5079
5080      gcc_assert (from == FRAME_POINTER_REGNUM);
5081      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5082    }
5083}
5084
5085/* Fill structure ix86_frame about frame of currently computed function.  */
5086
5087static void
5088ix86_compute_frame_layout (struct ix86_frame *frame)
5089{
5090  HOST_WIDE_INT total_size;
5091  unsigned int stack_alignment_needed;
5092  HOST_WIDE_INT offset;
5093  unsigned int preferred_alignment;
5094  HOST_WIDE_INT size = get_frame_size ();
5095
5096  frame->nregs = ix86_nsaved_regs ();
5097  total_size = size;
5098
5099  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5100  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5101
5102  /* During reload iteration the amount of registers saved can change.
5103     Recompute the value as needed.  Do not recompute when amount of registers
5104     didn't change as reload does multiple calls to the function and does not
5105     expect the decision to change within single iteration.  */
5106  if (!optimize_size
5107      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5108    {
5109      int count = frame->nregs;
5110
5111      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5112      /* The fast prologue uses move instead of push to save registers.  This
5113         is significantly longer, but also executes faster as modern hardware
5114         can execute the moves in parallel, but can't do that for push/pop.
5115
5116	 Be careful about choosing what prologue to emit:  When function takes
5117	 many instructions to execute we may use slow version as well as in
5118	 case function is known to be outside hot spot (this is known with
5119	 feedback only).  Weight the size of function by number of registers
5120	 to save as it is cheap to use one or two push instructions but very
5121	 slow to use many of them.  */
5122      if (count)
5123	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5124      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5125	  || (flag_branch_probabilities
5126	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5127        cfun->machine->use_fast_prologue_epilogue = false;
5128      else
5129        cfun->machine->use_fast_prologue_epilogue
5130	   = !expensive_function_p (count);
5131    }
5132  if (TARGET_PROLOGUE_USING_MOVE
5133      && cfun->machine->use_fast_prologue_epilogue)
5134    frame->save_regs_using_mov = true;
5135  else
5136    frame->save_regs_using_mov = false;
5137
5138
5139  /* Skip return address and saved base pointer.  */
5140  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5141
5142  frame->hard_frame_pointer_offset = offset;
5143
5144  /* Do some sanity checking of stack_alignment_needed and
5145     preferred_alignment, since i386 port is the only using those features
5146     that may break easily.  */
5147
5148  gcc_assert (!size || stack_alignment_needed);
5149  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5150  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5151  gcc_assert (stack_alignment_needed
5152	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5153
5154  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5155    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5156
5157  /* Register save area */
5158  offset += frame->nregs * UNITS_PER_WORD;
5159
5160  /* Va-arg area */
5161  if (ix86_save_varrargs_registers)
5162    {
5163      offset += X86_64_VARARGS_SIZE;
5164      frame->va_arg_size = X86_64_VARARGS_SIZE;
5165    }
5166  else
5167    frame->va_arg_size = 0;
5168
5169  /* Align start of frame for local function.  */
5170  frame->padding1 = ((offset + stack_alignment_needed - 1)
5171		     & -stack_alignment_needed) - offset;
5172
5173  offset += frame->padding1;
5174
5175  /* Frame pointer points here.  */
5176  frame->frame_pointer_offset = offset;
5177
5178  offset += size;
5179
5180  /* Add outgoing arguments area.  Can be skipped if we eliminated
5181     all the function calls as dead code.
5182     Skipping is however impossible when function calls alloca.  Alloca
5183     expander assumes that last current_function_outgoing_args_size
5184     of stack frame are unused.  */
5185  if (ACCUMULATE_OUTGOING_ARGS
5186      && (!current_function_is_leaf || current_function_calls_alloca
5187	  || ix86_current_function_calls_tls_descriptor))
5188    {
5189      offset += current_function_outgoing_args_size;
5190      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5191    }
5192  else
5193    frame->outgoing_arguments_size = 0;
5194
5195  /* Align stack boundary.  Only needed if we're calling another function
5196     or using alloca.  */
5197  if (!current_function_is_leaf || current_function_calls_alloca
5198      || ix86_current_function_calls_tls_descriptor)
5199    frame->padding2 = ((offset + preferred_alignment - 1)
5200		       & -preferred_alignment) - offset;
5201  else
5202    frame->padding2 = 0;
5203
5204  offset += frame->padding2;
5205
5206  /* We've reached end of stack frame.  */
5207  frame->stack_pointer_offset = offset;
5208
5209  /* Size prologue needs to allocate.  */
5210  frame->to_allocate =
5211    (size + frame->padding1 + frame->padding2
5212     + frame->outgoing_arguments_size + frame->va_arg_size);
5213
5214  if ((!frame->to_allocate && frame->nregs <= 1)
5215      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5216    frame->save_regs_using_mov = false;
5217
5218  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5219      && current_function_is_leaf
5220      && !ix86_current_function_calls_tls_descriptor)
5221    {
5222      frame->red_zone_size = frame->to_allocate;
5223      if (frame->save_regs_using_mov)
5224	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5225      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5226	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5227    }
5228  else
5229    frame->red_zone_size = 0;
5230  frame->to_allocate -= frame->red_zone_size;
5231  frame->stack_pointer_offset -= frame->red_zone_size;
5232#if 0
5233  fprintf (stderr, "nregs: %i\n", frame->nregs);
5234  fprintf (stderr, "size: %i\n", size);
5235  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5236  fprintf (stderr, "padding1: %i\n", frame->padding1);
5237  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5238  fprintf (stderr, "padding2: %i\n", frame->padding2);
5239  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5240  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5241  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5242  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5243	   frame->hard_frame_pointer_offset);
5244  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5245#endif
5246}
5247
5248/* Emit code to save registers in the prologue.  */
5249
5250static void
5251ix86_emit_save_regs (void)
5252{
5253  unsigned int regno;
5254  rtx insn;
5255
5256  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5257    if (ix86_save_reg (regno, true))
5258      {
5259	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5260	RTX_FRAME_RELATED_P (insn) = 1;
5261      }
5262}
5263
5264/* Emit code to save registers using MOV insns.  First register
5265   is restored from POINTER + OFFSET.  */
5266static void
5267ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5268{
5269  unsigned int regno;
5270  rtx insn;
5271
5272  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5273    if (ix86_save_reg (regno, true))
5274      {
5275	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5276					       Pmode, offset),
5277			       gen_rtx_REG (Pmode, regno));
5278	RTX_FRAME_RELATED_P (insn) = 1;
5279	offset += UNITS_PER_WORD;
5280      }
5281}
5282
5283/* Expand prologue or epilogue stack adjustment.
5284   The pattern exist to put a dependency on all ebp-based memory accesses.
5285   STYLE should be negative if instructions should be marked as frame related,
5286   zero if %r11 register is live and cannot be freely used and positive
5287   otherwise.  */
5288
5289static void
5290pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5291{
5292  rtx insn;
5293
5294  if (! TARGET_64BIT)
5295    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5296  else if (x86_64_immediate_operand (offset, DImode))
5297    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5298  else
5299    {
5300      rtx r11;
5301      /* r11 is used by indirect sibcall return as well, set before the
5302	 epilogue and used after the epilogue.  ATM indirect sibcall
5303	 shouldn't be used together with huge frame sizes in one
5304	 function because of the frame_size check in sibcall.c.  */
5305      gcc_assert (style);
5306      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5307      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5308      if (style < 0)
5309	RTX_FRAME_RELATED_P (insn) = 1;
5310      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5311							       offset));
5312    }
5313  if (style < 0)
5314    RTX_FRAME_RELATED_P (insn) = 1;
5315}
5316
5317/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5318
5319static rtx
5320ix86_internal_arg_pointer (void)
5321{
5322  bool has_force_align_arg_pointer =
5323    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5324			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5325  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5326       && DECL_NAME (current_function_decl)
5327       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5328       && DECL_FILE_SCOPE_P (current_function_decl))
5329      || ix86_force_align_arg_pointer
5330      || has_force_align_arg_pointer)
5331    {
5332      /* Nested functions can't realign the stack due to a register
5333	 conflict.  */
5334      if (DECL_CONTEXT (current_function_decl)
5335	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5336	{
5337	  if (ix86_force_align_arg_pointer)
5338	    warning (0, "-mstackrealign ignored for nested functions");
5339	  if (has_force_align_arg_pointer)
5340	    error ("%s not supported for nested functions",
5341		   ix86_force_align_arg_pointer_string);
5342	  return virtual_incoming_args_rtx;
5343	}
5344      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5345      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5346    }
5347  else
5348    return virtual_incoming_args_rtx;
5349}
5350
5351/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5352   This is called from dwarf2out.c to emit call frame instructions
5353   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5354static void
5355ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5356{
5357  rtx unspec = SET_SRC (pattern);
5358  gcc_assert (GET_CODE (unspec) == UNSPEC);
5359
5360  switch (index)
5361    {
5362    case UNSPEC_REG_SAVE:
5363      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5364			      SET_DEST (pattern));
5365      break;
5366    case UNSPEC_DEF_CFA:
5367      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5368			 INTVAL (XVECEXP (unspec, 0, 0)));
5369      break;
5370    default:
5371      gcc_unreachable ();
5372    }
5373}
5374
5375/* Expand the prologue into a bunch of separate insns.  */
5376
5377void
5378ix86_expand_prologue (void)
5379{
5380  rtx insn;
5381  bool pic_reg_used;
5382  struct ix86_frame frame;
5383  HOST_WIDE_INT allocate;
5384
5385  ix86_compute_frame_layout (&frame);
5386
5387  if (cfun->machine->force_align_arg_pointer)
5388    {
5389      rtx x, y;
5390
5391      /* Grab the argument pointer.  */
5392      x = plus_constant (stack_pointer_rtx, 4);
5393      y = cfun->machine->force_align_arg_pointer;
5394      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5395      RTX_FRAME_RELATED_P (insn) = 1;
5396
5397      /* The unwind info consists of two parts: install the fafp as the cfa,
5398	 and record the fafp as the "save register" of the stack pointer.
5399	 The later is there in order that the unwinder can see where it
5400	 should restore the stack pointer across the and insn.  */
5401      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5402      x = gen_rtx_SET (VOIDmode, y, x);
5403      RTX_FRAME_RELATED_P (x) = 1;
5404      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5405			  UNSPEC_REG_SAVE);
5406      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5407      RTX_FRAME_RELATED_P (y) = 1;
5408      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5409      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5410      REG_NOTES (insn) = x;
5411
5412      /* Align the stack.  */
5413      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5414			     GEN_INT (-16)));
5415
5416      /* And here we cheat like madmen with the unwind info.  We force the
5417	 cfa register back to sp+4, which is exactly what it was at the
5418	 start of the function.  Re-pushing the return address results in
5419	 the return at the same spot relative to the cfa, and thus is
5420	 correct wrt the unwind info.  */
5421      x = cfun->machine->force_align_arg_pointer;
5422      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5423      insn = emit_insn (gen_push (x));
5424      RTX_FRAME_RELATED_P (insn) = 1;
5425
5426      x = GEN_INT (4);
5427      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5428      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5429      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5430      REG_NOTES (insn) = x;
5431    }
5432
5433  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5434     slower on all targets.  Also sdb doesn't like it.  */
5435
5436  if (frame_pointer_needed)
5437    {
5438      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5439      RTX_FRAME_RELATED_P (insn) = 1;
5440
5441      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5442      RTX_FRAME_RELATED_P (insn) = 1;
5443    }
5444
5445  allocate = frame.to_allocate;
5446
5447  if (!frame.save_regs_using_mov)
5448    ix86_emit_save_regs ();
5449  else
5450    allocate += frame.nregs * UNITS_PER_WORD;
5451
5452  /* When using red zone we may start register saving before allocating
5453     the stack frame saving one cycle of the prologue.  */
5454  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5455    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5456				   : stack_pointer_rtx,
5457				   -frame.nregs * UNITS_PER_WORD);
5458
5459  if (allocate == 0)
5460    ;
5461  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5462    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5463			       GEN_INT (-allocate), -1);
5464  else
5465    {
5466      /* Only valid for Win32.  */
5467      rtx eax = gen_rtx_REG (SImode, 0);
5468      bool eax_live = ix86_eax_live_at_start_p ();
5469      rtx t;
5470
5471      gcc_assert (!TARGET_64BIT);
5472
5473      if (eax_live)
5474	{
5475	  emit_insn (gen_push (eax));
5476	  allocate -= 4;
5477	}
5478
5479      emit_move_insn (eax, GEN_INT (allocate));
5480
5481      insn = emit_insn (gen_allocate_stack_worker (eax));
5482      RTX_FRAME_RELATED_P (insn) = 1;
5483      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5484      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5485      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5486					    t, REG_NOTES (insn));
5487
5488      if (eax_live)
5489	{
5490	  if (frame_pointer_needed)
5491	    t = plus_constant (hard_frame_pointer_rtx,
5492			       allocate
5493			       - frame.to_allocate
5494			       - frame.nregs * UNITS_PER_WORD);
5495	  else
5496	    t = plus_constant (stack_pointer_rtx, allocate);
5497	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5498	}
5499    }
5500
5501  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5502    {
5503      if (!frame_pointer_needed || !frame.to_allocate)
5504        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5505      else
5506        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5507				       -frame.nregs * UNITS_PER_WORD);
5508    }
5509
5510  pic_reg_used = false;
5511  if (pic_offset_table_rtx
5512      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5513	  || current_function_profile))
5514    {
5515      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5516
5517      if (alt_pic_reg_used != INVALID_REGNUM)
5518	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5519
5520      pic_reg_used = true;
5521    }
5522
5523  if (pic_reg_used)
5524    {
5525      if (TARGET_64BIT)
5526        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5527      else
5528        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5529
5530      /* Even with accurate pre-reload life analysis, we can wind up
5531	 deleting all references to the pic register after reload.
5532	 Consider if cross-jumping unifies two sides of a branch
5533	 controlled by a comparison vs the only read from a global.
5534	 In which case, allow the set_got to be deleted, though we're
5535	 too late to do anything about the ebx save in the prologue.  */
5536      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5537    }
5538
5539  /* Prevent function calls from be scheduled before the call to mcount.
5540     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5541  if (current_function_profile)
5542    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5543}
5544
5545/* Emit code to restore saved registers using MOV insns.  First register
5546   is restored from POINTER + OFFSET.  */
5547static void
5548ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5549				  int maybe_eh_return)
5550{
5551  int regno;
5552  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5553
5554  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5555    if (ix86_save_reg (regno, maybe_eh_return))
5556      {
5557	/* Ensure that adjust_address won't be forced to produce pointer
5558	   out of range allowed by x86-64 instruction set.  */
5559	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5560	  {
5561	    rtx r11;
5562
5563	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5564	    emit_move_insn (r11, GEN_INT (offset));
5565	    emit_insn (gen_adddi3 (r11, r11, pointer));
5566	    base_address = gen_rtx_MEM (Pmode, r11);
5567	    offset = 0;
5568	  }
5569	emit_move_insn (gen_rtx_REG (Pmode, regno),
5570			adjust_address (base_address, Pmode, offset));
5571	offset += UNITS_PER_WORD;
5572      }
5573}
5574
5575/* Restore function stack, frame, and registers.  */
5576
5577void
5578ix86_expand_epilogue (int style)
5579{
5580  int regno;
5581  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5582  struct ix86_frame frame;
5583  HOST_WIDE_INT offset;
5584
5585  ix86_compute_frame_layout (&frame);
5586
5587  /* Calculate start of saved registers relative to ebp.  Special care
5588     must be taken for the normal return case of a function using
5589     eh_return: the eax and edx registers are marked as saved, but not
5590     restored along this path.  */
5591  offset = frame.nregs;
5592  if (current_function_calls_eh_return && style != 2)
5593    offset -= 2;
5594  offset *= -UNITS_PER_WORD;
5595
5596  /* If we're only restoring one register and sp is not valid then
5597     using a move instruction to restore the register since it's
5598     less work than reloading sp and popping the register.
5599
5600     The default code result in stack adjustment using add/lea instruction,
5601     while this code results in LEAVE instruction (or discrete equivalent),
5602     so it is profitable in some other cases as well.  Especially when there
5603     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5604     and there is exactly one register to pop. This heuristic may need some
5605     tuning in future.  */
5606  if ((!sp_valid && frame.nregs <= 1)
5607      || (TARGET_EPILOGUE_USING_MOVE
5608	  && cfun->machine->use_fast_prologue_epilogue
5609	  && (frame.nregs > 1 || frame.to_allocate))
5610      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5611      || (frame_pointer_needed && TARGET_USE_LEAVE
5612	  && cfun->machine->use_fast_prologue_epilogue
5613	  && frame.nregs == 1)
5614      || current_function_calls_eh_return)
5615    {
5616      /* Restore registers.  We can use ebp or esp to address the memory
5617	 locations.  If both are available, default to ebp, since offsets
5618	 are known to be small.  Only exception is esp pointing directly to the
5619	 end of block of saved registers, where we may simplify addressing
5620	 mode.  */
5621
5622      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5623	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5624					  frame.to_allocate, style == 2);
5625      else
5626	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5627					  offset, style == 2);
5628
5629      /* eh_return epilogues need %ecx added to the stack pointer.  */
5630      if (style == 2)
5631	{
5632	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5633
5634	  if (frame_pointer_needed)
5635	    {
5636	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5637	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5638	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5639
5640	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5641	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5642
5643	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5644					 const0_rtx, style);
5645	    }
5646	  else
5647	    {
5648	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5649	      tmp = plus_constant (tmp, (frame.to_allocate
5650                                         + frame.nregs * UNITS_PER_WORD));
5651	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5652	    }
5653	}
5654      else if (!frame_pointer_needed)
5655	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5656				   GEN_INT (frame.to_allocate
5657					    + frame.nregs * UNITS_PER_WORD),
5658				   style);
5659      /* If not an i386, mov & pop is faster than "leave".  */
5660      else if (TARGET_USE_LEAVE || optimize_size
5661	       || !cfun->machine->use_fast_prologue_epilogue)
5662	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5663      else
5664	{
5665	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5666				     hard_frame_pointer_rtx,
5667				     const0_rtx, style);
5668	  if (TARGET_64BIT)
5669	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5670	  else
5671	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5672	}
5673    }
5674  else
5675    {
5676      /* First step is to deallocate the stack frame so that we can
5677	 pop the registers.  */
5678      if (!sp_valid)
5679	{
5680	  gcc_assert (frame_pointer_needed);
5681	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5682				     hard_frame_pointer_rtx,
5683				     GEN_INT (offset), style);
5684	}
5685      else if (frame.to_allocate)
5686	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5687				   GEN_INT (frame.to_allocate), style);
5688
5689      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5690	if (ix86_save_reg (regno, false))
5691	  {
5692	    if (TARGET_64BIT)
5693	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5694	    else
5695	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5696	  }
5697      if (frame_pointer_needed)
5698	{
5699	  /* Leave results in shorter dependency chains on CPUs that are
5700	     able to grok it fast.  */
5701	  if (TARGET_USE_LEAVE)
5702	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5703	  else if (TARGET_64BIT)
5704	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5705	  else
5706	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5707	}
5708    }
5709
5710  if (cfun->machine->force_align_arg_pointer)
5711    {
5712      emit_insn (gen_addsi3 (stack_pointer_rtx,
5713			     cfun->machine->force_align_arg_pointer,
5714			     GEN_INT (-4)));
5715    }
5716
5717  /* Sibcall epilogues don't want a return instruction.  */
5718  if (style == 0)
5719    return;
5720
5721  if (current_function_pops_args && current_function_args_size)
5722    {
5723      rtx popc = GEN_INT (current_function_pops_args);
5724
5725      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5726	 return address, do explicit add, and jump indirectly to the
5727	 caller.  */
5728
5729      if (current_function_pops_args >= 65536)
5730	{
5731	  rtx ecx = gen_rtx_REG (SImode, 2);
5732
5733	  /* There is no "pascal" calling convention in 64bit ABI.  */
5734	  gcc_assert (!TARGET_64BIT);
5735
5736	  emit_insn (gen_popsi1 (ecx));
5737	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5738	  emit_jump_insn (gen_return_indirect_internal (ecx));
5739	}
5740      else
5741	emit_jump_insn (gen_return_pop_internal (popc));
5742    }
5743  else
5744    emit_jump_insn (gen_return_internal ());
5745}
5746
5747/* Reset from the function's potential modifications.  */
5748
5749static void
5750ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5751			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5752{
5753  if (pic_offset_table_rtx)
5754    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5755#if TARGET_MACHO
5756  /* Mach-O doesn't support labels at the end of objects, so if
5757     it looks like we might want one, insert a NOP.  */
5758  {
5759    rtx insn = get_last_insn ();
5760    while (insn
5761	   && NOTE_P (insn)
5762	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5763      insn = PREV_INSN (insn);
5764    if (insn
5765	&& (LABEL_P (insn)
5766	    || (NOTE_P (insn)
5767		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5768      fputs ("\tnop\n", file);
5769  }
5770#endif
5771
5772}
5773
5774/* Extract the parts of an RTL expression that is a valid memory address
5775   for an instruction.  Return 0 if the structure of the address is
5776   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5777   strictly valid, but still used for computing length of lea instruction.  */
5778
5779int
5780ix86_decompose_address (rtx addr, struct ix86_address *out)
5781{
5782  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5783  rtx base_reg, index_reg;
5784  HOST_WIDE_INT scale = 1;
5785  rtx scale_rtx = NULL_RTX;
5786  int retval = 1;
5787  enum ix86_address_seg seg = SEG_DEFAULT;
5788
5789  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5790    base = addr;
5791  else if (GET_CODE (addr) == PLUS)
5792    {
5793      rtx addends[4], op;
5794      int n = 0, i;
5795
5796      op = addr;
5797      do
5798	{
5799	  if (n >= 4)
5800	    return 0;
5801	  addends[n++] = XEXP (op, 1);
5802	  op = XEXP (op, 0);
5803	}
5804      while (GET_CODE (op) == PLUS);
5805      if (n >= 4)
5806	return 0;
5807      addends[n] = op;
5808
5809      for (i = n; i >= 0; --i)
5810	{
5811	  op = addends[i];
5812	  switch (GET_CODE (op))
5813	    {
5814	    case MULT:
5815	      if (index)
5816		return 0;
5817	      index = XEXP (op, 0);
5818	      scale_rtx = XEXP (op, 1);
5819	      break;
5820
5821	    case UNSPEC:
5822	      if (XINT (op, 1) == UNSPEC_TP
5823	          && TARGET_TLS_DIRECT_SEG_REFS
5824	          && seg == SEG_DEFAULT)
5825		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5826	      else
5827		return 0;
5828	      break;
5829
5830	    case REG:
5831	    case SUBREG:
5832	      if (!base)
5833		base = op;
5834	      else if (!index)
5835		index = op;
5836	      else
5837		return 0;
5838	      break;
5839
5840	    case CONST:
5841	    case CONST_INT:
5842	    case SYMBOL_REF:
5843	    case LABEL_REF:
5844	      if (disp)
5845		return 0;
5846	      disp = op;
5847	      break;
5848
5849	    default:
5850	      return 0;
5851	    }
5852	}
5853    }
5854  else if (GET_CODE (addr) == MULT)
5855    {
5856      index = XEXP (addr, 0);		/* index*scale */
5857      scale_rtx = XEXP (addr, 1);
5858    }
5859  else if (GET_CODE (addr) == ASHIFT)
5860    {
5861      rtx tmp;
5862
5863      /* We're called for lea too, which implements ashift on occasion.  */
5864      index = XEXP (addr, 0);
5865      tmp = XEXP (addr, 1);
5866      if (GET_CODE (tmp) != CONST_INT)
5867	return 0;
5868      scale = INTVAL (tmp);
5869      if ((unsigned HOST_WIDE_INT) scale > 3)
5870	return 0;
5871      scale = 1 << scale;
5872      retval = -1;
5873    }
5874  else
5875    disp = addr;			/* displacement */
5876
5877  /* Extract the integral value of scale.  */
5878  if (scale_rtx)
5879    {
5880      if (GET_CODE (scale_rtx) != CONST_INT)
5881	return 0;
5882      scale = INTVAL (scale_rtx);
5883    }
5884
5885  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5886  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5887
5888  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5889  if (base_reg && index_reg && scale == 1
5890      && (index_reg == arg_pointer_rtx
5891	  || index_reg == frame_pointer_rtx
5892	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5893    {
5894      rtx tmp;
5895      tmp = base, base = index, index = tmp;
5896      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5897    }
5898
5899  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5900  if ((base_reg == hard_frame_pointer_rtx
5901       || base_reg == frame_pointer_rtx
5902       || base_reg == arg_pointer_rtx) && !disp)
5903    disp = const0_rtx;
5904
5905  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5906     Avoid this by transforming to [%esi+0].  */
5907  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5908      && base_reg && !index_reg && !disp
5909      && REG_P (base_reg)
5910      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5911    disp = const0_rtx;
5912
5913  /* Special case: encode reg+reg instead of reg*2.  */
5914  if (!base && index && scale && scale == 2)
5915    base = index, base_reg = index_reg, scale = 1;
5916
5917  /* Special case: scaling cannot be encoded without base or displacement.  */
5918  if (!base && !disp && index && scale != 1)
5919    disp = const0_rtx;
5920
5921  out->base = base;
5922  out->index = index;
5923  out->disp = disp;
5924  out->scale = scale;
5925  out->seg = seg;
5926
5927  return retval;
5928}
5929
5930/* Return cost of the memory address x.
5931   For i386, it is better to use a complex address than let gcc copy
5932   the address into a reg and make a new pseudo.  But not if the address
5933   requires to two regs - that would mean more pseudos with longer
5934   lifetimes.  */
5935static int
5936ix86_address_cost (rtx x)
5937{
5938  struct ix86_address parts;
5939  int cost = 1;
5940  int ok = ix86_decompose_address (x, &parts);
5941
5942  gcc_assert (ok);
5943
5944  if (parts.base && GET_CODE (parts.base) == SUBREG)
5945    parts.base = SUBREG_REG (parts.base);
5946  if (parts.index && GET_CODE (parts.index) == SUBREG)
5947    parts.index = SUBREG_REG (parts.index);
5948
5949  /* More complex memory references are better.  */
5950  if (parts.disp && parts.disp != const0_rtx)
5951    cost--;
5952  if (parts.seg != SEG_DEFAULT)
5953    cost--;
5954
5955  /* Attempt to minimize number of registers in the address.  */
5956  if ((parts.base
5957       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5958      || (parts.index
5959	  && (!REG_P (parts.index)
5960	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5961    cost++;
5962
5963  if (parts.base
5964      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5965      && parts.index
5966      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5967      && parts.base != parts.index)
5968    cost++;
5969
5970  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5971     since it's predecode logic can't detect the length of instructions
5972     and it degenerates to vector decoded.  Increase cost of such
5973     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5974     to split such addresses or even refuse such addresses at all.
5975
5976     Following addressing modes are affected:
5977      [base+scale*index]
5978      [scale*index+disp]
5979      [base+index]
5980
5981     The first and last case  may be avoidable by explicitly coding the zero in
5982     memory address, but I don't have AMD-K6 machine handy to check this
5983     theory.  */
5984
5985  if (TARGET_K6
5986      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5987	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5988	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5989    cost += 10;
5990
5991  return cost;
5992}
5993
5994/* If X is a machine specific address (i.e. a symbol or label being
5995   referenced as a displacement from the GOT implemented using an
5996   UNSPEC), then return the base term.  Otherwise return X.  */
5997
5998rtx
5999ix86_find_base_term (rtx x)
6000{
6001  rtx term;
6002
6003  if (TARGET_64BIT)
6004    {
6005      if (GET_CODE (x) != CONST)
6006	return x;
6007      term = XEXP (x, 0);
6008      if (GET_CODE (term) == PLUS
6009	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6010	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6011	term = XEXP (term, 0);
6012      if (GET_CODE (term) != UNSPEC
6013	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6014	return x;
6015
6016      term = XVECEXP (term, 0, 0);
6017
6018      if (GET_CODE (term) != SYMBOL_REF
6019	  && GET_CODE (term) != LABEL_REF)
6020	return x;
6021
6022      return term;
6023    }
6024
6025  term = ix86_delegitimize_address (x);
6026
6027  if (GET_CODE (term) != SYMBOL_REF
6028      && GET_CODE (term) != LABEL_REF)
6029    return x;
6030
6031  return term;
6032}
6033
6034/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6035   this is used for to form addresses to local data when -fPIC is in
6036   use.  */
6037
6038static bool
6039darwin_local_data_pic (rtx disp)
6040{
6041  if (GET_CODE (disp) == MINUS)
6042    {
6043      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6044          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6045        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6046          {
6047            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6048            if (! strcmp (sym_name, "<pic base>"))
6049              return true;
6050          }
6051    }
6052
6053  return false;
6054}
6055
6056/* Determine if a given RTX is a valid constant.  We already know this
6057   satisfies CONSTANT_P.  */
6058
6059bool
6060legitimate_constant_p (rtx x)
6061{
6062  switch (GET_CODE (x))
6063    {
6064    case CONST:
6065      x = XEXP (x, 0);
6066
6067      if (GET_CODE (x) == PLUS)
6068	{
6069	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6070	    return false;
6071	  x = XEXP (x, 0);
6072	}
6073
6074      if (TARGET_MACHO && darwin_local_data_pic (x))
6075	return true;
6076
6077      /* Only some unspecs are valid as "constants".  */
6078      if (GET_CODE (x) == UNSPEC)
6079	switch (XINT (x, 1))
6080	  {
6081	  case UNSPEC_GOTOFF:
6082	    return TARGET_64BIT;
6083	  case UNSPEC_TPOFF:
6084	  case UNSPEC_NTPOFF:
6085	    x = XVECEXP (x, 0, 0);
6086	    return (GET_CODE (x) == SYMBOL_REF
6087		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6088	  case UNSPEC_DTPOFF:
6089	    x = XVECEXP (x, 0, 0);
6090	    return (GET_CODE (x) == SYMBOL_REF
6091		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6092	  default:
6093	    return false;
6094	  }
6095
6096      /* We must have drilled down to a symbol.  */
6097      if (GET_CODE (x) == LABEL_REF)
6098	return true;
6099      if (GET_CODE (x) != SYMBOL_REF)
6100	return false;
6101      /* FALLTHRU */
6102
6103    case SYMBOL_REF:
6104      /* TLS symbols are never valid.  */
6105      if (SYMBOL_REF_TLS_MODEL (x))
6106	return false;
6107      break;
6108
6109    case CONST_DOUBLE:
6110      if (GET_MODE (x) == TImode
6111	  && x != CONST0_RTX (TImode)
6112          && !TARGET_64BIT)
6113	return false;
6114      break;
6115
6116    case CONST_VECTOR:
6117      if (x == CONST0_RTX (GET_MODE (x)))
6118	return true;
6119      return false;
6120
6121    default:
6122      break;
6123    }
6124
6125  /* Otherwise we handle everything else in the move patterns.  */
6126  return true;
6127}
6128
6129/* Determine if it's legal to put X into the constant pool.  This
6130   is not possible for the address of thread-local symbols, which
6131   is checked above.  */
6132
6133static bool
6134ix86_cannot_force_const_mem (rtx x)
6135{
6136  /* We can always put integral constants and vectors in memory.  */
6137  switch (GET_CODE (x))
6138    {
6139    case CONST_INT:
6140    case CONST_DOUBLE:
6141    case CONST_VECTOR:
6142      return false;
6143
6144    default:
6145      break;
6146    }
6147  return !legitimate_constant_p (x);
6148}
6149
6150/* Determine if a given RTX is a valid constant address.  */
6151
6152bool
6153constant_address_p (rtx x)
6154{
6155  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6156}
6157
6158/* Nonzero if the constant value X is a legitimate general operand
6159   when generating PIC code.  It is given that flag_pic is on and
6160   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6161
6162bool
6163legitimate_pic_operand_p (rtx x)
6164{
6165  rtx inner;
6166
6167  switch (GET_CODE (x))
6168    {
6169    case CONST:
6170      inner = XEXP (x, 0);
6171      if (GET_CODE (inner) == PLUS
6172	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6173	inner = XEXP (inner, 0);
6174
6175      /* Only some unspecs are valid as "constants".  */
6176      if (GET_CODE (inner) == UNSPEC)
6177	switch (XINT (inner, 1))
6178	  {
6179	  case UNSPEC_GOTOFF:
6180	    return TARGET_64BIT;
6181	  case UNSPEC_TPOFF:
6182	    x = XVECEXP (inner, 0, 0);
6183	    return (GET_CODE (x) == SYMBOL_REF
6184		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6185	  default:
6186	    return false;
6187	  }
6188      /* FALLTHRU */
6189
6190    case SYMBOL_REF:
6191    case LABEL_REF:
6192      return legitimate_pic_address_disp_p (x);
6193
6194    default:
6195      return true;
6196    }
6197}
6198
6199/* Determine if a given CONST RTX is a valid memory displacement
6200   in PIC mode.  */
6201
6202int
6203legitimate_pic_address_disp_p (rtx disp)
6204{
6205  bool saw_plus;
6206
6207  /* In 64bit mode we can allow direct addresses of symbols and labels
6208     when they are not dynamic symbols.  */
6209  if (TARGET_64BIT)
6210    {
6211      rtx op0 = disp, op1;
6212
6213      switch (GET_CODE (disp))
6214	{
6215	case LABEL_REF:
6216	  return true;
6217
6218	case CONST:
6219	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6220	    break;
6221	  op0 = XEXP (XEXP (disp, 0), 0);
6222	  op1 = XEXP (XEXP (disp, 0), 1);
6223	  if (GET_CODE (op1) != CONST_INT
6224	      || INTVAL (op1) >= 16*1024*1024
6225	      || INTVAL (op1) < -16*1024*1024)
6226            break;
6227	  if (GET_CODE (op0) == LABEL_REF)
6228	    return true;
6229	  if (GET_CODE (op0) != SYMBOL_REF)
6230	    break;
6231	  /* FALLTHRU */
6232
6233	case SYMBOL_REF:
6234	  /* TLS references should always be enclosed in UNSPEC.  */
6235	  if (SYMBOL_REF_TLS_MODEL (op0))
6236	    return false;
6237	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6238	    return true;
6239	  break;
6240
6241	default:
6242	  break;
6243	}
6244    }
6245  if (GET_CODE (disp) != CONST)
6246    return 0;
6247  disp = XEXP (disp, 0);
6248
6249  if (TARGET_64BIT)
6250    {
6251      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6252         of GOT tables.  We should not need these anyway.  */
6253      if (GET_CODE (disp) != UNSPEC
6254	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6255	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6256	return 0;
6257
6258      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6259	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6260	return 0;
6261      return 1;
6262    }
6263
6264  saw_plus = false;
6265  if (GET_CODE (disp) == PLUS)
6266    {
6267      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6268	return 0;
6269      disp = XEXP (disp, 0);
6270      saw_plus = true;
6271    }
6272
6273  if (TARGET_MACHO && darwin_local_data_pic (disp))
6274    return 1;
6275
6276  if (GET_CODE (disp) != UNSPEC)
6277    return 0;
6278
6279  switch (XINT (disp, 1))
6280    {
6281    case UNSPEC_GOT:
6282      if (saw_plus)
6283	return false;
6284      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6285    case UNSPEC_GOTOFF:
6286      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6287	 While ABI specify also 32bit relocation but we don't produce it in
6288	 small PIC model at all.  */
6289      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6290	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6291	  && !TARGET_64BIT)
6292        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6293      return false;
6294    case UNSPEC_GOTTPOFF:
6295    case UNSPEC_GOTNTPOFF:
6296    case UNSPEC_INDNTPOFF:
6297      if (saw_plus)
6298	return false;
6299      disp = XVECEXP (disp, 0, 0);
6300      return (GET_CODE (disp) == SYMBOL_REF
6301	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6302    case UNSPEC_NTPOFF:
6303      disp = XVECEXP (disp, 0, 0);
6304      return (GET_CODE (disp) == SYMBOL_REF
6305	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6306    case UNSPEC_DTPOFF:
6307      disp = XVECEXP (disp, 0, 0);
6308      return (GET_CODE (disp) == SYMBOL_REF
6309	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6310    }
6311
6312  return 0;
6313}
6314
6315/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6316   memory address for an instruction.  The MODE argument is the machine mode
6317   for the MEM expression that wants to use this address.
6318
6319   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6320   convert common non-canonical forms to canonical form so that they will
6321   be recognized.  */
6322
6323int
6324legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6325{
6326  struct ix86_address parts;
6327  rtx base, index, disp;
6328  HOST_WIDE_INT scale;
6329  const char *reason = NULL;
6330  rtx reason_rtx = NULL_RTX;
6331
6332  if (TARGET_DEBUG_ADDR)
6333    {
6334      fprintf (stderr,
6335	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6336	       GET_MODE_NAME (mode), strict);
6337      debug_rtx (addr);
6338    }
6339
6340  if (ix86_decompose_address (addr, &parts) <= 0)
6341    {
6342      reason = "decomposition failed";
6343      goto report_error;
6344    }
6345
6346  base = parts.base;
6347  index = parts.index;
6348  disp = parts.disp;
6349  scale = parts.scale;
6350
6351  /* Validate base register.
6352
6353     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6354     failures when the base is one word out of a two word structure, which is
6355     represented internally as a DImode int.  */
6356
6357  if (base)
6358    {
6359      rtx reg;
6360      reason_rtx = base;
6361
6362      if (REG_P (base))
6363  	reg = base;
6364      else if (GET_CODE (base) == SUBREG
6365	       && REG_P (SUBREG_REG (base))
6366	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6367		  <= UNITS_PER_WORD)
6368  	reg = SUBREG_REG (base);
6369      else
6370	{
6371	  reason = "base is not a register";
6372	  goto report_error;
6373	}
6374
6375      if (GET_MODE (base) != Pmode)
6376	{
6377	  reason = "base is not in Pmode";
6378	  goto report_error;
6379	}
6380
6381      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6382	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6383	{
6384	  reason = "base is not valid";
6385	  goto report_error;
6386	}
6387    }
6388
6389  /* Validate index register.
6390
6391     Don't allow SUBREG's that span more than a word here -- same as above.  */
6392
6393  if (index)
6394    {
6395      rtx reg;
6396      reason_rtx = index;
6397
6398      if (REG_P (index))
6399  	reg = index;
6400      else if (GET_CODE (index) == SUBREG
6401	       && REG_P (SUBREG_REG (index))
6402	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6403		  <= UNITS_PER_WORD)
6404  	reg = SUBREG_REG (index);
6405      else
6406	{
6407	  reason = "index is not a register";
6408	  goto report_error;
6409	}
6410
6411      if (GET_MODE (index) != Pmode)
6412	{
6413	  reason = "index is not in Pmode";
6414	  goto report_error;
6415	}
6416
6417      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6418	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6419	{
6420	  reason = "index is not valid";
6421	  goto report_error;
6422	}
6423    }
6424
6425  /* Validate scale factor.  */
6426  if (scale != 1)
6427    {
6428      reason_rtx = GEN_INT (scale);
6429      if (!index)
6430	{
6431	  reason = "scale without index";
6432	  goto report_error;
6433	}
6434
6435      if (scale != 2 && scale != 4 && scale != 8)
6436	{
6437	  reason = "scale is not a valid multiplier";
6438	  goto report_error;
6439	}
6440    }
6441
6442  /* Validate displacement.  */
6443  if (disp)
6444    {
6445      reason_rtx = disp;
6446
6447      if (GET_CODE (disp) == CONST
6448	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6449	switch (XINT (XEXP (disp, 0), 1))
6450	  {
6451	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6452	     used.  While ABI specify also 32bit relocations, we don't produce
6453	     them at all and use IP relative instead.  */
6454	  case UNSPEC_GOT:
6455	  case UNSPEC_GOTOFF:
6456	    gcc_assert (flag_pic);
6457	    if (!TARGET_64BIT)
6458	      goto is_legitimate_pic;
6459	    reason = "64bit address unspec";
6460	    goto report_error;
6461
6462	  case UNSPEC_GOTPCREL:
6463	    gcc_assert (flag_pic);
6464	    goto is_legitimate_pic;
6465
6466	  case UNSPEC_GOTTPOFF:
6467	  case UNSPEC_GOTNTPOFF:
6468	  case UNSPEC_INDNTPOFF:
6469	  case UNSPEC_NTPOFF:
6470	  case UNSPEC_DTPOFF:
6471	    break;
6472
6473	  default:
6474	    reason = "invalid address unspec";
6475	    goto report_error;
6476	  }
6477
6478      else if (SYMBOLIC_CONST (disp)
6479	       && (flag_pic
6480		   || (TARGET_MACHO
6481#if TARGET_MACHO
6482		       && MACHOPIC_INDIRECT
6483		       && !machopic_operand_p (disp)
6484#endif
6485	       )))
6486	{
6487
6488	is_legitimate_pic:
6489	  if (TARGET_64BIT && (index || base))
6490	    {
6491	      /* foo@dtpoff(%rX) is ok.  */
6492	      if (GET_CODE (disp) != CONST
6493		  || GET_CODE (XEXP (disp, 0)) != PLUS
6494		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6495		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6496		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6497		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6498		{
6499		  reason = "non-constant pic memory reference";
6500		  goto report_error;
6501		}
6502	    }
6503	  else if (! legitimate_pic_address_disp_p (disp))
6504	    {
6505	      reason = "displacement is an invalid pic construct";
6506	      goto report_error;
6507	    }
6508
6509          /* This code used to verify that a symbolic pic displacement
6510	     includes the pic_offset_table_rtx register.
6511
6512	     While this is good idea, unfortunately these constructs may
6513	     be created by "adds using lea" optimization for incorrect
6514	     code like:
6515
6516	     int a;
6517	     int foo(int i)
6518	       {
6519	         return *(&a+i);
6520	       }
6521
6522	     This code is nonsensical, but results in addressing
6523	     GOT table with pic_offset_table_rtx base.  We can't
6524	     just refuse it easily, since it gets matched by
6525	     "addsi3" pattern, that later gets split to lea in the
6526	     case output register differs from input.  While this
6527	     can be handled by separate addsi pattern for this case
6528	     that never results in lea, this seems to be easier and
6529	     correct fix for crash to disable this test.  */
6530	}
6531      else if (GET_CODE (disp) != LABEL_REF
6532	       && GET_CODE (disp) != CONST_INT
6533	       && (GET_CODE (disp) != CONST
6534		   || !legitimate_constant_p (disp))
6535	       && (GET_CODE (disp) != SYMBOL_REF
6536		   || !legitimate_constant_p (disp)))
6537	{
6538	  reason = "displacement is not constant";
6539	  goto report_error;
6540	}
6541      else if (TARGET_64BIT
6542	       && !x86_64_immediate_operand (disp, VOIDmode))
6543	{
6544	  reason = "displacement is out of range";
6545	  goto report_error;
6546	}
6547    }
6548
6549  /* Everything looks valid.  */
6550  if (TARGET_DEBUG_ADDR)
6551    fprintf (stderr, "Success.\n");
6552  return TRUE;
6553
6554 report_error:
6555  if (TARGET_DEBUG_ADDR)
6556    {
6557      fprintf (stderr, "Error: %s\n", reason);
6558      debug_rtx (reason_rtx);
6559    }
6560  return FALSE;
6561}
6562
6563/* Return a unique alias set for the GOT.  */
6564
6565static HOST_WIDE_INT
6566ix86_GOT_alias_set (void)
6567{
6568  static HOST_WIDE_INT set = -1;
6569  if (set == -1)
6570    set = new_alias_set ();
6571  return set;
6572}
6573
6574/* Return a legitimate reference for ORIG (an address) using the
6575   register REG.  If REG is 0, a new pseudo is generated.
6576
6577   There are two types of references that must be handled:
6578
6579   1. Global data references must load the address from the GOT, via
6580      the PIC reg.  An insn is emitted to do this load, and the reg is
6581      returned.
6582
6583   2. Static data references, constant pool addresses, and code labels
6584      compute the address as an offset from the GOT, whose base is in
6585      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6586      differentiate them from global data objects.  The returned
6587      address is the PIC reg + an unspec constant.
6588
6589   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6590   reg also appears in the address.  */
6591
6592static rtx
6593legitimize_pic_address (rtx orig, rtx reg)
6594{
6595  rtx addr = orig;
6596  rtx new = orig;
6597  rtx base;
6598
6599#if TARGET_MACHO
6600  if (TARGET_MACHO && !TARGET_64BIT)
6601    {
6602      if (reg == 0)
6603	reg = gen_reg_rtx (Pmode);
6604      /* Use the generic Mach-O PIC machinery.  */
6605      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6606    }
6607#endif
6608
6609  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6610    new = addr;
6611  else if (TARGET_64BIT
6612	   && ix86_cmodel != CM_SMALL_PIC
6613	   && local_symbolic_operand (addr, Pmode))
6614    {
6615      rtx tmpreg;
6616      /* This symbol may be referenced via a displacement from the PIC
6617	 base address (@GOTOFF).  */
6618
6619      if (reload_in_progress)
6620	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6621      if (GET_CODE (addr) == CONST)
6622	addr = XEXP (addr, 0);
6623      if (GET_CODE (addr) == PLUS)
6624	  {
6625            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6626	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6627	  }
6628	else
6629          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6630      new = gen_rtx_CONST (Pmode, new);
6631      if (!reg)
6632        tmpreg = gen_reg_rtx (Pmode);
6633      else
6634	tmpreg = reg;
6635      emit_move_insn (tmpreg, new);
6636
6637      if (reg != 0)
6638	{
6639	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6640				     tmpreg, 1, OPTAB_DIRECT);
6641	  new = reg;
6642	}
6643      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6644    }
6645  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6646    {
6647      /* This symbol may be referenced via a displacement from the PIC
6648	 base address (@GOTOFF).  */
6649
6650      if (reload_in_progress)
6651	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6652      if (GET_CODE (addr) == CONST)
6653	addr = XEXP (addr, 0);
6654      if (GET_CODE (addr) == PLUS)
6655	  {
6656            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6657	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6658	  }
6659	else
6660          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6661      new = gen_rtx_CONST (Pmode, new);
6662      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6663
6664      if (reg != 0)
6665	{
6666	  emit_move_insn (reg, new);
6667	  new = reg;
6668	}
6669    }
6670  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6671    {
6672      if (TARGET_64BIT)
6673	{
6674	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6675	  new = gen_rtx_CONST (Pmode, new);
6676	  new = gen_const_mem (Pmode, new);
6677	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6678
6679	  if (reg == 0)
6680	    reg = gen_reg_rtx (Pmode);
6681	  /* Use directly gen_movsi, otherwise the address is loaded
6682	     into register for CSE.  We don't want to CSE this addresses,
6683	     instead we CSE addresses from the GOT table, so skip this.  */
6684	  emit_insn (gen_movsi (reg, new));
6685	  new = reg;
6686	}
6687      else
6688	{
6689	  /* This symbol must be referenced via a load from the
6690	     Global Offset Table (@GOT).  */
6691
6692	  if (reload_in_progress)
6693	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6694	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6695	  new = gen_rtx_CONST (Pmode, new);
6696	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6697	  new = gen_const_mem (Pmode, new);
6698	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6699
6700	  if (reg == 0)
6701	    reg = gen_reg_rtx (Pmode);
6702	  emit_move_insn (reg, new);
6703	  new = reg;
6704	}
6705    }
6706  else
6707    {
6708      if (GET_CODE (addr) == CONST_INT
6709	  && !x86_64_immediate_operand (addr, VOIDmode))
6710	{
6711	  if (reg)
6712	    {
6713	      emit_move_insn (reg, addr);
6714	      new = reg;
6715	    }
6716	  else
6717	    new = force_reg (Pmode, addr);
6718	}
6719      else if (GET_CODE (addr) == CONST)
6720	{
6721	  addr = XEXP (addr, 0);
6722
6723	  /* We must match stuff we generate before.  Assume the only
6724	     unspecs that can get here are ours.  Not that we could do
6725	     anything with them anyway....  */
6726	  if (GET_CODE (addr) == UNSPEC
6727	      || (GET_CODE (addr) == PLUS
6728		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6729	    return orig;
6730	  gcc_assert (GET_CODE (addr) == PLUS);
6731	}
6732      if (GET_CODE (addr) == PLUS)
6733	{
6734	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6735
6736	  /* Check first to see if this is a constant offset from a @GOTOFF
6737	     symbol reference.  */
6738	  if (local_symbolic_operand (op0, Pmode)
6739	      && GET_CODE (op1) == CONST_INT)
6740	    {
6741	      if (!TARGET_64BIT)
6742		{
6743		  if (reload_in_progress)
6744		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6745		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6746					UNSPEC_GOTOFF);
6747		  new = gen_rtx_PLUS (Pmode, new, op1);
6748		  new = gen_rtx_CONST (Pmode, new);
6749		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6750
6751		  if (reg != 0)
6752		    {
6753		      emit_move_insn (reg, new);
6754		      new = reg;
6755		    }
6756		}
6757	      else
6758		{
6759		  if (INTVAL (op1) < -16*1024*1024
6760		      || INTVAL (op1) >= 16*1024*1024)
6761		    {
6762		      if (!x86_64_immediate_operand (op1, Pmode))
6763			op1 = force_reg (Pmode, op1);
6764		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6765		    }
6766		}
6767	    }
6768	  else
6769	    {
6770	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6771	      new  = legitimize_pic_address (XEXP (addr, 1),
6772					     base == reg ? NULL_RTX : reg);
6773
6774	      if (GET_CODE (new) == CONST_INT)
6775		new = plus_constant (base, INTVAL (new));
6776	      else
6777		{
6778		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6779		    {
6780		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6781		      new = XEXP (new, 1);
6782		    }
6783		  new = gen_rtx_PLUS (Pmode, base, new);
6784		}
6785	    }
6786	}
6787    }
6788  return new;
6789}
6790
6791/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6792
6793static rtx
6794get_thread_pointer (int to_reg)
6795{
6796  rtx tp, reg, insn;
6797
6798  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6799  if (!to_reg)
6800    return tp;
6801
6802  reg = gen_reg_rtx (Pmode);
6803  insn = gen_rtx_SET (VOIDmode, reg, tp);
6804  insn = emit_insn (insn);
6805
6806  return reg;
6807}
6808
6809/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6810   false if we expect this to be used for a memory address and true if
6811   we expect to load the address into a register.  */
6812
6813static rtx
6814legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6815{
6816  rtx dest, base, off, pic, tp;
6817  int type;
6818
6819  switch (model)
6820    {
6821    case TLS_MODEL_GLOBAL_DYNAMIC:
6822      dest = gen_reg_rtx (Pmode);
6823      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6824
6825      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6826	{
6827	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6828
6829	  start_sequence ();
6830	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6831	  insns = get_insns ();
6832	  end_sequence ();
6833
6834	  emit_libcall_block (insns, dest, rax, x);
6835	}
6836      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6837	emit_insn (gen_tls_global_dynamic_64 (dest, x));
6838      else
6839	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6840
6841      if (TARGET_GNU2_TLS)
6842	{
6843	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6844
6845	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6846	}
6847      break;
6848
6849    case TLS_MODEL_LOCAL_DYNAMIC:
6850      base = gen_reg_rtx (Pmode);
6851      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6852
6853      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6854	{
6855	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6856
6857	  start_sequence ();
6858	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6859	  insns = get_insns ();
6860	  end_sequence ();
6861
6862	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6863	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6864	  emit_libcall_block (insns, base, rax, note);
6865	}
6866      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6867	emit_insn (gen_tls_local_dynamic_base_64 (base));
6868      else
6869	emit_insn (gen_tls_local_dynamic_base_32 (base));
6870
6871      if (TARGET_GNU2_TLS)
6872	{
6873	  rtx x = ix86_tls_module_base ();
6874
6875	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
6876			       gen_rtx_MINUS (Pmode, x, tp));
6877	}
6878
6879      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6880      off = gen_rtx_CONST (Pmode, off);
6881
6882      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6883
6884      if (TARGET_GNU2_TLS)
6885	{
6886	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6887
6888	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6889	}
6890
6891      break;
6892
6893    case TLS_MODEL_INITIAL_EXEC:
6894      if (TARGET_64BIT)
6895	{
6896	  pic = NULL;
6897	  type = UNSPEC_GOTNTPOFF;
6898	}
6899      else if (flag_pic)
6900	{
6901	  if (reload_in_progress)
6902	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6903	  pic = pic_offset_table_rtx;
6904	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6905	}
6906      else if (!TARGET_ANY_GNU_TLS)
6907	{
6908	  pic = gen_reg_rtx (Pmode);
6909	  emit_insn (gen_set_got (pic));
6910	  type = UNSPEC_GOTTPOFF;
6911	}
6912      else
6913	{
6914	  pic = NULL;
6915	  type = UNSPEC_INDNTPOFF;
6916	}
6917
6918      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6919      off = gen_rtx_CONST (Pmode, off);
6920      if (pic)
6921	off = gen_rtx_PLUS (Pmode, pic, off);
6922      off = gen_const_mem (Pmode, off);
6923      set_mem_alias_set (off, ix86_GOT_alias_set ());
6924
6925      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6926	{
6927          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6928	  off = force_reg (Pmode, off);
6929	  return gen_rtx_PLUS (Pmode, base, off);
6930	}
6931      else
6932	{
6933	  base = get_thread_pointer (true);
6934	  dest = gen_reg_rtx (Pmode);
6935	  emit_insn (gen_subsi3 (dest, base, off));
6936	}
6937      break;
6938
6939    case TLS_MODEL_LOCAL_EXEC:
6940      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6941			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6942			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6943      off = gen_rtx_CONST (Pmode, off);
6944
6945      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6946	{
6947	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6948	  return gen_rtx_PLUS (Pmode, base, off);
6949	}
6950      else
6951	{
6952	  base = get_thread_pointer (true);
6953	  dest = gen_reg_rtx (Pmode);
6954	  emit_insn (gen_subsi3 (dest, base, off));
6955	}
6956      break;
6957
6958    default:
6959      gcc_unreachable ();
6960    }
6961
6962  return dest;
6963}
6964
6965/* Try machine-dependent ways of modifying an illegitimate address
6966   to be legitimate.  If we find one, return the new, valid address.
6967   This macro is used in only one place: `memory_address' in explow.c.
6968
6969   OLDX is the address as it was before break_out_memory_refs was called.
6970   In some cases it is useful to look at this to decide what needs to be done.
6971
6972   MODE and WIN are passed so that this macro can use
6973   GO_IF_LEGITIMATE_ADDRESS.
6974
6975   It is always safe for this macro to do nothing.  It exists to recognize
6976   opportunities to optimize the output.
6977
6978   For the 80386, we handle X+REG by loading X into a register R and
6979   using R+REG.  R will go in a general reg and indexing will be used.
6980   However, if REG is a broken-out memory address or multiplication,
6981   nothing needs to be done because REG can certainly go in a general reg.
6982
6983   When -fpic is used, special handling is needed for symbolic references.
6984   See comments by legitimize_pic_address in i386.c for details.  */
6985
6986rtx
6987legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6988{
6989  int changed = 0;
6990  unsigned log;
6991
6992  if (TARGET_DEBUG_ADDR)
6993    {
6994      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6995	       GET_MODE_NAME (mode));
6996      debug_rtx (x);
6997    }
6998
6999  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7000  if (log)
7001    return legitimize_tls_address (x, log, false);
7002  if (GET_CODE (x) == CONST
7003      && GET_CODE (XEXP (x, 0)) == PLUS
7004      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7005      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7006    {
7007      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7008      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7009    }
7010
7011  if (flag_pic && SYMBOLIC_CONST (x))
7012    return legitimize_pic_address (x, 0);
7013
7014  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7015  if (GET_CODE (x) == ASHIFT
7016      && GET_CODE (XEXP (x, 1)) == CONST_INT
7017      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7018    {
7019      changed = 1;
7020      log = INTVAL (XEXP (x, 1));
7021      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7022			GEN_INT (1 << log));
7023    }
7024
7025  if (GET_CODE (x) == PLUS)
7026    {
7027      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7028
7029      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7030	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7031	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7032	{
7033	  changed = 1;
7034	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7035	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7036				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7037				      GEN_INT (1 << log));
7038	}
7039
7040      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7041	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7042	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7043	{
7044	  changed = 1;
7045	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7046	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7047				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7048				      GEN_INT (1 << log));
7049	}
7050
7051      /* Put multiply first if it isn't already.  */
7052      if (GET_CODE (XEXP (x, 1)) == MULT)
7053	{
7054	  rtx tmp = XEXP (x, 0);
7055	  XEXP (x, 0) = XEXP (x, 1);
7056	  XEXP (x, 1) = tmp;
7057	  changed = 1;
7058	}
7059
7060      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7061	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7062	 created by virtual register instantiation, register elimination, and
7063	 similar optimizations.  */
7064      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7065	{
7066	  changed = 1;
7067	  x = gen_rtx_PLUS (Pmode,
7068			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7069					  XEXP (XEXP (x, 1), 0)),
7070			    XEXP (XEXP (x, 1), 1));
7071	}
7072
7073      /* Canonicalize
7074	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7075	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7076      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7077	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7078	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7079	       && CONSTANT_P (XEXP (x, 1)))
7080	{
7081	  rtx constant;
7082	  rtx other = NULL_RTX;
7083
7084	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7085	    {
7086	      constant = XEXP (x, 1);
7087	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7088	    }
7089	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7090	    {
7091	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7092	      other = XEXP (x, 1);
7093	    }
7094	  else
7095	    constant = 0;
7096
7097	  if (constant)
7098	    {
7099	      changed = 1;
7100	      x = gen_rtx_PLUS (Pmode,
7101				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7102					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7103				plus_constant (other, INTVAL (constant)));
7104	    }
7105	}
7106
7107      if (changed && legitimate_address_p (mode, x, FALSE))
7108	return x;
7109
7110      if (GET_CODE (XEXP (x, 0)) == MULT)
7111	{
7112	  changed = 1;
7113	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7114	}
7115
7116      if (GET_CODE (XEXP (x, 1)) == MULT)
7117	{
7118	  changed = 1;
7119	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7120	}
7121
7122      if (changed
7123	  && GET_CODE (XEXP (x, 1)) == REG
7124	  && GET_CODE (XEXP (x, 0)) == REG)
7125	return x;
7126
7127      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7128	{
7129	  changed = 1;
7130	  x = legitimize_pic_address (x, 0);
7131	}
7132
7133      if (changed && legitimate_address_p (mode, x, FALSE))
7134	return x;
7135
7136      if (GET_CODE (XEXP (x, 0)) == REG)
7137	{
7138	  rtx temp = gen_reg_rtx (Pmode);
7139	  rtx val  = force_operand (XEXP (x, 1), temp);
7140	  if (val != temp)
7141	    emit_move_insn (temp, val);
7142
7143	  XEXP (x, 1) = temp;
7144	  return x;
7145	}
7146
7147      else if (GET_CODE (XEXP (x, 1)) == REG)
7148	{
7149	  rtx temp = gen_reg_rtx (Pmode);
7150	  rtx val  = force_operand (XEXP (x, 0), temp);
7151	  if (val != temp)
7152	    emit_move_insn (temp, val);
7153
7154	  XEXP (x, 0) = temp;
7155	  return x;
7156	}
7157    }
7158
7159  return x;
7160}
7161
7162/* Print an integer constant expression in assembler syntax.  Addition
7163   and subtraction are the only arithmetic that may appear in these
7164   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7165   CODE is the operand print code from the output string.  */
7166
7167static void
7168output_pic_addr_const (FILE *file, rtx x, int code)
7169{
7170  char buf[256];
7171
7172  switch (GET_CODE (x))
7173    {
7174    case PC:
7175      gcc_assert (flag_pic);
7176      putc ('.', file);
7177      break;
7178
7179    case SYMBOL_REF:
7180      if (! TARGET_MACHO || TARGET_64BIT)
7181	output_addr_const (file, x);
7182      else
7183	{
7184	  const char *name = XSTR (x, 0);
7185
7186	  /* Mark the decl as referenced so that cgraph will output the function.  */
7187	  if (SYMBOL_REF_DECL (x))
7188	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7189
7190#if TARGET_MACHO
7191	  if (MACHOPIC_INDIRECT
7192	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7193	    name = machopic_indirection_name (x, /*stub_p=*/true);
7194#endif
7195	  assemble_name (file, name);
7196	}
7197      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7198	fputs ("@PLT", file);
7199      break;
7200
7201    case LABEL_REF:
7202      x = XEXP (x, 0);
7203      /* FALLTHRU */
7204    case CODE_LABEL:
7205      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7206      assemble_name (asm_out_file, buf);
7207      break;
7208
7209    case CONST_INT:
7210      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7211      break;
7212
7213    case CONST:
7214      /* This used to output parentheses around the expression,
7215	 but that does not work on the 386 (either ATT or BSD assembler).  */
7216      output_pic_addr_const (file, XEXP (x, 0), code);
7217      break;
7218
7219    case CONST_DOUBLE:
7220      if (GET_MODE (x) == VOIDmode)
7221	{
7222	  /* We can use %d if the number is <32 bits and positive.  */
7223	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7224	    fprintf (file, "0x%lx%08lx",
7225		     (unsigned long) CONST_DOUBLE_HIGH (x),
7226		     (unsigned long) CONST_DOUBLE_LOW (x));
7227	  else
7228	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7229	}
7230      else
7231	/* We can't handle floating point constants;
7232	   PRINT_OPERAND must handle them.  */
7233	output_operand_lossage ("floating constant misused");
7234      break;
7235
7236    case PLUS:
7237      /* Some assemblers need integer constants to appear first.  */
7238      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7239	{
7240	  output_pic_addr_const (file, XEXP (x, 0), code);
7241	  putc ('+', file);
7242	  output_pic_addr_const (file, XEXP (x, 1), code);
7243	}
7244      else
7245	{
7246	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7247	  output_pic_addr_const (file, XEXP (x, 1), code);
7248	  putc ('+', file);
7249	  output_pic_addr_const (file, XEXP (x, 0), code);
7250	}
7251      break;
7252
7253    case MINUS:
7254      if (!TARGET_MACHO)
7255	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7256      output_pic_addr_const (file, XEXP (x, 0), code);
7257      putc ('-', file);
7258      output_pic_addr_const (file, XEXP (x, 1), code);
7259      if (!TARGET_MACHO)
7260	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7261      break;
7262
7263     case UNSPEC:
7264       gcc_assert (XVECLEN (x, 0) == 1);
7265       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7266       switch (XINT (x, 1))
7267	{
7268	case UNSPEC_GOT:
7269	  fputs ("@GOT", file);
7270	  break;
7271	case UNSPEC_GOTOFF:
7272	  fputs ("@GOTOFF", file);
7273	  break;
7274	case UNSPEC_GOTPCREL:
7275	  fputs ("@GOTPCREL(%rip)", file);
7276	  break;
7277	case UNSPEC_GOTTPOFF:
7278	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7279	  fputs ("@GOTTPOFF", file);
7280	  break;
7281	case UNSPEC_TPOFF:
7282	  fputs ("@TPOFF", file);
7283	  break;
7284	case UNSPEC_NTPOFF:
7285	  if (TARGET_64BIT)
7286	    fputs ("@TPOFF", file);
7287	  else
7288	    fputs ("@NTPOFF", file);
7289	  break;
7290	case UNSPEC_DTPOFF:
7291	  fputs ("@DTPOFF", file);
7292	  break;
7293	case UNSPEC_GOTNTPOFF:
7294	  if (TARGET_64BIT)
7295	    fputs ("@GOTTPOFF(%rip)", file);
7296	  else
7297	    fputs ("@GOTNTPOFF", file);
7298	  break;
7299	case UNSPEC_INDNTPOFF:
7300	  fputs ("@INDNTPOFF", file);
7301	  break;
7302	default:
7303	  output_operand_lossage ("invalid UNSPEC as operand");
7304	  break;
7305	}
7306       break;
7307
7308    default:
7309      output_operand_lossage ("invalid expression as operand");
7310    }
7311}
7312
7313/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7314   We need to emit DTP-relative relocations.  */
7315
7316static void
7317i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7318{
7319  fputs (ASM_LONG, file);
7320  output_addr_const (file, x);
7321  fputs ("@DTPOFF", file);
7322  switch (size)
7323    {
7324    case 4:
7325      break;
7326    case 8:
7327      fputs (", 0", file);
7328      break;
7329    default:
7330      gcc_unreachable ();
7331   }
7332}
7333
7334/* In the name of slightly smaller debug output, and to cater to
7335   general assembler lossage, recognize PIC+GOTOFF and turn it back
7336   into a direct symbol reference.
7337
7338   On Darwin, this is necessary to avoid a crash, because Darwin
7339   has a different PIC label for each routine but the DWARF debugging
7340   information is not associated with any particular routine, so it's
7341   necessary to remove references to the PIC label from RTL stored by
7342   the DWARF output code.  */
7343
7344static rtx
7345ix86_delegitimize_address (rtx orig_x)
7346{
7347  rtx x = orig_x;
7348  /* reg_addend is NULL or a multiple of some register.  */
7349  rtx reg_addend = NULL_RTX;
7350  /* const_addend is NULL or a const_int.  */
7351  rtx const_addend = NULL_RTX;
7352  /* This is the result, or NULL.  */
7353  rtx result = NULL_RTX;
7354
7355  if (GET_CODE (x) == MEM)
7356    x = XEXP (x, 0);
7357
7358  if (TARGET_64BIT)
7359    {
7360      if (GET_CODE (x) != CONST
7361	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7362	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7363	  || GET_CODE (orig_x) != MEM)
7364	return orig_x;
7365      return XVECEXP (XEXP (x, 0), 0, 0);
7366    }
7367
7368  if (GET_CODE (x) != PLUS
7369      || GET_CODE (XEXP (x, 1)) != CONST)
7370    return orig_x;
7371
7372  if (GET_CODE (XEXP (x, 0)) == REG
7373      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7374    /* %ebx + GOT/GOTOFF */
7375    ;
7376  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7377    {
7378      /* %ebx + %reg * scale + GOT/GOTOFF */
7379      reg_addend = XEXP (x, 0);
7380      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7381	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7382	reg_addend = XEXP (reg_addend, 1);
7383      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7384	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7385	reg_addend = XEXP (reg_addend, 0);
7386      else
7387	return orig_x;
7388      if (GET_CODE (reg_addend) != REG
7389	  && GET_CODE (reg_addend) != MULT
7390	  && GET_CODE (reg_addend) != ASHIFT)
7391	return orig_x;
7392    }
7393  else
7394    return orig_x;
7395
7396  x = XEXP (XEXP (x, 1), 0);
7397  if (GET_CODE (x) == PLUS
7398      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7399    {
7400      const_addend = XEXP (x, 1);
7401      x = XEXP (x, 0);
7402    }
7403
7404  if (GET_CODE (x) == UNSPEC
7405      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7406	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7407    result = XVECEXP (x, 0, 0);
7408
7409  if (TARGET_MACHO && darwin_local_data_pic (x)
7410      && GET_CODE (orig_x) != MEM)
7411    result = XEXP (x, 0);
7412
7413  if (! result)
7414    return orig_x;
7415
7416  if (const_addend)
7417    result = gen_rtx_PLUS (Pmode, result, const_addend);
7418  if (reg_addend)
7419    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7420  return result;
7421}
7422
7423static void
7424put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7425		    int fp, FILE *file)
7426{
7427  const char *suffix;
7428
7429  if (mode == CCFPmode || mode == CCFPUmode)
7430    {
7431      enum rtx_code second_code, bypass_code;
7432      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7433      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7434      code = ix86_fp_compare_code_to_integer (code);
7435      mode = CCmode;
7436    }
7437  if (reverse)
7438    code = reverse_condition (code);
7439
7440  switch (code)
7441    {
7442    case EQ:
7443      suffix = "e";
7444      break;
7445    case NE:
7446      suffix = "ne";
7447      break;
7448    case GT:
7449      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7450      suffix = "g";
7451      break;
7452    case GTU:
7453      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7454	 Those same assemblers have the same but opposite lossage on cmov.  */
7455      gcc_assert (mode == CCmode);
7456      suffix = fp ? "nbe" : "a";
7457      break;
7458    case LT:
7459      switch (mode)
7460	{
7461	case CCNOmode:
7462	case CCGOCmode:
7463	  suffix = "s";
7464	  break;
7465
7466	case CCmode:
7467	case CCGCmode:
7468	  suffix = "l";
7469	  break;
7470
7471	default:
7472	  gcc_unreachable ();
7473	}
7474      break;
7475    case LTU:
7476      gcc_assert (mode == CCmode);
7477      suffix = "b";
7478      break;
7479    case GE:
7480      switch (mode)
7481	{
7482	case CCNOmode:
7483	case CCGOCmode:
7484	  suffix = "ns";
7485	  break;
7486
7487	case CCmode:
7488	case CCGCmode:
7489	  suffix = "ge";
7490	  break;
7491
7492	default:
7493	  gcc_unreachable ();
7494	}
7495      break;
7496    case GEU:
7497      /* ??? As above.  */
7498      gcc_assert (mode == CCmode);
7499      suffix = fp ? "nb" : "ae";
7500      break;
7501    case LE:
7502      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7503      suffix = "le";
7504      break;
7505    case LEU:
7506      gcc_assert (mode == CCmode);
7507      suffix = "be";
7508      break;
7509    case UNORDERED:
7510      suffix = fp ? "u" : "p";
7511      break;
7512    case ORDERED:
7513      suffix = fp ? "nu" : "np";
7514      break;
7515    default:
7516      gcc_unreachable ();
7517    }
7518  fputs (suffix, file);
7519}
7520
7521/* Print the name of register X to FILE based on its machine mode and number.
7522   If CODE is 'w', pretend the mode is HImode.
7523   If CODE is 'b', pretend the mode is QImode.
7524   If CODE is 'k', pretend the mode is SImode.
7525   If CODE is 'q', pretend the mode is DImode.
7526   If CODE is 'h', pretend the reg is the 'high' byte register.
7527   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7528
7529void
7530print_reg (rtx x, int code, FILE *file)
7531{
7532  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7533	      && REGNO (x) != FRAME_POINTER_REGNUM
7534	      && REGNO (x) != FLAGS_REG
7535	      && REGNO (x) != FPSR_REG);
7536
7537  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7538    putc ('%', file);
7539
7540  if (code == 'w' || MMX_REG_P (x))
7541    code = 2;
7542  else if (code == 'b')
7543    code = 1;
7544  else if (code == 'k')
7545    code = 4;
7546  else if (code == 'q')
7547    code = 8;
7548  else if (code == 'y')
7549    code = 3;
7550  else if (code == 'h')
7551    code = 0;
7552  else
7553    code = GET_MODE_SIZE (GET_MODE (x));
7554
7555  /* Irritatingly, AMD extended registers use different naming convention
7556     from the normal registers.  */
7557  if (REX_INT_REG_P (x))
7558    {
7559      gcc_assert (TARGET_64BIT);
7560      switch (code)
7561	{
7562	  case 0:
7563	    error ("extended registers have no high halves");
7564	    break;
7565	  case 1:
7566	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7567	    break;
7568	  case 2:
7569	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7570	    break;
7571	  case 4:
7572	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7573	    break;
7574	  case 8:
7575	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7576	    break;
7577	  default:
7578	    error ("unsupported operand size for extended register");
7579	    break;
7580	}
7581      return;
7582    }
7583  switch (code)
7584    {
7585    case 3:
7586      if (STACK_TOP_P (x))
7587	{
7588	  fputs ("st(0)", file);
7589	  break;
7590	}
7591      /* FALLTHRU */
7592    case 8:
7593    case 4:
7594    case 12:
7595      if (! ANY_FP_REG_P (x))
7596	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7597      /* FALLTHRU */
7598    case 16:
7599    case 2:
7600    normal:
7601      fputs (hi_reg_name[REGNO (x)], file);
7602      break;
7603    case 1:
7604      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7605	goto normal;
7606      fputs (qi_reg_name[REGNO (x)], file);
7607      break;
7608    case 0:
7609      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7610	goto normal;
7611      fputs (qi_high_reg_name[REGNO (x)], file);
7612      break;
7613    default:
7614      gcc_unreachable ();
7615    }
7616}
7617
7618/* Locate some local-dynamic symbol still in use by this function
7619   so that we can print its name in some tls_local_dynamic_base
7620   pattern.  */
7621
7622static const char *
7623get_some_local_dynamic_name (void)
7624{
7625  rtx insn;
7626
7627  if (cfun->machine->some_ld_name)
7628    return cfun->machine->some_ld_name;
7629
7630  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7631    if (INSN_P (insn)
7632	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7633      return cfun->machine->some_ld_name;
7634
7635  gcc_unreachable ();
7636}
7637
7638static int
7639get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7640{
7641  rtx x = *px;
7642
7643  if (GET_CODE (x) == SYMBOL_REF
7644      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7645    {
7646      cfun->machine->some_ld_name = XSTR (x, 0);
7647      return 1;
7648    }
7649
7650  return 0;
7651}
7652
7653/* Meaning of CODE:
7654   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7655   C -- print opcode suffix for set/cmov insn.
7656   c -- like C, but print reversed condition
7657   F,f -- likewise, but for floating-point.
7658   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7659        otherwise nothing
7660   R -- print the prefix for register names.
7661   z -- print the opcode suffix for the size of the current operand.
7662   * -- print a star (in certain assembler syntax)
7663   A -- print an absolute memory reference.
7664   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7665   s -- print a shift double count, followed by the assemblers argument
7666	delimiter.
7667   b -- print the QImode name of the register for the indicated operand.
7668	%b0 would print %al if operands[0] is reg 0.
7669   w --  likewise, print the HImode name of the register.
7670   k --  likewise, print the SImode name of the register.
7671   q --  likewise, print the DImode name of the register.
7672   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7673   y -- print "st(0)" instead of "st" as a register.
7674   D -- print condition for SSE cmp instruction.
7675   P -- if PIC, print an @PLT suffix.
7676   X -- don't print any sort of PIC '@' suffix for a symbol.
7677   & -- print some in-use local-dynamic symbol name.
7678   H -- print a memory address offset by 8; used for sse high-parts
7679 */
7680
7681void
7682print_operand (FILE *file, rtx x, int code)
7683{
7684  if (code)
7685    {
7686      switch (code)
7687	{
7688	case '*':
7689	  if (ASSEMBLER_DIALECT == ASM_ATT)
7690	    putc ('*', file);
7691	  return;
7692
7693	case '&':
7694	  assemble_name (file, get_some_local_dynamic_name ());
7695	  return;
7696
7697	case 'A':
7698	  switch (ASSEMBLER_DIALECT)
7699	    {
7700	    case ASM_ATT:
7701	      putc ('*', file);
7702	      break;
7703
7704	    case ASM_INTEL:
7705	      /* Intel syntax. For absolute addresses, registers should not
7706		 be surrounded by braces.  */
7707	      if (GET_CODE (x) != REG)
7708		{
7709		  putc ('[', file);
7710		  PRINT_OPERAND (file, x, 0);
7711		  putc (']', file);
7712		  return;
7713		}
7714	      break;
7715
7716	    default:
7717	      gcc_unreachable ();
7718	    }
7719
7720	  PRINT_OPERAND (file, x, 0);
7721	  return;
7722
7723
7724	case 'L':
7725	  if (ASSEMBLER_DIALECT == ASM_ATT)
7726	    putc ('l', file);
7727	  return;
7728
7729	case 'W':
7730	  if (ASSEMBLER_DIALECT == ASM_ATT)
7731	    putc ('w', file);
7732	  return;
7733
7734	case 'B':
7735	  if (ASSEMBLER_DIALECT == ASM_ATT)
7736	    putc ('b', file);
7737	  return;
7738
7739	case 'Q':
7740	  if (ASSEMBLER_DIALECT == ASM_ATT)
7741	    putc ('l', file);
7742	  return;
7743
7744	case 'S':
7745	  if (ASSEMBLER_DIALECT == ASM_ATT)
7746	    putc ('s', file);
7747	  return;
7748
7749	case 'T':
7750	  if (ASSEMBLER_DIALECT == ASM_ATT)
7751	    putc ('t', file);
7752	  return;
7753
7754	case 'z':
7755	  /* 387 opcodes don't get size suffixes if the operands are
7756	     registers.  */
7757	  if (STACK_REG_P (x))
7758	    return;
7759
7760	  /* Likewise if using Intel opcodes.  */
7761	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7762	    return;
7763
7764	  /* This is the size of op from size of operand.  */
7765	  switch (GET_MODE_SIZE (GET_MODE (x)))
7766	    {
7767	    case 2:
7768#ifdef HAVE_GAS_FILDS_FISTS
7769	      putc ('s', file);
7770#endif
7771	      return;
7772
7773	    case 4:
7774	      if (GET_MODE (x) == SFmode)
7775		{
7776		  putc ('s', file);
7777		  return;
7778		}
7779	      else
7780		putc ('l', file);
7781	      return;
7782
7783	    case 12:
7784	    case 16:
7785	      putc ('t', file);
7786	      return;
7787
7788	    case 8:
7789	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7790		{
7791#ifdef GAS_MNEMONICS
7792		  putc ('q', file);
7793#else
7794		  putc ('l', file);
7795		  putc ('l', file);
7796#endif
7797		}
7798	      else
7799	        putc ('l', file);
7800	      return;
7801
7802	    default:
7803	      gcc_unreachable ();
7804	    }
7805
7806	case 'b':
7807	case 'w':
7808	case 'k':
7809	case 'q':
7810	case 'h':
7811	case 'y':
7812	case 'X':
7813	case 'P':
7814	  break;
7815
7816	case 's':
7817	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7818	    {
7819	      PRINT_OPERAND (file, x, 0);
7820	      putc (',', file);
7821	    }
7822	  return;
7823
7824	case 'D':
7825	  /* Little bit of braindamage here.  The SSE compare instructions
7826	     does use completely different names for the comparisons that the
7827	     fp conditional moves.  */
7828	  switch (GET_CODE (x))
7829	    {
7830	    case EQ:
7831	    case UNEQ:
7832	      fputs ("eq", file);
7833	      break;
7834	    case LT:
7835	    case UNLT:
7836	      fputs ("lt", file);
7837	      break;
7838	    case LE:
7839	    case UNLE:
7840	      fputs ("le", file);
7841	      break;
7842	    case UNORDERED:
7843	      fputs ("unord", file);
7844	      break;
7845	    case NE:
7846	    case LTGT:
7847	      fputs ("neq", file);
7848	      break;
7849	    case UNGE:
7850	    case GE:
7851	      fputs ("nlt", file);
7852	      break;
7853	    case UNGT:
7854	    case GT:
7855	      fputs ("nle", file);
7856	      break;
7857	    case ORDERED:
7858	      fputs ("ord", file);
7859	      break;
7860	    default:
7861	      gcc_unreachable ();
7862	    }
7863	  return;
7864	case 'O':
7865#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7866	  if (ASSEMBLER_DIALECT == ASM_ATT)
7867	    {
7868	      switch (GET_MODE (x))
7869		{
7870		case HImode: putc ('w', file); break;
7871		case SImode:
7872		case SFmode: putc ('l', file); break;
7873		case DImode:
7874		case DFmode: putc ('q', file); break;
7875		default: gcc_unreachable ();
7876		}
7877	      putc ('.', file);
7878	    }
7879#endif
7880	  return;
7881	case 'C':
7882	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7883	  return;
7884	case 'F':
7885#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7886	  if (ASSEMBLER_DIALECT == ASM_ATT)
7887	    putc ('.', file);
7888#endif
7889	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7890	  return;
7891
7892	  /* Like above, but reverse condition */
7893	case 'c':
7894	  /* Check to see if argument to %c is really a constant
7895	     and not a condition code which needs to be reversed.  */
7896	  if (!COMPARISON_P (x))
7897	  {
7898	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7899	     return;
7900	  }
7901	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7902	  return;
7903	case 'f':
7904#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7905	  if (ASSEMBLER_DIALECT == ASM_ATT)
7906	    putc ('.', file);
7907#endif
7908	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7909	  return;
7910
7911	case 'H':
7912	  /* It doesn't actually matter what mode we use here, as we're
7913	     only going to use this for printing.  */
7914	  x = adjust_address_nv (x, DImode, 8);
7915	  break;
7916
7917	case '+':
7918	  {
7919	    rtx x;
7920
7921	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7922	      return;
7923
7924	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7925	    if (x)
7926	      {
7927		int pred_val = INTVAL (XEXP (x, 0));
7928
7929		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7930		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7931		  {
7932		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7933		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7934
7935		    /* Emit hints only in the case default branch prediction
7936		       heuristics would fail.  */
7937		    if (taken != cputaken)
7938		      {
7939			/* We use 3e (DS) prefix for taken branches and
7940			   2e (CS) prefix for not taken branches.  */
7941			if (taken)
7942			  fputs ("ds ; ", file);
7943			else
7944			  fputs ("cs ; ", file);
7945		      }
7946		  }
7947	      }
7948	    return;
7949	  }
7950	default:
7951	    output_operand_lossage ("invalid operand code '%c'", code);
7952	}
7953    }
7954
7955  if (GET_CODE (x) == REG)
7956    print_reg (x, code, file);
7957
7958  else if (GET_CODE (x) == MEM)
7959    {
7960      /* No `byte ptr' prefix for call instructions.  */
7961      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7962	{
7963	  const char * size;
7964	  switch (GET_MODE_SIZE (GET_MODE (x)))
7965	    {
7966	    case 1: size = "BYTE"; break;
7967	    case 2: size = "WORD"; break;
7968	    case 4: size = "DWORD"; break;
7969	    case 8: size = "QWORD"; break;
7970	    case 12: size = "XWORD"; break;
7971	    case 16: size = "XMMWORD"; break;
7972	    default:
7973	      gcc_unreachable ();
7974	    }
7975
7976	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7977	  if (code == 'b')
7978	    size = "BYTE";
7979	  else if (code == 'w')
7980	    size = "WORD";
7981	  else if (code == 'k')
7982	    size = "DWORD";
7983
7984	  fputs (size, file);
7985	  fputs (" PTR ", file);
7986	}
7987
7988      x = XEXP (x, 0);
7989      /* Avoid (%rip) for call operands.  */
7990      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7991	       && GET_CODE (x) != CONST_INT)
7992	output_addr_const (file, x);
7993      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7994	output_operand_lossage ("invalid constraints for operand");
7995      else
7996	output_address (x);
7997    }
7998
7999  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8000    {
8001      REAL_VALUE_TYPE r;
8002      long l;
8003
8004      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8005      REAL_VALUE_TO_TARGET_SINGLE (r, l);
8006
8007      if (ASSEMBLER_DIALECT == ASM_ATT)
8008	putc ('$', file);
8009      fprintf (file, "0x%08lx", l);
8010    }
8011
8012  /* These float cases don't actually occur as immediate operands.  */
8013  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8014    {
8015      char dstr[30];
8016
8017      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8018      fprintf (file, "%s", dstr);
8019    }
8020
8021  else if (GET_CODE (x) == CONST_DOUBLE
8022	   && GET_MODE (x) == XFmode)
8023    {
8024      char dstr[30];
8025
8026      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8027      fprintf (file, "%s", dstr);
8028    }
8029
8030  else
8031    {
8032      /* We have patterns that allow zero sets of memory, for instance.
8033	 In 64-bit mode, we should probably support all 8-byte vectors,
8034	 since we can in fact encode that into an immediate.  */
8035      if (GET_CODE (x) == CONST_VECTOR)
8036	{
8037	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8038	  x = const0_rtx;
8039	}
8040
8041      if (code != 'P')
8042	{
8043	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8044	    {
8045	      if (ASSEMBLER_DIALECT == ASM_ATT)
8046		putc ('$', file);
8047	    }
8048	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8049		   || GET_CODE (x) == LABEL_REF)
8050	    {
8051	      if (ASSEMBLER_DIALECT == ASM_ATT)
8052		putc ('$', file);
8053	      else
8054		fputs ("OFFSET FLAT:", file);
8055	    }
8056	}
8057      if (GET_CODE (x) == CONST_INT)
8058	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8059      else if (flag_pic)
8060	output_pic_addr_const (file, x, code);
8061      else
8062	output_addr_const (file, x);
8063    }
8064}
8065
8066/* Print a memory operand whose address is ADDR.  */
8067
8068void
8069print_operand_address (FILE *file, rtx addr)
8070{
8071  struct ix86_address parts;
8072  rtx base, index, disp;
8073  int scale;
8074  int ok = ix86_decompose_address (addr, &parts);
8075
8076  gcc_assert (ok);
8077
8078  base = parts.base;
8079  index = parts.index;
8080  disp = parts.disp;
8081  scale = parts.scale;
8082
8083  switch (parts.seg)
8084    {
8085    case SEG_DEFAULT:
8086      break;
8087    case SEG_FS:
8088    case SEG_GS:
8089      if (USER_LABEL_PREFIX[0] == 0)
8090	putc ('%', file);
8091      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8092      break;
8093    default:
8094      gcc_unreachable ();
8095    }
8096
8097  if (!base && !index)
8098    {
8099      /* Displacement only requires special attention.  */
8100
8101      if (GET_CODE (disp) == CONST_INT)
8102	{
8103	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8104	    {
8105	      if (USER_LABEL_PREFIX[0] == 0)
8106		putc ('%', file);
8107	      fputs ("ds:", file);
8108	    }
8109	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8110	}
8111      else if (flag_pic)
8112	output_pic_addr_const (file, disp, 0);
8113      else
8114	output_addr_const (file, disp);
8115
8116      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8117      if (TARGET_64BIT)
8118	{
8119	  if (GET_CODE (disp) == CONST
8120	      && GET_CODE (XEXP (disp, 0)) == PLUS
8121	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8122	    disp = XEXP (XEXP (disp, 0), 0);
8123	  if (GET_CODE (disp) == LABEL_REF
8124	      || (GET_CODE (disp) == SYMBOL_REF
8125		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8126	    fputs ("(%rip)", file);
8127	}
8128    }
8129  else
8130    {
8131      if (ASSEMBLER_DIALECT == ASM_ATT)
8132	{
8133	  if (disp)
8134	    {
8135	      if (flag_pic)
8136		output_pic_addr_const (file, disp, 0);
8137	      else if (GET_CODE (disp) == LABEL_REF)
8138		output_asm_label (disp);
8139	      else
8140		output_addr_const (file, disp);
8141	    }
8142
8143	  putc ('(', file);
8144	  if (base)
8145	    print_reg (base, 0, file);
8146	  if (index)
8147	    {
8148	      putc (',', file);
8149	      print_reg (index, 0, file);
8150	      if (scale != 1)
8151		fprintf (file, ",%d", scale);
8152	    }
8153	  putc (')', file);
8154	}
8155      else
8156	{
8157	  rtx offset = NULL_RTX;
8158
8159	  if (disp)
8160	    {
8161	      /* Pull out the offset of a symbol; print any symbol itself.  */
8162	      if (GET_CODE (disp) == CONST
8163		  && GET_CODE (XEXP (disp, 0)) == PLUS
8164		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8165		{
8166		  offset = XEXP (XEXP (disp, 0), 1);
8167		  disp = gen_rtx_CONST (VOIDmode,
8168					XEXP (XEXP (disp, 0), 0));
8169		}
8170
8171	      if (flag_pic)
8172		output_pic_addr_const (file, disp, 0);
8173	      else if (GET_CODE (disp) == LABEL_REF)
8174		output_asm_label (disp);
8175	      else if (GET_CODE (disp) == CONST_INT)
8176		offset = disp;
8177	      else
8178		output_addr_const (file, disp);
8179	    }
8180
8181	  putc ('[', file);
8182	  if (base)
8183	    {
8184	      print_reg (base, 0, file);
8185	      if (offset)
8186		{
8187		  if (INTVAL (offset) >= 0)
8188		    putc ('+', file);
8189		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8190		}
8191	    }
8192	  else if (offset)
8193	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8194	  else
8195	    putc ('0', file);
8196
8197	  if (index)
8198	    {
8199	      putc ('+', file);
8200	      print_reg (index, 0, file);
8201	      if (scale != 1)
8202		fprintf (file, "*%d", scale);
8203	    }
8204	  putc (']', file);
8205	}
8206    }
8207}
8208
8209bool
8210output_addr_const_extra (FILE *file, rtx x)
8211{
8212  rtx op;
8213
8214  if (GET_CODE (x) != UNSPEC)
8215    return false;
8216
8217  op = XVECEXP (x, 0, 0);
8218  switch (XINT (x, 1))
8219    {
8220    case UNSPEC_GOTTPOFF:
8221      output_addr_const (file, op);
8222      /* FIXME: This might be @TPOFF in Sun ld.  */
8223      fputs ("@GOTTPOFF", file);
8224      break;
8225    case UNSPEC_TPOFF:
8226      output_addr_const (file, op);
8227      fputs ("@TPOFF", file);
8228      break;
8229    case UNSPEC_NTPOFF:
8230      output_addr_const (file, op);
8231      if (TARGET_64BIT)
8232	fputs ("@TPOFF", file);
8233      else
8234	fputs ("@NTPOFF", file);
8235      break;
8236    case UNSPEC_DTPOFF:
8237      output_addr_const (file, op);
8238      fputs ("@DTPOFF", file);
8239      break;
8240    case UNSPEC_GOTNTPOFF:
8241      output_addr_const (file, op);
8242      if (TARGET_64BIT)
8243	fputs ("@GOTTPOFF(%rip)", file);
8244      else
8245	fputs ("@GOTNTPOFF", file);
8246      break;
8247    case UNSPEC_INDNTPOFF:
8248      output_addr_const (file, op);
8249      fputs ("@INDNTPOFF", file);
8250      break;
8251
8252    default:
8253      return false;
8254    }
8255
8256  return true;
8257}
8258
8259/* Split one or more DImode RTL references into pairs of SImode
8260   references.  The RTL can be REG, offsettable MEM, integer constant, or
8261   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8262   split and "num" is its length.  lo_half and hi_half are output arrays
8263   that parallel "operands".  */
8264
8265void
8266split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8267{
8268  while (num--)
8269    {
8270      rtx op = operands[num];
8271
8272      /* simplify_subreg refuse to split volatile memory addresses,
8273         but we still have to handle it.  */
8274      if (GET_CODE (op) == MEM)
8275	{
8276	  lo_half[num] = adjust_address (op, SImode, 0);
8277	  hi_half[num] = adjust_address (op, SImode, 4);
8278	}
8279      else
8280	{
8281	  lo_half[num] = simplify_gen_subreg (SImode, op,
8282					      GET_MODE (op) == VOIDmode
8283					      ? DImode : GET_MODE (op), 0);
8284	  hi_half[num] = simplify_gen_subreg (SImode, op,
8285					      GET_MODE (op) == VOIDmode
8286					      ? DImode : GET_MODE (op), 4);
8287	}
8288    }
8289}
8290/* Split one or more TImode RTL references into pairs of DImode
8291   references.  The RTL can be REG, offsettable MEM, integer constant, or
8292   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8293   split and "num" is its length.  lo_half and hi_half are output arrays
8294   that parallel "operands".  */
8295
8296void
8297split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8298{
8299  while (num--)
8300    {
8301      rtx op = operands[num];
8302
8303      /* simplify_subreg refuse to split volatile memory addresses, but we
8304         still have to handle it.  */
8305      if (GET_CODE (op) == MEM)
8306	{
8307	  lo_half[num] = adjust_address (op, DImode, 0);
8308	  hi_half[num] = adjust_address (op, DImode, 8);
8309	}
8310      else
8311	{
8312	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8313	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8314	}
8315    }
8316}
8317
8318/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8319   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8320   is the expression of the binary operation.  The output may either be
8321   emitted here, or returned to the caller, like all output_* functions.
8322
8323   There is no guarantee that the operands are the same mode, as they
8324   might be within FLOAT or FLOAT_EXTEND expressions.  */
8325
8326#ifndef SYSV386_COMPAT
8327/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8328   wants to fix the assemblers because that causes incompatibility
8329   with gcc.  No-one wants to fix gcc because that causes
8330   incompatibility with assemblers...  You can use the option of
8331   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8332#define SYSV386_COMPAT 1
8333#endif
8334
8335const char *
8336output_387_binary_op (rtx insn, rtx *operands)
8337{
8338  static char buf[30];
8339  const char *p;
8340  const char *ssep;
8341  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8342
8343#ifdef ENABLE_CHECKING
8344  /* Even if we do not want to check the inputs, this documents input
8345     constraints.  Which helps in understanding the following code.  */
8346  if (STACK_REG_P (operands[0])
8347      && ((REG_P (operands[1])
8348	   && REGNO (operands[0]) == REGNO (operands[1])
8349	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8350	  || (REG_P (operands[2])
8351	      && REGNO (operands[0]) == REGNO (operands[2])
8352	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8353      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8354    ; /* ok */
8355  else
8356    gcc_assert (is_sse);
8357#endif
8358
8359  switch (GET_CODE (operands[3]))
8360    {
8361    case PLUS:
8362      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8363	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8364	p = "fiadd";
8365      else
8366	p = "fadd";
8367      ssep = "add";
8368      break;
8369
8370    case MINUS:
8371      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8372	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8373	p = "fisub";
8374      else
8375	p = "fsub";
8376      ssep = "sub";
8377      break;
8378
8379    case MULT:
8380      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8381	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8382	p = "fimul";
8383      else
8384	p = "fmul";
8385      ssep = "mul";
8386      break;
8387
8388    case DIV:
8389      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8390	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8391	p = "fidiv";
8392      else
8393	p = "fdiv";
8394      ssep = "div";
8395      break;
8396
8397    default:
8398      gcc_unreachable ();
8399    }
8400
8401  if (is_sse)
8402   {
8403      strcpy (buf, ssep);
8404      if (GET_MODE (operands[0]) == SFmode)
8405	strcat (buf, "ss\t{%2, %0|%0, %2}");
8406      else
8407	strcat (buf, "sd\t{%2, %0|%0, %2}");
8408      return buf;
8409   }
8410  strcpy (buf, p);
8411
8412  switch (GET_CODE (operands[3]))
8413    {
8414    case MULT:
8415    case PLUS:
8416      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8417	{
8418	  rtx temp = operands[2];
8419	  operands[2] = operands[1];
8420	  operands[1] = temp;
8421	}
8422
8423      /* know operands[0] == operands[1].  */
8424
8425      if (GET_CODE (operands[2]) == MEM)
8426	{
8427	  p = "%z2\t%2";
8428	  break;
8429	}
8430
8431      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8432	{
8433	  if (STACK_TOP_P (operands[0]))
8434	    /* How is it that we are storing to a dead operand[2]?
8435	       Well, presumably operands[1] is dead too.  We can't
8436	       store the result to st(0) as st(0) gets popped on this
8437	       instruction.  Instead store to operands[2] (which I
8438	       think has to be st(1)).  st(1) will be popped later.
8439	       gcc <= 2.8.1 didn't have this check and generated
8440	       assembly code that the Unixware assembler rejected.  */
8441	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8442	  else
8443	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8444	  break;
8445	}
8446
8447      if (STACK_TOP_P (operands[0]))
8448	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8449      else
8450	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8451      break;
8452
8453    case MINUS:
8454    case DIV:
8455      if (GET_CODE (operands[1]) == MEM)
8456	{
8457	  p = "r%z1\t%1";
8458	  break;
8459	}
8460
8461      if (GET_CODE (operands[2]) == MEM)
8462	{
8463	  p = "%z2\t%2";
8464	  break;
8465	}
8466
8467      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8468	{
8469#if SYSV386_COMPAT
8470	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8471	     derived assemblers, confusingly reverse the direction of
8472	     the operation for fsub{r} and fdiv{r} when the
8473	     destination register is not st(0).  The Intel assembler
8474	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8475	     figure out what the hardware really does.  */
8476	  if (STACK_TOP_P (operands[0]))
8477	    p = "{p\t%0, %2|rp\t%2, %0}";
8478	  else
8479	    p = "{rp\t%2, %0|p\t%0, %2}";
8480#else
8481	  if (STACK_TOP_P (operands[0]))
8482	    /* As above for fmul/fadd, we can't store to st(0).  */
8483	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8484	  else
8485	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8486#endif
8487	  break;
8488	}
8489
8490      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8491	{
8492#if SYSV386_COMPAT
8493	  if (STACK_TOP_P (operands[0]))
8494	    p = "{rp\t%0, %1|p\t%1, %0}";
8495	  else
8496	    p = "{p\t%1, %0|rp\t%0, %1}";
8497#else
8498	  if (STACK_TOP_P (operands[0]))
8499	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8500	  else
8501	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8502#endif
8503	  break;
8504	}
8505
8506      if (STACK_TOP_P (operands[0]))
8507	{
8508	  if (STACK_TOP_P (operands[1]))
8509	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8510	  else
8511	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8512	  break;
8513	}
8514      else if (STACK_TOP_P (operands[1]))
8515	{
8516#if SYSV386_COMPAT
8517	  p = "{\t%1, %0|r\t%0, %1}";
8518#else
8519	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8520#endif
8521	}
8522      else
8523	{
8524#if SYSV386_COMPAT
8525	  p = "{r\t%2, %0|\t%0, %2}";
8526#else
8527	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8528#endif
8529	}
8530      break;
8531
8532    default:
8533      gcc_unreachable ();
8534    }
8535
8536  strcat (buf, p);
8537  return buf;
8538}
8539
8540/* Return needed mode for entity in optimize_mode_switching pass.  */
8541
8542int
8543ix86_mode_needed (int entity, rtx insn)
8544{
8545  enum attr_i387_cw mode;
8546
8547  /* The mode UNINITIALIZED is used to store control word after a
8548     function call or ASM pattern.  The mode ANY specify that function
8549     has no requirements on the control word and make no changes in the
8550     bits we are interested in.  */
8551
8552  if (CALL_P (insn)
8553      || (NONJUMP_INSN_P (insn)
8554	  && (asm_noperands (PATTERN (insn)) >= 0
8555	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8556    return I387_CW_UNINITIALIZED;
8557
8558  if (recog_memoized (insn) < 0)
8559    return I387_CW_ANY;
8560
8561  mode = get_attr_i387_cw (insn);
8562
8563  switch (entity)
8564    {
8565    case I387_TRUNC:
8566      if (mode == I387_CW_TRUNC)
8567	return mode;
8568      break;
8569
8570    case I387_FLOOR:
8571      if (mode == I387_CW_FLOOR)
8572	return mode;
8573      break;
8574
8575    case I387_CEIL:
8576      if (mode == I387_CW_CEIL)
8577	return mode;
8578      break;
8579
8580    case I387_MASK_PM:
8581      if (mode == I387_CW_MASK_PM)
8582	return mode;
8583      break;
8584
8585    default:
8586      gcc_unreachable ();
8587    }
8588
8589  return I387_CW_ANY;
8590}
8591
8592/* Output code to initialize control word copies used by trunc?f?i and
8593   rounding patterns.  CURRENT_MODE is set to current control word,
8594   while NEW_MODE is set to new control word.  */
8595
8596void
8597emit_i387_cw_initialization (int mode)
8598{
8599  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8600  rtx new_mode;
8601
8602  int slot;
8603
8604  rtx reg = gen_reg_rtx (HImode);
8605
8606  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8607  emit_move_insn (reg, stored_mode);
8608
8609  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8610    {
8611      switch (mode)
8612	{
8613	case I387_CW_TRUNC:
8614	  /* round toward zero (truncate) */
8615	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8616	  slot = SLOT_CW_TRUNC;
8617	  break;
8618
8619	case I387_CW_FLOOR:
8620	  /* round down toward -oo */
8621	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8622	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8623	  slot = SLOT_CW_FLOOR;
8624	  break;
8625
8626	case I387_CW_CEIL:
8627	  /* round up toward +oo */
8628	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8629	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8630	  slot = SLOT_CW_CEIL;
8631	  break;
8632
8633	case I387_CW_MASK_PM:
8634	  /* mask precision exception for nearbyint() */
8635	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8636	  slot = SLOT_CW_MASK_PM;
8637	  break;
8638
8639	default:
8640	  gcc_unreachable ();
8641	}
8642    }
8643  else
8644    {
8645      switch (mode)
8646	{
8647	case I387_CW_TRUNC:
8648	  /* round toward zero (truncate) */
8649	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8650	  slot = SLOT_CW_TRUNC;
8651	  break;
8652
8653	case I387_CW_FLOOR:
8654	  /* round down toward -oo */
8655	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8656	  slot = SLOT_CW_FLOOR;
8657	  break;
8658
8659	case I387_CW_CEIL:
8660	  /* round up toward +oo */
8661	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8662	  slot = SLOT_CW_CEIL;
8663	  break;
8664
8665	case I387_CW_MASK_PM:
8666	  /* mask precision exception for nearbyint() */
8667	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8668	  slot = SLOT_CW_MASK_PM;
8669	  break;
8670
8671	default:
8672	  gcc_unreachable ();
8673	}
8674    }
8675
8676  gcc_assert (slot < MAX_386_STACK_LOCALS);
8677
8678  new_mode = assign_386_stack_local (HImode, slot);
8679  emit_move_insn (new_mode, reg);
8680}
8681
8682/* Output code for INSN to convert a float to a signed int.  OPERANDS
8683   are the insn operands.  The output may be [HSD]Imode and the input
8684   operand may be [SDX]Fmode.  */
8685
8686const char *
8687output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8688{
8689  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8690  int dimode_p = GET_MODE (operands[0]) == DImode;
8691  int round_mode = get_attr_i387_cw (insn);
8692
8693  /* Jump through a hoop or two for DImode, since the hardware has no
8694     non-popping instruction.  We used to do this a different way, but
8695     that was somewhat fragile and broke with post-reload splitters.  */
8696  if ((dimode_p || fisttp) && !stack_top_dies)
8697    output_asm_insn ("fld\t%y1", operands);
8698
8699  gcc_assert (STACK_TOP_P (operands[1]));
8700  gcc_assert (GET_CODE (operands[0]) == MEM);
8701
8702  if (fisttp)
8703      output_asm_insn ("fisttp%z0\t%0", operands);
8704  else
8705    {
8706      if (round_mode != I387_CW_ANY)
8707	output_asm_insn ("fldcw\t%3", operands);
8708      if (stack_top_dies || dimode_p)
8709	output_asm_insn ("fistp%z0\t%0", operands);
8710      else
8711	output_asm_insn ("fist%z0\t%0", operands);
8712      if (round_mode != I387_CW_ANY)
8713	output_asm_insn ("fldcw\t%2", operands);
8714    }
8715
8716  return "";
8717}
8718
8719/* Output code for x87 ffreep insn.  The OPNO argument, which may only
8720   have the values zero or one, indicates the ffreep insn's operand
8721   from the OPERANDS array.  */
8722
8723static const char *
8724output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8725{
8726  if (TARGET_USE_FFREEP)
8727#if HAVE_AS_IX86_FFREEP
8728    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8729#else
8730    switch (REGNO (operands[opno]))
8731      {
8732      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8733      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8734      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8735      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8736      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8737      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8738      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8739      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8740      }
8741#endif
8742
8743  return opno ? "fstp\t%y1" : "fstp\t%y0";
8744}
8745
8746
8747/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8748   should be used.  UNORDERED_P is true when fucom should be used.  */
8749
8750const char *
8751output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8752{
8753  int stack_top_dies;
8754  rtx cmp_op0, cmp_op1;
8755  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8756
8757  if (eflags_p)
8758    {
8759      cmp_op0 = operands[0];
8760      cmp_op1 = operands[1];
8761    }
8762  else
8763    {
8764      cmp_op0 = operands[1];
8765      cmp_op1 = operands[2];
8766    }
8767
8768  if (is_sse)
8769    {
8770      if (GET_MODE (operands[0]) == SFmode)
8771	if (unordered_p)
8772	  return "ucomiss\t{%1, %0|%0, %1}";
8773	else
8774	  return "comiss\t{%1, %0|%0, %1}";
8775      else
8776	if (unordered_p)
8777	  return "ucomisd\t{%1, %0|%0, %1}";
8778	else
8779	  return "comisd\t{%1, %0|%0, %1}";
8780    }
8781
8782  gcc_assert (STACK_TOP_P (cmp_op0));
8783
8784  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8785
8786  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8787    {
8788      if (stack_top_dies)
8789	{
8790	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8791	  return output_387_ffreep (operands, 1);
8792	}
8793      else
8794	return "ftst\n\tfnstsw\t%0";
8795    }
8796
8797  if (STACK_REG_P (cmp_op1)
8798      && stack_top_dies
8799      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8800      && REGNO (cmp_op1) != FIRST_STACK_REG)
8801    {
8802      /* If both the top of the 387 stack dies, and the other operand
8803	 is also a stack register that dies, then this must be a
8804	 `fcompp' float compare */
8805
8806      if (eflags_p)
8807	{
8808	  /* There is no double popping fcomi variant.  Fortunately,
8809	     eflags is immune from the fstp's cc clobbering.  */
8810	  if (unordered_p)
8811	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8812	  else
8813	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8814	  return output_387_ffreep (operands, 0);
8815	}
8816      else
8817	{
8818	  if (unordered_p)
8819	    return "fucompp\n\tfnstsw\t%0";
8820	  else
8821	    return "fcompp\n\tfnstsw\t%0";
8822	}
8823    }
8824  else
8825    {
8826      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8827
8828      static const char * const alt[16] =
8829      {
8830	"fcom%z2\t%y2\n\tfnstsw\t%0",
8831	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8832	"fucom%z2\t%y2\n\tfnstsw\t%0",
8833	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8834
8835	"ficom%z2\t%y2\n\tfnstsw\t%0",
8836	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8837	NULL,
8838	NULL,
8839
8840	"fcomi\t{%y1, %0|%0, %y1}",
8841	"fcomip\t{%y1, %0|%0, %y1}",
8842	"fucomi\t{%y1, %0|%0, %y1}",
8843	"fucomip\t{%y1, %0|%0, %y1}",
8844
8845	NULL,
8846	NULL,
8847	NULL,
8848	NULL
8849      };
8850
8851      int mask;
8852      const char *ret;
8853
8854      mask  = eflags_p << 3;
8855      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8856      mask |= unordered_p << 1;
8857      mask |= stack_top_dies;
8858
8859      gcc_assert (mask < 16);
8860      ret = alt[mask];
8861      gcc_assert (ret);
8862
8863      return ret;
8864    }
8865}
8866
8867void
8868ix86_output_addr_vec_elt (FILE *file, int value)
8869{
8870  const char *directive = ASM_LONG;
8871
8872#ifdef ASM_QUAD
8873  if (TARGET_64BIT)
8874    directive = ASM_QUAD;
8875#else
8876  gcc_assert (!TARGET_64BIT);
8877#endif
8878
8879  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8880}
8881
8882void
8883ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8884{
8885  if (TARGET_64BIT)
8886    fprintf (file, "%s%s%d-%s%d\n",
8887	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8888  else if (HAVE_AS_GOTOFF_IN_DATA)
8889    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8890#if TARGET_MACHO
8891  else if (TARGET_MACHO)
8892    {
8893      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8894      machopic_output_function_base_name (file);
8895      fprintf(file, "\n");
8896    }
8897#endif
8898  else
8899    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8900		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8901}
8902
8903/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8904   for the target.  */
8905
8906void
8907ix86_expand_clear (rtx dest)
8908{
8909  rtx tmp;
8910
8911  /* We play register width games, which are only valid after reload.  */
8912  gcc_assert (reload_completed);
8913
8914  /* Avoid HImode and its attendant prefix byte.  */
8915  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8916    dest = gen_rtx_REG (SImode, REGNO (dest));
8917
8918  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8919
8920  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8921  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8922    {
8923      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8924      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8925    }
8926
8927  emit_insn (tmp);
8928}
8929
8930/* X is an unchanging MEM.  If it is a constant pool reference, return
8931   the constant pool rtx, else NULL.  */
8932
8933rtx
8934maybe_get_pool_constant (rtx x)
8935{
8936  x = ix86_delegitimize_address (XEXP (x, 0));
8937
8938  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8939    return get_pool_constant (x);
8940
8941  return NULL_RTX;
8942}
8943
8944void
8945ix86_expand_move (enum machine_mode mode, rtx operands[])
8946{
8947  int strict = (reload_in_progress || reload_completed);
8948  rtx op0, op1;
8949  enum tls_model model;
8950
8951  op0 = operands[0];
8952  op1 = operands[1];
8953
8954  if (GET_CODE (op1) == SYMBOL_REF)
8955    {
8956      model = SYMBOL_REF_TLS_MODEL (op1);
8957      if (model)
8958	{
8959	  op1 = legitimize_tls_address (op1, model, true);
8960	  op1 = force_operand (op1, op0);
8961	  if (op1 == op0)
8962	    return;
8963	}
8964    }
8965  else if (GET_CODE (op1) == CONST
8966	   && GET_CODE (XEXP (op1, 0)) == PLUS
8967	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8968    {
8969      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8970      if (model)
8971	{
8972	  rtx addend = XEXP (XEXP (op1, 0), 1);
8973	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8974	  op1 = force_operand (op1, NULL);
8975	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8976				     op0, 1, OPTAB_DIRECT);
8977	  if (op1 == op0)
8978	    return;
8979	}
8980    }
8981
8982  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8983    {
8984      if (TARGET_MACHO && !TARGET_64BIT)
8985	{
8986#if TARGET_MACHO
8987	  if (MACHOPIC_PURE)
8988	    {
8989	      rtx temp = ((reload_in_progress
8990			   || ((op0 && GET_CODE (op0) == REG)
8991			       && mode == Pmode))
8992			  ? op0 : gen_reg_rtx (Pmode));
8993	      op1 = machopic_indirect_data_reference (op1, temp);
8994	      op1 = machopic_legitimize_pic_address (op1, mode,
8995						     temp == op1 ? 0 : temp);
8996	    }
8997	  else if (MACHOPIC_INDIRECT)
8998	    op1 = machopic_indirect_data_reference (op1, 0);
8999	  if (op0 == op1)
9000	    return;
9001#endif
9002	}
9003      else
9004	{
9005	  if (GET_CODE (op0) == MEM)
9006	    op1 = force_reg (Pmode, op1);
9007	  else
9008	    op1 = legitimize_address (op1, op1, Pmode);
9009	}
9010    }
9011  else
9012    {
9013      if (GET_CODE (op0) == MEM
9014	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9015	      || !push_operand (op0, mode))
9016	  && GET_CODE (op1) == MEM)
9017	op1 = force_reg (mode, op1);
9018
9019      if (push_operand (op0, mode)
9020	  && ! general_no_elim_operand (op1, mode))
9021	op1 = copy_to_mode_reg (mode, op1);
9022
9023      /* Force large constants in 64bit compilation into register
9024	 to get them CSEed.  */
9025      if (TARGET_64BIT && mode == DImode
9026	  && immediate_operand (op1, mode)
9027	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9028	  && !register_operand (op0, mode)
9029	  && optimize && !reload_completed && !reload_in_progress)
9030	op1 = copy_to_mode_reg (mode, op1);
9031
9032      if (FLOAT_MODE_P (mode))
9033	{
9034	  /* If we are loading a floating point constant to a register,
9035	     force the value to memory now, since we'll get better code
9036	     out the back end.  */
9037
9038	  if (strict)
9039	    ;
9040	  else if (GET_CODE (op1) == CONST_DOUBLE)
9041	    {
9042	      op1 = validize_mem (force_const_mem (mode, op1));
9043	      if (!register_operand (op0, mode))
9044		{
9045		  rtx temp = gen_reg_rtx (mode);
9046		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9047		  emit_move_insn (op0, temp);
9048		  return;
9049		}
9050	    }
9051	}
9052    }
9053
9054  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9055}
9056
9057void
9058ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9059{
9060  rtx op0 = operands[0], op1 = operands[1];
9061
9062  /* Force constants other than zero into memory.  We do not know how
9063     the instructions used to build constants modify the upper 64 bits
9064     of the register, once we have that information we may be able
9065     to handle some of them more efficiently.  */
9066  if ((reload_in_progress | reload_completed) == 0
9067      && register_operand (op0, mode)
9068      && CONSTANT_P (op1)
9069      && standard_sse_constant_p (op1) <= 0)
9070    op1 = validize_mem (force_const_mem (mode, op1));
9071
9072  /* Make operand1 a register if it isn't already.  */
9073  if (!no_new_pseudos
9074      && !register_operand (op0, mode)
9075      && !register_operand (op1, mode))
9076    {
9077      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9078      return;
9079    }
9080
9081  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9082}
9083
9084/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9085   straight to ix86_expand_vector_move.  */
9086
9087void
9088ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9089{
9090  rtx op0, op1, m;
9091
9092  op0 = operands[0];
9093  op1 = operands[1];
9094
9095  if (MEM_P (op1))
9096    {
9097      /* If we're optimizing for size, movups is the smallest.  */
9098      if (optimize_size)
9099	{
9100	  op0 = gen_lowpart (V4SFmode, op0);
9101	  op1 = gen_lowpart (V4SFmode, op1);
9102	  emit_insn (gen_sse_movups (op0, op1));
9103	  return;
9104	}
9105
9106      /* ??? If we have typed data, then it would appear that using
9107	 movdqu is the only way to get unaligned data loaded with
9108	 integer type.  */
9109      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9110	{
9111	  op0 = gen_lowpart (V16QImode, op0);
9112	  op1 = gen_lowpart (V16QImode, op1);
9113	  emit_insn (gen_sse2_movdqu (op0, op1));
9114	  return;
9115	}
9116
9117      if (TARGET_SSE2 && mode == V2DFmode)
9118	{
9119	  rtx zero;
9120
9121	  /* When SSE registers are split into halves, we can avoid
9122	     writing to the top half twice.  */
9123	  if (TARGET_SSE_SPLIT_REGS)
9124	    {
9125	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9126	      zero = op0;
9127	    }
9128	  else
9129	    {
9130	      /* ??? Not sure about the best option for the Intel chips.
9131		 The following would seem to satisfy; the register is
9132		 entirely cleared, breaking the dependency chain.  We
9133		 then store to the upper half, with a dependency depth
9134		 of one.  A rumor has it that Intel recommends two movsd
9135		 followed by an unpacklpd, but this is unconfirmed.  And
9136		 given that the dependency depth of the unpacklpd would
9137		 still be one, I'm not sure why this would be better.  */
9138	      zero = CONST0_RTX (V2DFmode);
9139	    }
9140
9141	  m = adjust_address (op1, DFmode, 0);
9142	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9143	  m = adjust_address (op1, DFmode, 8);
9144	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9145	}
9146      else
9147	{
9148	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9149	    emit_move_insn (op0, CONST0_RTX (mode));
9150	  else
9151	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9152
9153	  if (mode != V4SFmode)
9154	    op0 = gen_lowpart (V4SFmode, op0);
9155	  m = adjust_address (op1, V2SFmode, 0);
9156	  emit_insn (gen_sse_loadlps (op0, op0, m));
9157	  m = adjust_address (op1, V2SFmode, 8);
9158	  emit_insn (gen_sse_loadhps (op0, op0, m));
9159	}
9160    }
9161  else if (MEM_P (op0))
9162    {
9163      /* If we're optimizing for size, movups is the smallest.  */
9164      if (optimize_size)
9165	{
9166	  op0 = gen_lowpart (V4SFmode, op0);
9167	  op1 = gen_lowpart (V4SFmode, op1);
9168	  emit_insn (gen_sse_movups (op0, op1));
9169	  return;
9170	}
9171
9172      /* ??? Similar to above, only less clear because of quote
9173	 typeless stores unquote.  */
9174      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9175	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9176        {
9177	  op0 = gen_lowpart (V16QImode, op0);
9178	  op1 = gen_lowpart (V16QImode, op1);
9179	  emit_insn (gen_sse2_movdqu (op0, op1));
9180	  return;
9181	}
9182
9183      if (TARGET_SSE2 && mode == V2DFmode)
9184	{
9185	  m = adjust_address (op0, DFmode, 0);
9186	  emit_insn (gen_sse2_storelpd (m, op1));
9187	  m = adjust_address (op0, DFmode, 8);
9188	  emit_insn (gen_sse2_storehpd (m, op1));
9189	}
9190      else
9191	{
9192	  if (mode != V4SFmode)
9193	    op1 = gen_lowpart (V4SFmode, op1);
9194	  m = adjust_address (op0, V2SFmode, 0);
9195	  emit_insn (gen_sse_storelps (m, op1));
9196	  m = adjust_address (op0, V2SFmode, 8);
9197	  emit_insn (gen_sse_storehps (m, op1));
9198	}
9199    }
9200  else
9201    gcc_unreachable ();
9202}
9203
9204/* Expand a push in MODE.  This is some mode for which we do not support
9205   proper push instructions, at least from the registers that we expect
9206   the value to live in.  */
9207
9208void
9209ix86_expand_push (enum machine_mode mode, rtx x)
9210{
9211  rtx tmp;
9212
9213  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9214			     GEN_INT (-GET_MODE_SIZE (mode)),
9215			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9216  if (tmp != stack_pointer_rtx)
9217    emit_move_insn (stack_pointer_rtx, tmp);
9218
9219  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9220  emit_move_insn (tmp, x);
9221}
9222
9223/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9224   destination to use for the operation.  If different from the true
9225   destination in operands[0], a copy operation will be required.  */
9226
9227rtx
9228ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9229			    rtx operands[])
9230{
9231  int matching_memory;
9232  rtx src1, src2, dst;
9233
9234  dst = operands[0];
9235  src1 = operands[1];
9236  src2 = operands[2];
9237
9238  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9239  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9240      && (rtx_equal_p (dst, src2)
9241	  || immediate_operand (src1, mode)))
9242    {
9243      rtx temp = src1;
9244      src1 = src2;
9245      src2 = temp;
9246    }
9247
9248  /* If the destination is memory, and we do not have matching source
9249     operands, do things in registers.  */
9250  matching_memory = 0;
9251  if (GET_CODE (dst) == MEM)
9252    {
9253      if (rtx_equal_p (dst, src1))
9254	matching_memory = 1;
9255      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9256	       && rtx_equal_p (dst, src2))
9257	matching_memory = 2;
9258      else
9259	dst = gen_reg_rtx (mode);
9260    }
9261
9262  /* Both source operands cannot be in memory.  */
9263  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9264    {
9265      if (matching_memory != 2)
9266	src2 = force_reg (mode, src2);
9267      else
9268	src1 = force_reg (mode, src1);
9269    }
9270
9271  /* If the operation is not commutable, source 1 cannot be a constant
9272     or non-matching memory.  */
9273  if ((CONSTANT_P (src1)
9274       || (!matching_memory && GET_CODE (src1) == MEM))
9275      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9276    src1 = force_reg (mode, src1);
9277
9278  src1 = operands[1] = src1;
9279  src2 = operands[2] = src2;
9280  return dst;
9281}
9282
9283/* Similarly, but assume that the destination has already been
9284   set up properly.  */
9285
9286void
9287ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9288				    enum machine_mode mode, rtx operands[])
9289{
9290  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9291  gcc_assert (dst == operands[0]);
9292}
9293
9294/* Attempt to expand a binary operator.  Make the expansion closer to the
9295   actual machine, then just general_operand, which will allow 3 separate
9296   memory references (one output, two input) in a single insn.  */
9297
9298void
9299ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9300			     rtx operands[])
9301{
9302  rtx src1, src2, dst, op, clob;
9303
9304  dst = ix86_fixup_binary_operands (code, mode, operands);
9305  src1 = operands[1];
9306  src2 = operands[2];
9307
9308 /* Emit the instruction.  */
9309
9310  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9311  if (reload_in_progress)
9312    {
9313      /* Reload doesn't know about the flags register, and doesn't know that
9314         it doesn't want to clobber it.  We can only do this with PLUS.  */
9315      gcc_assert (code == PLUS);
9316      emit_insn (op);
9317    }
9318  else
9319    {
9320      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9321      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9322    }
9323
9324  /* Fix up the destination if needed.  */
9325  if (dst != operands[0])
9326    emit_move_insn (operands[0], dst);
9327}
9328
9329/* Return TRUE or FALSE depending on whether the binary operator meets the
9330   appropriate constraints.  */
9331
9332int
9333ix86_binary_operator_ok (enum rtx_code code,
9334			 enum machine_mode mode ATTRIBUTE_UNUSED,
9335			 rtx operands[3])
9336{
9337  /* Both source operands cannot be in memory.  */
9338  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9339    return 0;
9340  /* If the operation is not commutable, source 1 cannot be a constant.  */
9341  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9342    return 0;
9343  /* If the destination is memory, we must have a matching source operand.  */
9344  if (GET_CODE (operands[0]) == MEM
9345      && ! (rtx_equal_p (operands[0], operands[1])
9346	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9347		&& rtx_equal_p (operands[0], operands[2]))))
9348    return 0;
9349  /* If the operation is not commutable and the source 1 is memory, we must
9350     have a matching destination.  */
9351  if (GET_CODE (operands[1]) == MEM
9352      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9353      && ! rtx_equal_p (operands[0], operands[1]))
9354    return 0;
9355  return 1;
9356}
9357
9358/* Attempt to expand a unary operator.  Make the expansion closer to the
9359   actual machine, then just general_operand, which will allow 2 separate
9360   memory references (one output, one input) in a single insn.  */
9361
9362void
9363ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9364			    rtx operands[])
9365{
9366  int matching_memory;
9367  rtx src, dst, op, clob;
9368
9369  dst = operands[0];
9370  src = operands[1];
9371
9372  /* If the destination is memory, and we do not have matching source
9373     operands, do things in registers.  */
9374  matching_memory = 0;
9375  if (MEM_P (dst))
9376    {
9377      if (rtx_equal_p (dst, src))
9378	matching_memory = 1;
9379      else
9380	dst = gen_reg_rtx (mode);
9381    }
9382
9383  /* When source operand is memory, destination must match.  */
9384  if (MEM_P (src) && !matching_memory)
9385    src = force_reg (mode, src);
9386
9387  /* Emit the instruction.  */
9388
9389  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9390  if (reload_in_progress || code == NOT)
9391    {
9392      /* Reload doesn't know about the flags register, and doesn't know that
9393         it doesn't want to clobber it.  */
9394      gcc_assert (code == NOT);
9395      emit_insn (op);
9396    }
9397  else
9398    {
9399      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9400      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9401    }
9402
9403  /* Fix up the destination if needed.  */
9404  if (dst != operands[0])
9405    emit_move_insn (operands[0], dst);
9406}
9407
9408/* Return TRUE or FALSE depending on whether the unary operator meets the
9409   appropriate constraints.  */
9410
9411int
9412ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9413			enum machine_mode mode ATTRIBUTE_UNUSED,
9414			rtx operands[2] ATTRIBUTE_UNUSED)
9415{
9416  /* If one of operands is memory, source and destination must match.  */
9417  if ((GET_CODE (operands[0]) == MEM
9418       || GET_CODE (operands[1]) == MEM)
9419      && ! rtx_equal_p (operands[0], operands[1]))
9420    return FALSE;
9421  return TRUE;
9422}
9423
9424/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9425   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9426   true, then replicate the mask for all elements of the vector register.
9427   If INVERT is true, then create a mask excluding the sign bit.  */
9428
9429rtx
9430ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9431{
9432  enum machine_mode vec_mode;
9433  HOST_WIDE_INT hi, lo;
9434  int shift = 63;
9435  rtvec v;
9436  rtx mask;
9437
9438  /* Find the sign bit, sign extended to 2*HWI.  */
9439  if (mode == SFmode)
9440    lo = 0x80000000, hi = lo < 0;
9441  else if (HOST_BITS_PER_WIDE_INT >= 64)
9442    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9443  else
9444    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9445
9446  if (invert)
9447    lo = ~lo, hi = ~hi;
9448
9449  /* Force this value into the low part of a fp vector constant.  */
9450  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9451  mask = gen_lowpart (mode, mask);
9452
9453  if (mode == SFmode)
9454    {
9455      if (vect)
9456	v = gen_rtvec (4, mask, mask, mask, mask);
9457      else
9458	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9459		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9460      vec_mode = V4SFmode;
9461    }
9462  else
9463    {
9464      if (vect)
9465	v = gen_rtvec (2, mask, mask);
9466      else
9467	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9468      vec_mode = V2DFmode;
9469    }
9470
9471  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9472}
9473
9474/* Generate code for floating point ABS or NEG.  */
9475
9476void
9477ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9478				rtx operands[])
9479{
9480  rtx mask, set, use, clob, dst, src;
9481  bool matching_memory;
9482  bool use_sse = false;
9483  bool vector_mode = VECTOR_MODE_P (mode);
9484  enum machine_mode elt_mode = mode;
9485
9486  if (vector_mode)
9487    {
9488      elt_mode = GET_MODE_INNER (mode);
9489      use_sse = true;
9490    }
9491  else if (TARGET_SSE_MATH)
9492    use_sse = SSE_FLOAT_MODE_P (mode);
9493
9494  /* NEG and ABS performed with SSE use bitwise mask operations.
9495     Create the appropriate mask now.  */
9496  if (use_sse)
9497    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9498  else
9499    mask = NULL_RTX;
9500
9501  dst = operands[0];
9502  src = operands[1];
9503
9504  /* If the destination is memory, and we don't have matching source
9505     operands or we're using the x87, do things in registers.  */
9506  matching_memory = false;
9507  if (MEM_P (dst))
9508    {
9509      if (use_sse && rtx_equal_p (dst, src))
9510	matching_memory = true;
9511      else
9512	dst = gen_reg_rtx (mode);
9513    }
9514  if (MEM_P (src) && !matching_memory)
9515    src = force_reg (mode, src);
9516
9517  if (vector_mode)
9518    {
9519      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9520      set = gen_rtx_SET (VOIDmode, dst, set);
9521      emit_insn (set);
9522    }
9523  else
9524    {
9525      set = gen_rtx_fmt_e (code, mode, src);
9526      set = gen_rtx_SET (VOIDmode, dst, set);
9527      if (mask)
9528        {
9529          use = gen_rtx_USE (VOIDmode, mask);
9530          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9531          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9532				       gen_rtvec (3, set, use, clob)));
9533        }
9534      else
9535	emit_insn (set);
9536    }
9537
9538  if (dst != operands[0])
9539    emit_move_insn (operands[0], dst);
9540}
9541
9542/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9543
9544void
9545ix86_expand_copysign (rtx operands[])
9546{
9547  enum machine_mode mode, vmode;
9548  rtx dest, op0, op1, mask, nmask;
9549
9550  dest = operands[0];
9551  op0 = operands[1];
9552  op1 = operands[2];
9553
9554  mode = GET_MODE (dest);
9555  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9556
9557  if (GET_CODE (op0) == CONST_DOUBLE)
9558    {
9559      rtvec v;
9560
9561      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9562	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9563
9564      if (op0 == CONST0_RTX (mode))
9565	op0 = CONST0_RTX (vmode);
9566      else
9567        {
9568	  if (mode == SFmode)
9569	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9570                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9571	  else
9572	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9573          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9574	}
9575
9576      mask = ix86_build_signbit_mask (mode, 0, 0);
9577
9578      if (mode == SFmode)
9579	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9580      else
9581	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9582    }
9583  else
9584    {
9585      nmask = ix86_build_signbit_mask (mode, 0, 1);
9586      mask = ix86_build_signbit_mask (mode, 0, 0);
9587
9588      if (mode == SFmode)
9589	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9590      else
9591	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9592    }
9593}
9594
9595/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9596   be a constant, and so has already been expanded into a vector constant.  */
9597
9598void
9599ix86_split_copysign_const (rtx operands[])
9600{
9601  enum machine_mode mode, vmode;
9602  rtx dest, op0, op1, mask, x;
9603
9604  dest = operands[0];
9605  op0 = operands[1];
9606  op1 = operands[2];
9607  mask = operands[3];
9608
9609  mode = GET_MODE (dest);
9610  vmode = GET_MODE (mask);
9611
9612  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9613  x = gen_rtx_AND (vmode, dest, mask);
9614  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9615
9616  if (op0 != CONST0_RTX (vmode))
9617    {
9618      x = gen_rtx_IOR (vmode, dest, op0);
9619      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9620    }
9621}
9622
9623/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9624   so we have to do two masks.  */
9625
9626void
9627ix86_split_copysign_var (rtx operands[])
9628{
9629  enum machine_mode mode, vmode;
9630  rtx dest, scratch, op0, op1, mask, nmask, x;
9631
9632  dest = operands[0];
9633  scratch = operands[1];
9634  op0 = operands[2];
9635  op1 = operands[3];
9636  nmask = operands[4];
9637  mask = operands[5];
9638
9639  mode = GET_MODE (dest);
9640  vmode = GET_MODE (mask);
9641
9642  if (rtx_equal_p (op0, op1))
9643    {
9644      /* Shouldn't happen often (it's useless, obviously), but when it does
9645	 we'd generate incorrect code if we continue below.  */
9646      emit_move_insn (dest, op0);
9647      return;
9648    }
9649
9650  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9651    {
9652      gcc_assert (REGNO (op1) == REGNO (scratch));
9653
9654      x = gen_rtx_AND (vmode, scratch, mask);
9655      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9656
9657      dest = mask;
9658      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9659      x = gen_rtx_NOT (vmode, dest);
9660      x = gen_rtx_AND (vmode, x, op0);
9661      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9662    }
9663  else
9664    {
9665      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9666	{
9667	  x = gen_rtx_AND (vmode, scratch, mask);
9668	}
9669      else						/* alternative 2,4 */
9670	{
9671          gcc_assert (REGNO (mask) == REGNO (scratch));
9672          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9673	  x = gen_rtx_AND (vmode, scratch, op1);
9674	}
9675      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9676
9677      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
9678	{
9679	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
9680	  x = gen_rtx_AND (vmode, dest, nmask);
9681	}
9682      else						/* alternative 3,4 */
9683	{
9684          gcc_assert (REGNO (nmask) == REGNO (dest));
9685	  dest = nmask;
9686	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9687	  x = gen_rtx_AND (vmode, dest, op0);
9688	}
9689      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9690    }
9691
9692  x = gen_rtx_IOR (vmode, dest, scratch);
9693  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9694}
9695
9696/* Return TRUE or FALSE depending on whether the first SET in INSN
9697   has source and destination with matching CC modes, and that the
9698   CC mode is at least as constrained as REQ_MODE.  */
9699
9700int
9701ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9702{
9703  rtx set;
9704  enum machine_mode set_mode;
9705
9706  set = PATTERN (insn);
9707  if (GET_CODE (set) == PARALLEL)
9708    set = XVECEXP (set, 0, 0);
9709  gcc_assert (GET_CODE (set) == SET);
9710  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9711
9712  set_mode = GET_MODE (SET_DEST (set));
9713  switch (set_mode)
9714    {
9715    case CCNOmode:
9716      if (req_mode != CCNOmode
9717	  && (req_mode != CCmode
9718	      || XEXP (SET_SRC (set), 1) != const0_rtx))
9719	return 0;
9720      break;
9721    case CCmode:
9722      if (req_mode == CCGCmode)
9723	return 0;
9724      /* FALLTHRU */
9725    case CCGCmode:
9726      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9727	return 0;
9728      /* FALLTHRU */
9729    case CCGOCmode:
9730      if (req_mode == CCZmode)
9731	return 0;
9732      /* FALLTHRU */
9733    case CCZmode:
9734      break;
9735
9736    default:
9737      gcc_unreachable ();
9738    }
9739
9740  return (GET_MODE (SET_SRC (set)) == set_mode);
9741}
9742
9743/* Generate insn patterns to do an integer compare of OPERANDS.  */
9744
9745static rtx
9746ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9747{
9748  enum machine_mode cmpmode;
9749  rtx tmp, flags;
9750
9751  cmpmode = SELECT_CC_MODE (code, op0, op1);
9752  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9753
9754  /* This is very simple, but making the interface the same as in the
9755     FP case makes the rest of the code easier.  */
9756  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9757  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9758
9759  /* Return the test that should be put into the flags user, i.e.
9760     the bcc, scc, or cmov instruction.  */
9761  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9762}
9763
9764/* Figure out whether to use ordered or unordered fp comparisons.
9765   Return the appropriate mode to use.  */
9766
9767enum machine_mode
9768ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9769{
9770  /* ??? In order to make all comparisons reversible, we do all comparisons
9771     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9772     all forms trapping and nontrapping comparisons, we can make inequality
9773     comparisons trapping again, since it results in better code when using
9774     FCOM based compares.  */
9775  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9776}
9777
9778enum machine_mode
9779ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9780{
9781  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9782    return ix86_fp_compare_mode (code);
9783  switch (code)
9784    {
9785      /* Only zero flag is needed.  */
9786    case EQ:			/* ZF=0 */
9787    case NE:			/* ZF!=0 */
9788      return CCZmode;
9789      /* Codes needing carry flag.  */
9790    case GEU:			/* CF=0 */
9791    case GTU:			/* CF=0 & ZF=0 */
9792    case LTU:			/* CF=1 */
9793    case LEU:			/* CF=1 | ZF=1 */
9794      return CCmode;
9795      /* Codes possibly doable only with sign flag when
9796         comparing against zero.  */
9797    case GE:			/* SF=OF   or   SF=0 */
9798    case LT:			/* SF<>OF  or   SF=1 */
9799      if (op1 == const0_rtx)
9800	return CCGOCmode;
9801      else
9802	/* For other cases Carry flag is not required.  */
9803	return CCGCmode;
9804      /* Codes doable only with sign flag when comparing
9805         against zero, but we miss jump instruction for it
9806         so we need to use relational tests against overflow
9807         that thus needs to be zero.  */
9808    case GT:			/* ZF=0 & SF=OF */
9809    case LE:			/* ZF=1 | SF<>OF */
9810      if (op1 == const0_rtx)
9811	return CCNOmode;
9812      else
9813	return CCGCmode;
9814      /* strcmp pattern do (use flags) and combine may ask us for proper
9815	 mode.  */
9816    case USE:
9817      return CCmode;
9818    default:
9819      gcc_unreachable ();
9820    }
9821}
9822
9823/* Return the fixed registers used for condition codes.  */
9824
9825static bool
9826ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9827{
9828  *p1 = FLAGS_REG;
9829  *p2 = FPSR_REG;
9830  return true;
9831}
9832
9833/* If two condition code modes are compatible, return a condition code
9834   mode which is compatible with both.  Otherwise, return
9835   VOIDmode.  */
9836
9837static enum machine_mode
9838ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9839{
9840  if (m1 == m2)
9841    return m1;
9842
9843  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9844    return VOIDmode;
9845
9846  if ((m1 == CCGCmode && m2 == CCGOCmode)
9847      || (m1 == CCGOCmode && m2 == CCGCmode))
9848    return CCGCmode;
9849
9850  switch (m1)
9851    {
9852    default:
9853      gcc_unreachable ();
9854
9855    case CCmode:
9856    case CCGCmode:
9857    case CCGOCmode:
9858    case CCNOmode:
9859    case CCZmode:
9860      switch (m2)
9861	{
9862	default:
9863	  return VOIDmode;
9864
9865	case CCmode:
9866	case CCGCmode:
9867	case CCGOCmode:
9868	case CCNOmode:
9869	case CCZmode:
9870	  return CCmode;
9871	}
9872
9873    case CCFPmode:
9874    case CCFPUmode:
9875      /* These are only compatible with themselves, which we already
9876	 checked above.  */
9877      return VOIDmode;
9878    }
9879}
9880
9881/* Return true if we should use an FCOMI instruction for this fp comparison.  */
9882
9883int
9884ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9885{
9886  enum rtx_code swapped_code = swap_condition (code);
9887  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9888	  || (ix86_fp_comparison_cost (swapped_code)
9889	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
9890}
9891
9892/* Swap, force into registers, or otherwise massage the two operands
9893   to a fp comparison.  The operands are updated in place; the new
9894   comparison code is returned.  */
9895
9896static enum rtx_code
9897ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9898{
9899  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9900  rtx op0 = *pop0, op1 = *pop1;
9901  enum machine_mode op_mode = GET_MODE (op0);
9902  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9903
9904  /* All of the unordered compare instructions only work on registers.
9905     The same is true of the fcomi compare instructions.  The XFmode
9906     compare instructions require registers except when comparing
9907     against zero or when converting operand 1 from fixed point to
9908     floating point.  */
9909
9910  if (!is_sse
9911      && (fpcmp_mode == CCFPUmode
9912	  || (op_mode == XFmode
9913	      && ! (standard_80387_constant_p (op0) == 1
9914		    || standard_80387_constant_p (op1) == 1)
9915	      && GET_CODE (op1) != FLOAT)
9916	  || ix86_use_fcomi_compare (code)))
9917    {
9918      op0 = force_reg (op_mode, op0);
9919      op1 = force_reg (op_mode, op1);
9920    }
9921  else
9922    {
9923      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
9924	 things around if they appear profitable, otherwise force op0
9925	 into a register.  */
9926
9927      if (standard_80387_constant_p (op0) == 0
9928	  || (GET_CODE (op0) == MEM
9929	      && ! (standard_80387_constant_p (op1) == 0
9930		    || GET_CODE (op1) == MEM)))
9931	{
9932	  rtx tmp;
9933	  tmp = op0, op0 = op1, op1 = tmp;
9934	  code = swap_condition (code);
9935	}
9936
9937      if (GET_CODE (op0) != REG)
9938	op0 = force_reg (op_mode, op0);
9939
9940      if (CONSTANT_P (op1))
9941	{
9942	  int tmp = standard_80387_constant_p (op1);
9943	  if (tmp == 0)
9944	    op1 = validize_mem (force_const_mem (op_mode, op1));
9945	  else if (tmp == 1)
9946	    {
9947	      if (TARGET_CMOVE)
9948		op1 = force_reg (op_mode, op1);
9949	    }
9950	  else
9951	    op1 = force_reg (op_mode, op1);
9952	}
9953    }
9954
9955  /* Try to rearrange the comparison to make it cheaper.  */
9956  if (ix86_fp_comparison_cost (code)
9957      > ix86_fp_comparison_cost (swap_condition (code))
9958      && (GET_CODE (op1) == REG || !no_new_pseudos))
9959    {
9960      rtx tmp;
9961      tmp = op0, op0 = op1, op1 = tmp;
9962      code = swap_condition (code);
9963      if (GET_CODE (op0) != REG)
9964	op0 = force_reg (op_mode, op0);
9965    }
9966
9967  *pop0 = op0;
9968  *pop1 = op1;
9969  return code;
9970}
9971
9972/* Convert comparison codes we use to represent FP comparison to integer
9973   code that will result in proper branch.  Return UNKNOWN if no such code
9974   is available.  */
9975
9976enum rtx_code
9977ix86_fp_compare_code_to_integer (enum rtx_code code)
9978{
9979  switch (code)
9980    {
9981    case GT:
9982      return GTU;
9983    case GE:
9984      return GEU;
9985    case ORDERED:
9986    case UNORDERED:
9987      return code;
9988      break;
9989    case UNEQ:
9990      return EQ;
9991      break;
9992    case UNLT:
9993      return LTU;
9994      break;
9995    case UNLE:
9996      return LEU;
9997      break;
9998    case LTGT:
9999      return NE;
10000      break;
10001    default:
10002      return UNKNOWN;
10003    }
10004}
10005
10006/* Split comparison code CODE into comparisons we can do using branch
10007   instructions.  BYPASS_CODE is comparison code for branch that will
10008   branch around FIRST_CODE and SECOND_CODE.  If some of branches
10009   is not required, set value to UNKNOWN.
10010   We never require more than two branches.  */
10011
10012void
10013ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10014			  enum rtx_code *first_code,
10015			  enum rtx_code *second_code)
10016{
10017  *first_code = code;
10018  *bypass_code = UNKNOWN;
10019  *second_code = UNKNOWN;
10020
10021  /* The fcomi comparison sets flags as follows:
10022
10023     cmp    ZF PF CF
10024     >      0  0  0
10025     <      0  0  1
10026     =      1  0  0
10027     un     1  1  1 */
10028
10029  switch (code)
10030    {
10031    case GT:			/* GTU - CF=0 & ZF=0 */
10032    case GE:			/* GEU - CF=0 */
10033    case ORDERED:		/* PF=0 */
10034    case UNORDERED:		/* PF=1 */
10035    case UNEQ:			/* EQ - ZF=1 */
10036    case UNLT:			/* LTU - CF=1 */
10037    case UNLE:			/* LEU - CF=1 | ZF=1 */
10038    case LTGT:			/* EQ - ZF=0 */
10039      break;
10040    case LT:			/* LTU - CF=1 - fails on unordered */
10041      *first_code = UNLT;
10042      *bypass_code = UNORDERED;
10043      break;
10044    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10045      *first_code = UNLE;
10046      *bypass_code = UNORDERED;
10047      break;
10048    case EQ:			/* EQ - ZF=1 - fails on unordered */
10049      *first_code = UNEQ;
10050      *bypass_code = UNORDERED;
10051      break;
10052    case NE:			/* NE - ZF=0 - fails on unordered */
10053      *first_code = LTGT;
10054      *second_code = UNORDERED;
10055      break;
10056    case UNGE:			/* GEU - CF=0 - fails on unordered */
10057      *first_code = GE;
10058      *second_code = UNORDERED;
10059      break;
10060    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10061      *first_code = GT;
10062      *second_code = UNORDERED;
10063      break;
10064    default:
10065      gcc_unreachable ();
10066    }
10067  if (!TARGET_IEEE_FP)
10068    {
10069      *second_code = UNKNOWN;
10070      *bypass_code = UNKNOWN;
10071    }
10072}
10073
10074/* Return cost of comparison done fcom + arithmetics operations on AX.
10075   All following functions do use number of instructions as a cost metrics.
10076   In future this should be tweaked to compute bytes for optimize_size and
10077   take into account performance of various instructions on various CPUs.  */
10078static int
10079ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10080{
10081  if (!TARGET_IEEE_FP)
10082    return 4;
10083  /* The cost of code output by ix86_expand_fp_compare.  */
10084  switch (code)
10085    {
10086    case UNLE:
10087    case UNLT:
10088    case LTGT:
10089    case GT:
10090    case GE:
10091    case UNORDERED:
10092    case ORDERED:
10093    case UNEQ:
10094      return 4;
10095      break;
10096    case LT:
10097    case NE:
10098    case EQ:
10099    case UNGE:
10100      return 5;
10101      break;
10102    case LE:
10103    case UNGT:
10104      return 6;
10105      break;
10106    default:
10107      gcc_unreachable ();
10108    }
10109}
10110
10111/* Return cost of comparison done using fcomi operation.
10112   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10113static int
10114ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10115{
10116  enum rtx_code bypass_code, first_code, second_code;
10117  /* Return arbitrarily high cost when instruction is not supported - this
10118     prevents gcc from using it.  */
10119  if (!TARGET_CMOVE)
10120    return 1024;
10121  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10122  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10123}
10124
10125/* Return cost of comparison done using sahf operation.
10126   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10127static int
10128ix86_fp_comparison_sahf_cost (enum rtx_code code)
10129{
10130  enum rtx_code bypass_code, first_code, second_code;
10131  /* Return arbitrarily high cost when instruction is not preferred - this
10132     avoids gcc from using it.  */
10133  if (!TARGET_USE_SAHF && !optimize_size)
10134    return 1024;
10135  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10136  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10137}
10138
10139/* Compute cost of the comparison done using any method.
10140   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10141static int
10142ix86_fp_comparison_cost (enum rtx_code code)
10143{
10144  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10145  int min;
10146
10147  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10148  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10149
10150  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10151  if (min > sahf_cost)
10152    min = sahf_cost;
10153  if (min > fcomi_cost)
10154    min = fcomi_cost;
10155  return min;
10156}
10157
10158/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10159
10160static rtx
10161ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10162			rtx *second_test, rtx *bypass_test)
10163{
10164  enum machine_mode fpcmp_mode, intcmp_mode;
10165  rtx tmp, tmp2;
10166  int cost = ix86_fp_comparison_cost (code);
10167  enum rtx_code bypass_code, first_code, second_code;
10168
10169  fpcmp_mode = ix86_fp_compare_mode (code);
10170  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10171
10172  if (second_test)
10173    *second_test = NULL_RTX;
10174  if (bypass_test)
10175    *bypass_test = NULL_RTX;
10176
10177  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10178
10179  /* Do fcomi/sahf based test when profitable.  */
10180  if ((bypass_code == UNKNOWN || bypass_test)
10181      && (second_code == UNKNOWN || second_test)
10182      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10183    {
10184      if (TARGET_CMOVE)
10185	{
10186	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10187	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10188			     tmp);
10189	  emit_insn (tmp);
10190	}
10191      else
10192	{
10193	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10194	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10195	  if (!scratch)
10196	    scratch = gen_reg_rtx (HImode);
10197	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10198	  emit_insn (gen_x86_sahf_1 (scratch));
10199	}
10200
10201      /* The FP codes work out to act like unsigned.  */
10202      intcmp_mode = fpcmp_mode;
10203      code = first_code;
10204      if (bypass_code != UNKNOWN)
10205	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10206				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10207				       const0_rtx);
10208      if (second_code != UNKNOWN)
10209	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10210				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10211				       const0_rtx);
10212    }
10213  else
10214    {
10215      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10216      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10217      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10218      if (!scratch)
10219	scratch = gen_reg_rtx (HImode);
10220      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10221
10222      /* In the unordered case, we have to check C2 for NaN's, which
10223	 doesn't happen to work out to anything nice combination-wise.
10224	 So do some bit twiddling on the value we've got in AH to come
10225	 up with an appropriate set of condition codes.  */
10226
10227      intcmp_mode = CCNOmode;
10228      switch (code)
10229	{
10230	case GT:
10231	case UNGT:
10232	  if (code == GT || !TARGET_IEEE_FP)
10233	    {
10234	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10235	      code = EQ;
10236	    }
10237	  else
10238	    {
10239	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10240	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10241	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10242	      intcmp_mode = CCmode;
10243	      code = GEU;
10244	    }
10245	  break;
10246	case LT:
10247	case UNLT:
10248	  if (code == LT && TARGET_IEEE_FP)
10249	    {
10250	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10251	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10252	      intcmp_mode = CCmode;
10253	      code = EQ;
10254	    }
10255	  else
10256	    {
10257	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10258	      code = NE;
10259	    }
10260	  break;
10261	case GE:
10262	case UNGE:
10263	  if (code == GE || !TARGET_IEEE_FP)
10264	    {
10265	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10266	      code = EQ;
10267	    }
10268	  else
10269	    {
10270	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10271	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10272					     GEN_INT (0x01)));
10273	      code = NE;
10274	    }
10275	  break;
10276	case LE:
10277	case UNLE:
10278	  if (code == LE && TARGET_IEEE_FP)
10279	    {
10280	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10281	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10282	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10283	      intcmp_mode = CCmode;
10284	      code = LTU;
10285	    }
10286	  else
10287	    {
10288	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10289	      code = NE;
10290	    }
10291	  break;
10292	case EQ:
10293	case UNEQ:
10294	  if (code == EQ && TARGET_IEEE_FP)
10295	    {
10296	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10297	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10298	      intcmp_mode = CCmode;
10299	      code = EQ;
10300	    }
10301	  else
10302	    {
10303	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10304	      code = NE;
10305	      break;
10306	    }
10307	  break;
10308	case NE:
10309	case LTGT:
10310	  if (code == NE && TARGET_IEEE_FP)
10311	    {
10312	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10313	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10314					     GEN_INT (0x40)));
10315	      code = NE;
10316	    }
10317	  else
10318	    {
10319	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10320	      code = EQ;
10321	    }
10322	  break;
10323
10324	case UNORDERED:
10325	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10326	  code = NE;
10327	  break;
10328	case ORDERED:
10329	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10330	  code = EQ;
10331	  break;
10332
10333	default:
10334	  gcc_unreachable ();
10335	}
10336    }
10337
10338  /* Return the test that should be put into the flags user, i.e.
10339     the bcc, scc, or cmov instruction.  */
10340  return gen_rtx_fmt_ee (code, VOIDmode,
10341			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10342			 const0_rtx);
10343}
10344
10345rtx
10346ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10347{
10348  rtx op0, op1, ret;
10349  op0 = ix86_compare_op0;
10350  op1 = ix86_compare_op1;
10351
10352  if (second_test)
10353    *second_test = NULL_RTX;
10354  if (bypass_test)
10355    *bypass_test = NULL_RTX;
10356
10357  if (ix86_compare_emitted)
10358    {
10359      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10360      ix86_compare_emitted = NULL_RTX;
10361    }
10362  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10363    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10364				  second_test, bypass_test);
10365  else
10366    ret = ix86_expand_int_compare (code, op0, op1);
10367
10368  return ret;
10369}
10370
10371/* Return true if the CODE will result in nontrivial jump sequence.  */
10372bool
10373ix86_fp_jump_nontrivial_p (enum rtx_code code)
10374{
10375  enum rtx_code bypass_code, first_code, second_code;
10376  if (!TARGET_CMOVE)
10377    return true;
10378  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10379  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10380}
10381
10382void
10383ix86_expand_branch (enum rtx_code code, rtx label)
10384{
10385  rtx tmp;
10386
10387  /* If we have emitted a compare insn, go straight to simple.
10388     ix86_expand_compare won't emit anything if ix86_compare_emitted
10389     is non NULL.  */
10390  if (ix86_compare_emitted)
10391    goto simple;
10392
10393  switch (GET_MODE (ix86_compare_op0))
10394    {
10395    case QImode:
10396    case HImode:
10397    case SImode:
10398      simple:
10399      tmp = ix86_expand_compare (code, NULL, NULL);
10400      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10401				  gen_rtx_LABEL_REF (VOIDmode, label),
10402				  pc_rtx);
10403      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10404      return;
10405
10406    case SFmode:
10407    case DFmode:
10408    case XFmode:
10409      {
10410	rtvec vec;
10411	int use_fcomi;
10412	enum rtx_code bypass_code, first_code, second_code;
10413
10414	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10415					     &ix86_compare_op1);
10416
10417	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10418
10419	/* Check whether we will use the natural sequence with one jump.  If
10420	   so, we can expand jump early.  Otherwise delay expansion by
10421	   creating compound insn to not confuse optimizers.  */
10422	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10423	    && TARGET_CMOVE)
10424	  {
10425	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10426				  gen_rtx_LABEL_REF (VOIDmode, label),
10427				  pc_rtx, NULL_RTX, NULL_RTX);
10428	  }
10429	else
10430	  {
10431	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10432				  ix86_compare_op0, ix86_compare_op1);
10433	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10434					gen_rtx_LABEL_REF (VOIDmode, label),
10435					pc_rtx);
10436	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10437
10438	    use_fcomi = ix86_use_fcomi_compare (code);
10439	    vec = rtvec_alloc (3 + !use_fcomi);
10440	    RTVEC_ELT (vec, 0) = tmp;
10441	    RTVEC_ELT (vec, 1)
10442	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10443	    RTVEC_ELT (vec, 2)
10444	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10445	    if (! use_fcomi)
10446	      RTVEC_ELT (vec, 3)
10447		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10448
10449	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10450	  }
10451	return;
10452      }
10453
10454    case DImode:
10455      if (TARGET_64BIT)
10456	goto simple;
10457    case TImode:
10458      /* Expand DImode branch into multiple compare+branch.  */
10459      {
10460	rtx lo[2], hi[2], label2;
10461	enum rtx_code code1, code2, code3;
10462	enum machine_mode submode;
10463
10464	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10465	  {
10466	    tmp = ix86_compare_op0;
10467	    ix86_compare_op0 = ix86_compare_op1;
10468	    ix86_compare_op1 = tmp;
10469	    code = swap_condition (code);
10470	  }
10471	if (GET_MODE (ix86_compare_op0) == DImode)
10472	  {
10473	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10474	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10475	    submode = SImode;
10476	  }
10477	else
10478	  {
10479	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10480	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10481	    submode = DImode;
10482	  }
10483
10484	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10485	   avoid two branches.  This costs one extra insn, so disable when
10486	   optimizing for size.  */
10487
10488	if ((code == EQ || code == NE)
10489	    && (!optimize_size
10490	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10491	  {
10492	    rtx xor0, xor1;
10493
10494	    xor1 = hi[0];
10495	    if (hi[1] != const0_rtx)
10496	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10497				   NULL_RTX, 0, OPTAB_WIDEN);
10498
10499	    xor0 = lo[0];
10500	    if (lo[1] != const0_rtx)
10501	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10502				   NULL_RTX, 0, OPTAB_WIDEN);
10503
10504	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10505				NULL_RTX, 0, OPTAB_WIDEN);
10506
10507	    ix86_compare_op0 = tmp;
10508	    ix86_compare_op1 = const0_rtx;
10509	    ix86_expand_branch (code, label);
10510	    return;
10511	  }
10512
10513	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10514	   op1 is a constant and the low word is zero, then we can just
10515	   examine the high word.  */
10516
10517	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10518	  switch (code)
10519	    {
10520	    case LT: case LTU: case GE: case GEU:
10521	      ix86_compare_op0 = hi[0];
10522	      ix86_compare_op1 = hi[1];
10523	      ix86_expand_branch (code, label);
10524	      return;
10525	    default:
10526	      break;
10527	    }
10528
10529	/* Otherwise, we need two or three jumps.  */
10530
10531	label2 = gen_label_rtx ();
10532
10533	code1 = code;
10534	code2 = swap_condition (code);
10535	code3 = unsigned_condition (code);
10536
10537	switch (code)
10538	  {
10539	  case LT: case GT: case LTU: case GTU:
10540	    break;
10541
10542	  case LE:   code1 = LT;  code2 = GT;  break;
10543	  case GE:   code1 = GT;  code2 = LT;  break;
10544	  case LEU:  code1 = LTU; code2 = GTU; break;
10545	  case GEU:  code1 = GTU; code2 = LTU; break;
10546
10547	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10548	  case NE:   code2 = UNKNOWN; break;
10549
10550	  default:
10551	    gcc_unreachable ();
10552	  }
10553
10554	/*
10555	 * a < b =>
10556	 *    if (hi(a) < hi(b)) goto true;
10557	 *    if (hi(a) > hi(b)) goto false;
10558	 *    if (lo(a) < lo(b)) goto true;
10559	 *  false:
10560	 */
10561
10562	ix86_compare_op0 = hi[0];
10563	ix86_compare_op1 = hi[1];
10564
10565	if (code1 != UNKNOWN)
10566	  ix86_expand_branch (code1, label);
10567	if (code2 != UNKNOWN)
10568	  ix86_expand_branch (code2, label2);
10569
10570	ix86_compare_op0 = lo[0];
10571	ix86_compare_op1 = lo[1];
10572	ix86_expand_branch (code3, label);
10573
10574	if (code2 != UNKNOWN)
10575	  emit_label (label2);
10576	return;
10577      }
10578
10579    default:
10580      gcc_unreachable ();
10581    }
10582}
10583
10584/* Split branch based on floating point condition.  */
10585void
10586ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10587		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10588{
10589  rtx second, bypass;
10590  rtx label = NULL_RTX;
10591  rtx condition;
10592  int bypass_probability = -1, second_probability = -1, probability = -1;
10593  rtx i;
10594
10595  if (target2 != pc_rtx)
10596    {
10597      rtx tmp = target2;
10598      code = reverse_condition_maybe_unordered (code);
10599      target2 = target1;
10600      target1 = tmp;
10601    }
10602
10603  condition = ix86_expand_fp_compare (code, op1, op2,
10604				      tmp, &second, &bypass);
10605
10606  /* Remove pushed operand from stack.  */
10607  if (pushed)
10608    ix86_free_from_memory (GET_MODE (pushed));
10609
10610  if (split_branch_probability >= 0)
10611    {
10612      /* Distribute the probabilities across the jumps.
10613	 Assume the BYPASS and SECOND to be always test
10614	 for UNORDERED.  */
10615      probability = split_branch_probability;
10616
10617      /* Value of 1 is low enough to make no need for probability
10618	 to be updated.  Later we may run some experiments and see
10619	 if unordered values are more frequent in practice.  */
10620      if (bypass)
10621	bypass_probability = 1;
10622      if (second)
10623	second_probability = 1;
10624    }
10625  if (bypass != NULL_RTX)
10626    {
10627      label = gen_label_rtx ();
10628      i = emit_jump_insn (gen_rtx_SET
10629			  (VOIDmode, pc_rtx,
10630			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10631						 bypass,
10632						 gen_rtx_LABEL_REF (VOIDmode,
10633								    label),
10634						 pc_rtx)));
10635      if (bypass_probability >= 0)
10636	REG_NOTES (i)
10637	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10638			       GEN_INT (bypass_probability),
10639			       REG_NOTES (i));
10640    }
10641  i = emit_jump_insn (gen_rtx_SET
10642		      (VOIDmode, pc_rtx,
10643		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10644					     condition, target1, target2)));
10645  if (probability >= 0)
10646    REG_NOTES (i)
10647      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10648			   GEN_INT (probability),
10649			   REG_NOTES (i));
10650  if (second != NULL_RTX)
10651    {
10652      i = emit_jump_insn (gen_rtx_SET
10653			  (VOIDmode, pc_rtx,
10654			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10655						 target2)));
10656      if (second_probability >= 0)
10657	REG_NOTES (i)
10658	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10659			       GEN_INT (second_probability),
10660			       REG_NOTES (i));
10661    }
10662  if (label != NULL_RTX)
10663    emit_label (label);
10664}
10665
10666int
10667ix86_expand_setcc (enum rtx_code code, rtx dest)
10668{
10669  rtx ret, tmp, tmpreg, equiv;
10670  rtx second_test, bypass_test;
10671
10672  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10673    return 0; /* FAIL */
10674
10675  gcc_assert (GET_MODE (dest) == QImode);
10676
10677  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10678  PUT_MODE (ret, QImode);
10679
10680  tmp = dest;
10681  tmpreg = dest;
10682
10683  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10684  if (bypass_test || second_test)
10685    {
10686      rtx test = second_test;
10687      int bypass = 0;
10688      rtx tmp2 = gen_reg_rtx (QImode);
10689      if (bypass_test)
10690	{
10691	  gcc_assert (!second_test);
10692	  test = bypass_test;
10693	  bypass = 1;
10694	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10695	}
10696      PUT_MODE (test, QImode);
10697      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10698
10699      if (bypass)
10700	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10701      else
10702	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10703    }
10704
10705  /* Attach a REG_EQUAL note describing the comparison result.  */
10706  if (ix86_compare_op0 && ix86_compare_op1)
10707    {
10708      equiv = simplify_gen_relational (code, QImode,
10709				       GET_MODE (ix86_compare_op0),
10710				       ix86_compare_op0, ix86_compare_op1);
10711      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10712    }
10713
10714  return 1; /* DONE */
10715}
10716
10717/* Expand comparison setting or clearing carry flag.  Return true when
10718   successful and set pop for the operation.  */
10719static bool
10720ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10721{
10722  enum machine_mode mode =
10723    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10724
10725  /* Do not handle DImode compares that go through special path.  Also we can't
10726     deal with FP compares yet.  This is possible to add.  */
10727  if (mode == (TARGET_64BIT ? TImode : DImode))
10728    return false;
10729  if (FLOAT_MODE_P (mode))
10730    {
10731      rtx second_test = NULL, bypass_test = NULL;
10732      rtx compare_op, compare_seq;
10733
10734      /* Shortcut:  following common codes never translate into carry flag compares.  */
10735      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10736	  || code == ORDERED || code == UNORDERED)
10737	return false;
10738
10739      /* These comparisons require zero flag; swap operands so they won't.  */
10740      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10741	  && !TARGET_IEEE_FP)
10742	{
10743	  rtx tmp = op0;
10744	  op0 = op1;
10745	  op1 = tmp;
10746	  code = swap_condition (code);
10747	}
10748
10749      /* Try to expand the comparison and verify that we end up with carry flag
10750	 based comparison.  This is fails to be true only when we decide to expand
10751	 comparison using arithmetic that is not too common scenario.  */
10752      start_sequence ();
10753      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10754					   &second_test, &bypass_test);
10755      compare_seq = get_insns ();
10756      end_sequence ();
10757
10758      if (second_test || bypass_test)
10759	return false;
10760      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10761	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10762        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10763      else
10764	code = GET_CODE (compare_op);
10765      if (code != LTU && code != GEU)
10766	return false;
10767      emit_insn (compare_seq);
10768      *pop = compare_op;
10769      return true;
10770    }
10771  if (!INTEGRAL_MODE_P (mode))
10772    return false;
10773  switch (code)
10774    {
10775    case LTU:
10776    case GEU:
10777      break;
10778
10779    /* Convert a==0 into (unsigned)a<1.  */
10780    case EQ:
10781    case NE:
10782      if (op1 != const0_rtx)
10783	return false;
10784      op1 = const1_rtx;
10785      code = (code == EQ ? LTU : GEU);
10786      break;
10787
10788    /* Convert a>b into b<a or a>=b-1.  */
10789    case GTU:
10790    case LEU:
10791      if (GET_CODE (op1) == CONST_INT)
10792	{
10793	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10794	  /* Bail out on overflow.  We still can swap operands but that
10795	     would force loading of the constant into register.  */
10796	  if (op1 == const0_rtx
10797	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10798	    return false;
10799	  code = (code == GTU ? GEU : LTU);
10800	}
10801      else
10802	{
10803	  rtx tmp = op1;
10804	  op1 = op0;
10805	  op0 = tmp;
10806	  code = (code == GTU ? LTU : GEU);
10807	}
10808      break;
10809
10810    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10811    case LT:
10812    case GE:
10813      if (mode == DImode || op1 != const0_rtx)
10814	return false;
10815      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10816      code = (code == LT ? GEU : LTU);
10817      break;
10818    case LE:
10819    case GT:
10820      if (mode == DImode || op1 != constm1_rtx)
10821	return false;
10822      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10823      code = (code == LE ? GEU : LTU);
10824      break;
10825
10826    default:
10827      return false;
10828    }
10829  /* Swapping operands may cause constant to appear as first operand.  */
10830  if (!nonimmediate_operand (op0, VOIDmode))
10831    {
10832      if (no_new_pseudos)
10833	return false;
10834      op0 = force_reg (mode, op0);
10835    }
10836  ix86_compare_op0 = op0;
10837  ix86_compare_op1 = op1;
10838  *pop = ix86_expand_compare (code, NULL, NULL);
10839  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10840  return true;
10841}
10842
10843int
10844ix86_expand_int_movcc (rtx operands[])
10845{
10846  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10847  rtx compare_seq, compare_op;
10848  rtx second_test, bypass_test;
10849  enum machine_mode mode = GET_MODE (operands[0]);
10850  bool sign_bit_compare_p = false;;
10851
10852  start_sequence ();
10853  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10854  compare_seq = get_insns ();
10855  end_sequence ();
10856
10857  compare_code = GET_CODE (compare_op);
10858
10859  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10860      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10861    sign_bit_compare_p = true;
10862
10863  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10864     HImode insns, we'd be swallowed in word prefix ops.  */
10865
10866  if ((mode != HImode || TARGET_FAST_PREFIX)
10867      && (mode != (TARGET_64BIT ? TImode : DImode))
10868      && GET_CODE (operands[2]) == CONST_INT
10869      && GET_CODE (operands[3]) == CONST_INT)
10870    {
10871      rtx out = operands[0];
10872      HOST_WIDE_INT ct = INTVAL (operands[2]);
10873      HOST_WIDE_INT cf = INTVAL (operands[3]);
10874      HOST_WIDE_INT diff;
10875
10876      diff = ct - cf;
10877      /*  Sign bit compares are better done using shifts than we do by using
10878	  sbb.  */
10879      if (sign_bit_compare_p
10880	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10881					     ix86_compare_op1, &compare_op))
10882	{
10883	  /* Detect overlap between destination and compare sources.  */
10884	  rtx tmp = out;
10885
10886          if (!sign_bit_compare_p)
10887	    {
10888	      bool fpcmp = false;
10889
10890	      compare_code = GET_CODE (compare_op);
10891
10892	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10893		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10894		{
10895		  fpcmp = true;
10896		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
10897		}
10898
10899	      /* To simplify rest of code, restrict to the GEU case.  */
10900	      if (compare_code == LTU)
10901		{
10902		  HOST_WIDE_INT tmp = ct;
10903		  ct = cf;
10904		  cf = tmp;
10905		  compare_code = reverse_condition (compare_code);
10906		  code = reverse_condition (code);
10907		}
10908	      else
10909		{
10910		  if (fpcmp)
10911		    PUT_CODE (compare_op,
10912			      reverse_condition_maybe_unordered
10913			        (GET_CODE (compare_op)));
10914		  else
10915		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10916		}
10917	      diff = ct - cf;
10918
10919	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10920		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
10921		tmp = gen_reg_rtx (mode);
10922
10923	      if (mode == DImode)
10924		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10925	      else
10926		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10927	    }
10928	  else
10929	    {
10930	      if (code == GT || code == GE)
10931		code = reverse_condition (code);
10932	      else
10933		{
10934		  HOST_WIDE_INT tmp = ct;
10935		  ct = cf;
10936		  cf = tmp;
10937		  diff = ct - cf;
10938		}
10939	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10940				     ix86_compare_op1, VOIDmode, 0, -1);
10941	    }
10942
10943	  if (diff == 1)
10944	    {
10945	      /*
10946	       * cmpl op0,op1
10947	       * sbbl dest,dest
10948	       * [addl dest, ct]
10949	       *
10950	       * Size 5 - 8.
10951	       */
10952	      if (ct)
10953		tmp = expand_simple_binop (mode, PLUS,
10954					   tmp, GEN_INT (ct),
10955					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10956	    }
10957	  else if (cf == -1)
10958	    {
10959	      /*
10960	       * cmpl op0,op1
10961	       * sbbl dest,dest
10962	       * orl $ct, dest
10963	       *
10964	       * Size 8.
10965	       */
10966	      tmp = expand_simple_binop (mode, IOR,
10967					 tmp, GEN_INT (ct),
10968					 copy_rtx (tmp), 1, OPTAB_DIRECT);
10969	    }
10970	  else if (diff == -1 && ct)
10971	    {
10972	      /*
10973	       * cmpl op0,op1
10974	       * sbbl dest,dest
10975	       * notl dest
10976	       * [addl dest, cf]
10977	       *
10978	       * Size 8 - 11.
10979	       */
10980	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10981	      if (cf)
10982		tmp = expand_simple_binop (mode, PLUS,
10983					   copy_rtx (tmp), GEN_INT (cf),
10984					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10985	    }
10986	  else
10987	    {
10988	      /*
10989	       * cmpl op0,op1
10990	       * sbbl dest,dest
10991	       * [notl dest]
10992	       * andl cf - ct, dest
10993	       * [addl dest, ct]
10994	       *
10995	       * Size 8 - 11.
10996	       */
10997
10998	      if (cf == 0)
10999		{
11000		  cf = ct;
11001		  ct = 0;
11002		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11003		}
11004
11005	      tmp = expand_simple_binop (mode, AND,
11006					 copy_rtx (tmp),
11007					 gen_int_mode (cf - ct, mode),
11008					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11009	      if (ct)
11010		tmp = expand_simple_binop (mode, PLUS,
11011					   copy_rtx (tmp), GEN_INT (ct),
11012					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11013	    }
11014
11015	  if (!rtx_equal_p (tmp, out))
11016	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11017
11018	  return 1; /* DONE */
11019	}
11020
11021      if (diff < 0)
11022	{
11023	  HOST_WIDE_INT tmp;
11024	  tmp = ct, ct = cf, cf = tmp;
11025	  diff = -diff;
11026	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11027	    {
11028	      /* We may be reversing unordered compare to normal compare, that
11029		 is not valid in general (we may convert non-trapping condition
11030		 to trapping one), however on i386 we currently emit all
11031		 comparisons unordered.  */
11032	      compare_code = reverse_condition_maybe_unordered (compare_code);
11033	      code = reverse_condition_maybe_unordered (code);
11034	    }
11035	  else
11036	    {
11037	      compare_code = reverse_condition (compare_code);
11038	      code = reverse_condition (code);
11039	    }
11040	}
11041
11042      compare_code = UNKNOWN;
11043      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11044	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11045	{
11046	  if (ix86_compare_op1 == const0_rtx
11047	      && (code == LT || code == GE))
11048	    compare_code = code;
11049	  else if (ix86_compare_op1 == constm1_rtx)
11050	    {
11051	      if (code == LE)
11052		compare_code = LT;
11053	      else if (code == GT)
11054		compare_code = GE;
11055	    }
11056	}
11057
11058      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11059      if (compare_code != UNKNOWN
11060	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11061	  && (cf == -1 || ct == -1))
11062	{
11063	  /* If lea code below could be used, only optimize
11064	     if it results in a 2 insn sequence.  */
11065
11066	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11067		 || diff == 3 || diff == 5 || diff == 9)
11068	      || (compare_code == LT && ct == -1)
11069	      || (compare_code == GE && cf == -1))
11070	    {
11071	      /*
11072	       * notl op1	(if necessary)
11073	       * sarl $31, op1
11074	       * orl cf, op1
11075	       */
11076	      if (ct != -1)
11077		{
11078		  cf = ct;
11079		  ct = -1;
11080		  code = reverse_condition (code);
11081		}
11082
11083	      out = emit_store_flag (out, code, ix86_compare_op0,
11084				     ix86_compare_op1, VOIDmode, 0, -1);
11085
11086	      out = expand_simple_binop (mode, IOR,
11087					 out, GEN_INT (cf),
11088					 out, 1, OPTAB_DIRECT);
11089	      if (out != operands[0])
11090		emit_move_insn (operands[0], out);
11091
11092	      return 1; /* DONE */
11093	    }
11094	}
11095
11096
11097      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11098	   || diff == 3 || diff == 5 || diff == 9)
11099	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11100	  && (mode != DImode
11101	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11102	{
11103	  /*
11104	   * xorl dest,dest
11105	   * cmpl op1,op2
11106	   * setcc dest
11107	   * lea cf(dest*(ct-cf)),dest
11108	   *
11109	   * Size 14.
11110	   *
11111	   * This also catches the degenerate setcc-only case.
11112	   */
11113
11114	  rtx tmp;
11115	  int nops;
11116
11117	  out = emit_store_flag (out, code, ix86_compare_op0,
11118				 ix86_compare_op1, VOIDmode, 0, 1);
11119
11120	  nops = 0;
11121	  /* On x86_64 the lea instruction operates on Pmode, so we need
11122	     to get arithmetics done in proper mode to match.  */
11123	  if (diff == 1)
11124	    tmp = copy_rtx (out);
11125	  else
11126	    {
11127	      rtx out1;
11128	      out1 = copy_rtx (out);
11129	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11130	      nops++;
11131	      if (diff & 1)
11132		{
11133		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11134		  nops++;
11135		}
11136	    }
11137	  if (cf != 0)
11138	    {
11139	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11140	      nops++;
11141	    }
11142	  if (!rtx_equal_p (tmp, out))
11143	    {
11144	      if (nops == 1)
11145		out = force_operand (tmp, copy_rtx (out));
11146	      else
11147		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11148	    }
11149	  if (!rtx_equal_p (out, operands[0]))
11150	    emit_move_insn (operands[0], copy_rtx (out));
11151
11152	  return 1; /* DONE */
11153	}
11154
11155      /*
11156       * General case:			Jumpful:
11157       *   xorl dest,dest		cmpl op1, op2
11158       *   cmpl op1, op2		movl ct, dest
11159       *   setcc dest			jcc 1f
11160       *   decl dest			movl cf, dest
11161       *   andl (cf-ct),dest		1:
11162       *   addl ct,dest
11163       *
11164       * Size 20.			Size 14.
11165       *
11166       * This is reasonably steep, but branch mispredict costs are
11167       * high on modern cpus, so consider failing only if optimizing
11168       * for space.
11169       */
11170
11171      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11172	  && BRANCH_COST >= 2)
11173	{
11174	  if (cf == 0)
11175	    {
11176	      cf = ct;
11177	      ct = 0;
11178	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11179		/* We may be reversing unordered compare to normal compare,
11180		   that is not valid in general (we may convert non-trapping
11181		   condition to trapping one), however on i386 we currently
11182		   emit all comparisons unordered.  */
11183		code = reverse_condition_maybe_unordered (code);
11184	      else
11185		{
11186		  code = reverse_condition (code);
11187		  if (compare_code != UNKNOWN)
11188		    compare_code = reverse_condition (compare_code);
11189		}
11190	    }
11191
11192	  if (compare_code != UNKNOWN)
11193	    {
11194	      /* notl op1	(if needed)
11195		 sarl $31, op1
11196		 andl (cf-ct), op1
11197		 addl ct, op1
11198
11199		 For x < 0 (resp. x <= -1) there will be no notl,
11200		 so if possible swap the constants to get rid of the
11201		 complement.
11202		 True/false will be -1/0 while code below (store flag
11203		 followed by decrement) is 0/-1, so the constants need
11204		 to be exchanged once more.  */
11205
11206	      if (compare_code == GE || !cf)
11207		{
11208		  code = reverse_condition (code);
11209		  compare_code = LT;
11210		}
11211	      else
11212		{
11213		  HOST_WIDE_INT tmp = cf;
11214		  cf = ct;
11215		  ct = tmp;
11216		}
11217
11218	      out = emit_store_flag (out, code, ix86_compare_op0,
11219				     ix86_compare_op1, VOIDmode, 0, -1);
11220	    }
11221	  else
11222	    {
11223	      out = emit_store_flag (out, code, ix86_compare_op0,
11224				     ix86_compare_op1, VOIDmode, 0, 1);
11225
11226	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11227					 copy_rtx (out), 1, OPTAB_DIRECT);
11228	    }
11229
11230	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11231				     gen_int_mode (cf - ct, mode),
11232				     copy_rtx (out), 1, OPTAB_DIRECT);
11233	  if (ct)
11234	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11235				       copy_rtx (out), 1, OPTAB_DIRECT);
11236	  if (!rtx_equal_p (out, operands[0]))
11237	    emit_move_insn (operands[0], copy_rtx (out));
11238
11239	  return 1; /* DONE */
11240	}
11241    }
11242
11243  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11244    {
11245      /* Try a few things more with specific constants and a variable.  */
11246
11247      optab op;
11248      rtx var, orig_out, out, tmp;
11249
11250      if (BRANCH_COST <= 2)
11251	return 0; /* FAIL */
11252
11253      /* If one of the two operands is an interesting constant, load a
11254	 constant with the above and mask it in with a logical operation.  */
11255
11256      if (GET_CODE (operands[2]) == CONST_INT)
11257	{
11258	  var = operands[3];
11259	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11260	    operands[3] = constm1_rtx, op = and_optab;
11261	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11262	    operands[3] = const0_rtx, op = ior_optab;
11263	  else
11264	    return 0; /* FAIL */
11265	}
11266      else if (GET_CODE (operands[3]) == CONST_INT)
11267	{
11268	  var = operands[2];
11269	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11270	    operands[2] = constm1_rtx, op = and_optab;
11271	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11272	    operands[2] = const0_rtx, op = ior_optab;
11273	  else
11274	    return 0; /* FAIL */
11275	}
11276      else
11277        return 0; /* FAIL */
11278
11279      orig_out = operands[0];
11280      tmp = gen_reg_rtx (mode);
11281      operands[0] = tmp;
11282
11283      /* Recurse to get the constant loaded.  */
11284      if (ix86_expand_int_movcc (operands) == 0)
11285        return 0; /* FAIL */
11286
11287      /* Mask in the interesting variable.  */
11288      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11289			  OPTAB_WIDEN);
11290      if (!rtx_equal_p (out, orig_out))
11291	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11292
11293      return 1; /* DONE */
11294    }
11295
11296  /*
11297   * For comparison with above,
11298   *
11299   * movl cf,dest
11300   * movl ct,tmp
11301   * cmpl op1,op2
11302   * cmovcc tmp,dest
11303   *
11304   * Size 15.
11305   */
11306
11307  if (! nonimmediate_operand (operands[2], mode))
11308    operands[2] = force_reg (mode, operands[2]);
11309  if (! nonimmediate_operand (operands[3], mode))
11310    operands[3] = force_reg (mode, operands[3]);
11311
11312  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11313    {
11314      rtx tmp = gen_reg_rtx (mode);
11315      emit_move_insn (tmp, operands[3]);
11316      operands[3] = tmp;
11317    }
11318  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11319    {
11320      rtx tmp = gen_reg_rtx (mode);
11321      emit_move_insn (tmp, operands[2]);
11322      operands[2] = tmp;
11323    }
11324
11325  if (! register_operand (operands[2], VOIDmode)
11326      && (mode == QImode
11327          || ! register_operand (operands[3], VOIDmode)))
11328    operands[2] = force_reg (mode, operands[2]);
11329
11330  if (mode == QImode
11331      && ! register_operand (operands[3], VOIDmode))
11332    operands[3] = force_reg (mode, operands[3]);
11333
11334  emit_insn (compare_seq);
11335  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11336			  gen_rtx_IF_THEN_ELSE (mode,
11337						compare_op, operands[2],
11338						operands[3])));
11339  if (bypass_test)
11340    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11341			    gen_rtx_IF_THEN_ELSE (mode,
11342				  bypass_test,
11343				  copy_rtx (operands[3]),
11344				  copy_rtx (operands[0]))));
11345  if (second_test)
11346    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11347			    gen_rtx_IF_THEN_ELSE (mode,
11348				  second_test,
11349				  copy_rtx (operands[2]),
11350				  copy_rtx (operands[0]))));
11351
11352  return 1; /* DONE */
11353}
11354
11355/* Swap, force into registers, or otherwise massage the two operands
11356   to an sse comparison with a mask result.  Thus we differ a bit from
11357   ix86_prepare_fp_compare_args which expects to produce a flags result.
11358
11359   The DEST operand exists to help determine whether to commute commutative
11360   operators.  The POP0/POP1 operands are updated in place.  The new
11361   comparison code is returned, or UNKNOWN if not implementable.  */
11362
11363static enum rtx_code
11364ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11365				  rtx *pop0, rtx *pop1)
11366{
11367  rtx tmp;
11368
11369  switch (code)
11370    {
11371    case LTGT:
11372    case UNEQ:
11373      /* We have no LTGT as an operator.  We could implement it with
11374	 NE & ORDERED, but this requires an extra temporary.  It's
11375	 not clear that it's worth it.  */
11376      return UNKNOWN;
11377
11378    case LT:
11379    case LE:
11380    case UNGT:
11381    case UNGE:
11382      /* These are supported directly.  */
11383      break;
11384
11385    case EQ:
11386    case NE:
11387    case UNORDERED:
11388    case ORDERED:
11389      /* For commutative operators, try to canonicalize the destination
11390	 operand to be first in the comparison - this helps reload to
11391	 avoid extra moves.  */
11392      if (!dest || !rtx_equal_p (dest, *pop1))
11393	break;
11394      /* FALLTHRU */
11395
11396    case GE:
11397    case GT:
11398    case UNLE:
11399    case UNLT:
11400      /* These are not supported directly.  Swap the comparison operands
11401	 to transform into something that is supported.  */
11402      tmp = *pop0;
11403      *pop0 = *pop1;
11404      *pop1 = tmp;
11405      code = swap_condition (code);
11406      break;
11407
11408    default:
11409      gcc_unreachable ();
11410    }
11411
11412  return code;
11413}
11414
11415/* Detect conditional moves that exactly match min/max operational
11416   semantics.  Note that this is IEEE safe, as long as we don't
11417   interchange the operands.
11418
11419   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11420   and TRUE if the operation is successful and instructions are emitted.  */
11421
11422static bool
11423ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11424			   rtx cmp_op1, rtx if_true, rtx if_false)
11425{
11426  enum machine_mode mode;
11427  bool is_min;
11428  rtx tmp;
11429
11430  if (code == LT)
11431    ;
11432  else if (code == UNGE)
11433    {
11434      tmp = if_true;
11435      if_true = if_false;
11436      if_false = tmp;
11437    }
11438  else
11439    return false;
11440
11441  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11442    is_min = true;
11443  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11444    is_min = false;
11445  else
11446    return false;
11447
11448  mode = GET_MODE (dest);
11449
11450  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11451     but MODE may be a vector mode and thus not appropriate.  */
11452  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11453    {
11454      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11455      rtvec v;
11456
11457      if_true = force_reg (mode, if_true);
11458      v = gen_rtvec (2, if_true, if_false);
11459      tmp = gen_rtx_UNSPEC (mode, v, u);
11460    }
11461  else
11462    {
11463      code = is_min ? SMIN : SMAX;
11464      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11465    }
11466
11467  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11468  return true;
11469}
11470
11471/* Expand an sse vector comparison.  Return the register with the result.  */
11472
11473static rtx
11474ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11475		     rtx op_true, rtx op_false)
11476{
11477  enum machine_mode mode = GET_MODE (dest);
11478  rtx x;
11479
11480  cmp_op0 = force_reg (mode, cmp_op0);
11481  if (!nonimmediate_operand (cmp_op1, mode))
11482    cmp_op1 = force_reg (mode, cmp_op1);
11483
11484  if (optimize
11485      || reg_overlap_mentioned_p (dest, op_true)
11486      || reg_overlap_mentioned_p (dest, op_false))
11487    dest = gen_reg_rtx (mode);
11488
11489  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11490  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11491
11492  return dest;
11493}
11494
11495/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11496   operations.  This is used for both scalar and vector conditional moves.  */
11497
11498static void
11499ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11500{
11501  enum machine_mode mode = GET_MODE (dest);
11502  rtx t2, t3, x;
11503
11504  if (op_false == CONST0_RTX (mode))
11505    {
11506      op_true = force_reg (mode, op_true);
11507      x = gen_rtx_AND (mode, cmp, op_true);
11508      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11509    }
11510  else if (op_true == CONST0_RTX (mode))
11511    {
11512      op_false = force_reg (mode, op_false);
11513      x = gen_rtx_NOT (mode, cmp);
11514      x = gen_rtx_AND (mode, x, op_false);
11515      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11516    }
11517  else
11518    {
11519      op_true = force_reg (mode, op_true);
11520      op_false = force_reg (mode, op_false);
11521
11522      t2 = gen_reg_rtx (mode);
11523      if (optimize)
11524	t3 = gen_reg_rtx (mode);
11525      else
11526	t3 = dest;
11527
11528      x = gen_rtx_AND (mode, op_true, cmp);
11529      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11530
11531      x = gen_rtx_NOT (mode, cmp);
11532      x = gen_rtx_AND (mode, x, op_false);
11533      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11534
11535      x = gen_rtx_IOR (mode, t3, t2);
11536      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11537    }
11538}
11539
11540/* Expand a floating-point conditional move.  Return true if successful.  */
11541
11542int
11543ix86_expand_fp_movcc (rtx operands[])
11544{
11545  enum machine_mode mode = GET_MODE (operands[0]);
11546  enum rtx_code code = GET_CODE (operands[1]);
11547  rtx tmp, compare_op, second_test, bypass_test;
11548
11549  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11550    {
11551      enum machine_mode cmode;
11552
11553      /* Since we've no cmove for sse registers, don't force bad register
11554	 allocation just to gain access to it.  Deny movcc when the
11555	 comparison mode doesn't match the move mode.  */
11556      cmode = GET_MODE (ix86_compare_op0);
11557      if (cmode == VOIDmode)
11558	cmode = GET_MODE (ix86_compare_op1);
11559      if (cmode != mode)
11560	return 0;
11561
11562      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11563					       &ix86_compare_op0,
11564					       &ix86_compare_op1);
11565      if (code == UNKNOWN)
11566	return 0;
11567
11568      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11569				     ix86_compare_op1, operands[2],
11570				     operands[3]))
11571	return 1;
11572
11573      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11574				 ix86_compare_op1, operands[2], operands[3]);
11575      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11576      return 1;
11577    }
11578
11579  /* The floating point conditional move instructions don't directly
11580     support conditions resulting from a signed integer comparison.  */
11581
11582  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11583
11584  /* The floating point conditional move instructions don't directly
11585     support signed integer comparisons.  */
11586
11587  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11588    {
11589      gcc_assert (!second_test && !bypass_test);
11590      tmp = gen_reg_rtx (QImode);
11591      ix86_expand_setcc (code, tmp);
11592      code = NE;
11593      ix86_compare_op0 = tmp;
11594      ix86_compare_op1 = const0_rtx;
11595      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11596    }
11597  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11598    {
11599      tmp = gen_reg_rtx (mode);
11600      emit_move_insn (tmp, operands[3]);
11601      operands[3] = tmp;
11602    }
11603  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11604    {
11605      tmp = gen_reg_rtx (mode);
11606      emit_move_insn (tmp, operands[2]);
11607      operands[2] = tmp;
11608    }
11609
11610  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11611			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11612						operands[2], operands[3])));
11613  if (bypass_test)
11614    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11615			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11616						  operands[3], operands[0])));
11617  if (second_test)
11618    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11619			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11620						  operands[2], operands[0])));
11621
11622  return 1;
11623}
11624
11625/* Expand a floating-point vector conditional move; a vcond operation
11626   rather than a movcc operation.  */
11627
11628bool
11629ix86_expand_fp_vcond (rtx operands[])
11630{
11631  enum rtx_code code = GET_CODE (operands[3]);
11632  rtx cmp;
11633
11634  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11635					   &operands[4], &operands[5]);
11636  if (code == UNKNOWN)
11637    return false;
11638
11639  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11640				 operands[5], operands[1], operands[2]))
11641    return true;
11642
11643  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11644			     operands[1], operands[2]);
11645  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11646  return true;
11647}
11648
11649/* Expand a signed integral vector conditional move.  */
11650
11651bool
11652ix86_expand_int_vcond (rtx operands[])
11653{
11654  enum machine_mode mode = GET_MODE (operands[0]);
11655  enum rtx_code code = GET_CODE (operands[3]);
11656  bool negate = false;
11657  rtx x, cop0, cop1;
11658
11659  cop0 = operands[4];
11660  cop1 = operands[5];
11661
11662  /* Canonicalize the comparison to EQ, GT, GTU.  */
11663  switch (code)
11664    {
11665    case EQ:
11666    case GT:
11667    case GTU:
11668      break;
11669
11670    case NE:
11671    case LE:
11672    case LEU:
11673      code = reverse_condition (code);
11674      negate = true;
11675      break;
11676
11677    case GE:
11678    case GEU:
11679      code = reverse_condition (code);
11680      negate = true;
11681      /* FALLTHRU */
11682
11683    case LT:
11684    case LTU:
11685      code = swap_condition (code);
11686      x = cop0, cop0 = cop1, cop1 = x;
11687      break;
11688
11689    default:
11690      gcc_unreachable ();
11691    }
11692
11693  /* Unsigned parallel compare is not supported by the hardware.  Play some
11694     tricks to turn this into a signed comparison against 0.  */
11695  if (code == GTU)
11696    {
11697      cop0 = force_reg (mode, cop0);
11698
11699      switch (mode)
11700	{
11701	case V4SImode:
11702	  {
11703	    rtx t1, t2, mask;
11704
11705	    /* Perform a parallel modulo subtraction.  */
11706	    t1 = gen_reg_rtx (mode);
11707	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
11708
11709	    /* Extract the original sign bit of op0.  */
11710	    mask = GEN_INT (-0x80000000);
11711	    mask = gen_rtx_CONST_VECTOR (mode,
11712			gen_rtvec (4, mask, mask, mask, mask));
11713	    mask = force_reg (mode, mask);
11714	    t2 = gen_reg_rtx (mode);
11715	    emit_insn (gen_andv4si3 (t2, cop0, mask));
11716
11717	    /* XOR it back into the result of the subtraction.  This results
11718	       in the sign bit set iff we saw unsigned underflow.  */
11719	    x = gen_reg_rtx (mode);
11720	    emit_insn (gen_xorv4si3 (x, t1, t2));
11721
11722	    code = GT;
11723	  }
11724	  break;
11725
11726	case V16QImode:
11727	case V8HImode:
11728	  /* Perform a parallel unsigned saturating subtraction.  */
11729	  x = gen_reg_rtx (mode);
11730	  emit_insn (gen_rtx_SET (VOIDmode, x,
11731				  gen_rtx_US_MINUS (mode, cop0, cop1)));
11732
11733	  code = EQ;
11734	  negate = !negate;
11735	  break;
11736
11737	default:
11738	  gcc_unreachable ();
11739	}
11740
11741      cop0 = x;
11742      cop1 = CONST0_RTX (mode);
11743    }
11744
11745  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11746			   operands[1+negate], operands[2-negate]);
11747
11748  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11749			 operands[2-negate]);
11750  return true;
11751}
11752
11753/* Expand conditional increment or decrement using adb/sbb instructions.
11754   The default case using setcc followed by the conditional move can be
11755   done by generic code.  */
11756int
11757ix86_expand_int_addcc (rtx operands[])
11758{
11759  enum rtx_code code = GET_CODE (operands[1]);
11760  rtx compare_op;
11761  rtx val = const0_rtx;
11762  bool fpcmp = false;
11763  enum machine_mode mode = GET_MODE (operands[0]);
11764
11765  if (operands[3] != const1_rtx
11766      && operands[3] != constm1_rtx)
11767    return 0;
11768  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11769				       ix86_compare_op1, &compare_op))
11770     return 0;
11771  code = GET_CODE (compare_op);
11772
11773  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11774      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11775    {
11776      fpcmp = true;
11777      code = ix86_fp_compare_code_to_integer (code);
11778    }
11779
11780  if (code != LTU)
11781    {
11782      val = constm1_rtx;
11783      if (fpcmp)
11784	PUT_CODE (compare_op,
11785		  reverse_condition_maybe_unordered
11786		    (GET_CODE (compare_op)));
11787      else
11788	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11789    }
11790  PUT_MODE (compare_op, mode);
11791
11792  /* Construct either adc or sbb insn.  */
11793  if ((code == LTU) == (operands[3] == constm1_rtx))
11794    {
11795      switch (GET_MODE (operands[0]))
11796	{
11797	  case QImode:
11798            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11799	    break;
11800	  case HImode:
11801            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11802	    break;
11803	  case SImode:
11804            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11805	    break;
11806	  case DImode:
11807            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11808	    break;
11809	  default:
11810	    gcc_unreachable ();
11811	}
11812    }
11813  else
11814    {
11815      switch (GET_MODE (operands[0]))
11816	{
11817	  case QImode:
11818            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11819	    break;
11820	  case HImode:
11821            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11822	    break;
11823	  case SImode:
11824            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11825	    break;
11826	  case DImode:
11827            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11828	    break;
11829	  default:
11830	    gcc_unreachable ();
11831	}
11832    }
11833  return 1; /* DONE */
11834}
11835
11836
11837/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11838   works for floating pointer parameters and nonoffsetable memories.
11839   For pushes, it returns just stack offsets; the values will be saved
11840   in the right order.  Maximally three parts are generated.  */
11841
11842static int
11843ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11844{
11845  int size;
11846
11847  if (!TARGET_64BIT)
11848    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11849  else
11850    size = (GET_MODE_SIZE (mode) + 4) / 8;
11851
11852  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11853  gcc_assert (size >= 2 && size <= 3);
11854
11855  /* Optimize constant pool reference to immediates.  This is used by fp
11856     moves, that force all constants to memory to allow combining.  */
11857  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11858    {
11859      rtx tmp = maybe_get_pool_constant (operand);
11860      if (tmp)
11861	operand = tmp;
11862    }
11863
11864  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11865    {
11866      /* The only non-offsetable memories we handle are pushes.  */
11867      int ok = push_operand (operand, VOIDmode);
11868
11869      gcc_assert (ok);
11870
11871      operand = copy_rtx (operand);
11872      PUT_MODE (operand, Pmode);
11873      parts[0] = parts[1] = parts[2] = operand;
11874      return size;
11875    }
11876
11877  if (GET_CODE (operand) == CONST_VECTOR)
11878    {
11879      enum machine_mode imode = int_mode_for_mode (mode);
11880      /* Caution: if we looked through a constant pool memory above,
11881	 the operand may actually have a different mode now.  That's
11882	 ok, since we want to pun this all the way back to an integer.  */
11883      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11884      gcc_assert (operand != NULL);
11885      mode = imode;
11886    }
11887
11888  if (!TARGET_64BIT)
11889    {
11890      if (mode == DImode)
11891	split_di (&operand, 1, &parts[0], &parts[1]);
11892      else
11893	{
11894	  if (REG_P (operand))
11895	    {
11896	      gcc_assert (reload_completed);
11897	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11898	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11899	      if (size == 3)
11900		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11901	    }
11902	  else if (offsettable_memref_p (operand))
11903	    {
11904	      operand = adjust_address (operand, SImode, 0);
11905	      parts[0] = operand;
11906	      parts[1] = adjust_address (operand, SImode, 4);
11907	      if (size == 3)
11908		parts[2] = adjust_address (operand, SImode, 8);
11909	    }
11910	  else if (GET_CODE (operand) == CONST_DOUBLE)
11911	    {
11912	      REAL_VALUE_TYPE r;
11913	      long l[4];
11914
11915	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11916	      switch (mode)
11917		{
11918		case XFmode:
11919		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11920		  parts[2] = gen_int_mode (l[2], SImode);
11921		  break;
11922		case DFmode:
11923		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11924		  break;
11925		default:
11926		  gcc_unreachable ();
11927		}
11928	      parts[1] = gen_int_mode (l[1], SImode);
11929	      parts[0] = gen_int_mode (l[0], SImode);
11930	    }
11931	  else
11932	    gcc_unreachable ();
11933	}
11934    }
11935  else
11936    {
11937      if (mode == TImode)
11938	split_ti (&operand, 1, &parts[0], &parts[1]);
11939      if (mode == XFmode || mode == TFmode)
11940	{
11941	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11942	  if (REG_P (operand))
11943	    {
11944	      gcc_assert (reload_completed);
11945	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11946	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11947	    }
11948	  else if (offsettable_memref_p (operand))
11949	    {
11950	      operand = adjust_address (operand, DImode, 0);
11951	      parts[0] = operand;
11952	      parts[1] = adjust_address (operand, upper_mode, 8);
11953	    }
11954	  else if (GET_CODE (operand) == CONST_DOUBLE)
11955	    {
11956	      REAL_VALUE_TYPE r;
11957	      long l[4];
11958
11959	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11960	      real_to_target (l, &r, mode);
11961
11962	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
11963	      if (HOST_BITS_PER_WIDE_INT >= 64)
11964	        parts[0]
11965		  = gen_int_mode
11966		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11967		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11968		       DImode);
11969	      else
11970	        parts[0] = immed_double_const (l[0], l[1], DImode);
11971
11972	      if (upper_mode == SImode)
11973	        parts[1] = gen_int_mode (l[2], SImode);
11974	      else if (HOST_BITS_PER_WIDE_INT >= 64)
11975	        parts[1]
11976		  = gen_int_mode
11977		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11978		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11979		       DImode);
11980	      else
11981	        parts[1] = immed_double_const (l[2], l[3], DImode);
11982	    }
11983	  else
11984	    gcc_unreachable ();
11985	}
11986    }
11987
11988  return size;
11989}
11990
11991/* Emit insns to perform a move or push of DI, DF, and XF values.
11992   Return false when normal moves are needed; true when all required
11993   insns have been emitted.  Operands 2-4 contain the input values
11994   int the correct order; operands 5-7 contain the output values.  */
11995
11996void
11997ix86_split_long_move (rtx operands[])
11998{
11999  rtx part[2][3];
12000  int nparts;
12001  int push = 0;
12002  int collisions = 0;
12003  enum machine_mode mode = GET_MODE (operands[0]);
12004
12005  /* The DFmode expanders may ask us to move double.
12006     For 64bit target this is single move.  By hiding the fact
12007     here we simplify i386.md splitters.  */
12008  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12009    {
12010      /* Optimize constant pool reference to immediates.  This is used by
12011	 fp moves, that force all constants to memory to allow combining.  */
12012
12013      if (GET_CODE (operands[1]) == MEM
12014	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12015	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12016	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12017      if (push_operand (operands[0], VOIDmode))
12018	{
12019	  operands[0] = copy_rtx (operands[0]);
12020	  PUT_MODE (operands[0], Pmode);
12021	}
12022      else
12023        operands[0] = gen_lowpart (DImode, operands[0]);
12024      operands[1] = gen_lowpart (DImode, operands[1]);
12025      emit_move_insn (operands[0], operands[1]);
12026      return;
12027    }
12028
12029  /* The only non-offsettable memory we handle is push.  */
12030  if (push_operand (operands[0], VOIDmode))
12031    push = 1;
12032  else
12033    gcc_assert (GET_CODE (operands[0]) != MEM
12034		|| offsettable_memref_p (operands[0]));
12035
12036  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12037  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12038
12039  /* When emitting push, take care for source operands on the stack.  */
12040  if (push && GET_CODE (operands[1]) == MEM
12041      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12042    {
12043      if (nparts == 3)
12044	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12045				     XEXP (part[1][2], 0));
12046      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12047				   XEXP (part[1][1], 0));
12048    }
12049
12050  /* We need to do copy in the right order in case an address register
12051     of the source overlaps the destination.  */
12052  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12053    {
12054      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12055	collisions++;
12056      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12057	collisions++;
12058      if (nparts == 3
12059	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12060	collisions++;
12061
12062      /* Collision in the middle part can be handled by reordering.  */
12063      if (collisions == 1 && nparts == 3
12064	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12065	{
12066	  rtx tmp;
12067	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12068	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12069	}
12070
12071      /* If there are more collisions, we can't handle it by reordering.
12072	 Do an lea to the last part and use only one colliding move.  */
12073      else if (collisions > 1)
12074	{
12075	  rtx base;
12076
12077	  collisions = 1;
12078
12079	  base = part[0][nparts - 1];
12080
12081	  /* Handle the case when the last part isn't valid for lea.
12082	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12083	  if (GET_MODE (base) != Pmode)
12084	    base = gen_rtx_REG (Pmode, REGNO (base));
12085
12086	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12087	  part[1][0] = replace_equiv_address (part[1][0], base);
12088	  part[1][1] = replace_equiv_address (part[1][1],
12089				      plus_constant (base, UNITS_PER_WORD));
12090	  if (nparts == 3)
12091	    part[1][2] = replace_equiv_address (part[1][2],
12092				      plus_constant (base, 8));
12093	}
12094    }
12095
12096  if (push)
12097    {
12098      if (!TARGET_64BIT)
12099	{
12100	  if (nparts == 3)
12101	    {
12102	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12103                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12104	      emit_move_insn (part[0][2], part[1][2]);
12105	    }
12106	}
12107      else
12108	{
12109	  /* In 64bit mode we don't have 32bit push available.  In case this is
12110	     register, it is OK - we will just use larger counterpart.  We also
12111	     retype memory - these comes from attempt to avoid REX prefix on
12112	     moving of second half of TFmode value.  */
12113	  if (GET_MODE (part[1][1]) == SImode)
12114	    {
12115	      switch (GET_CODE (part[1][1]))
12116		{
12117		case MEM:
12118		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12119		  break;
12120
12121		case REG:
12122		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12123		  break;
12124
12125		default:
12126		  gcc_unreachable ();
12127		}
12128
12129	      if (GET_MODE (part[1][0]) == SImode)
12130		part[1][0] = part[1][1];
12131	    }
12132	}
12133      emit_move_insn (part[0][1], part[1][1]);
12134      emit_move_insn (part[0][0], part[1][0]);
12135      return;
12136    }
12137
12138  /* Choose correct order to not overwrite the source before it is copied.  */
12139  if ((REG_P (part[0][0])
12140       && REG_P (part[1][1])
12141       && (REGNO (part[0][0]) == REGNO (part[1][1])
12142	   || (nparts == 3
12143	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12144      || (collisions > 0
12145	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12146    {
12147      if (nparts == 3)
12148	{
12149	  operands[2] = part[0][2];
12150	  operands[3] = part[0][1];
12151	  operands[4] = part[0][0];
12152	  operands[5] = part[1][2];
12153	  operands[6] = part[1][1];
12154	  operands[7] = part[1][0];
12155	}
12156      else
12157	{
12158	  operands[2] = part[0][1];
12159	  operands[3] = part[0][0];
12160	  operands[5] = part[1][1];
12161	  operands[6] = part[1][0];
12162	}
12163    }
12164  else
12165    {
12166      if (nparts == 3)
12167	{
12168	  operands[2] = part[0][0];
12169	  operands[3] = part[0][1];
12170	  operands[4] = part[0][2];
12171	  operands[5] = part[1][0];
12172	  operands[6] = part[1][1];
12173	  operands[7] = part[1][2];
12174	}
12175      else
12176	{
12177	  operands[2] = part[0][0];
12178	  operands[3] = part[0][1];
12179	  operands[5] = part[1][0];
12180	  operands[6] = part[1][1];
12181	}
12182    }
12183
12184  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12185  if (optimize_size)
12186    {
12187      if (GET_CODE (operands[5]) == CONST_INT
12188	  && operands[5] != const0_rtx
12189	  && REG_P (operands[2]))
12190	{
12191	  if (GET_CODE (operands[6]) == CONST_INT
12192	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12193	    operands[6] = operands[2];
12194
12195	  if (nparts == 3
12196	      && GET_CODE (operands[7]) == CONST_INT
12197	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12198	    operands[7] = operands[2];
12199	}
12200
12201      if (nparts == 3
12202	  && GET_CODE (operands[6]) == CONST_INT
12203	  && operands[6] != const0_rtx
12204	  && REG_P (operands[3])
12205	  && GET_CODE (operands[7]) == CONST_INT
12206	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12207	operands[7] = operands[3];
12208    }
12209
12210  emit_move_insn (operands[2], operands[5]);
12211  emit_move_insn (operands[3], operands[6]);
12212  if (nparts == 3)
12213    emit_move_insn (operands[4], operands[7]);
12214
12215  return;
12216}
12217
12218/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12219   left shift by a constant, either using a single shift or
12220   a sequence of add instructions.  */
12221
12222static void
12223ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12224{
12225  if (count == 1)
12226    {
12227      emit_insn ((mode == DImode
12228		  ? gen_addsi3
12229		  : gen_adddi3) (operand, operand, operand));
12230    }
12231  else if (!optimize_size
12232	   && count * ix86_cost->add <= ix86_cost->shift_const)
12233    {
12234      int i;
12235      for (i=0; i<count; i++)
12236	{
12237	  emit_insn ((mode == DImode
12238		      ? gen_addsi3
12239		      : gen_adddi3) (operand, operand, operand));
12240	}
12241    }
12242  else
12243    emit_insn ((mode == DImode
12244		? gen_ashlsi3
12245		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12246}
12247
12248void
12249ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12250{
12251  rtx low[2], high[2];
12252  int count;
12253  const int single_width = mode == DImode ? 32 : 64;
12254
12255  if (GET_CODE (operands[2]) == CONST_INT)
12256    {
12257      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12258      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12259
12260      if (count >= single_width)
12261	{
12262	  emit_move_insn (high[0], low[1]);
12263	  emit_move_insn (low[0], const0_rtx);
12264
12265	  if (count > single_width)
12266	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12267	}
12268      else
12269	{
12270	  if (!rtx_equal_p (operands[0], operands[1]))
12271	    emit_move_insn (operands[0], operands[1]);
12272	  emit_insn ((mode == DImode
12273		     ? gen_x86_shld_1
12274		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12275	  ix86_expand_ashl_const (low[0], count, mode);
12276	}
12277      return;
12278    }
12279
12280  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12281
12282  if (operands[1] == const1_rtx)
12283    {
12284      /* Assuming we've chosen a QImode capable registers, then 1 << N
12285	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12286      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12287	{
12288	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12289
12290	  ix86_expand_clear (low[0]);
12291	  ix86_expand_clear (high[0]);
12292	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12293
12294	  d = gen_lowpart (QImode, low[0]);
12295	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12296	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12297	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12298
12299	  d = gen_lowpart (QImode, high[0]);
12300	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12301	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12302	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12303	}
12304
12305      /* Otherwise, we can get the same results by manually performing
12306	 a bit extract operation on bit 5/6, and then performing the two
12307	 shifts.  The two methods of getting 0/1 into low/high are exactly
12308	 the same size.  Avoiding the shift in the bit extract case helps
12309	 pentium4 a bit; no one else seems to care much either way.  */
12310      else
12311	{
12312	  rtx x;
12313
12314	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12315	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12316	  else
12317	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12318	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12319
12320	  emit_insn ((mode == DImode
12321		      ? gen_lshrsi3
12322		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12323	  emit_insn ((mode == DImode
12324		      ? gen_andsi3
12325		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12326	  emit_move_insn (low[0], high[0]);
12327	  emit_insn ((mode == DImode
12328		      ? gen_xorsi3
12329		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12330	}
12331
12332      emit_insn ((mode == DImode
12333		    ? gen_ashlsi3
12334		    : gen_ashldi3) (low[0], low[0], operands[2]));
12335      emit_insn ((mode == DImode
12336		    ? gen_ashlsi3
12337		    : gen_ashldi3) (high[0], high[0], operands[2]));
12338      return;
12339    }
12340
12341  if (operands[1] == constm1_rtx)
12342    {
12343      /* For -1 << N, we can avoid the shld instruction, because we
12344	 know that we're shifting 0...31/63 ones into a -1.  */
12345      emit_move_insn (low[0], constm1_rtx);
12346      if (optimize_size)
12347	emit_move_insn (high[0], low[0]);
12348      else
12349	emit_move_insn (high[0], constm1_rtx);
12350    }
12351  else
12352    {
12353      if (!rtx_equal_p (operands[0], operands[1]))
12354	emit_move_insn (operands[0], operands[1]);
12355
12356      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12357      emit_insn ((mode == DImode
12358		  ? gen_x86_shld_1
12359		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12360    }
12361
12362  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12363
12364  if (TARGET_CMOVE && scratch)
12365    {
12366      ix86_expand_clear (scratch);
12367      emit_insn ((mode == DImode
12368		  ? gen_x86_shift_adj_1
12369		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12370    }
12371  else
12372    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12373}
12374
12375void
12376ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12377{
12378  rtx low[2], high[2];
12379  int count;
12380  const int single_width = mode == DImode ? 32 : 64;
12381
12382  if (GET_CODE (operands[2]) == CONST_INT)
12383    {
12384      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12385      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12386
12387      if (count == single_width * 2 - 1)
12388	{
12389	  emit_move_insn (high[0], high[1]);
12390	  emit_insn ((mode == DImode
12391		      ? gen_ashrsi3
12392		      : gen_ashrdi3) (high[0], high[0],
12393				      GEN_INT (single_width - 1)));
12394	  emit_move_insn (low[0], high[0]);
12395
12396	}
12397      else if (count >= single_width)
12398	{
12399	  emit_move_insn (low[0], high[1]);
12400	  emit_move_insn (high[0], low[0]);
12401	  emit_insn ((mode == DImode
12402		      ? gen_ashrsi3
12403		      : gen_ashrdi3) (high[0], high[0],
12404				      GEN_INT (single_width - 1)));
12405	  if (count > single_width)
12406	    emit_insn ((mode == DImode
12407			? gen_ashrsi3
12408			: gen_ashrdi3) (low[0], low[0],
12409					GEN_INT (count - single_width)));
12410	}
12411      else
12412	{
12413	  if (!rtx_equal_p (operands[0], operands[1]))
12414	    emit_move_insn (operands[0], operands[1]);
12415	  emit_insn ((mode == DImode
12416		      ? gen_x86_shrd_1
12417		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12418	  emit_insn ((mode == DImode
12419		      ? gen_ashrsi3
12420		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12421	}
12422    }
12423  else
12424    {
12425      if (!rtx_equal_p (operands[0], operands[1]))
12426	emit_move_insn (operands[0], operands[1]);
12427
12428      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12429
12430      emit_insn ((mode == DImode
12431		  ? gen_x86_shrd_1
12432		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12433      emit_insn ((mode == DImode
12434		  ? gen_ashrsi3
12435		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12436
12437      if (TARGET_CMOVE && scratch)
12438	{
12439	  emit_move_insn (scratch, high[0]);
12440	  emit_insn ((mode == DImode
12441		      ? gen_ashrsi3
12442		      : gen_ashrdi3) (scratch, scratch,
12443				      GEN_INT (single_width - 1)));
12444	  emit_insn ((mode == DImode
12445		      ? gen_x86_shift_adj_1
12446		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12447					 scratch));
12448	}
12449      else
12450	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12451    }
12452}
12453
12454void
12455ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12456{
12457  rtx low[2], high[2];
12458  int count;
12459  const int single_width = mode == DImode ? 32 : 64;
12460
12461  if (GET_CODE (operands[2]) == CONST_INT)
12462    {
12463      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12464      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12465
12466      if (count >= single_width)
12467	{
12468	  emit_move_insn (low[0], high[1]);
12469	  ix86_expand_clear (high[0]);
12470
12471	  if (count > single_width)
12472	    emit_insn ((mode == DImode
12473			? gen_lshrsi3
12474			: gen_lshrdi3) (low[0], low[0],
12475					GEN_INT (count - single_width)));
12476	}
12477      else
12478	{
12479	  if (!rtx_equal_p (operands[0], operands[1]))
12480	    emit_move_insn (operands[0], operands[1]);
12481	  emit_insn ((mode == DImode
12482		      ? gen_x86_shrd_1
12483		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12484	  emit_insn ((mode == DImode
12485		      ? gen_lshrsi3
12486		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12487	}
12488    }
12489  else
12490    {
12491      if (!rtx_equal_p (operands[0], operands[1]))
12492	emit_move_insn (operands[0], operands[1]);
12493
12494      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12495
12496      emit_insn ((mode == DImode
12497		  ? gen_x86_shrd_1
12498		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12499      emit_insn ((mode == DImode
12500		  ? gen_lshrsi3
12501		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12502
12503      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12504      if (TARGET_CMOVE && scratch)
12505	{
12506	  ix86_expand_clear (scratch);
12507	  emit_insn ((mode == DImode
12508		      ? gen_x86_shift_adj_1
12509		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12510					       scratch));
12511	}
12512      else
12513	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12514    }
12515}
12516
12517/* Helper function for the string operations below.  Dest VARIABLE whether
12518   it is aligned to VALUE bytes.  If true, jump to the label.  */
12519static rtx
12520ix86_expand_aligntest (rtx variable, int value)
12521{
12522  rtx label = gen_label_rtx ();
12523  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12524  if (GET_MODE (variable) == DImode)
12525    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12526  else
12527    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12528  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12529			   1, label);
12530  return label;
12531}
12532
12533/* Adjust COUNTER by the VALUE.  */
12534static void
12535ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12536{
12537  if (GET_MODE (countreg) == DImode)
12538    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12539  else
12540    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12541}
12542
12543/* Zero extend possibly SImode EXP to Pmode register.  */
12544rtx
12545ix86_zero_extend_to_Pmode (rtx exp)
12546{
12547  rtx r;
12548  if (GET_MODE (exp) == VOIDmode)
12549    return force_reg (Pmode, exp);
12550  if (GET_MODE (exp) == Pmode)
12551    return copy_to_mode_reg (Pmode, exp);
12552  r = gen_reg_rtx (Pmode);
12553  emit_insn (gen_zero_extendsidi2 (r, exp));
12554  return r;
12555}
12556
12557/* Expand string move (memcpy) operation.  Use i386 string operations when
12558   profitable.  expand_clrmem contains similar code.  */
12559int
12560ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12561{
12562  rtx srcreg, destreg, countreg, srcexp, destexp;
12563  enum machine_mode counter_mode;
12564  HOST_WIDE_INT align = 0;
12565  unsigned HOST_WIDE_INT count = 0;
12566
12567  if (GET_CODE (align_exp) == CONST_INT)
12568    align = INTVAL (align_exp);
12569
12570  /* Can't use any of this if the user has appropriated esi or edi.  */
12571  if (global_regs[4] || global_regs[5])
12572    return 0;
12573
12574  /* This simple hack avoids all inlining code and simplifies code below.  */
12575  if (!TARGET_ALIGN_STRINGOPS)
12576    align = 64;
12577
12578  if (GET_CODE (count_exp) == CONST_INT)
12579    {
12580      count = INTVAL (count_exp);
12581      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12582	return 0;
12583    }
12584
12585  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12586     for 64bits use SImode when possible, otherwise DImode.
12587     Set count to number of bytes copied when known at compile time.  */
12588  if (!TARGET_64BIT
12589      || GET_MODE (count_exp) == SImode
12590      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12591    counter_mode = SImode;
12592  else
12593    counter_mode = DImode;
12594
12595  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12596
12597  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12598  if (destreg != XEXP (dst, 0))
12599    dst = replace_equiv_address_nv (dst, destreg);
12600  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12601  if (srcreg != XEXP (src, 0))
12602    src = replace_equiv_address_nv (src, srcreg);
12603
12604  /* When optimizing for size emit simple rep ; movsb instruction for
12605     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12606     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12607     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12608     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12609     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12610     known to be zero or not.  The rep; movsb sequence causes higher
12611     register pressure though, so take that into account.  */
12612
12613  if ((!optimize || optimize_size)
12614      && (count == 0
12615	  || ((count & 0x03)
12616	      && (!optimize_size
12617		  || count > 5 * 4
12618		  || (count & 3) + count / 4 > 6))))
12619    {
12620      emit_insn (gen_cld ());
12621      countreg = ix86_zero_extend_to_Pmode (count_exp);
12622      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12623      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12624      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12625			      destexp, srcexp));
12626    }
12627
12628  /* For constant aligned (or small unaligned) copies use rep movsl
12629     followed by code copying the rest.  For PentiumPro ensure 8 byte
12630     alignment to allow rep movsl acceleration.  */
12631
12632  else if (count != 0
12633	   && (align >= 8
12634	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12635	       || optimize_size || count < (unsigned int) 64))
12636    {
12637      unsigned HOST_WIDE_INT offset = 0;
12638      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12639      rtx srcmem, dstmem;
12640
12641      emit_insn (gen_cld ());
12642      if (count & ~(size - 1))
12643	{
12644	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12645	    {
12646	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12647
12648	      while (offset < (count & ~(size - 1)))
12649		{
12650		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12651							 srcreg, offset);
12652		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12653							 destreg, offset);
12654		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12655		  offset += size;
12656		}
12657	    }
12658	  else
12659	    {
12660	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12661				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12662	      countreg = copy_to_mode_reg (counter_mode, countreg);
12663	      countreg = ix86_zero_extend_to_Pmode (countreg);
12664
12665	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12666					GEN_INT (size == 4 ? 2 : 3));
12667	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12668	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12669
12670	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12671				      countreg, destexp, srcexp));
12672	      offset = count & ~(size - 1);
12673	    }
12674	}
12675      if (size == 8 && (count & 0x04))
12676	{
12677	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12678						 offset);
12679	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12680						 offset);
12681	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12682	  offset += 4;
12683	}
12684      if (count & 0x02)
12685	{
12686	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12687						 offset);
12688	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12689						 offset);
12690	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12691	  offset += 2;
12692	}
12693      if (count & 0x01)
12694	{
12695	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12696						 offset);
12697	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12698						 offset);
12699	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12700	}
12701    }
12702  /* The generic code based on the glibc implementation:
12703     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12704     allowing accelerated copying there)
12705     - copy the data using rep movsl
12706     - copy the rest.  */
12707  else
12708    {
12709      rtx countreg2;
12710      rtx label = NULL;
12711      rtx srcmem, dstmem;
12712      int desired_alignment = (TARGET_PENTIUMPRO
12713			       && (count == 0 || count >= (unsigned int) 260)
12714			       ? 8 : UNITS_PER_WORD);
12715      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12716      dst = change_address (dst, BLKmode, destreg);
12717      src = change_address (src, BLKmode, srcreg);
12718
12719      /* In case we don't know anything about the alignment, default to
12720         library version, since it is usually equally fast and result in
12721         shorter code.
12722
12723	 Also emit call when we know that the count is large and call overhead
12724	 will not be important.  */
12725      if (!TARGET_INLINE_ALL_STRINGOPS
12726	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12727	return 0;
12728
12729      if (TARGET_SINGLE_STRINGOP)
12730	emit_insn (gen_cld ());
12731
12732      countreg2 = gen_reg_rtx (Pmode);
12733      countreg = copy_to_mode_reg (counter_mode, count_exp);
12734
12735      /* We don't use loops to align destination and to copy parts smaller
12736         than 4 bytes, because gcc is able to optimize such code better (in
12737         the case the destination or the count really is aligned, gcc is often
12738         able to predict the branches) and also it is friendlier to the
12739         hardware branch prediction.
12740
12741         Using loops is beneficial for generic case, because we can
12742         handle small counts using the loops.  Many CPUs (such as Athlon)
12743         have large REP prefix setup costs.
12744
12745         This is quite costly.  Maybe we can revisit this decision later or
12746         add some customizability to this code.  */
12747
12748      if (count == 0 && align < desired_alignment)
12749	{
12750	  label = gen_label_rtx ();
12751	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12752				   LEU, 0, counter_mode, 1, label);
12753	}
12754      if (align <= 1)
12755	{
12756	  rtx label = ix86_expand_aligntest (destreg, 1);
12757	  srcmem = change_address (src, QImode, srcreg);
12758	  dstmem = change_address (dst, QImode, destreg);
12759	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12760	  ix86_adjust_counter (countreg, 1);
12761	  emit_label (label);
12762	  LABEL_NUSES (label) = 1;
12763	}
12764      if (align <= 2)
12765	{
12766	  rtx label = ix86_expand_aligntest (destreg, 2);
12767	  srcmem = change_address (src, HImode, srcreg);
12768	  dstmem = change_address (dst, HImode, destreg);
12769	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12770	  ix86_adjust_counter (countreg, 2);
12771	  emit_label (label);
12772	  LABEL_NUSES (label) = 1;
12773	}
12774      if (align <= 4 && desired_alignment > 4)
12775	{
12776	  rtx label = ix86_expand_aligntest (destreg, 4);
12777	  srcmem = change_address (src, SImode, srcreg);
12778	  dstmem = change_address (dst, SImode, destreg);
12779	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12780	  ix86_adjust_counter (countreg, 4);
12781	  emit_label (label);
12782	  LABEL_NUSES (label) = 1;
12783	}
12784
12785      if (label && desired_alignment > 4 && !TARGET_64BIT)
12786	{
12787	  emit_label (label);
12788	  LABEL_NUSES (label) = 1;
12789	  label = NULL_RTX;
12790	}
12791      if (!TARGET_SINGLE_STRINGOP)
12792	emit_insn (gen_cld ());
12793      if (TARGET_64BIT)
12794	{
12795	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12796				  GEN_INT (3)));
12797	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12798	}
12799      else
12800	{
12801	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12802	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12803	}
12804      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12805      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12806      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12807			      countreg2, destexp, srcexp));
12808
12809      if (label)
12810	{
12811	  emit_label (label);
12812	  LABEL_NUSES (label) = 1;
12813	}
12814      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12815	{
12816	  srcmem = change_address (src, SImode, srcreg);
12817	  dstmem = change_address (dst, SImode, destreg);
12818	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12819	}
12820      if ((align <= 4 || count == 0) && TARGET_64BIT)
12821	{
12822	  rtx label = ix86_expand_aligntest (countreg, 4);
12823	  srcmem = change_address (src, SImode, srcreg);
12824	  dstmem = change_address (dst, SImode, destreg);
12825	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12826	  emit_label (label);
12827	  LABEL_NUSES (label) = 1;
12828	}
12829      if (align > 2 && count != 0 && (count & 2))
12830	{
12831	  srcmem = change_address (src, HImode, srcreg);
12832	  dstmem = change_address (dst, HImode, destreg);
12833	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12834	}
12835      if (align <= 2 || count == 0)
12836	{
12837	  rtx label = ix86_expand_aligntest (countreg, 2);
12838	  srcmem = change_address (src, HImode, srcreg);
12839	  dstmem = change_address (dst, HImode, destreg);
12840	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12841	  emit_label (label);
12842	  LABEL_NUSES (label) = 1;
12843	}
12844      if (align > 1 && count != 0 && (count & 1))
12845	{
12846	  srcmem = change_address (src, QImode, srcreg);
12847	  dstmem = change_address (dst, QImode, destreg);
12848	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12849	}
12850      if (align <= 1 || count == 0)
12851	{
12852	  rtx label = ix86_expand_aligntest (countreg, 1);
12853	  srcmem = change_address (src, QImode, srcreg);
12854	  dstmem = change_address (dst, QImode, destreg);
12855	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12856	  emit_label (label);
12857	  LABEL_NUSES (label) = 1;
12858	}
12859    }
12860
12861  return 1;
12862}
12863
12864/* Expand string clear operation (bzero).  Use i386 string operations when
12865   profitable.  expand_movmem contains similar code.  */
12866int
12867ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12868{
12869  rtx destreg, zeroreg, countreg, destexp;
12870  enum machine_mode counter_mode;
12871  HOST_WIDE_INT align = 0;
12872  unsigned HOST_WIDE_INT count = 0;
12873
12874  if (GET_CODE (align_exp) == CONST_INT)
12875    align = INTVAL (align_exp);
12876
12877  /* Can't use any of this if the user has appropriated esi.  */
12878  if (global_regs[4])
12879    return 0;
12880
12881  /* This simple hack avoids all inlining code and simplifies code below.  */
12882  if (!TARGET_ALIGN_STRINGOPS)
12883    align = 32;
12884
12885  if (GET_CODE (count_exp) == CONST_INT)
12886    {
12887      count = INTVAL (count_exp);
12888      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12889	return 0;
12890    }
12891  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12892     for 64bits use SImode when possible, otherwise DImode.
12893     Set count to number of bytes copied when known at compile time.  */
12894  if (!TARGET_64BIT
12895      || GET_MODE (count_exp) == SImode
12896      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12897    counter_mode = SImode;
12898  else
12899    counter_mode = DImode;
12900
12901  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12902  if (destreg != XEXP (dst, 0))
12903    dst = replace_equiv_address_nv (dst, destreg);
12904
12905
12906  /* When optimizing for size emit simple rep ; movsb instruction for
12907     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
12908     sequence is 7 bytes long, so if optimizing for size and count is
12909     small enough that some stosl, stosw and stosb instructions without
12910     rep are shorter, fall back into the next if.  */
12911
12912  if ((!optimize || optimize_size)
12913      && (count == 0
12914	  || ((count & 0x03)
12915	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12916    {
12917      emit_insn (gen_cld ());
12918
12919      countreg = ix86_zero_extend_to_Pmode (count_exp);
12920      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12921      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12922      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12923    }
12924  else if (count != 0
12925	   && (align >= 8
12926	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12927	       || optimize_size || count < (unsigned int) 64))
12928    {
12929      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12930      unsigned HOST_WIDE_INT offset = 0;
12931
12932      emit_insn (gen_cld ());
12933
12934      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12935      if (count & ~(size - 1))
12936	{
12937	  unsigned HOST_WIDE_INT repcount;
12938	  unsigned int max_nonrep;
12939
12940	  repcount = count >> (size == 4 ? 2 : 3);
12941	  if (!TARGET_64BIT)
12942	    repcount &= 0x3fffffff;
12943
12944	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12945	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12946	     bytes.  In both cases the latter seems to be faster for small
12947	     values of N.  */
12948	  max_nonrep = size == 4 ? 7 : 4;
12949	  if (!optimize_size)
12950	    switch (ix86_tune)
12951	      {
12952	      case PROCESSOR_PENTIUM4:
12953	      case PROCESSOR_NOCONA:
12954	        max_nonrep = 3;
12955	        break;
12956	      default:
12957	        break;
12958	      }
12959
12960	  if (repcount <= max_nonrep)
12961	    while (repcount-- > 0)
12962	      {
12963		rtx mem = adjust_automodify_address_nv (dst,
12964							GET_MODE (zeroreg),
12965							destreg, offset);
12966		emit_insn (gen_strset (destreg, mem, zeroreg));
12967		offset += size;
12968	      }
12969	  else
12970	    {
12971	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12972	      countreg = ix86_zero_extend_to_Pmode (countreg);
12973	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12974					GEN_INT (size == 4 ? 2 : 3));
12975	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12976	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12977				       destexp));
12978	      offset = count & ~(size - 1);
12979	    }
12980	}
12981      if (size == 8 && (count & 0x04))
12982	{
12983	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12984						  offset);
12985	  emit_insn (gen_strset (destreg, mem,
12986				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12987	  offset += 4;
12988	}
12989      if (count & 0x02)
12990	{
12991	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12992						  offset);
12993	  emit_insn (gen_strset (destreg, mem,
12994				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12995	  offset += 2;
12996	}
12997      if (count & 0x01)
12998	{
12999	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13000						  offset);
13001	  emit_insn (gen_strset (destreg, mem,
13002				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13003	}
13004    }
13005  else
13006    {
13007      rtx countreg2;
13008      rtx label = NULL;
13009      /* Compute desired alignment of the string operation.  */
13010      int desired_alignment = (TARGET_PENTIUMPRO
13011			       && (count == 0 || count >= (unsigned int) 260)
13012			       ? 8 : UNITS_PER_WORD);
13013
13014      /* In case we don't know anything about the alignment, default to
13015         library version, since it is usually equally fast and result in
13016         shorter code.
13017
13018	 Also emit call when we know that the count is large and call overhead
13019	 will not be important.  */
13020      if (!TARGET_INLINE_ALL_STRINGOPS
13021	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13022	return 0;
13023
13024      if (TARGET_SINGLE_STRINGOP)
13025	emit_insn (gen_cld ());
13026
13027      countreg2 = gen_reg_rtx (Pmode);
13028      countreg = copy_to_mode_reg (counter_mode, count_exp);
13029      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13030      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13031      dst = change_address (dst, BLKmode, destreg);
13032
13033      if (count == 0 && align < desired_alignment)
13034	{
13035	  label = gen_label_rtx ();
13036	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13037				   LEU, 0, counter_mode, 1, label);
13038	}
13039      if (align <= 1)
13040	{
13041	  rtx label = ix86_expand_aligntest (destreg, 1);
13042	  emit_insn (gen_strset (destreg, dst,
13043				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13044	  ix86_adjust_counter (countreg, 1);
13045	  emit_label (label);
13046	  LABEL_NUSES (label) = 1;
13047	}
13048      if (align <= 2)
13049	{
13050	  rtx label = ix86_expand_aligntest (destreg, 2);
13051	  emit_insn (gen_strset (destreg, dst,
13052				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13053	  ix86_adjust_counter (countreg, 2);
13054	  emit_label (label);
13055	  LABEL_NUSES (label) = 1;
13056	}
13057      if (align <= 4 && desired_alignment > 4)
13058	{
13059	  rtx label = ix86_expand_aligntest (destreg, 4);
13060	  emit_insn (gen_strset (destreg, dst,
13061				 (TARGET_64BIT
13062				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13063				  : zeroreg)));
13064	  ix86_adjust_counter (countreg, 4);
13065	  emit_label (label);
13066	  LABEL_NUSES (label) = 1;
13067	}
13068
13069      if (label && desired_alignment > 4 && !TARGET_64BIT)
13070	{
13071	  emit_label (label);
13072	  LABEL_NUSES (label) = 1;
13073	  label = NULL_RTX;
13074	}
13075
13076      if (!TARGET_SINGLE_STRINGOP)
13077	emit_insn (gen_cld ());
13078      if (TARGET_64BIT)
13079	{
13080	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13081				  GEN_INT (3)));
13082	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13083	}
13084      else
13085	{
13086	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13087	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13088	}
13089      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13090      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13091
13092      if (label)
13093	{
13094	  emit_label (label);
13095	  LABEL_NUSES (label) = 1;
13096	}
13097
13098      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13099	emit_insn (gen_strset (destreg, dst,
13100			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13101      if (TARGET_64BIT && (align <= 4 || count == 0))
13102	{
13103	  rtx label = ix86_expand_aligntest (countreg, 4);
13104	  emit_insn (gen_strset (destreg, dst,
13105				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13106	  emit_label (label);
13107	  LABEL_NUSES (label) = 1;
13108	}
13109      if (align > 2 && count != 0 && (count & 2))
13110	emit_insn (gen_strset (destreg, dst,
13111			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13112      if (align <= 2 || count == 0)
13113	{
13114	  rtx label = ix86_expand_aligntest (countreg, 2);
13115	  emit_insn (gen_strset (destreg, dst,
13116				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13117	  emit_label (label);
13118	  LABEL_NUSES (label) = 1;
13119	}
13120      if (align > 1 && count != 0 && (count & 1))
13121	emit_insn (gen_strset (destreg, dst,
13122			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13123      if (align <= 1 || count == 0)
13124	{
13125	  rtx label = ix86_expand_aligntest (countreg, 1);
13126	  emit_insn (gen_strset (destreg, dst,
13127				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13128	  emit_label (label);
13129	  LABEL_NUSES (label) = 1;
13130	}
13131    }
13132  return 1;
13133}
13134
13135/* Expand strlen.  */
13136int
13137ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13138{
13139  rtx addr, scratch1, scratch2, scratch3, scratch4;
13140
13141  /* The generic case of strlen expander is long.  Avoid it's
13142     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13143
13144  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13145      && !TARGET_INLINE_ALL_STRINGOPS
13146      && !optimize_size
13147      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13148    return 0;
13149
13150  addr = force_reg (Pmode, XEXP (src, 0));
13151  scratch1 = gen_reg_rtx (Pmode);
13152
13153  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13154      && !optimize_size)
13155    {
13156      /* Well it seems that some optimizer does not combine a call like
13157         foo(strlen(bar), strlen(bar));
13158         when the move and the subtraction is done here.  It does calculate
13159         the length just once when these instructions are done inside of
13160         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13161         often used and I use one fewer register for the lifetime of
13162         output_strlen_unroll() this is better.  */
13163
13164      emit_move_insn (out, addr);
13165
13166      ix86_expand_strlensi_unroll_1 (out, src, align);
13167
13168      /* strlensi_unroll_1 returns the address of the zero at the end of
13169         the string, like memchr(), so compute the length by subtracting
13170         the start address.  */
13171      if (TARGET_64BIT)
13172	emit_insn (gen_subdi3 (out, out, addr));
13173      else
13174	emit_insn (gen_subsi3 (out, out, addr));
13175    }
13176  else
13177    {
13178      rtx unspec;
13179      scratch2 = gen_reg_rtx (Pmode);
13180      scratch3 = gen_reg_rtx (Pmode);
13181      scratch4 = force_reg (Pmode, constm1_rtx);
13182
13183      emit_move_insn (scratch3, addr);
13184      eoschar = force_reg (QImode, eoschar);
13185
13186      emit_insn (gen_cld ());
13187      src = replace_equiv_address_nv (src, scratch3);
13188
13189      /* If .md starts supporting :P, this can be done in .md.  */
13190      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13191						 scratch4), UNSPEC_SCAS);
13192      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13193      if (TARGET_64BIT)
13194	{
13195	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13196	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13197	}
13198      else
13199	{
13200	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13201	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13202	}
13203    }
13204  return 1;
13205}
13206
13207/* Expand the appropriate insns for doing strlen if not just doing
13208   repnz; scasb
13209
13210   out = result, initialized with the start address
13211   align_rtx = alignment of the address.
13212   scratch = scratch register, initialized with the startaddress when
13213	not aligned, otherwise undefined
13214
13215   This is just the body. It needs the initializations mentioned above and
13216   some address computing at the end.  These things are done in i386.md.  */
13217
13218static void
13219ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13220{
13221  int align;
13222  rtx tmp;
13223  rtx align_2_label = NULL_RTX;
13224  rtx align_3_label = NULL_RTX;
13225  rtx align_4_label = gen_label_rtx ();
13226  rtx end_0_label = gen_label_rtx ();
13227  rtx mem;
13228  rtx tmpreg = gen_reg_rtx (SImode);
13229  rtx scratch = gen_reg_rtx (SImode);
13230  rtx cmp;
13231
13232  align = 0;
13233  if (GET_CODE (align_rtx) == CONST_INT)
13234    align = INTVAL (align_rtx);
13235
13236  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13237
13238  /* Is there a known alignment and is it less than 4?  */
13239  if (align < 4)
13240    {
13241      rtx scratch1 = gen_reg_rtx (Pmode);
13242      emit_move_insn (scratch1, out);
13243      /* Is there a known alignment and is it not 2? */
13244      if (align != 2)
13245	{
13246	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13247	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13248
13249	  /* Leave just the 3 lower bits.  */
13250	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13251				    NULL_RTX, 0, OPTAB_WIDEN);
13252
13253	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13254				   Pmode, 1, align_4_label);
13255	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13256				   Pmode, 1, align_2_label);
13257	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13258				   Pmode, 1, align_3_label);
13259	}
13260      else
13261        {
13262	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13263	     check if is aligned to 4 - byte.  */
13264
13265	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13266				    NULL_RTX, 0, OPTAB_WIDEN);
13267
13268	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13269				   Pmode, 1, align_4_label);
13270        }
13271
13272      mem = change_address (src, QImode, out);
13273
13274      /* Now compare the bytes.  */
13275
13276      /* Compare the first n unaligned byte on a byte per byte basis.  */
13277      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13278			       QImode, 1, end_0_label);
13279
13280      /* Increment the address.  */
13281      if (TARGET_64BIT)
13282	emit_insn (gen_adddi3 (out, out, const1_rtx));
13283      else
13284	emit_insn (gen_addsi3 (out, out, const1_rtx));
13285
13286      /* Not needed with an alignment of 2 */
13287      if (align != 2)
13288	{
13289	  emit_label (align_2_label);
13290
13291	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13292				   end_0_label);
13293
13294	  if (TARGET_64BIT)
13295	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13296	  else
13297	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13298
13299	  emit_label (align_3_label);
13300	}
13301
13302      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13303			       end_0_label);
13304
13305      if (TARGET_64BIT)
13306	emit_insn (gen_adddi3 (out, out, const1_rtx));
13307      else
13308	emit_insn (gen_addsi3 (out, out, const1_rtx));
13309    }
13310
13311  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13312     align this loop.  It gives only huge programs, but does not help to
13313     speed up.  */
13314  emit_label (align_4_label);
13315
13316  mem = change_address (src, SImode, out);
13317  emit_move_insn (scratch, mem);
13318  if (TARGET_64BIT)
13319    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13320  else
13321    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13322
13323  /* This formula yields a nonzero result iff one of the bytes is zero.
13324     This saves three branches inside loop and many cycles.  */
13325
13326  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13327  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13328  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13329  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13330			 gen_int_mode (0x80808080, SImode)));
13331  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13332			   align_4_label);
13333
13334  if (TARGET_CMOVE)
13335    {
13336       rtx reg = gen_reg_rtx (SImode);
13337       rtx reg2 = gen_reg_rtx (Pmode);
13338       emit_move_insn (reg, tmpreg);
13339       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13340
13341       /* If zero is not in the first two bytes, move two bytes forward.  */
13342       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13343       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13344       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13345       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13346			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13347						     reg,
13348						     tmpreg)));
13349       /* Emit lea manually to avoid clobbering of flags.  */
13350       emit_insn (gen_rtx_SET (SImode, reg2,
13351			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13352
13353       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13354       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13355       emit_insn (gen_rtx_SET (VOIDmode, out,
13356			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13357						     reg2,
13358						     out)));
13359
13360    }
13361  else
13362    {
13363       rtx end_2_label = gen_label_rtx ();
13364       /* Is zero in the first two bytes? */
13365
13366       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13367       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13368       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13369       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13370                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13371                            pc_rtx);
13372       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13373       JUMP_LABEL (tmp) = end_2_label;
13374
13375       /* Not in the first two.  Move two bytes forward.  */
13376       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13377       if (TARGET_64BIT)
13378	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13379       else
13380	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13381
13382       emit_label (end_2_label);
13383
13384    }
13385
13386  /* Avoid branch in fixing the byte.  */
13387  tmpreg = gen_lowpart (QImode, tmpreg);
13388  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13389  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13390  if (TARGET_64BIT)
13391    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13392  else
13393    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13394
13395  emit_label (end_0_label);
13396}
13397
13398void
13399ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13400		  rtx callarg2 ATTRIBUTE_UNUSED,
13401		  rtx pop, int sibcall)
13402{
13403  rtx use = NULL, call;
13404
13405  if (pop == const0_rtx)
13406    pop = NULL;
13407  gcc_assert (!TARGET_64BIT || !pop);
13408
13409  if (TARGET_MACHO && !TARGET_64BIT)
13410    {
13411#if TARGET_MACHO
13412      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13413	fnaddr = machopic_indirect_call_target (fnaddr);
13414#endif
13415    }
13416  else
13417    {
13418      /* Static functions and indirect calls don't need the pic register.  */
13419      if (! TARGET_64BIT && flag_pic
13420	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13421	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13422	use_reg (&use, pic_offset_table_rtx);
13423    }
13424
13425  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13426    {
13427      rtx al = gen_rtx_REG (QImode, 0);
13428      emit_move_insn (al, callarg2);
13429      use_reg (&use, al);
13430    }
13431
13432  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13433    {
13434      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13435      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13436    }
13437  if (sibcall && TARGET_64BIT
13438      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13439    {
13440      rtx addr;
13441      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13442      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13443      emit_move_insn (fnaddr, addr);
13444      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13445    }
13446
13447  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13448  if (retval)
13449    call = gen_rtx_SET (VOIDmode, retval, call);
13450  if (pop)
13451    {
13452      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13453      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13454      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13455    }
13456
13457  call = emit_call_insn (call);
13458  if (use)
13459    CALL_INSN_FUNCTION_USAGE (call) = use;
13460}
13461
13462
13463/* Clear stack slot assignments remembered from previous functions.
13464   This is called from INIT_EXPANDERS once before RTL is emitted for each
13465   function.  */
13466
13467static struct machine_function *
13468ix86_init_machine_status (void)
13469{
13470  struct machine_function *f;
13471
13472  f = ggc_alloc_cleared (sizeof (struct machine_function));
13473  f->use_fast_prologue_epilogue_nregs = -1;
13474  f->tls_descriptor_call_expanded_p = 0;
13475
13476  return f;
13477}
13478
13479/* Return a MEM corresponding to a stack slot with mode MODE.
13480   Allocate a new slot if necessary.
13481
13482   The RTL for a function can have several slots available: N is
13483   which slot to use.  */
13484
13485rtx
13486assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13487{
13488  struct stack_local_entry *s;
13489
13490  gcc_assert (n < MAX_386_STACK_LOCALS);
13491
13492  /* Virtual slot is valid only before vregs are instantiated.  */
13493  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13494
13495  for (s = ix86_stack_locals; s; s = s->next)
13496    if (s->mode == mode && s->n == n)
13497      return s->rtl;
13498
13499  s = (struct stack_local_entry *)
13500    ggc_alloc (sizeof (struct stack_local_entry));
13501  s->n = n;
13502  s->mode = mode;
13503  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13504
13505  s->next = ix86_stack_locals;
13506  ix86_stack_locals = s;
13507  return s->rtl;
13508}
13509
13510/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13511
13512static GTY(()) rtx ix86_tls_symbol;
13513rtx
13514ix86_tls_get_addr (void)
13515{
13516
13517  if (!ix86_tls_symbol)
13518    {
13519      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13520					    (TARGET_ANY_GNU_TLS
13521					     && !TARGET_64BIT)
13522					    ? "___tls_get_addr"
13523					    : "__tls_get_addr");
13524    }
13525
13526  return ix86_tls_symbol;
13527}
13528
13529/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13530
13531static GTY(()) rtx ix86_tls_module_base_symbol;
13532rtx
13533ix86_tls_module_base (void)
13534{
13535
13536  if (!ix86_tls_module_base_symbol)
13537    {
13538      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13539							"_TLS_MODULE_BASE_");
13540      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13541	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13542    }
13543
13544  return ix86_tls_module_base_symbol;
13545}
13546
13547/* Calculate the length of the memory address in the instruction
13548   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13549
13550int
13551memory_address_length (rtx addr)
13552{
13553  struct ix86_address parts;
13554  rtx base, index, disp;
13555  int len;
13556  int ok;
13557
13558  if (GET_CODE (addr) == PRE_DEC
13559      || GET_CODE (addr) == POST_INC
13560      || GET_CODE (addr) == PRE_MODIFY
13561      || GET_CODE (addr) == POST_MODIFY)
13562    return 0;
13563
13564  ok = ix86_decompose_address (addr, &parts);
13565  gcc_assert (ok);
13566
13567  if (parts.base && GET_CODE (parts.base) == SUBREG)
13568    parts.base = SUBREG_REG (parts.base);
13569  if (parts.index && GET_CODE (parts.index) == SUBREG)
13570    parts.index = SUBREG_REG (parts.index);
13571
13572  base = parts.base;
13573  index = parts.index;
13574  disp = parts.disp;
13575  len = 0;
13576
13577  /* Rule of thumb:
13578       - esp as the base always wants an index,
13579       - ebp as the base always wants a displacement.  */
13580
13581  /* Register Indirect.  */
13582  if (base && !index && !disp)
13583    {
13584      /* esp (for its index) and ebp (for its displacement) need
13585	 the two-byte modrm form.  */
13586      if (addr == stack_pointer_rtx
13587	  || addr == arg_pointer_rtx
13588	  || addr == frame_pointer_rtx
13589	  || addr == hard_frame_pointer_rtx)
13590	len = 1;
13591    }
13592
13593  /* Direct Addressing.  */
13594  else if (disp && !base && !index)
13595    len = 4;
13596
13597  else
13598    {
13599      /* Find the length of the displacement constant.  */
13600      if (disp)
13601	{
13602	  if (base && satisfies_constraint_K (disp))
13603	    len = 1;
13604	  else
13605	    len = 4;
13606	}
13607      /* ebp always wants a displacement.  */
13608      else if (base == hard_frame_pointer_rtx)
13609        len = 1;
13610
13611      /* An index requires the two-byte modrm form....  */
13612      if (index
13613	  /* ...like esp, which always wants an index.  */
13614	  || base == stack_pointer_rtx
13615	  || base == arg_pointer_rtx
13616	  || base == frame_pointer_rtx)
13617	len += 1;
13618    }
13619
13620  return len;
13621}
13622
13623/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13624   is set, expect that insn have 8bit immediate alternative.  */
13625int
13626ix86_attr_length_immediate_default (rtx insn, int shortform)
13627{
13628  int len = 0;
13629  int i;
13630  extract_insn_cached (insn);
13631  for (i = recog_data.n_operands - 1; i >= 0; --i)
13632    if (CONSTANT_P (recog_data.operand[i]))
13633      {
13634	gcc_assert (!len);
13635	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13636	  len = 1;
13637	else
13638	  {
13639	    switch (get_attr_mode (insn))
13640	      {
13641		case MODE_QI:
13642		  len+=1;
13643		  break;
13644		case MODE_HI:
13645		  len+=2;
13646		  break;
13647		case MODE_SI:
13648		  len+=4;
13649		  break;
13650		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13651		case MODE_DI:
13652		  len+=4;
13653		  break;
13654		default:
13655		  fatal_insn ("unknown insn mode", insn);
13656	      }
13657	  }
13658      }
13659  return len;
13660}
13661/* Compute default value for "length_address" attribute.  */
13662int
13663ix86_attr_length_address_default (rtx insn)
13664{
13665  int i;
13666
13667  if (get_attr_type (insn) == TYPE_LEA)
13668    {
13669      rtx set = PATTERN (insn);
13670
13671      if (GET_CODE (set) == PARALLEL)
13672	set = XVECEXP (set, 0, 0);
13673
13674      gcc_assert (GET_CODE (set) == SET);
13675
13676      return memory_address_length (SET_SRC (set));
13677    }
13678
13679  extract_insn_cached (insn);
13680  for (i = recog_data.n_operands - 1; i >= 0; --i)
13681    if (GET_CODE (recog_data.operand[i]) == MEM)
13682      {
13683	return memory_address_length (XEXP (recog_data.operand[i], 0));
13684	break;
13685      }
13686  return 0;
13687}
13688
13689/* Return the maximum number of instructions a cpu can issue.  */
13690
13691static int
13692ix86_issue_rate (void)
13693{
13694  switch (ix86_tune)
13695    {
13696    case PROCESSOR_PENTIUM:
13697    case PROCESSOR_K6:
13698      return 2;
13699
13700    case PROCESSOR_PENTIUMPRO:
13701    case PROCESSOR_PENTIUM4:
13702    case PROCESSOR_ATHLON:
13703    case PROCESSOR_K8:
13704    case PROCESSOR_NOCONA:
13705    case PROCESSOR_GENERIC32:
13706    case PROCESSOR_GENERIC64:
13707      return 3;
13708
13709    default:
13710      return 1;
13711    }
13712}
13713
13714/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13715   by DEP_INSN and nothing set by DEP_INSN.  */
13716
13717static int
13718ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13719{
13720  rtx set, set2;
13721
13722  /* Simplify the test for uninteresting insns.  */
13723  if (insn_type != TYPE_SETCC
13724      && insn_type != TYPE_ICMOV
13725      && insn_type != TYPE_FCMOV
13726      && insn_type != TYPE_IBR)
13727    return 0;
13728
13729  if ((set = single_set (dep_insn)) != 0)
13730    {
13731      set = SET_DEST (set);
13732      set2 = NULL_RTX;
13733    }
13734  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13735	   && XVECLEN (PATTERN (dep_insn), 0) == 2
13736	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13737	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13738    {
13739      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13740      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13741    }
13742  else
13743    return 0;
13744
13745  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13746    return 0;
13747
13748  /* This test is true if the dependent insn reads the flags but
13749     not any other potentially set register.  */
13750  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13751    return 0;
13752
13753  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13754    return 0;
13755
13756  return 1;
13757}
13758
13759/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13760   address with operands set by DEP_INSN.  */
13761
13762static int
13763ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13764{
13765  rtx addr;
13766
13767  if (insn_type == TYPE_LEA
13768      && TARGET_PENTIUM)
13769    {
13770      addr = PATTERN (insn);
13771
13772      if (GET_CODE (addr) == PARALLEL)
13773	addr = XVECEXP (addr, 0, 0);
13774
13775      gcc_assert (GET_CODE (addr) == SET);
13776
13777      addr = SET_SRC (addr);
13778    }
13779  else
13780    {
13781      int i;
13782      extract_insn_cached (insn);
13783      for (i = recog_data.n_operands - 1; i >= 0; --i)
13784	if (GET_CODE (recog_data.operand[i]) == MEM)
13785	  {
13786	    addr = XEXP (recog_data.operand[i], 0);
13787	    goto found;
13788	  }
13789      return 0;
13790    found:;
13791    }
13792
13793  return modified_in_p (addr, dep_insn);
13794}
13795
13796static int
13797ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13798{
13799  enum attr_type insn_type, dep_insn_type;
13800  enum attr_memory memory;
13801  rtx set, set2;
13802  int dep_insn_code_number;
13803
13804  /* Anti and output dependencies have zero cost on all CPUs.  */
13805  if (REG_NOTE_KIND (link) != 0)
13806    return 0;
13807
13808  dep_insn_code_number = recog_memoized (dep_insn);
13809
13810  /* If we can't recognize the insns, we can't really do anything.  */
13811  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13812    return cost;
13813
13814  insn_type = get_attr_type (insn);
13815  dep_insn_type = get_attr_type (dep_insn);
13816
13817  switch (ix86_tune)
13818    {
13819    case PROCESSOR_PENTIUM:
13820      /* Address Generation Interlock adds a cycle of latency.  */
13821      if (ix86_agi_dependent (insn, dep_insn, insn_type))
13822	cost += 1;
13823
13824      /* ??? Compares pair with jump/setcc.  */
13825      if (ix86_flags_dependent (insn, dep_insn, insn_type))
13826	cost = 0;
13827
13828      /* Floating point stores require value to be ready one cycle earlier.  */
13829      if (insn_type == TYPE_FMOV
13830	  && get_attr_memory (insn) == MEMORY_STORE
13831	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13832	cost += 1;
13833      break;
13834
13835    case PROCESSOR_PENTIUMPRO:
13836      memory = get_attr_memory (insn);
13837
13838      /* INT->FP conversion is expensive.  */
13839      if (get_attr_fp_int_src (dep_insn))
13840	cost += 5;
13841
13842      /* There is one cycle extra latency between an FP op and a store.  */
13843      if (insn_type == TYPE_FMOV
13844	  && (set = single_set (dep_insn)) != NULL_RTX
13845	  && (set2 = single_set (insn)) != NULL_RTX
13846	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13847	  && GET_CODE (SET_DEST (set2)) == MEM)
13848	cost += 1;
13849
13850      /* Show ability of reorder buffer to hide latency of load by executing
13851	 in parallel with previous instruction in case
13852	 previous instruction is not needed to compute the address.  */
13853      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13854	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13855	{
13856	  /* Claim moves to take one cycle, as core can issue one load
13857	     at time and the next load can start cycle later.  */
13858	  if (dep_insn_type == TYPE_IMOV
13859	      || dep_insn_type == TYPE_FMOV)
13860	    cost = 1;
13861	  else if (cost > 1)
13862	    cost--;
13863	}
13864      break;
13865
13866    case PROCESSOR_K6:
13867      memory = get_attr_memory (insn);
13868
13869      /* The esp dependency is resolved before the instruction is really
13870         finished.  */
13871      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13872	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13873	return 1;
13874
13875      /* INT->FP conversion is expensive.  */
13876      if (get_attr_fp_int_src (dep_insn))
13877	cost += 5;
13878
13879      /* Show ability of reorder buffer to hide latency of load by executing
13880	 in parallel with previous instruction in case
13881	 previous instruction is not needed to compute the address.  */
13882      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13883	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13884	{
13885	  /* Claim moves to take one cycle, as core can issue one load
13886	     at time and the next load can start cycle later.  */
13887	  if (dep_insn_type == TYPE_IMOV
13888	      || dep_insn_type == TYPE_FMOV)
13889	    cost = 1;
13890	  else if (cost > 2)
13891	    cost -= 2;
13892	  else
13893	    cost = 1;
13894	}
13895      break;
13896
13897    case PROCESSOR_ATHLON:
13898    case PROCESSOR_K8:
13899    case PROCESSOR_GENERIC32:
13900    case PROCESSOR_GENERIC64:
13901      memory = get_attr_memory (insn);
13902
13903      /* Show ability of reorder buffer to hide latency of load by executing
13904	 in parallel with previous instruction in case
13905	 previous instruction is not needed to compute the address.  */
13906      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13907	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13908	{
13909	  enum attr_unit unit = get_attr_unit (insn);
13910	  int loadcost = 3;
13911
13912	  /* Because of the difference between the length of integer and
13913	     floating unit pipeline preparation stages, the memory operands
13914	     for floating point are cheaper.
13915
13916	     ??? For Athlon it the difference is most probably 2.  */
13917	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13918	    loadcost = 3;
13919	  else
13920	    loadcost = TARGET_ATHLON ? 2 : 0;
13921
13922	  if (cost >= loadcost)
13923	    cost -= loadcost;
13924	  else
13925	    cost = 0;
13926	}
13927
13928    default:
13929      break;
13930    }
13931
13932  return cost;
13933}
13934
13935/* How many alternative schedules to try.  This should be as wide as the
13936   scheduling freedom in the DFA, but no wider.  Making this value too
13937   large results extra work for the scheduler.  */
13938
13939static int
13940ia32_multipass_dfa_lookahead (void)
13941{
13942  if (ix86_tune == PROCESSOR_PENTIUM)
13943    return 2;
13944
13945  if (ix86_tune == PROCESSOR_PENTIUMPRO
13946      || ix86_tune == PROCESSOR_K6)
13947    return 1;
13948
13949  else
13950    return 0;
13951}
13952
13953
13954/* Compute the alignment given to a constant that is being placed in memory.
13955   EXP is the constant and ALIGN is the alignment that the object would
13956   ordinarily have.
13957   The value of this function is used instead of that alignment to align
13958   the object.  */
13959
13960int
13961ix86_constant_alignment (tree exp, int align)
13962{
13963  if (TREE_CODE (exp) == REAL_CST)
13964    {
13965      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13966	return 64;
13967      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13968	return 128;
13969    }
13970  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13971      	   && !TARGET_NO_ALIGN_LONG_STRINGS
13972	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13973    return BITS_PER_WORD;
13974
13975  return align;
13976}
13977
13978/* Compute the alignment for a static variable.
13979   TYPE is the data type, and ALIGN is the alignment that
13980   the object would ordinarily have.  The value of this function is used
13981   instead of that alignment to align the object.  */
13982
13983int
13984ix86_data_alignment (tree type, int align)
13985{
13986  int max_align = optimize_size ? BITS_PER_WORD : 256;
13987
13988  if (AGGREGATE_TYPE_P (type)
13989      && TYPE_SIZE (type)
13990      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13991      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13992	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13993      && align < max_align)
13994    align = max_align;
13995
13996  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13997     to 16byte boundary.  */
13998  if (TARGET_64BIT)
13999    {
14000      if (AGGREGATE_TYPE_P (type)
14001	   && TYPE_SIZE (type)
14002	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14003	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14004	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14005	return 128;
14006    }
14007
14008  if (TREE_CODE (type) == ARRAY_TYPE)
14009    {
14010      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14011	return 64;
14012      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14013	return 128;
14014    }
14015  else if (TREE_CODE (type) == COMPLEX_TYPE)
14016    {
14017
14018      if (TYPE_MODE (type) == DCmode && align < 64)
14019	return 64;
14020      if (TYPE_MODE (type) == XCmode && align < 128)
14021	return 128;
14022    }
14023  else if ((TREE_CODE (type) == RECORD_TYPE
14024	    || TREE_CODE (type) == UNION_TYPE
14025	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14026	   && TYPE_FIELDS (type))
14027    {
14028      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14029	return 64;
14030      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14031	return 128;
14032    }
14033  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14034	   || TREE_CODE (type) == INTEGER_TYPE)
14035    {
14036      if (TYPE_MODE (type) == DFmode && align < 64)
14037	return 64;
14038      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14039	return 128;
14040    }
14041
14042  return align;
14043}
14044
14045/* Compute the alignment for a local variable.
14046   TYPE is the data type, and ALIGN is the alignment that
14047   the object would ordinarily have.  The value of this macro is used
14048   instead of that alignment to align the object.  */
14049
14050int
14051ix86_local_alignment (tree type, int align)
14052{
14053  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14054     to 16byte boundary.  */
14055  if (TARGET_64BIT)
14056    {
14057      if (AGGREGATE_TYPE_P (type)
14058	   && TYPE_SIZE (type)
14059	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14060	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14061	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14062	return 128;
14063    }
14064  if (TREE_CODE (type) == ARRAY_TYPE)
14065    {
14066      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14067	return 64;
14068      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14069	return 128;
14070    }
14071  else if (TREE_CODE (type) == COMPLEX_TYPE)
14072    {
14073      if (TYPE_MODE (type) == DCmode && align < 64)
14074	return 64;
14075      if (TYPE_MODE (type) == XCmode && align < 128)
14076	return 128;
14077    }
14078  else if ((TREE_CODE (type) == RECORD_TYPE
14079	    || TREE_CODE (type) == UNION_TYPE
14080	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14081	   && TYPE_FIELDS (type))
14082    {
14083      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14084	return 64;
14085      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14086	return 128;
14087    }
14088  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14089	   || TREE_CODE (type) == INTEGER_TYPE)
14090    {
14091
14092      if (TYPE_MODE (type) == DFmode && align < 64)
14093	return 64;
14094      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14095	return 128;
14096    }
14097  return align;
14098}
14099
14100/* Emit RTL insns to initialize the variable parts of a trampoline.
14101   FNADDR is an RTX for the address of the function's pure code.
14102   CXT is an RTX for the static chain value for the function.  */
14103void
14104x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14105{
14106  if (!TARGET_64BIT)
14107    {
14108      /* Compute offset from the end of the jmp to the target function.  */
14109      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14110			       plus_constant (tramp, 10),
14111			       NULL_RTX, 1, OPTAB_DIRECT);
14112      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14113		      gen_int_mode (0xb9, QImode));
14114      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14115      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14116		      gen_int_mode (0xe9, QImode));
14117      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14118    }
14119  else
14120    {
14121      int offset = 0;
14122      /* Try to load address using shorter movl instead of movabs.
14123         We may want to support movq for kernel mode, but kernel does not use
14124         trampolines at the moment.  */
14125      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14126	{
14127	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14128	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14129			  gen_int_mode (0xbb41, HImode));
14130	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14131			  gen_lowpart (SImode, fnaddr));
14132	  offset += 6;
14133	}
14134      else
14135	{
14136	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14137			  gen_int_mode (0xbb49, HImode));
14138	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14139			  fnaddr);
14140	  offset += 10;
14141	}
14142      /* Load static chain using movabs to r10.  */
14143      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14144		      gen_int_mode (0xba49, HImode));
14145      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14146		      cxt);
14147      offset += 10;
14148      /* Jump to the r11 */
14149      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14150		      gen_int_mode (0xff49, HImode));
14151      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14152		      gen_int_mode (0xe3, QImode));
14153      offset += 3;
14154      gcc_assert (offset <= TRAMPOLINE_SIZE);
14155    }
14156
14157#ifdef ENABLE_EXECUTE_STACK
14158  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14159		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14160#endif
14161}
14162
14163/* Codes for all the SSE/MMX builtins.  */
14164enum ix86_builtins
14165{
14166  IX86_BUILTIN_ADDPS,
14167  IX86_BUILTIN_ADDSS,
14168  IX86_BUILTIN_DIVPS,
14169  IX86_BUILTIN_DIVSS,
14170  IX86_BUILTIN_MULPS,
14171  IX86_BUILTIN_MULSS,
14172  IX86_BUILTIN_SUBPS,
14173  IX86_BUILTIN_SUBSS,
14174
14175  IX86_BUILTIN_CMPEQPS,
14176  IX86_BUILTIN_CMPLTPS,
14177  IX86_BUILTIN_CMPLEPS,
14178  IX86_BUILTIN_CMPGTPS,
14179  IX86_BUILTIN_CMPGEPS,
14180  IX86_BUILTIN_CMPNEQPS,
14181  IX86_BUILTIN_CMPNLTPS,
14182  IX86_BUILTIN_CMPNLEPS,
14183  IX86_BUILTIN_CMPNGTPS,
14184  IX86_BUILTIN_CMPNGEPS,
14185  IX86_BUILTIN_CMPORDPS,
14186  IX86_BUILTIN_CMPUNORDPS,
14187  IX86_BUILTIN_CMPEQSS,
14188  IX86_BUILTIN_CMPLTSS,
14189  IX86_BUILTIN_CMPLESS,
14190  IX86_BUILTIN_CMPNEQSS,
14191  IX86_BUILTIN_CMPNLTSS,
14192  IX86_BUILTIN_CMPNLESS,
14193  IX86_BUILTIN_CMPNGTSS,
14194  IX86_BUILTIN_CMPNGESS,
14195  IX86_BUILTIN_CMPORDSS,
14196  IX86_BUILTIN_CMPUNORDSS,
14197
14198  IX86_BUILTIN_COMIEQSS,
14199  IX86_BUILTIN_COMILTSS,
14200  IX86_BUILTIN_COMILESS,
14201  IX86_BUILTIN_COMIGTSS,
14202  IX86_BUILTIN_COMIGESS,
14203  IX86_BUILTIN_COMINEQSS,
14204  IX86_BUILTIN_UCOMIEQSS,
14205  IX86_BUILTIN_UCOMILTSS,
14206  IX86_BUILTIN_UCOMILESS,
14207  IX86_BUILTIN_UCOMIGTSS,
14208  IX86_BUILTIN_UCOMIGESS,
14209  IX86_BUILTIN_UCOMINEQSS,
14210
14211  IX86_BUILTIN_CVTPI2PS,
14212  IX86_BUILTIN_CVTPS2PI,
14213  IX86_BUILTIN_CVTSI2SS,
14214  IX86_BUILTIN_CVTSI642SS,
14215  IX86_BUILTIN_CVTSS2SI,
14216  IX86_BUILTIN_CVTSS2SI64,
14217  IX86_BUILTIN_CVTTPS2PI,
14218  IX86_BUILTIN_CVTTSS2SI,
14219  IX86_BUILTIN_CVTTSS2SI64,
14220
14221  IX86_BUILTIN_MAXPS,
14222  IX86_BUILTIN_MAXSS,
14223  IX86_BUILTIN_MINPS,
14224  IX86_BUILTIN_MINSS,
14225
14226  IX86_BUILTIN_LOADUPS,
14227  IX86_BUILTIN_STOREUPS,
14228  IX86_BUILTIN_MOVSS,
14229
14230  IX86_BUILTIN_MOVHLPS,
14231  IX86_BUILTIN_MOVLHPS,
14232  IX86_BUILTIN_LOADHPS,
14233  IX86_BUILTIN_LOADLPS,
14234  IX86_BUILTIN_STOREHPS,
14235  IX86_BUILTIN_STORELPS,
14236
14237  IX86_BUILTIN_MASKMOVQ,
14238  IX86_BUILTIN_MOVMSKPS,
14239  IX86_BUILTIN_PMOVMSKB,
14240
14241  IX86_BUILTIN_MOVNTPS,
14242  IX86_BUILTIN_MOVNTQ,
14243
14244  IX86_BUILTIN_LOADDQU,
14245  IX86_BUILTIN_STOREDQU,
14246
14247  IX86_BUILTIN_PACKSSWB,
14248  IX86_BUILTIN_PACKSSDW,
14249  IX86_BUILTIN_PACKUSWB,
14250
14251  IX86_BUILTIN_PADDB,
14252  IX86_BUILTIN_PADDW,
14253  IX86_BUILTIN_PADDD,
14254  IX86_BUILTIN_PADDQ,
14255  IX86_BUILTIN_PADDSB,
14256  IX86_BUILTIN_PADDSW,
14257  IX86_BUILTIN_PADDUSB,
14258  IX86_BUILTIN_PADDUSW,
14259  IX86_BUILTIN_PSUBB,
14260  IX86_BUILTIN_PSUBW,
14261  IX86_BUILTIN_PSUBD,
14262  IX86_BUILTIN_PSUBQ,
14263  IX86_BUILTIN_PSUBSB,
14264  IX86_BUILTIN_PSUBSW,
14265  IX86_BUILTIN_PSUBUSB,
14266  IX86_BUILTIN_PSUBUSW,
14267
14268  IX86_BUILTIN_PAND,
14269  IX86_BUILTIN_PANDN,
14270  IX86_BUILTIN_POR,
14271  IX86_BUILTIN_PXOR,
14272
14273  IX86_BUILTIN_PAVGB,
14274  IX86_BUILTIN_PAVGW,
14275
14276  IX86_BUILTIN_PCMPEQB,
14277  IX86_BUILTIN_PCMPEQW,
14278  IX86_BUILTIN_PCMPEQD,
14279  IX86_BUILTIN_PCMPGTB,
14280  IX86_BUILTIN_PCMPGTW,
14281  IX86_BUILTIN_PCMPGTD,
14282
14283  IX86_BUILTIN_PMADDWD,
14284
14285  IX86_BUILTIN_PMAXSW,
14286  IX86_BUILTIN_PMAXUB,
14287  IX86_BUILTIN_PMINSW,
14288  IX86_BUILTIN_PMINUB,
14289
14290  IX86_BUILTIN_PMULHUW,
14291  IX86_BUILTIN_PMULHW,
14292  IX86_BUILTIN_PMULLW,
14293
14294  IX86_BUILTIN_PSADBW,
14295  IX86_BUILTIN_PSHUFW,
14296
14297  IX86_BUILTIN_PSLLW,
14298  IX86_BUILTIN_PSLLD,
14299  IX86_BUILTIN_PSLLQ,
14300  IX86_BUILTIN_PSRAW,
14301  IX86_BUILTIN_PSRAD,
14302  IX86_BUILTIN_PSRLW,
14303  IX86_BUILTIN_PSRLD,
14304  IX86_BUILTIN_PSRLQ,
14305  IX86_BUILTIN_PSLLWI,
14306  IX86_BUILTIN_PSLLDI,
14307  IX86_BUILTIN_PSLLQI,
14308  IX86_BUILTIN_PSRAWI,
14309  IX86_BUILTIN_PSRADI,
14310  IX86_BUILTIN_PSRLWI,
14311  IX86_BUILTIN_PSRLDI,
14312  IX86_BUILTIN_PSRLQI,
14313
14314  IX86_BUILTIN_PUNPCKHBW,
14315  IX86_BUILTIN_PUNPCKHWD,
14316  IX86_BUILTIN_PUNPCKHDQ,
14317  IX86_BUILTIN_PUNPCKLBW,
14318  IX86_BUILTIN_PUNPCKLWD,
14319  IX86_BUILTIN_PUNPCKLDQ,
14320
14321  IX86_BUILTIN_SHUFPS,
14322
14323  IX86_BUILTIN_RCPPS,
14324  IX86_BUILTIN_RCPSS,
14325  IX86_BUILTIN_RSQRTPS,
14326  IX86_BUILTIN_RSQRTSS,
14327  IX86_BUILTIN_SQRTPS,
14328  IX86_BUILTIN_SQRTSS,
14329
14330  IX86_BUILTIN_UNPCKHPS,
14331  IX86_BUILTIN_UNPCKLPS,
14332
14333  IX86_BUILTIN_ANDPS,
14334  IX86_BUILTIN_ANDNPS,
14335  IX86_BUILTIN_ORPS,
14336  IX86_BUILTIN_XORPS,
14337
14338  IX86_BUILTIN_EMMS,
14339  IX86_BUILTIN_LDMXCSR,
14340  IX86_BUILTIN_STMXCSR,
14341  IX86_BUILTIN_SFENCE,
14342
14343  /* 3DNow! Original */
14344  IX86_BUILTIN_FEMMS,
14345  IX86_BUILTIN_PAVGUSB,
14346  IX86_BUILTIN_PF2ID,
14347  IX86_BUILTIN_PFACC,
14348  IX86_BUILTIN_PFADD,
14349  IX86_BUILTIN_PFCMPEQ,
14350  IX86_BUILTIN_PFCMPGE,
14351  IX86_BUILTIN_PFCMPGT,
14352  IX86_BUILTIN_PFMAX,
14353  IX86_BUILTIN_PFMIN,
14354  IX86_BUILTIN_PFMUL,
14355  IX86_BUILTIN_PFRCP,
14356  IX86_BUILTIN_PFRCPIT1,
14357  IX86_BUILTIN_PFRCPIT2,
14358  IX86_BUILTIN_PFRSQIT1,
14359  IX86_BUILTIN_PFRSQRT,
14360  IX86_BUILTIN_PFSUB,
14361  IX86_BUILTIN_PFSUBR,
14362  IX86_BUILTIN_PI2FD,
14363  IX86_BUILTIN_PMULHRW,
14364
14365  /* 3DNow! Athlon Extensions */
14366  IX86_BUILTIN_PF2IW,
14367  IX86_BUILTIN_PFNACC,
14368  IX86_BUILTIN_PFPNACC,
14369  IX86_BUILTIN_PI2FW,
14370  IX86_BUILTIN_PSWAPDSI,
14371  IX86_BUILTIN_PSWAPDSF,
14372
14373  /* SSE2 */
14374  IX86_BUILTIN_ADDPD,
14375  IX86_BUILTIN_ADDSD,
14376  IX86_BUILTIN_DIVPD,
14377  IX86_BUILTIN_DIVSD,
14378  IX86_BUILTIN_MULPD,
14379  IX86_BUILTIN_MULSD,
14380  IX86_BUILTIN_SUBPD,
14381  IX86_BUILTIN_SUBSD,
14382
14383  IX86_BUILTIN_CMPEQPD,
14384  IX86_BUILTIN_CMPLTPD,
14385  IX86_BUILTIN_CMPLEPD,
14386  IX86_BUILTIN_CMPGTPD,
14387  IX86_BUILTIN_CMPGEPD,
14388  IX86_BUILTIN_CMPNEQPD,
14389  IX86_BUILTIN_CMPNLTPD,
14390  IX86_BUILTIN_CMPNLEPD,
14391  IX86_BUILTIN_CMPNGTPD,
14392  IX86_BUILTIN_CMPNGEPD,
14393  IX86_BUILTIN_CMPORDPD,
14394  IX86_BUILTIN_CMPUNORDPD,
14395  IX86_BUILTIN_CMPNEPD,
14396  IX86_BUILTIN_CMPEQSD,
14397  IX86_BUILTIN_CMPLTSD,
14398  IX86_BUILTIN_CMPLESD,
14399  IX86_BUILTIN_CMPNEQSD,
14400  IX86_BUILTIN_CMPNLTSD,
14401  IX86_BUILTIN_CMPNLESD,
14402  IX86_BUILTIN_CMPORDSD,
14403  IX86_BUILTIN_CMPUNORDSD,
14404  IX86_BUILTIN_CMPNESD,
14405
14406  IX86_BUILTIN_COMIEQSD,
14407  IX86_BUILTIN_COMILTSD,
14408  IX86_BUILTIN_COMILESD,
14409  IX86_BUILTIN_COMIGTSD,
14410  IX86_BUILTIN_COMIGESD,
14411  IX86_BUILTIN_COMINEQSD,
14412  IX86_BUILTIN_UCOMIEQSD,
14413  IX86_BUILTIN_UCOMILTSD,
14414  IX86_BUILTIN_UCOMILESD,
14415  IX86_BUILTIN_UCOMIGTSD,
14416  IX86_BUILTIN_UCOMIGESD,
14417  IX86_BUILTIN_UCOMINEQSD,
14418
14419  IX86_BUILTIN_MAXPD,
14420  IX86_BUILTIN_MAXSD,
14421  IX86_BUILTIN_MINPD,
14422  IX86_BUILTIN_MINSD,
14423
14424  IX86_BUILTIN_ANDPD,
14425  IX86_BUILTIN_ANDNPD,
14426  IX86_BUILTIN_ORPD,
14427  IX86_BUILTIN_XORPD,
14428
14429  IX86_BUILTIN_SQRTPD,
14430  IX86_BUILTIN_SQRTSD,
14431
14432  IX86_BUILTIN_UNPCKHPD,
14433  IX86_BUILTIN_UNPCKLPD,
14434
14435  IX86_BUILTIN_SHUFPD,
14436
14437  IX86_BUILTIN_LOADUPD,
14438  IX86_BUILTIN_STOREUPD,
14439  IX86_BUILTIN_MOVSD,
14440
14441  IX86_BUILTIN_LOADHPD,
14442  IX86_BUILTIN_LOADLPD,
14443
14444  IX86_BUILTIN_CVTDQ2PD,
14445  IX86_BUILTIN_CVTDQ2PS,
14446
14447  IX86_BUILTIN_CVTPD2DQ,
14448  IX86_BUILTIN_CVTPD2PI,
14449  IX86_BUILTIN_CVTPD2PS,
14450  IX86_BUILTIN_CVTTPD2DQ,
14451  IX86_BUILTIN_CVTTPD2PI,
14452
14453  IX86_BUILTIN_CVTPI2PD,
14454  IX86_BUILTIN_CVTSI2SD,
14455  IX86_BUILTIN_CVTSI642SD,
14456
14457  IX86_BUILTIN_CVTSD2SI,
14458  IX86_BUILTIN_CVTSD2SI64,
14459  IX86_BUILTIN_CVTSD2SS,
14460  IX86_BUILTIN_CVTSS2SD,
14461  IX86_BUILTIN_CVTTSD2SI,
14462  IX86_BUILTIN_CVTTSD2SI64,
14463
14464  IX86_BUILTIN_CVTPS2DQ,
14465  IX86_BUILTIN_CVTPS2PD,
14466  IX86_BUILTIN_CVTTPS2DQ,
14467
14468  IX86_BUILTIN_MOVNTI,
14469  IX86_BUILTIN_MOVNTPD,
14470  IX86_BUILTIN_MOVNTDQ,
14471
14472  /* SSE2 MMX */
14473  IX86_BUILTIN_MASKMOVDQU,
14474  IX86_BUILTIN_MOVMSKPD,
14475  IX86_BUILTIN_PMOVMSKB128,
14476
14477  IX86_BUILTIN_PACKSSWB128,
14478  IX86_BUILTIN_PACKSSDW128,
14479  IX86_BUILTIN_PACKUSWB128,
14480
14481  IX86_BUILTIN_PADDB128,
14482  IX86_BUILTIN_PADDW128,
14483  IX86_BUILTIN_PADDD128,
14484  IX86_BUILTIN_PADDQ128,
14485  IX86_BUILTIN_PADDSB128,
14486  IX86_BUILTIN_PADDSW128,
14487  IX86_BUILTIN_PADDUSB128,
14488  IX86_BUILTIN_PADDUSW128,
14489  IX86_BUILTIN_PSUBB128,
14490  IX86_BUILTIN_PSUBW128,
14491  IX86_BUILTIN_PSUBD128,
14492  IX86_BUILTIN_PSUBQ128,
14493  IX86_BUILTIN_PSUBSB128,
14494  IX86_BUILTIN_PSUBSW128,
14495  IX86_BUILTIN_PSUBUSB128,
14496  IX86_BUILTIN_PSUBUSW128,
14497
14498  IX86_BUILTIN_PAND128,
14499  IX86_BUILTIN_PANDN128,
14500  IX86_BUILTIN_POR128,
14501  IX86_BUILTIN_PXOR128,
14502
14503  IX86_BUILTIN_PAVGB128,
14504  IX86_BUILTIN_PAVGW128,
14505
14506  IX86_BUILTIN_PCMPEQB128,
14507  IX86_BUILTIN_PCMPEQW128,
14508  IX86_BUILTIN_PCMPEQD128,
14509  IX86_BUILTIN_PCMPGTB128,
14510  IX86_BUILTIN_PCMPGTW128,
14511  IX86_BUILTIN_PCMPGTD128,
14512
14513  IX86_BUILTIN_PMADDWD128,
14514
14515  IX86_BUILTIN_PMAXSW128,
14516  IX86_BUILTIN_PMAXUB128,
14517  IX86_BUILTIN_PMINSW128,
14518  IX86_BUILTIN_PMINUB128,
14519
14520  IX86_BUILTIN_PMULUDQ,
14521  IX86_BUILTIN_PMULUDQ128,
14522  IX86_BUILTIN_PMULHUW128,
14523  IX86_BUILTIN_PMULHW128,
14524  IX86_BUILTIN_PMULLW128,
14525
14526  IX86_BUILTIN_PSADBW128,
14527  IX86_BUILTIN_PSHUFHW,
14528  IX86_BUILTIN_PSHUFLW,
14529  IX86_BUILTIN_PSHUFD,
14530
14531  IX86_BUILTIN_PSLLW128,
14532  IX86_BUILTIN_PSLLD128,
14533  IX86_BUILTIN_PSLLQ128,
14534  IX86_BUILTIN_PSRAW128,
14535  IX86_BUILTIN_PSRAD128,
14536  IX86_BUILTIN_PSRLW128,
14537  IX86_BUILTIN_PSRLD128,
14538  IX86_BUILTIN_PSRLQ128,
14539  IX86_BUILTIN_PSLLDQI128,
14540  IX86_BUILTIN_PSLLWI128,
14541  IX86_BUILTIN_PSLLDI128,
14542  IX86_BUILTIN_PSLLQI128,
14543  IX86_BUILTIN_PSRAWI128,
14544  IX86_BUILTIN_PSRADI128,
14545  IX86_BUILTIN_PSRLDQI128,
14546  IX86_BUILTIN_PSRLWI128,
14547  IX86_BUILTIN_PSRLDI128,
14548  IX86_BUILTIN_PSRLQI128,
14549
14550  IX86_BUILTIN_PUNPCKHBW128,
14551  IX86_BUILTIN_PUNPCKHWD128,
14552  IX86_BUILTIN_PUNPCKHDQ128,
14553  IX86_BUILTIN_PUNPCKHQDQ128,
14554  IX86_BUILTIN_PUNPCKLBW128,
14555  IX86_BUILTIN_PUNPCKLWD128,
14556  IX86_BUILTIN_PUNPCKLDQ128,
14557  IX86_BUILTIN_PUNPCKLQDQ128,
14558
14559  IX86_BUILTIN_CLFLUSH,
14560  IX86_BUILTIN_MFENCE,
14561  IX86_BUILTIN_LFENCE,
14562
14563  /* Prescott New Instructions.  */
14564  IX86_BUILTIN_ADDSUBPS,
14565  IX86_BUILTIN_HADDPS,
14566  IX86_BUILTIN_HSUBPS,
14567  IX86_BUILTIN_MOVSHDUP,
14568  IX86_BUILTIN_MOVSLDUP,
14569  IX86_BUILTIN_ADDSUBPD,
14570  IX86_BUILTIN_HADDPD,
14571  IX86_BUILTIN_HSUBPD,
14572  IX86_BUILTIN_LDDQU,
14573
14574  IX86_BUILTIN_MONITOR,
14575  IX86_BUILTIN_MWAIT,
14576
14577  IX86_BUILTIN_VEC_INIT_V2SI,
14578  IX86_BUILTIN_VEC_INIT_V4HI,
14579  IX86_BUILTIN_VEC_INIT_V8QI,
14580  IX86_BUILTIN_VEC_EXT_V2DF,
14581  IX86_BUILTIN_VEC_EXT_V2DI,
14582  IX86_BUILTIN_VEC_EXT_V4SF,
14583  IX86_BUILTIN_VEC_EXT_V4SI,
14584  IX86_BUILTIN_VEC_EXT_V8HI,
14585  IX86_BUILTIN_VEC_EXT_V16QI,
14586  IX86_BUILTIN_VEC_EXT_V2SI,
14587  IX86_BUILTIN_VEC_EXT_V4HI,
14588  IX86_BUILTIN_VEC_SET_V8HI,
14589  IX86_BUILTIN_VEC_SET_V4HI,
14590
14591  IX86_BUILTIN_MAX
14592};
14593
14594#define def_builtin(MASK, NAME, TYPE, CODE)				\
14595do {									\
14596  if ((MASK) & target_flags						\
14597      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14598    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14599				 NULL, NULL_TREE);			\
14600} while (0)
14601
14602/* Bits for builtin_description.flag.  */
14603
14604/* Set when we don't support the comparison natively, and should
14605   swap_comparison in order to support it.  */
14606#define BUILTIN_DESC_SWAP_OPERANDS	1
14607
14608struct builtin_description
14609{
14610  const unsigned int mask;
14611  const enum insn_code icode;
14612  const char *const name;
14613  const enum ix86_builtins code;
14614  const enum rtx_code comparison;
14615  const unsigned int flag;
14616};
14617
14618static const struct builtin_description bdesc_comi[] =
14619{
14620  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14621  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14622  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14623  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14624  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14625  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14626  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14627  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14628  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14629  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14630  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14631  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14632  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14633  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14634  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14635  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14636  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14637  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14638  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14639  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14640  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14641  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14642  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14643  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14644};
14645
14646static const struct builtin_description bdesc_2arg[] =
14647{
14648  /* SSE */
14649  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14650  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14651  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14652  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14653  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14654  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14655  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14656  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14657
14658  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14659  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14660  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14661  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14662    BUILTIN_DESC_SWAP_OPERANDS },
14663  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14664    BUILTIN_DESC_SWAP_OPERANDS },
14665  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14666  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14667  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14668  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14669  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14670    BUILTIN_DESC_SWAP_OPERANDS },
14671  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14672    BUILTIN_DESC_SWAP_OPERANDS },
14673  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14674  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14675  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14676  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14677  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14678  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14679  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14680  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14681  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14682    BUILTIN_DESC_SWAP_OPERANDS },
14683  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14684    BUILTIN_DESC_SWAP_OPERANDS },
14685  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14686
14687  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14688  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14689  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14690  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14691
14692  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14693  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14694  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14695  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14696
14697  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14698  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14699  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14700  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14701  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14702
14703  /* MMX */
14704  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14705  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14706  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14707  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14708  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14709  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14710  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14711  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14712
14713  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14714  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14715  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14716  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14717  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14718  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14719  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14720  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14721
14722  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14723  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14724  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14725
14726  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14727  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14728  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14729  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14730
14731  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14732  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14733
14734  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14735  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14736  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14737  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14738  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14739  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14740
14741  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14742  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14743  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14744  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14745
14746  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14747  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14748  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14749  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14750  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14751  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14752
14753  /* Special.  */
14754  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14755  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14756  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14757
14758  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14759  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14760  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14761
14762  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14763  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14764  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14765  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14766  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14767  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14768
14769  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14770  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14771  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14772  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14773  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14774  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14775
14776  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14777  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14778  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14779  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14780
14781  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14782  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14783
14784  /* SSE2 */
14785  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14786  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14787  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14788  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14789  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14790  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14791  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14792  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14793
14794  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14795  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14796  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14797  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14798    BUILTIN_DESC_SWAP_OPERANDS },
14799  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14800    BUILTIN_DESC_SWAP_OPERANDS },
14801  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14802  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14803  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14804  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14805  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14806    BUILTIN_DESC_SWAP_OPERANDS },
14807  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14808    BUILTIN_DESC_SWAP_OPERANDS },
14809  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14810  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14811  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14812  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14813  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14814  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14815  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14816  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14817  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14818
14819  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14820  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14821  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14822  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14823
14824  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14825  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14826  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14827  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14828
14829  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14830  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14831  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14832
14833  /* SSE2 MMX */
14834  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14835  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14836  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14837  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14838  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14839  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14840  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14841  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14842
14843  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14844  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14845  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14846  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14847  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14848  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14849  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14850  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14851
14852  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14853  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14854
14855  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14856  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14857  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14858  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14859
14860  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14861  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14862
14863  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14864  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14865  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14866  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14867  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14868  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14869
14870  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14871  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14872  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14873  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14874
14875  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14876  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14877  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14878  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14879  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14880  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14881  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14882  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14883
14884  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14885  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14886  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14887
14888  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14889  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14890
14891  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14892  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14893
14894  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14895  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14896  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14897
14898  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14899  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14900  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14901
14902  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14903  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14904
14905  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14906
14907  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14908  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14909  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14910  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14911
14912  /* SSE3 MMX */
14913  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14914  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14915  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14916  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14917  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14918  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14919};
14920
14921static const struct builtin_description bdesc_1arg[] =
14922{
14923  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14924  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14925
14926  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14927  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14928  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14929
14930  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14931  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14932  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14933  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14934  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14935  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14936
14937  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14938  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14939
14940  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14941
14942  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14943  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14944
14945  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14946  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14947  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14948  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14949  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14950
14951  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14952
14953  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14954  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14955  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14956  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14957
14958  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14959  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14960  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14961
14962  /* SSE3 */
14963  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14964  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14965};
14966
14967static void
14968ix86_init_builtins (void)
14969{
14970  if (TARGET_MMX)
14971    ix86_init_mmx_sse_builtins ();
14972}
14973
14974/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
14975   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
14976   builtins.  */
14977static void
14978ix86_init_mmx_sse_builtins (void)
14979{
14980  const struct builtin_description * d;
14981  size_t i;
14982
14983  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14984  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14985  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14986  tree V2DI_type_node
14987    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14988  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14989  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14990  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14991  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14992  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14993  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14994
14995  tree pchar_type_node = build_pointer_type (char_type_node);
14996  tree pcchar_type_node = build_pointer_type (
14997			     build_type_variant (char_type_node, 1, 0));
14998  tree pfloat_type_node = build_pointer_type (float_type_node);
14999  tree pcfloat_type_node = build_pointer_type (
15000			     build_type_variant (float_type_node, 1, 0));
15001  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15002  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15003  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15004
15005  /* Comparisons.  */
15006  tree int_ftype_v4sf_v4sf
15007    = build_function_type_list (integer_type_node,
15008				V4SF_type_node, V4SF_type_node, NULL_TREE);
15009  tree v4si_ftype_v4sf_v4sf
15010    = build_function_type_list (V4SI_type_node,
15011				V4SF_type_node, V4SF_type_node, NULL_TREE);
15012  /* MMX/SSE/integer conversions.  */
15013  tree int_ftype_v4sf
15014    = build_function_type_list (integer_type_node,
15015				V4SF_type_node, NULL_TREE);
15016  tree int64_ftype_v4sf
15017    = build_function_type_list (long_long_integer_type_node,
15018				V4SF_type_node, NULL_TREE);
15019  tree int_ftype_v8qi
15020    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15021  tree v4sf_ftype_v4sf_int
15022    = build_function_type_list (V4SF_type_node,
15023				V4SF_type_node, integer_type_node, NULL_TREE);
15024  tree v4sf_ftype_v4sf_int64
15025    = build_function_type_list (V4SF_type_node,
15026				V4SF_type_node, long_long_integer_type_node,
15027				NULL_TREE);
15028  tree v4sf_ftype_v4sf_v2si
15029    = build_function_type_list (V4SF_type_node,
15030				V4SF_type_node, V2SI_type_node, NULL_TREE);
15031
15032  /* Miscellaneous.  */
15033  tree v8qi_ftype_v4hi_v4hi
15034    = build_function_type_list (V8QI_type_node,
15035				V4HI_type_node, V4HI_type_node, NULL_TREE);
15036  tree v4hi_ftype_v2si_v2si
15037    = build_function_type_list (V4HI_type_node,
15038				V2SI_type_node, V2SI_type_node, NULL_TREE);
15039  tree v4sf_ftype_v4sf_v4sf_int
15040    = build_function_type_list (V4SF_type_node,
15041				V4SF_type_node, V4SF_type_node,
15042				integer_type_node, NULL_TREE);
15043  tree v2si_ftype_v4hi_v4hi
15044    = build_function_type_list (V2SI_type_node,
15045				V4HI_type_node, V4HI_type_node, NULL_TREE);
15046  tree v4hi_ftype_v4hi_int
15047    = build_function_type_list (V4HI_type_node,
15048				V4HI_type_node, integer_type_node, NULL_TREE);
15049  tree v4hi_ftype_v4hi_di
15050    = build_function_type_list (V4HI_type_node,
15051				V4HI_type_node, long_long_unsigned_type_node,
15052				NULL_TREE);
15053  tree v2si_ftype_v2si_di
15054    = build_function_type_list (V2SI_type_node,
15055				V2SI_type_node, long_long_unsigned_type_node,
15056				NULL_TREE);
15057  tree void_ftype_void
15058    = build_function_type (void_type_node, void_list_node);
15059  tree void_ftype_unsigned
15060    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15061  tree void_ftype_unsigned_unsigned
15062    = build_function_type_list (void_type_node, unsigned_type_node,
15063				unsigned_type_node, NULL_TREE);
15064  tree void_ftype_pcvoid_unsigned_unsigned
15065    = build_function_type_list (void_type_node, const_ptr_type_node,
15066				unsigned_type_node, unsigned_type_node,
15067				NULL_TREE);
15068  tree unsigned_ftype_void
15069    = build_function_type (unsigned_type_node, void_list_node);
15070  tree v2si_ftype_v4sf
15071    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15072  /* Loads/stores.  */
15073  tree void_ftype_v8qi_v8qi_pchar
15074    = build_function_type_list (void_type_node,
15075				V8QI_type_node, V8QI_type_node,
15076				pchar_type_node, NULL_TREE);
15077  tree v4sf_ftype_pcfloat
15078    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15079  /* @@@ the type is bogus */
15080  tree v4sf_ftype_v4sf_pv2si
15081    = build_function_type_list (V4SF_type_node,
15082				V4SF_type_node, pv2si_type_node, NULL_TREE);
15083  tree void_ftype_pv2si_v4sf
15084    = build_function_type_list (void_type_node,
15085				pv2si_type_node, V4SF_type_node, NULL_TREE);
15086  tree void_ftype_pfloat_v4sf
15087    = build_function_type_list (void_type_node,
15088				pfloat_type_node, V4SF_type_node, NULL_TREE);
15089  tree void_ftype_pdi_di
15090    = build_function_type_list (void_type_node,
15091				pdi_type_node, long_long_unsigned_type_node,
15092				NULL_TREE);
15093  tree void_ftype_pv2di_v2di
15094    = build_function_type_list (void_type_node,
15095				pv2di_type_node, V2DI_type_node, NULL_TREE);
15096  /* Normal vector unops.  */
15097  tree v4sf_ftype_v4sf
15098    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15099
15100  /* Normal vector binops.  */
15101  tree v4sf_ftype_v4sf_v4sf
15102    = build_function_type_list (V4SF_type_node,
15103				V4SF_type_node, V4SF_type_node, NULL_TREE);
15104  tree v8qi_ftype_v8qi_v8qi
15105    = build_function_type_list (V8QI_type_node,
15106				V8QI_type_node, V8QI_type_node, NULL_TREE);
15107  tree v4hi_ftype_v4hi_v4hi
15108    = build_function_type_list (V4HI_type_node,
15109				V4HI_type_node, V4HI_type_node, NULL_TREE);
15110  tree v2si_ftype_v2si_v2si
15111    = build_function_type_list (V2SI_type_node,
15112				V2SI_type_node, V2SI_type_node, NULL_TREE);
15113  tree di_ftype_di_di
15114    = build_function_type_list (long_long_unsigned_type_node,
15115				long_long_unsigned_type_node,
15116				long_long_unsigned_type_node, NULL_TREE);
15117
15118  tree v2si_ftype_v2sf
15119    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15120  tree v2sf_ftype_v2si
15121    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15122  tree v2si_ftype_v2si
15123    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15124  tree v2sf_ftype_v2sf
15125    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15126  tree v2sf_ftype_v2sf_v2sf
15127    = build_function_type_list (V2SF_type_node,
15128				V2SF_type_node, V2SF_type_node, NULL_TREE);
15129  tree v2si_ftype_v2sf_v2sf
15130    = build_function_type_list (V2SI_type_node,
15131				V2SF_type_node, V2SF_type_node, NULL_TREE);
15132  tree pint_type_node    = build_pointer_type (integer_type_node);
15133  tree pdouble_type_node = build_pointer_type (double_type_node);
15134  tree pcdouble_type_node = build_pointer_type (
15135				build_type_variant (double_type_node, 1, 0));
15136  tree int_ftype_v2df_v2df
15137    = build_function_type_list (integer_type_node,
15138				V2DF_type_node, V2DF_type_node, NULL_TREE);
15139
15140  tree void_ftype_pcvoid
15141    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15142  tree v4sf_ftype_v4si
15143    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15144  tree v4si_ftype_v4sf
15145    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15146  tree v2df_ftype_v4si
15147    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15148  tree v4si_ftype_v2df
15149    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15150  tree v2si_ftype_v2df
15151    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15152  tree v4sf_ftype_v2df
15153    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15154  tree v2df_ftype_v2si
15155    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15156  tree v2df_ftype_v4sf
15157    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15158  tree int_ftype_v2df
15159    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15160  tree int64_ftype_v2df
15161    = build_function_type_list (long_long_integer_type_node,
15162				V2DF_type_node, NULL_TREE);
15163  tree v2df_ftype_v2df_int
15164    = build_function_type_list (V2DF_type_node,
15165				V2DF_type_node, integer_type_node, NULL_TREE);
15166  tree v2df_ftype_v2df_int64
15167    = build_function_type_list (V2DF_type_node,
15168				V2DF_type_node, long_long_integer_type_node,
15169				NULL_TREE);
15170  tree v4sf_ftype_v4sf_v2df
15171    = build_function_type_list (V4SF_type_node,
15172				V4SF_type_node, V2DF_type_node, NULL_TREE);
15173  tree v2df_ftype_v2df_v4sf
15174    = build_function_type_list (V2DF_type_node,
15175				V2DF_type_node, V4SF_type_node, NULL_TREE);
15176  tree v2df_ftype_v2df_v2df_int
15177    = build_function_type_list (V2DF_type_node,
15178				V2DF_type_node, V2DF_type_node,
15179				integer_type_node,
15180				NULL_TREE);
15181  tree v2df_ftype_v2df_pcdouble
15182    = build_function_type_list (V2DF_type_node,
15183				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15184  tree void_ftype_pdouble_v2df
15185    = build_function_type_list (void_type_node,
15186				pdouble_type_node, V2DF_type_node, NULL_TREE);
15187  tree void_ftype_pint_int
15188    = build_function_type_list (void_type_node,
15189				pint_type_node, integer_type_node, NULL_TREE);
15190  tree void_ftype_v16qi_v16qi_pchar
15191    = build_function_type_list (void_type_node,
15192				V16QI_type_node, V16QI_type_node,
15193				pchar_type_node, NULL_TREE);
15194  tree v2df_ftype_pcdouble
15195    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15196  tree v2df_ftype_v2df_v2df
15197    = build_function_type_list (V2DF_type_node,
15198				V2DF_type_node, V2DF_type_node, NULL_TREE);
15199  tree v16qi_ftype_v16qi_v16qi
15200    = build_function_type_list (V16QI_type_node,
15201				V16QI_type_node, V16QI_type_node, NULL_TREE);
15202  tree v8hi_ftype_v8hi_v8hi
15203    = build_function_type_list (V8HI_type_node,
15204				V8HI_type_node, V8HI_type_node, NULL_TREE);
15205  tree v4si_ftype_v4si_v4si
15206    = build_function_type_list (V4SI_type_node,
15207				V4SI_type_node, V4SI_type_node, NULL_TREE);
15208  tree v2di_ftype_v2di_v2di
15209    = build_function_type_list (V2DI_type_node,
15210				V2DI_type_node, V2DI_type_node, NULL_TREE);
15211  tree v2di_ftype_v2df_v2df
15212    = build_function_type_list (V2DI_type_node,
15213				V2DF_type_node, V2DF_type_node, NULL_TREE);
15214  tree v2df_ftype_v2df
15215    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15216  tree v2di_ftype_v2di_int
15217    = build_function_type_list (V2DI_type_node,
15218				V2DI_type_node, integer_type_node, NULL_TREE);
15219  tree v4si_ftype_v4si_int
15220    = build_function_type_list (V4SI_type_node,
15221				V4SI_type_node, integer_type_node, NULL_TREE);
15222  tree v8hi_ftype_v8hi_int
15223    = build_function_type_list (V8HI_type_node,
15224				V8HI_type_node, integer_type_node, NULL_TREE);
15225  tree v4si_ftype_v8hi_v8hi
15226    = build_function_type_list (V4SI_type_node,
15227				V8HI_type_node, V8HI_type_node, NULL_TREE);
15228  tree di_ftype_v8qi_v8qi
15229    = build_function_type_list (long_long_unsigned_type_node,
15230				V8QI_type_node, V8QI_type_node, NULL_TREE);
15231  tree di_ftype_v2si_v2si
15232    = build_function_type_list (long_long_unsigned_type_node,
15233				V2SI_type_node, V2SI_type_node, NULL_TREE);
15234  tree v2di_ftype_v16qi_v16qi
15235    = build_function_type_list (V2DI_type_node,
15236				V16QI_type_node, V16QI_type_node, NULL_TREE);
15237  tree v2di_ftype_v4si_v4si
15238    = build_function_type_list (V2DI_type_node,
15239				V4SI_type_node, V4SI_type_node, NULL_TREE);
15240  tree int_ftype_v16qi
15241    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15242  tree v16qi_ftype_pcchar
15243    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15244  tree void_ftype_pchar_v16qi
15245    = build_function_type_list (void_type_node,
15246			        pchar_type_node, V16QI_type_node, NULL_TREE);
15247
15248  tree float80_type;
15249  tree float128_type;
15250  tree ftype;
15251
15252  /* The __float80 type.  */
15253  if (TYPE_MODE (long_double_type_node) == XFmode)
15254    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15255					       "__float80");
15256  else
15257    {
15258      /* The __float80 type.  */
15259      float80_type = make_node (REAL_TYPE);
15260      TYPE_PRECISION (float80_type) = 80;
15261      layout_type (float80_type);
15262      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15263    }
15264
15265  if (TARGET_64BIT)
15266    {
15267      float128_type = make_node (REAL_TYPE);
15268      TYPE_PRECISION (float128_type) = 128;
15269      layout_type (float128_type);
15270      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15271    }
15272
15273  /* Add all builtins that are more or less simple operations on two
15274     operands.  */
15275  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15276    {
15277      /* Use one of the operands; the target can have a different mode for
15278	 mask-generating compares.  */
15279      enum machine_mode mode;
15280      tree type;
15281
15282      if (d->name == 0)
15283	continue;
15284      mode = insn_data[d->icode].operand[1].mode;
15285
15286      switch (mode)
15287	{
15288	case V16QImode:
15289	  type = v16qi_ftype_v16qi_v16qi;
15290	  break;
15291	case V8HImode:
15292	  type = v8hi_ftype_v8hi_v8hi;
15293	  break;
15294	case V4SImode:
15295	  type = v4si_ftype_v4si_v4si;
15296	  break;
15297	case V2DImode:
15298	  type = v2di_ftype_v2di_v2di;
15299	  break;
15300	case V2DFmode:
15301	  type = v2df_ftype_v2df_v2df;
15302	  break;
15303	case V4SFmode:
15304	  type = v4sf_ftype_v4sf_v4sf;
15305	  break;
15306	case V8QImode:
15307	  type = v8qi_ftype_v8qi_v8qi;
15308	  break;
15309	case V4HImode:
15310	  type = v4hi_ftype_v4hi_v4hi;
15311	  break;
15312	case V2SImode:
15313	  type = v2si_ftype_v2si_v2si;
15314	  break;
15315	case DImode:
15316	  type = di_ftype_di_di;
15317	  break;
15318
15319	default:
15320	  gcc_unreachable ();
15321	}
15322
15323      /* Override for comparisons.  */
15324      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15325	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15326	type = v4si_ftype_v4sf_v4sf;
15327
15328      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15329	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15330	type = v2di_ftype_v2df_v2df;
15331
15332      def_builtin (d->mask, d->name, type, d->code);
15333    }
15334
15335  /* Add the remaining MMX insns with somewhat more complicated types.  */
15336  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15337  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15338  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15339  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15340
15341  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15342  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15343  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15344
15345  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15346  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15347
15348  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15349  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15350
15351  /* comi/ucomi insns.  */
15352  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15353    if (d->mask == MASK_SSE2)
15354      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15355    else
15356      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15357
15358  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15359  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15360  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15361
15362  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15363  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15364  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15365  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15366  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15367  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15368  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15369  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15370  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15371  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15372  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15373
15374  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15375
15376  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15377  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15378
15379  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15380  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15381  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15382  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15383
15384  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15385  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15386  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15387  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15388
15389  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15390
15391  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15392
15393  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15394  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15395  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15396  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15397  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15398  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15399
15400  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15401
15402  /* Original 3DNow!  */
15403  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15404  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15405  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15406  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15407  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15408  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15409  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15410  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15411  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15412  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15413  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15414  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15415  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15416  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15417  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15418  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15419  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15420  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15421  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15422  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15423
15424  /* 3DNow! extension as used in the Athlon CPU.  */
15425  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15426  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15427  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15428  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15429  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15430  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15431
15432  /* SSE2 */
15433  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15434
15435  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15436  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15437
15438  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15439  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15440
15441  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15442  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15443  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15444  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15445  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15446
15447  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15448  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15449  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15450  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15451
15452  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15453  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15454
15455  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15456
15457  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15458  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15459
15460  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15461  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15462  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15463  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15464  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15465
15466  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15467
15468  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15469  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15470  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15471  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15472
15473  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15474  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15475  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15476
15477  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15478  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15479  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15480  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15481
15482  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15483  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15484  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15485
15486  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15487  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15488
15489  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15490  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15491
15492  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15493  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15494  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15495
15496  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15497  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15498  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15499
15500  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15501  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15502
15503  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15504  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15505  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15506  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15507
15508  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15509  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15510  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15511  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15512
15513  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15514  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15515
15516  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15517
15518  /* Prescott New Instructions.  */
15519  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15520	       void_ftype_pcvoid_unsigned_unsigned,
15521	       IX86_BUILTIN_MONITOR);
15522  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15523	       void_ftype_unsigned_unsigned,
15524	       IX86_BUILTIN_MWAIT);
15525  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15526	       v4sf_ftype_v4sf,
15527	       IX86_BUILTIN_MOVSHDUP);
15528  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15529	       v4sf_ftype_v4sf,
15530	       IX86_BUILTIN_MOVSLDUP);
15531  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15532	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15533
15534  /* Access to the vec_init patterns.  */
15535  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15536				    integer_type_node, NULL_TREE);
15537  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15538	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15539
15540  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15541				    short_integer_type_node,
15542				    short_integer_type_node,
15543				    short_integer_type_node, NULL_TREE);
15544  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15545	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15546
15547  ftype = build_function_type_list (V8QI_type_node, char_type_node,
15548				    char_type_node, char_type_node,
15549				    char_type_node, char_type_node,
15550				    char_type_node, char_type_node,
15551				    char_type_node, NULL_TREE);
15552  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15553	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15554
15555  /* Access to the vec_extract patterns.  */
15556  ftype = build_function_type_list (double_type_node, V2DF_type_node,
15557				    integer_type_node, NULL_TREE);
15558  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15559	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15560
15561  ftype = build_function_type_list (long_long_integer_type_node,
15562				    V2DI_type_node, integer_type_node,
15563				    NULL_TREE);
15564  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15565	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15566
15567  ftype = build_function_type_list (float_type_node, V4SF_type_node,
15568				    integer_type_node, NULL_TREE);
15569  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15570	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15571
15572  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15573				    integer_type_node, NULL_TREE);
15574  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15575	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15576
15577  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15578				    integer_type_node, NULL_TREE);
15579  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15580	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15581
15582  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15583				    integer_type_node, NULL_TREE);
15584  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15585	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15586
15587  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15588				    integer_type_node, NULL_TREE);
15589  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15590	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15591
15592  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15593				    integer_type_node, NULL_TREE);
15594  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15595
15596  /* Access to the vec_set patterns.  */
15597  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15598				    intHI_type_node,
15599				    integer_type_node, NULL_TREE);
15600  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15601	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
15602
15603  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15604				    intHI_type_node,
15605				    integer_type_node, NULL_TREE);
15606  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15607	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
15608}
15609
15610/* Errors in the source file can cause expand_expr to return const0_rtx
15611   where we expect a vector.  To avoid crashing, use one of the vector
15612   clear instructions.  */
15613static rtx
15614safe_vector_operand (rtx x, enum machine_mode mode)
15615{
15616  if (x == const0_rtx)
15617    x = CONST0_RTX (mode);
15618  return x;
15619}
15620
15621/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
15622
15623static rtx
15624ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15625{
15626  rtx pat, xops[3];
15627  tree arg0 = TREE_VALUE (arglist);
15628  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15629  rtx op0 = expand_normal (arg0);
15630  rtx op1 = expand_normal (arg1);
15631  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15632  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15633  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15634
15635  if (VECTOR_MODE_P (mode0))
15636    op0 = safe_vector_operand (op0, mode0);
15637  if (VECTOR_MODE_P (mode1))
15638    op1 = safe_vector_operand (op1, mode1);
15639
15640  if (optimize || !target
15641      || GET_MODE (target) != tmode
15642      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15643    target = gen_reg_rtx (tmode);
15644
15645  if (GET_MODE (op1) == SImode && mode1 == TImode)
15646    {
15647      rtx x = gen_reg_rtx (V4SImode);
15648      emit_insn (gen_sse2_loadd (x, op1));
15649      op1 = gen_lowpart (TImode, x);
15650    }
15651
15652  /* The insn must want input operands in the same modes as the
15653     result.  */
15654  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15655	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15656
15657  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15658    op0 = copy_to_mode_reg (mode0, op0);
15659  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15660    op1 = copy_to_mode_reg (mode1, op1);
15661
15662  /* ??? Using ix86_fixup_binary_operands is problematic when
15663     we've got mismatched modes.  Fake it.  */
15664
15665  xops[0] = target;
15666  xops[1] = op0;
15667  xops[2] = op1;
15668
15669  if (tmode == mode0 && tmode == mode1)
15670    {
15671      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15672      op0 = xops[1];
15673      op1 = xops[2];
15674    }
15675  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15676    {
15677      op0 = force_reg (mode0, op0);
15678      op1 = force_reg (mode1, op1);
15679      target = gen_reg_rtx (tmode);
15680    }
15681
15682  pat = GEN_FCN (icode) (target, op0, op1);
15683  if (! pat)
15684    return 0;
15685  emit_insn (pat);
15686  return target;
15687}
15688
15689/* Subroutine of ix86_expand_builtin to take care of stores.  */
15690
15691static rtx
15692ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15693{
15694  rtx pat;
15695  tree arg0 = TREE_VALUE (arglist);
15696  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15697  rtx op0 = expand_normal (arg0);
15698  rtx op1 = expand_normal (arg1);
15699  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15700  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15701
15702  if (VECTOR_MODE_P (mode1))
15703    op1 = safe_vector_operand (op1, mode1);
15704
15705  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15706  op1 = copy_to_mode_reg (mode1, op1);
15707
15708  pat = GEN_FCN (icode) (op0, op1);
15709  if (pat)
15710    emit_insn (pat);
15711  return 0;
15712}
15713
15714/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15715
15716static rtx
15717ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15718			  rtx target, int do_load)
15719{
15720  rtx pat;
15721  tree arg0 = TREE_VALUE (arglist);
15722  rtx op0 = expand_normal (arg0);
15723  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15724  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15725
15726  if (optimize || !target
15727      || GET_MODE (target) != tmode
15728      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15729    target = gen_reg_rtx (tmode);
15730  if (do_load)
15731    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15732  else
15733    {
15734      if (VECTOR_MODE_P (mode0))
15735	op0 = safe_vector_operand (op0, mode0);
15736
15737      if ((optimize && !register_operand (op0, mode0))
15738	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15739	op0 = copy_to_mode_reg (mode0, op0);
15740    }
15741
15742  pat = GEN_FCN (icode) (target, op0);
15743  if (! pat)
15744    return 0;
15745  emit_insn (pat);
15746  return target;
15747}
15748
15749/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15750   sqrtss, rsqrtss, rcpss.  */
15751
15752static rtx
15753ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15754{
15755  rtx pat;
15756  tree arg0 = TREE_VALUE (arglist);
15757  rtx op1, op0 = expand_normal (arg0);
15758  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15759  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15760
15761  if (optimize || !target
15762      || GET_MODE (target) != tmode
15763      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15764    target = gen_reg_rtx (tmode);
15765
15766  if (VECTOR_MODE_P (mode0))
15767    op0 = safe_vector_operand (op0, mode0);
15768
15769  if ((optimize && !register_operand (op0, mode0))
15770      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15771    op0 = copy_to_mode_reg (mode0, op0);
15772
15773  op1 = op0;
15774  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15775    op1 = copy_to_mode_reg (mode0, op1);
15776
15777  pat = GEN_FCN (icode) (target, op0, op1);
15778  if (! pat)
15779    return 0;
15780  emit_insn (pat);
15781  return target;
15782}
15783
15784/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15785
15786static rtx
15787ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15788			 rtx target)
15789{
15790  rtx pat;
15791  tree arg0 = TREE_VALUE (arglist);
15792  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15793  rtx op0 = expand_normal (arg0);
15794  rtx op1 = expand_normal (arg1);
15795  rtx op2;
15796  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15797  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15798  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15799  enum rtx_code comparison = d->comparison;
15800
15801  if (VECTOR_MODE_P (mode0))
15802    op0 = safe_vector_operand (op0, mode0);
15803  if (VECTOR_MODE_P (mode1))
15804    op1 = safe_vector_operand (op1, mode1);
15805
15806  /* Swap operands if we have a comparison that isn't available in
15807     hardware.  */
15808  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15809    {
15810      rtx tmp = gen_reg_rtx (mode1);
15811      emit_move_insn (tmp, op1);
15812      op1 = op0;
15813      op0 = tmp;
15814    }
15815
15816  if (optimize || !target
15817      || GET_MODE (target) != tmode
15818      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15819    target = gen_reg_rtx (tmode);
15820
15821  if ((optimize && !register_operand (op0, mode0))
15822      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15823    op0 = copy_to_mode_reg (mode0, op0);
15824  if ((optimize && !register_operand (op1, mode1))
15825      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15826    op1 = copy_to_mode_reg (mode1, op1);
15827
15828  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15829  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15830  if (! pat)
15831    return 0;
15832  emit_insn (pat);
15833  return target;
15834}
15835
15836/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15837
15838static rtx
15839ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15840		      rtx target)
15841{
15842  rtx pat;
15843  tree arg0 = TREE_VALUE (arglist);
15844  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15845  rtx op0 = expand_normal (arg0);
15846  rtx op1 = expand_normal (arg1);
15847  rtx op2;
15848  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15849  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15850  enum rtx_code comparison = d->comparison;
15851
15852  if (VECTOR_MODE_P (mode0))
15853    op0 = safe_vector_operand (op0, mode0);
15854  if (VECTOR_MODE_P (mode1))
15855    op1 = safe_vector_operand (op1, mode1);
15856
15857  /* Swap operands if we have a comparison that isn't available in
15858     hardware.  */
15859  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15860    {
15861      rtx tmp = op1;
15862      op1 = op0;
15863      op0 = tmp;
15864    }
15865
15866  target = gen_reg_rtx (SImode);
15867  emit_move_insn (target, const0_rtx);
15868  target = gen_rtx_SUBREG (QImode, target, 0);
15869
15870  if ((optimize && !register_operand (op0, mode0))
15871      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15872    op0 = copy_to_mode_reg (mode0, op0);
15873  if ((optimize && !register_operand (op1, mode1))
15874      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15875    op1 = copy_to_mode_reg (mode1, op1);
15876
15877  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15878  pat = GEN_FCN (d->icode) (op0, op1);
15879  if (! pat)
15880    return 0;
15881  emit_insn (pat);
15882  emit_insn (gen_rtx_SET (VOIDmode,
15883			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15884			  gen_rtx_fmt_ee (comparison, QImode,
15885					  SET_DEST (pat),
15886					  const0_rtx)));
15887
15888  return SUBREG_REG (target);
15889}
15890
15891/* Return the integer constant in ARG.  Constrain it to be in the range
15892   of the subparts of VEC_TYPE; issue an error if not.  */
15893
15894static int
15895get_element_number (tree vec_type, tree arg)
15896{
15897  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15898
15899  if (!host_integerp (arg, 1)
15900      || (elt = tree_low_cst (arg, 1), elt > max))
15901    {
15902      error ("selector must be an integer constant in the range 0..%wi", max);
15903      return 0;
15904    }
15905
15906  return elt;
15907}
15908
15909/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15910   ix86_expand_vector_init.  We DO have language-level syntax for this, in
15911   the form of  (type){ init-list }.  Except that since we can't place emms
15912   instructions from inside the compiler, we can't allow the use of MMX
15913   registers unless the user explicitly asks for it.  So we do *not* define
15914   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
15915   we have builtins invoked by mmintrin.h that gives us license to emit
15916   these sorts of instructions.  */
15917
15918static rtx
15919ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15920{
15921  enum machine_mode tmode = TYPE_MODE (type);
15922  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15923  int i, n_elt = GET_MODE_NUNITS (tmode);
15924  rtvec v = rtvec_alloc (n_elt);
15925
15926  gcc_assert (VECTOR_MODE_P (tmode));
15927
15928  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15929    {
15930      rtx x = expand_normal (TREE_VALUE (arglist));
15931      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15932    }
15933
15934  gcc_assert (arglist == NULL);
15935
15936  if (!target || !register_operand (target, tmode))
15937    target = gen_reg_rtx (tmode);
15938
15939  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15940  return target;
15941}
15942
15943/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15944   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
15945   had a language-level syntax for referencing vector elements.  */
15946
15947static rtx
15948ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15949{
15950  enum machine_mode tmode, mode0;
15951  tree arg0, arg1;
15952  int elt;
15953  rtx op0;
15954
15955  arg0 = TREE_VALUE (arglist);
15956  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15957
15958  op0 = expand_normal (arg0);
15959  elt = get_element_number (TREE_TYPE (arg0), arg1);
15960
15961  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15962  mode0 = TYPE_MODE (TREE_TYPE (arg0));
15963  gcc_assert (VECTOR_MODE_P (mode0));
15964
15965  op0 = force_reg (mode0, op0);
15966
15967  if (optimize || !target || !register_operand (target, tmode))
15968    target = gen_reg_rtx (tmode);
15969
15970  ix86_expand_vector_extract (true, target, op0, elt);
15971
15972  return target;
15973}
15974
15975/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15976   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
15977   a language-level syntax for referencing vector elements.  */
15978
15979static rtx
15980ix86_expand_vec_set_builtin (tree arglist)
15981{
15982  enum machine_mode tmode, mode1;
15983  tree arg0, arg1, arg2;
15984  int elt;
15985  rtx op0, op1, target;
15986
15987  arg0 = TREE_VALUE (arglist);
15988  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15989  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15990
15991  tmode = TYPE_MODE (TREE_TYPE (arg0));
15992  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15993  gcc_assert (VECTOR_MODE_P (tmode));
15994
15995  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15996  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15997  elt = get_element_number (TREE_TYPE (arg0), arg2);
15998
15999  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16000    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16001
16002  op0 = force_reg (tmode, op0);
16003  op1 = force_reg (mode1, op1);
16004
16005  /* OP0 is the source of these builtin functions and shouldn't be
16006     modified.  Create a copy, use it and return it as target.  */
16007  target = gen_reg_rtx (tmode);
16008  emit_move_insn (target, op0);
16009  ix86_expand_vector_set (true, target, op1, elt);
16010
16011  return target;
16012}
16013
16014/* Expand an expression EXP that calls a built-in function,
16015   with result going to TARGET if that's convenient
16016   (and in mode MODE if that's convenient).
16017   SUBTARGET may be used as the target for computing one of EXP's operands.
16018   IGNORE is nonzero if the value is to be ignored.  */
16019
16020static rtx
16021ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16022		     enum machine_mode mode ATTRIBUTE_UNUSED,
16023		     int ignore ATTRIBUTE_UNUSED)
16024{
16025  const struct builtin_description *d;
16026  size_t i;
16027  enum insn_code icode;
16028  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16029  tree arglist = TREE_OPERAND (exp, 1);
16030  tree arg0, arg1, arg2;
16031  rtx op0, op1, op2, pat;
16032  enum machine_mode tmode, mode0, mode1, mode2;
16033  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16034
16035  switch (fcode)
16036    {
16037    case IX86_BUILTIN_EMMS:
16038      emit_insn (gen_mmx_emms ());
16039      return 0;
16040
16041    case IX86_BUILTIN_SFENCE:
16042      emit_insn (gen_sse_sfence ());
16043      return 0;
16044
16045    case IX86_BUILTIN_MASKMOVQ:
16046    case IX86_BUILTIN_MASKMOVDQU:
16047      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16048	       ? CODE_FOR_mmx_maskmovq
16049	       : CODE_FOR_sse2_maskmovdqu);
16050      /* Note the arg order is different from the operand order.  */
16051      arg1 = TREE_VALUE (arglist);
16052      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16053      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16054      op0 = expand_normal (arg0);
16055      op1 = expand_normal (arg1);
16056      op2 = expand_normal (arg2);
16057      mode0 = insn_data[icode].operand[0].mode;
16058      mode1 = insn_data[icode].operand[1].mode;
16059      mode2 = insn_data[icode].operand[2].mode;
16060
16061      op0 = force_reg (Pmode, op0);
16062      op0 = gen_rtx_MEM (mode1, op0);
16063
16064      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16065	op0 = copy_to_mode_reg (mode0, op0);
16066      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16067	op1 = copy_to_mode_reg (mode1, op1);
16068      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16069	op2 = copy_to_mode_reg (mode2, op2);
16070      pat = GEN_FCN (icode) (op0, op1, op2);
16071      if (! pat)
16072	return 0;
16073      emit_insn (pat);
16074      return 0;
16075
16076    case IX86_BUILTIN_SQRTSS:
16077      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16078    case IX86_BUILTIN_RSQRTSS:
16079      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16080    case IX86_BUILTIN_RCPSS:
16081      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16082
16083    case IX86_BUILTIN_LOADUPS:
16084      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16085
16086    case IX86_BUILTIN_STOREUPS:
16087      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16088
16089    case IX86_BUILTIN_LOADHPS:
16090    case IX86_BUILTIN_LOADLPS:
16091    case IX86_BUILTIN_LOADHPD:
16092    case IX86_BUILTIN_LOADLPD:
16093      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16094	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16095	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16096	       : CODE_FOR_sse2_loadlpd);
16097      arg0 = TREE_VALUE (arglist);
16098      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16099      op0 = expand_normal (arg0);
16100      op1 = expand_normal (arg1);
16101      tmode = insn_data[icode].operand[0].mode;
16102      mode0 = insn_data[icode].operand[1].mode;
16103      mode1 = insn_data[icode].operand[2].mode;
16104
16105      op0 = force_reg (mode0, op0);
16106      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16107      if (optimize || target == 0
16108	  || GET_MODE (target) != tmode
16109	  || !register_operand (target, tmode))
16110	target = gen_reg_rtx (tmode);
16111      pat = GEN_FCN (icode) (target, op0, op1);
16112      if (! pat)
16113	return 0;
16114      emit_insn (pat);
16115      return target;
16116
16117    case IX86_BUILTIN_STOREHPS:
16118    case IX86_BUILTIN_STORELPS:
16119      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16120	       : CODE_FOR_sse_storelps);
16121      arg0 = TREE_VALUE (arglist);
16122      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16123      op0 = expand_normal (arg0);
16124      op1 = expand_normal (arg1);
16125      mode0 = insn_data[icode].operand[0].mode;
16126      mode1 = insn_data[icode].operand[1].mode;
16127
16128      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16129      op1 = force_reg (mode1, op1);
16130
16131      pat = GEN_FCN (icode) (op0, op1);
16132      if (! pat)
16133	return 0;
16134      emit_insn (pat);
16135      return const0_rtx;
16136
16137    case IX86_BUILTIN_MOVNTPS:
16138      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16139    case IX86_BUILTIN_MOVNTQ:
16140      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16141
16142    case IX86_BUILTIN_LDMXCSR:
16143      op0 = expand_normal (TREE_VALUE (arglist));
16144      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16145      emit_move_insn (target, op0);
16146      emit_insn (gen_sse_ldmxcsr (target));
16147      return 0;
16148
16149    case IX86_BUILTIN_STMXCSR:
16150      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16151      emit_insn (gen_sse_stmxcsr (target));
16152      return copy_to_mode_reg (SImode, target);
16153
16154    case IX86_BUILTIN_SHUFPS:
16155    case IX86_BUILTIN_SHUFPD:
16156      icode = (fcode == IX86_BUILTIN_SHUFPS
16157	       ? CODE_FOR_sse_shufps
16158	       : CODE_FOR_sse2_shufpd);
16159      arg0 = TREE_VALUE (arglist);
16160      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16161      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16162      op0 = expand_normal (arg0);
16163      op1 = expand_normal (arg1);
16164      op2 = expand_normal (arg2);
16165      tmode = insn_data[icode].operand[0].mode;
16166      mode0 = insn_data[icode].operand[1].mode;
16167      mode1 = insn_data[icode].operand[2].mode;
16168      mode2 = insn_data[icode].operand[3].mode;
16169
16170      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16171	op0 = copy_to_mode_reg (mode0, op0);
16172      if ((optimize && !register_operand (op1, mode1))
16173	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16174	op1 = copy_to_mode_reg (mode1, op1);
16175      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16176	{
16177	  /* @@@ better error message */
16178	  error ("mask must be an immediate");
16179	  return gen_reg_rtx (tmode);
16180	}
16181      if (optimize || target == 0
16182	  || GET_MODE (target) != tmode
16183	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16184	target = gen_reg_rtx (tmode);
16185      pat = GEN_FCN (icode) (target, op0, op1, op2);
16186      if (! pat)
16187	return 0;
16188      emit_insn (pat);
16189      return target;
16190
16191    case IX86_BUILTIN_PSHUFW:
16192    case IX86_BUILTIN_PSHUFD:
16193    case IX86_BUILTIN_PSHUFHW:
16194    case IX86_BUILTIN_PSHUFLW:
16195      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16196	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16197	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16198	       : CODE_FOR_mmx_pshufw);
16199      arg0 = TREE_VALUE (arglist);
16200      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16201      op0 = expand_normal (arg0);
16202      op1 = expand_normal (arg1);
16203      tmode = insn_data[icode].operand[0].mode;
16204      mode1 = insn_data[icode].operand[1].mode;
16205      mode2 = insn_data[icode].operand[2].mode;
16206
16207      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16208	op0 = copy_to_mode_reg (mode1, op0);
16209      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16210	{
16211	  /* @@@ better error message */
16212	  error ("mask must be an immediate");
16213	  return const0_rtx;
16214	}
16215      if (target == 0
16216	  || GET_MODE (target) != tmode
16217	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16218	target = gen_reg_rtx (tmode);
16219      pat = GEN_FCN (icode) (target, op0, op1);
16220      if (! pat)
16221	return 0;
16222      emit_insn (pat);
16223      return target;
16224
16225    case IX86_BUILTIN_PSLLWI128:
16226      icode = CODE_FOR_ashlv8hi3;
16227      goto do_pshifti;
16228    case IX86_BUILTIN_PSLLDI128:
16229      icode = CODE_FOR_ashlv4si3;
16230      goto do_pshifti;
16231    case IX86_BUILTIN_PSLLQI128:
16232      icode = CODE_FOR_ashlv2di3;
16233      goto do_pshifti;
16234    case IX86_BUILTIN_PSRAWI128:
16235      icode = CODE_FOR_ashrv8hi3;
16236      goto do_pshifti;
16237    case IX86_BUILTIN_PSRADI128:
16238      icode = CODE_FOR_ashrv4si3;
16239      goto do_pshifti;
16240    case IX86_BUILTIN_PSRLWI128:
16241      icode = CODE_FOR_lshrv8hi3;
16242      goto do_pshifti;
16243    case IX86_BUILTIN_PSRLDI128:
16244      icode = CODE_FOR_lshrv4si3;
16245      goto do_pshifti;
16246    case IX86_BUILTIN_PSRLQI128:
16247      icode = CODE_FOR_lshrv2di3;
16248      goto do_pshifti;
16249    do_pshifti:
16250      arg0 = TREE_VALUE (arglist);
16251      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16252      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16253      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16254
16255      if (GET_CODE (op1) != CONST_INT)
16256	{
16257	  error ("shift must be an immediate");
16258	  return const0_rtx;
16259	}
16260      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16261	op1 = GEN_INT (255);
16262
16263      tmode = insn_data[icode].operand[0].mode;
16264      mode1 = insn_data[icode].operand[1].mode;
16265      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16266	op0 = copy_to_reg (op0);
16267
16268      target = gen_reg_rtx (tmode);
16269      pat = GEN_FCN (icode) (target, op0, op1);
16270      if (!pat)
16271	return 0;
16272      emit_insn (pat);
16273      return target;
16274
16275    case IX86_BUILTIN_PSLLW128:
16276      icode = CODE_FOR_ashlv8hi3;
16277      goto do_pshift;
16278    case IX86_BUILTIN_PSLLD128:
16279      icode = CODE_FOR_ashlv4si3;
16280      goto do_pshift;
16281    case IX86_BUILTIN_PSLLQ128:
16282      icode = CODE_FOR_ashlv2di3;
16283      goto do_pshift;
16284    case IX86_BUILTIN_PSRAW128:
16285      icode = CODE_FOR_ashrv8hi3;
16286      goto do_pshift;
16287    case IX86_BUILTIN_PSRAD128:
16288      icode = CODE_FOR_ashrv4si3;
16289      goto do_pshift;
16290    case IX86_BUILTIN_PSRLW128:
16291      icode = CODE_FOR_lshrv8hi3;
16292      goto do_pshift;
16293    case IX86_BUILTIN_PSRLD128:
16294      icode = CODE_FOR_lshrv4si3;
16295      goto do_pshift;
16296    case IX86_BUILTIN_PSRLQ128:
16297      icode = CODE_FOR_lshrv2di3;
16298      goto do_pshift;
16299    do_pshift:
16300      arg0 = TREE_VALUE (arglist);
16301      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16302      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16303      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16304
16305      tmode = insn_data[icode].operand[0].mode;
16306      mode1 = insn_data[icode].operand[1].mode;
16307
16308      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16309	op0 = copy_to_reg (op0);
16310
16311      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16312      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16313	op1 = copy_to_reg (op1);
16314
16315      target = gen_reg_rtx (tmode);
16316      pat = GEN_FCN (icode) (target, op0, op1);
16317      if (!pat)
16318	return 0;
16319      emit_insn (pat);
16320      return target;
16321
16322    case IX86_BUILTIN_PSLLDQI128:
16323    case IX86_BUILTIN_PSRLDQI128:
16324      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16325	       : CODE_FOR_sse2_lshrti3);
16326      arg0 = TREE_VALUE (arglist);
16327      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16328      op0 = expand_normal (arg0);
16329      op1 = expand_normal (arg1);
16330      tmode = insn_data[icode].operand[0].mode;
16331      mode1 = insn_data[icode].operand[1].mode;
16332      mode2 = insn_data[icode].operand[2].mode;
16333
16334      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16335	{
16336	  op0 = copy_to_reg (op0);
16337	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16338	}
16339      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16340	{
16341	  error ("shift must be an immediate");
16342	  return const0_rtx;
16343	}
16344      target = gen_reg_rtx (V2DImode);
16345      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16346			     op0, op1);
16347      if (! pat)
16348	return 0;
16349      emit_insn (pat);
16350      return target;
16351
16352    case IX86_BUILTIN_FEMMS:
16353      emit_insn (gen_mmx_femms ());
16354      return NULL_RTX;
16355
16356    case IX86_BUILTIN_PAVGUSB:
16357      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16358
16359    case IX86_BUILTIN_PF2ID:
16360      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16361
16362    case IX86_BUILTIN_PFACC:
16363      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16364
16365    case IX86_BUILTIN_PFADD:
16366     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16367
16368    case IX86_BUILTIN_PFCMPEQ:
16369      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16370
16371    case IX86_BUILTIN_PFCMPGE:
16372      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16373
16374    case IX86_BUILTIN_PFCMPGT:
16375      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16376
16377    case IX86_BUILTIN_PFMAX:
16378      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16379
16380    case IX86_BUILTIN_PFMIN:
16381      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16382
16383    case IX86_BUILTIN_PFMUL:
16384      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16385
16386    case IX86_BUILTIN_PFRCP:
16387      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16388
16389    case IX86_BUILTIN_PFRCPIT1:
16390      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16391
16392    case IX86_BUILTIN_PFRCPIT2:
16393      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16394
16395    case IX86_BUILTIN_PFRSQIT1:
16396      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16397
16398    case IX86_BUILTIN_PFRSQRT:
16399      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16400
16401    case IX86_BUILTIN_PFSUB:
16402      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16403
16404    case IX86_BUILTIN_PFSUBR:
16405      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16406
16407    case IX86_BUILTIN_PI2FD:
16408      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16409
16410    case IX86_BUILTIN_PMULHRW:
16411      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16412
16413    case IX86_BUILTIN_PF2IW:
16414      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16415
16416    case IX86_BUILTIN_PFNACC:
16417      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16418
16419    case IX86_BUILTIN_PFPNACC:
16420      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16421
16422    case IX86_BUILTIN_PI2FW:
16423      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16424
16425    case IX86_BUILTIN_PSWAPDSI:
16426      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16427
16428    case IX86_BUILTIN_PSWAPDSF:
16429      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16430
16431    case IX86_BUILTIN_SQRTSD:
16432      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16433    case IX86_BUILTIN_LOADUPD:
16434      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16435    case IX86_BUILTIN_STOREUPD:
16436      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16437
16438    case IX86_BUILTIN_MFENCE:
16439	emit_insn (gen_sse2_mfence ());
16440	return 0;
16441    case IX86_BUILTIN_LFENCE:
16442	emit_insn (gen_sse2_lfence ());
16443	return 0;
16444
16445    case IX86_BUILTIN_CLFLUSH:
16446	arg0 = TREE_VALUE (arglist);
16447	op0 = expand_normal (arg0);
16448	icode = CODE_FOR_sse2_clflush;
16449	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16450	    op0 = copy_to_mode_reg (Pmode, op0);
16451
16452	emit_insn (gen_sse2_clflush (op0));
16453	return 0;
16454
16455    case IX86_BUILTIN_MOVNTPD:
16456      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16457    case IX86_BUILTIN_MOVNTDQ:
16458      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16459    case IX86_BUILTIN_MOVNTI:
16460      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16461
16462    case IX86_BUILTIN_LOADDQU:
16463      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16464    case IX86_BUILTIN_STOREDQU:
16465      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16466
16467    case IX86_BUILTIN_MONITOR:
16468      arg0 = TREE_VALUE (arglist);
16469      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16470      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16471      op0 = expand_normal (arg0);
16472      op1 = expand_normal (arg1);
16473      op2 = expand_normal (arg2);
16474      if (!REG_P (op0))
16475	op0 = copy_to_mode_reg (Pmode, op0);
16476      if (!REG_P (op1))
16477	op1 = copy_to_mode_reg (SImode, op1);
16478      if (!REG_P (op2))
16479	op2 = copy_to_mode_reg (SImode, op2);
16480      if (!TARGET_64BIT)
16481	emit_insn (gen_sse3_monitor (op0, op1, op2));
16482      else
16483	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16484      return 0;
16485
16486    case IX86_BUILTIN_MWAIT:
16487      arg0 = TREE_VALUE (arglist);
16488      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16489      op0 = expand_normal (arg0);
16490      op1 = expand_normal (arg1);
16491      if (!REG_P (op0))
16492	op0 = copy_to_mode_reg (SImode, op0);
16493      if (!REG_P (op1))
16494	op1 = copy_to_mode_reg (SImode, op1);
16495      emit_insn (gen_sse3_mwait (op0, op1));
16496      return 0;
16497
16498    case IX86_BUILTIN_LDDQU:
16499      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16500				       target, 1);
16501
16502    case IX86_BUILTIN_VEC_INIT_V2SI:
16503    case IX86_BUILTIN_VEC_INIT_V4HI:
16504    case IX86_BUILTIN_VEC_INIT_V8QI:
16505      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16506
16507    case IX86_BUILTIN_VEC_EXT_V2DF:
16508    case IX86_BUILTIN_VEC_EXT_V2DI:
16509    case IX86_BUILTIN_VEC_EXT_V4SF:
16510    case IX86_BUILTIN_VEC_EXT_V4SI:
16511    case IX86_BUILTIN_VEC_EXT_V8HI:
16512    case IX86_BUILTIN_VEC_EXT_V16QI:
16513    case IX86_BUILTIN_VEC_EXT_V2SI:
16514    case IX86_BUILTIN_VEC_EXT_V4HI:
16515      return ix86_expand_vec_ext_builtin (arglist, target);
16516
16517    case IX86_BUILTIN_VEC_SET_V8HI:
16518    case IX86_BUILTIN_VEC_SET_V4HI:
16519      return ix86_expand_vec_set_builtin (arglist);
16520
16521    default:
16522      break;
16523    }
16524
16525  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16526    if (d->code == fcode)
16527      {
16528	/* Compares are treated specially.  */
16529	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16530	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16531	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
16532	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16533	  return ix86_expand_sse_compare (d, arglist, target);
16534
16535	return ix86_expand_binop_builtin (d->icode, arglist, target);
16536      }
16537
16538  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16539    if (d->code == fcode)
16540      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16541
16542  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16543    if (d->code == fcode)
16544      return ix86_expand_sse_comi (d, arglist, target);
16545
16546  gcc_unreachable ();
16547}
16548
16549/* Store OPERAND to the memory after reload is completed.  This means
16550   that we can't easily use assign_stack_local.  */
16551rtx
16552ix86_force_to_memory (enum machine_mode mode, rtx operand)
16553{
16554  rtx result;
16555
16556  gcc_assert (reload_completed);
16557  if (TARGET_RED_ZONE)
16558    {
16559      result = gen_rtx_MEM (mode,
16560			    gen_rtx_PLUS (Pmode,
16561					  stack_pointer_rtx,
16562					  GEN_INT (-RED_ZONE_SIZE)));
16563      emit_move_insn (result, operand);
16564    }
16565  else if (!TARGET_RED_ZONE && TARGET_64BIT)
16566    {
16567      switch (mode)
16568	{
16569	case HImode:
16570	case SImode:
16571	  operand = gen_lowpart (DImode, operand);
16572	  /* FALLTHRU */
16573	case DImode:
16574	  emit_insn (
16575		      gen_rtx_SET (VOIDmode,
16576				   gen_rtx_MEM (DImode,
16577						gen_rtx_PRE_DEC (DImode,
16578							stack_pointer_rtx)),
16579				   operand));
16580	  break;
16581	default:
16582	  gcc_unreachable ();
16583	}
16584      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16585    }
16586  else
16587    {
16588      switch (mode)
16589	{
16590	case DImode:
16591	  {
16592	    rtx operands[2];
16593	    split_di (&operand, 1, operands, operands + 1);
16594	    emit_insn (
16595			gen_rtx_SET (VOIDmode,
16596				     gen_rtx_MEM (SImode,
16597						  gen_rtx_PRE_DEC (Pmode,
16598							stack_pointer_rtx)),
16599				     operands[1]));
16600	    emit_insn (
16601			gen_rtx_SET (VOIDmode,
16602				     gen_rtx_MEM (SImode,
16603						  gen_rtx_PRE_DEC (Pmode,
16604							stack_pointer_rtx)),
16605				     operands[0]));
16606	  }
16607	  break;
16608	case HImode:
16609	  /* Store HImodes as SImodes.  */
16610	  operand = gen_lowpart (SImode, operand);
16611	  /* FALLTHRU */
16612	case SImode:
16613	  emit_insn (
16614		      gen_rtx_SET (VOIDmode,
16615				   gen_rtx_MEM (GET_MODE (operand),
16616						gen_rtx_PRE_DEC (SImode,
16617							stack_pointer_rtx)),
16618				   operand));
16619	  break;
16620	default:
16621	  gcc_unreachable ();
16622	}
16623      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16624    }
16625  return result;
16626}
16627
16628/* Free operand from the memory.  */
16629void
16630ix86_free_from_memory (enum machine_mode mode)
16631{
16632  if (!TARGET_RED_ZONE)
16633    {
16634      int size;
16635
16636      if (mode == DImode || TARGET_64BIT)
16637	size = 8;
16638      else
16639	size = 4;
16640      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
16641         to pop or add instruction if registers are available.  */
16642      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16643			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16644					    GEN_INT (size))));
16645    }
16646}
16647
16648/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16649   QImode must go into class Q_REGS.
16650   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16651   movdf to do mem-to-mem moves through integer regs.  */
16652enum reg_class
16653ix86_preferred_reload_class (rtx x, enum reg_class class)
16654{
16655  enum machine_mode mode = GET_MODE (x);
16656
16657  /* We're only allowed to return a subclass of CLASS.  Many of the
16658     following checks fail for NO_REGS, so eliminate that early.  */
16659  if (class == NO_REGS)
16660    return NO_REGS;
16661
16662  /* All classes can load zeros.  */
16663  if (x == CONST0_RTX (mode))
16664    return class;
16665
16666  /* Force constants into memory if we are loading a (nonzero) constant into
16667     an MMX or SSE register.  This is because there are no MMX/SSE instructions
16668     to load from a constant.  */
16669  if (CONSTANT_P (x)
16670      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16671    return NO_REGS;
16672
16673  /* Prefer SSE regs only, if we can use them for math.  */
16674  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16675    return SSE_CLASS_P (class) ? class : NO_REGS;
16676
16677  /* Floating-point constants need more complex checks.  */
16678  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16679    {
16680      /* General regs can load everything.  */
16681      if (reg_class_subset_p (class, GENERAL_REGS))
16682        return class;
16683
16684      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
16685	 zero above.  We only want to wind up preferring 80387 registers if
16686	 we plan on doing computation with them.  */
16687      if (TARGET_80387
16688	  && standard_80387_constant_p (x))
16689	{
16690	  /* Limit class to non-sse.  */
16691	  if (class == FLOAT_SSE_REGS)
16692	    return FLOAT_REGS;
16693	  if (class == FP_TOP_SSE_REGS)
16694	    return FP_TOP_REG;
16695	  if (class == FP_SECOND_SSE_REGS)
16696	    return FP_SECOND_REG;
16697	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16698	    return class;
16699	}
16700
16701      return NO_REGS;
16702    }
16703
16704  /* Generally when we see PLUS here, it's the function invariant
16705     (plus soft-fp const_int).  Which can only be computed into general
16706     regs.  */
16707  if (GET_CODE (x) == PLUS)
16708    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16709
16710  /* QImode constants are easy to load, but non-constant QImode data
16711     must go into Q_REGS.  */
16712  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16713    {
16714      if (reg_class_subset_p (class, Q_REGS))
16715	return class;
16716      if (reg_class_subset_p (Q_REGS, class))
16717	return Q_REGS;
16718      return NO_REGS;
16719    }
16720
16721  return class;
16722}
16723
16724/* Discourage putting floating-point values in SSE registers unless
16725   SSE math is being used, and likewise for the 387 registers.  */
16726enum reg_class
16727ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16728{
16729  enum machine_mode mode = GET_MODE (x);
16730
16731  /* Restrict the output reload class to the register bank that we are doing
16732     math on.  If we would like not to return a subset of CLASS, reject this
16733     alternative: if reload cannot do this, it will still use its choice.  */
16734  mode = GET_MODE (x);
16735  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16736    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16737
16738  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16739    {
16740      if (class == FP_TOP_SSE_REGS)
16741	return FP_TOP_REG;
16742      else if (class == FP_SECOND_SSE_REGS)
16743	return FP_SECOND_REG;
16744      else
16745	return FLOAT_CLASS_P (class) ? class : NO_REGS;
16746    }
16747
16748  return class;
16749}
16750
16751/* If we are copying between general and FP registers, we need a memory
16752   location. The same is true for SSE and MMX registers.
16753
16754   The macro can't work reliably when one of the CLASSES is class containing
16755   registers from multiple units (SSE, MMX, integer).  We avoid this by never
16756   combining those units in single alternative in the machine description.
16757   Ensure that this constraint holds to avoid unexpected surprises.
16758
16759   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16760   enforce these sanity checks.  */
16761
16762int
16763ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16764			      enum machine_mode mode, int strict)
16765{
16766  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16767      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16768      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16769      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16770      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16771      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16772    {
16773      gcc_assert (!strict);
16774      return true;
16775    }
16776
16777  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16778    return true;
16779
16780  /* ??? This is a lie.  We do have moves between mmx/general, and for
16781     mmx/sse2.  But by saying we need secondary memory we discourage the
16782     register allocator from using the mmx registers unless needed.  */
16783  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16784    return true;
16785
16786  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16787    {
16788      /* SSE1 doesn't have any direct moves from other classes.  */
16789      if (!TARGET_SSE2)
16790	return true;
16791
16792      /* If the target says that inter-unit moves are more expensive
16793	 than moving through memory, then don't generate them.  */
16794      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16795	return true;
16796
16797      /* Between SSE and general, we have moves no larger than word size.  */
16798      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16799	return true;
16800
16801      /* ??? For the cost of one register reformat penalty, we could use
16802	 the same instructions to move SFmode and DFmode data, but the
16803	 relevant move patterns don't support those alternatives.  */
16804      if (mode == SFmode || mode == DFmode)
16805	return true;
16806    }
16807
16808  return false;
16809}
16810
16811/* Return true if the registers in CLASS cannot represent the change from
16812   modes FROM to TO.  */
16813
16814bool
16815ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16816			       enum reg_class class)
16817{
16818  if (from == to)
16819    return false;
16820
16821  /* x87 registers can't do subreg at all, as all values are reformatted
16822     to extended precision.  */
16823  if (MAYBE_FLOAT_CLASS_P (class))
16824    return true;
16825
16826  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16827    {
16828      /* Vector registers do not support QI or HImode loads.  If we don't
16829	 disallow a change to these modes, reload will assume it's ok to
16830	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16831	 the vec_dupv4hi pattern.  */
16832      if (GET_MODE_SIZE (from) < 4)
16833	return true;
16834
16835      /* Vector registers do not support subreg with nonzero offsets, which
16836	 are otherwise valid for integer registers.  Since we can't see
16837	 whether we have a nonzero offset from here, prohibit all
16838         nonparadoxical subregs changing size.  */
16839      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16840	return true;
16841    }
16842
16843  return false;
16844}
16845
16846/* Return the cost of moving data from a register in class CLASS1 to
16847   one in class CLASS2.
16848
16849   It is not required that the cost always equal 2 when FROM is the same as TO;
16850   on some machines it is expensive to move between registers if they are not
16851   general registers.  */
16852
16853int
16854ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16855			 enum reg_class class2)
16856{
16857  /* In case we require secondary memory, compute cost of the store followed
16858     by load.  In order to avoid bad register allocation choices, we need
16859     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16860
16861  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16862    {
16863      int cost = 1;
16864
16865      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16866		   MEMORY_MOVE_COST (mode, class1, 1));
16867      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16868		   MEMORY_MOVE_COST (mode, class2, 1));
16869
16870      /* In case of copying from general_purpose_register we may emit multiple
16871         stores followed by single load causing memory size mismatch stall.
16872         Count this as arbitrarily high cost of 20.  */
16873      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16874	cost += 20;
16875
16876      /* In the case of FP/MMX moves, the registers actually overlap, and we
16877	 have to switch modes in order to treat them differently.  */
16878      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16879          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16880	cost += 20;
16881
16882      return cost;
16883    }
16884
16885  /* Moves between SSE/MMX and integer unit are expensive.  */
16886  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16887      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16888    return ix86_cost->mmxsse_to_integer;
16889  if (MAYBE_FLOAT_CLASS_P (class1))
16890    return ix86_cost->fp_move;
16891  if (MAYBE_SSE_CLASS_P (class1))
16892    return ix86_cost->sse_move;
16893  if (MAYBE_MMX_CLASS_P (class1))
16894    return ix86_cost->mmx_move;
16895  return 2;
16896}
16897
16898/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
16899
16900bool
16901ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16902{
16903  /* Flags and only flags can only hold CCmode values.  */
16904  if (CC_REGNO_P (regno))
16905    return GET_MODE_CLASS (mode) == MODE_CC;
16906  if (GET_MODE_CLASS (mode) == MODE_CC
16907      || GET_MODE_CLASS (mode) == MODE_RANDOM
16908      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16909    return 0;
16910  if (FP_REGNO_P (regno))
16911    return VALID_FP_MODE_P (mode);
16912  if (SSE_REGNO_P (regno))
16913    {
16914      /* We implement the move patterns for all vector modes into and
16915	 out of SSE registers, even when no operation instructions
16916	 are available.  */
16917      return (VALID_SSE_REG_MODE (mode)
16918	      || VALID_SSE2_REG_MODE (mode)
16919	      || VALID_MMX_REG_MODE (mode)
16920	      || VALID_MMX_REG_MODE_3DNOW (mode));
16921    }
16922  if (MMX_REGNO_P (regno))
16923    {
16924      /* We implement the move patterns for 3DNOW modes even in MMX mode,
16925	 so if the register is available at all, then we can move data of
16926	 the given mode into or out of it.  */
16927      return (VALID_MMX_REG_MODE (mode)
16928	      || VALID_MMX_REG_MODE_3DNOW (mode));
16929    }
16930
16931  if (mode == QImode)
16932    {
16933      /* Take care for QImode values - they can be in non-QI regs,
16934	 but then they do cause partial register stalls.  */
16935      if (regno < 4 || TARGET_64BIT)
16936	return 1;
16937      if (!TARGET_PARTIAL_REG_STALL)
16938	return 1;
16939      return reload_in_progress || reload_completed;
16940    }
16941  /* We handle both integer and floats in the general purpose registers.  */
16942  else if (VALID_INT_MODE_P (mode))
16943    return 1;
16944  else if (VALID_FP_MODE_P (mode))
16945    return 1;
16946  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
16947     on to use that value in smaller contexts, this can easily force a
16948     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
16949     supporting DImode, allow it.  */
16950  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16951    return 1;
16952
16953  return 0;
16954}
16955
16956/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
16957   tieable integer mode.  */
16958
16959static bool
16960ix86_tieable_integer_mode_p (enum machine_mode mode)
16961{
16962  switch (mode)
16963    {
16964    case HImode:
16965    case SImode:
16966      return true;
16967
16968    case QImode:
16969      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16970
16971    case DImode:
16972      return TARGET_64BIT;
16973
16974    default:
16975      return false;
16976    }
16977}
16978
16979/* Return true if MODE1 is accessible in a register that can hold MODE2
16980   without copying.  That is, all register classes that can hold MODE2
16981   can also hold MODE1.  */
16982
16983bool
16984ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16985{
16986  if (mode1 == mode2)
16987    return true;
16988
16989  if (ix86_tieable_integer_mode_p (mode1)
16990      && ix86_tieable_integer_mode_p (mode2))
16991    return true;
16992
16993  /* MODE2 being XFmode implies fp stack or general regs, which means we
16994     can tie any smaller floating point modes to it.  Note that we do not
16995     tie this with TFmode.  */
16996  if (mode2 == XFmode)
16997    return mode1 == SFmode || mode1 == DFmode;
16998
16999  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17000     that we can tie it with SFmode.  */
17001  if (mode2 == DFmode)
17002    return mode1 == SFmode;
17003
17004  /* If MODE2 is only appropriate for an SSE register, then tie with
17005     any other mode acceptable to SSE registers.  */
17006  if (GET_MODE_SIZE (mode2) >= 8
17007      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17008    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17009
17010  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17011     with any other mode acceptable to MMX registers.  */
17012  if (GET_MODE_SIZE (mode2) == 8
17013      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17014    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17015
17016  return false;
17017}
17018
17019/* Return the cost of moving data of mode M between a
17020   register and memory.  A value of 2 is the default; this cost is
17021   relative to those in `REGISTER_MOVE_COST'.
17022
17023   If moving between registers and memory is more expensive than
17024   between two registers, you should define this macro to express the
17025   relative cost.
17026
17027   Model also increased moving costs of QImode registers in non
17028   Q_REGS classes.
17029 */
17030int
17031ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17032{
17033  if (FLOAT_CLASS_P (class))
17034    {
17035      int index;
17036      switch (mode)
17037	{
17038	  case SFmode:
17039	    index = 0;
17040	    break;
17041	  case DFmode:
17042	    index = 1;
17043	    break;
17044	  case XFmode:
17045	    index = 2;
17046	    break;
17047	  default:
17048	    return 100;
17049	}
17050      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17051    }
17052  if (SSE_CLASS_P (class))
17053    {
17054      int index;
17055      switch (GET_MODE_SIZE (mode))
17056	{
17057	  case 4:
17058	    index = 0;
17059	    break;
17060	  case 8:
17061	    index = 1;
17062	    break;
17063	  case 16:
17064	    index = 2;
17065	    break;
17066	  default:
17067	    return 100;
17068	}
17069      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17070    }
17071  if (MMX_CLASS_P (class))
17072    {
17073      int index;
17074      switch (GET_MODE_SIZE (mode))
17075	{
17076	  case 4:
17077	    index = 0;
17078	    break;
17079	  case 8:
17080	    index = 1;
17081	    break;
17082	  default:
17083	    return 100;
17084	}
17085      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17086    }
17087  switch (GET_MODE_SIZE (mode))
17088    {
17089      case 1:
17090	if (in)
17091	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17092		  : ix86_cost->movzbl_load);
17093	else
17094	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17095		  : ix86_cost->int_store[0] + 4);
17096	break;
17097      case 2:
17098	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17099      default:
17100	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17101	if (mode == TFmode)
17102	  mode = XFmode;
17103	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17104		* (((int) GET_MODE_SIZE (mode)
17105		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17106    }
17107}
17108
17109/* Compute a (partial) cost for rtx X.  Return true if the complete
17110   cost has been computed, and false if subexpressions should be
17111   scanned.  In either case, *TOTAL contains the cost result.  */
17112
17113static bool
17114ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17115{
17116  enum machine_mode mode = GET_MODE (x);
17117
17118  switch (code)
17119    {
17120    case CONST_INT:
17121    case CONST:
17122    case LABEL_REF:
17123    case SYMBOL_REF:
17124      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17125	*total = 3;
17126      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17127	*total = 2;
17128      else if (flag_pic && SYMBOLIC_CONST (x)
17129	       && (!TARGET_64BIT
17130		   || (!GET_CODE (x) != LABEL_REF
17131		       && (GET_CODE (x) != SYMBOL_REF
17132		           || !SYMBOL_REF_LOCAL_P (x)))))
17133	*total = 1;
17134      else
17135	*total = 0;
17136      return true;
17137
17138    case CONST_DOUBLE:
17139      if (mode == VOIDmode)
17140	*total = 0;
17141      else
17142	switch (standard_80387_constant_p (x))
17143	  {
17144	  case 1: /* 0.0 */
17145	    *total = 1;
17146	    break;
17147	  default: /* Other constants */
17148	    *total = 2;
17149	    break;
17150	  case 0:
17151	  case -1:
17152	    /* Start with (MEM (SYMBOL_REF)), since that's where
17153	       it'll probably end up.  Add a penalty for size.  */
17154	    *total = (COSTS_N_INSNS (1)
17155		      + (flag_pic != 0 && !TARGET_64BIT)
17156		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17157	    break;
17158	  }
17159      return true;
17160
17161    case ZERO_EXTEND:
17162      /* The zero extensions is often completely free on x86_64, so make
17163	 it as cheap as possible.  */
17164      if (TARGET_64BIT && mode == DImode
17165	  && GET_MODE (XEXP (x, 0)) == SImode)
17166	*total = 1;
17167      else if (TARGET_ZERO_EXTEND_WITH_AND)
17168	*total = ix86_cost->add;
17169      else
17170	*total = ix86_cost->movzx;
17171      return false;
17172
17173    case SIGN_EXTEND:
17174      *total = ix86_cost->movsx;
17175      return false;
17176
17177    case ASHIFT:
17178      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17179	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17180	{
17181	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17182	  if (value == 1)
17183	    {
17184	      *total = ix86_cost->add;
17185	      return false;
17186	    }
17187	  if ((value == 2 || value == 3)
17188	      && ix86_cost->lea <= ix86_cost->shift_const)
17189	    {
17190	      *total = ix86_cost->lea;
17191	      return false;
17192	    }
17193	}
17194      /* FALLTHRU */
17195
17196    case ROTATE:
17197    case ASHIFTRT:
17198    case LSHIFTRT:
17199    case ROTATERT:
17200      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17201	{
17202	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17203	    {
17204	      if (INTVAL (XEXP (x, 1)) > 32)
17205		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17206	      else
17207		*total = ix86_cost->shift_const * 2;
17208	    }
17209	  else
17210	    {
17211	      if (GET_CODE (XEXP (x, 1)) == AND)
17212		*total = ix86_cost->shift_var * 2;
17213	      else
17214		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17215	    }
17216	}
17217      else
17218	{
17219	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17220	    *total = ix86_cost->shift_const;
17221	  else
17222	    *total = ix86_cost->shift_var;
17223	}
17224      return false;
17225
17226    case MULT:
17227      if (FLOAT_MODE_P (mode))
17228	{
17229	  *total = ix86_cost->fmul;
17230	  return false;
17231	}
17232      else
17233	{
17234	  rtx op0 = XEXP (x, 0);
17235	  rtx op1 = XEXP (x, 1);
17236	  int nbits;
17237	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17238	    {
17239	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17240	      for (nbits = 0; value != 0; value &= value - 1)
17241	        nbits++;
17242	    }
17243	  else
17244	    /* This is arbitrary.  */
17245	    nbits = 7;
17246
17247	  /* Compute costs correctly for widening multiplication.  */
17248	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17249	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17250	         == GET_MODE_SIZE (mode))
17251	    {
17252	      int is_mulwiden = 0;
17253	      enum machine_mode inner_mode = GET_MODE (op0);
17254
17255	      if (GET_CODE (op0) == GET_CODE (op1))
17256		is_mulwiden = 1, op1 = XEXP (op1, 0);
17257	      else if (GET_CODE (op1) == CONST_INT)
17258		{
17259		  if (GET_CODE (op0) == SIGN_EXTEND)
17260		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17261			          == INTVAL (op1);
17262		  else
17263		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17264	        }
17265
17266	      if (is_mulwiden)
17267	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17268	    }
17269
17270  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17271		    + nbits * ix86_cost->mult_bit
17272	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17273
17274          return true;
17275	}
17276
17277    case DIV:
17278    case UDIV:
17279    case MOD:
17280    case UMOD:
17281      if (FLOAT_MODE_P (mode))
17282	*total = ix86_cost->fdiv;
17283      else
17284	*total = ix86_cost->divide[MODE_INDEX (mode)];
17285      return false;
17286
17287    case PLUS:
17288      if (FLOAT_MODE_P (mode))
17289	*total = ix86_cost->fadd;
17290      else if (GET_MODE_CLASS (mode) == MODE_INT
17291	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17292	{
17293	  if (GET_CODE (XEXP (x, 0)) == PLUS
17294	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17295	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17296	      && CONSTANT_P (XEXP (x, 1)))
17297	    {
17298	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17299	      if (val == 2 || val == 4 || val == 8)
17300		{
17301		  *total = ix86_cost->lea;
17302		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17303		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17304				      outer_code);
17305		  *total += rtx_cost (XEXP (x, 1), outer_code);
17306		  return true;
17307		}
17308	    }
17309	  else if (GET_CODE (XEXP (x, 0)) == MULT
17310		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17311	    {
17312	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17313	      if (val == 2 || val == 4 || val == 8)
17314		{
17315		  *total = ix86_cost->lea;
17316		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17317		  *total += rtx_cost (XEXP (x, 1), outer_code);
17318		  return true;
17319		}
17320	    }
17321	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17322	    {
17323	      *total = ix86_cost->lea;
17324	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17325	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17326	      *total += rtx_cost (XEXP (x, 1), outer_code);
17327	      return true;
17328	    }
17329	}
17330      /* FALLTHRU */
17331
17332    case MINUS:
17333      if (FLOAT_MODE_P (mode))
17334	{
17335	  *total = ix86_cost->fadd;
17336	  return false;
17337	}
17338      /* FALLTHRU */
17339
17340    case AND:
17341    case IOR:
17342    case XOR:
17343      if (!TARGET_64BIT && mode == DImode)
17344	{
17345	  *total = (ix86_cost->add * 2
17346		    + (rtx_cost (XEXP (x, 0), outer_code)
17347		       << (GET_MODE (XEXP (x, 0)) != DImode))
17348		    + (rtx_cost (XEXP (x, 1), outer_code)
17349	               << (GET_MODE (XEXP (x, 1)) != DImode)));
17350	  return true;
17351	}
17352      /* FALLTHRU */
17353
17354    case NEG:
17355      if (FLOAT_MODE_P (mode))
17356	{
17357	  *total = ix86_cost->fchs;
17358	  return false;
17359	}
17360      /* FALLTHRU */
17361
17362    case NOT:
17363      if (!TARGET_64BIT && mode == DImode)
17364	*total = ix86_cost->add * 2;
17365      else
17366	*total = ix86_cost->add;
17367      return false;
17368
17369    case COMPARE:
17370      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17371	  && XEXP (XEXP (x, 0), 1) == const1_rtx
17372	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17373	  && XEXP (x, 1) == const0_rtx)
17374	{
17375	  /* This kind of construct is implemented using test[bwl].
17376	     Treat it as if we had an AND.  */
17377	  *total = (ix86_cost->add
17378		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17379		    + rtx_cost (const1_rtx, outer_code));
17380	  return true;
17381	}
17382      return false;
17383
17384    case FLOAT_EXTEND:
17385      if (!TARGET_SSE_MATH
17386	  || mode == XFmode
17387	  || (mode == DFmode && !TARGET_SSE2))
17388	/* For standard 80387 constants, raise the cost to prevent
17389	   compress_float_constant() to generate load from memory.  */
17390	switch (standard_80387_constant_p (XEXP (x, 0)))
17391	  {
17392	  case -1:
17393	  case 0:
17394	    *total = 0;
17395	    break;
17396	  case 1: /* 0.0 */
17397	    *total = 1;
17398	    break;
17399	  default:
17400	    *total = (x86_ext_80387_constants & TUNEMASK
17401		      || optimize_size
17402		      ? 1 : 0);
17403	  }
17404      return false;
17405
17406    case ABS:
17407      if (FLOAT_MODE_P (mode))
17408	*total = ix86_cost->fabs;
17409      return false;
17410
17411    case SQRT:
17412      if (FLOAT_MODE_P (mode))
17413	*total = ix86_cost->fsqrt;
17414      return false;
17415
17416    case UNSPEC:
17417      if (XINT (x, 1) == UNSPEC_TP)
17418	*total = 0;
17419      return false;
17420
17421    default:
17422      return false;
17423    }
17424}
17425
17426#if TARGET_MACHO
17427
17428static int current_machopic_label_num;
17429
17430/* Given a symbol name and its associated stub, write out the
17431   definition of the stub.  */
17432
17433void
17434machopic_output_stub (FILE *file, const char *symb, const char *stub)
17435{
17436  unsigned int length;
17437  char *binder_name, *symbol_name, lazy_ptr_name[32];
17438  int label = ++current_machopic_label_num;
17439
17440  /* For 64-bit we shouldn't get here.  */
17441  gcc_assert (!TARGET_64BIT);
17442
17443  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
17444  symb = (*targetm.strip_name_encoding) (symb);
17445
17446  length = strlen (stub);
17447  binder_name = alloca (length + 32);
17448  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17449
17450  length = strlen (symb);
17451  symbol_name = alloca (length + 32);
17452  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17453
17454  sprintf (lazy_ptr_name, "L%d$lz", label);
17455
17456  if (MACHOPIC_PURE)
17457    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17458  else
17459    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17460
17461  fprintf (file, "%s:\n", stub);
17462  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17463
17464  if (MACHOPIC_PURE)
17465    {
17466      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17467      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17468      fprintf (file, "\tjmp\t*%%edx\n");
17469    }
17470  else
17471    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17472
17473  fprintf (file, "%s:\n", binder_name);
17474
17475  if (MACHOPIC_PURE)
17476    {
17477      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17478      fprintf (file, "\tpushl\t%%eax\n");
17479    }
17480  else
17481    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17482
17483  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17484
17485  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17486  fprintf (file, "%s:\n", lazy_ptr_name);
17487  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17488  fprintf (file, "\t.long %s\n", binder_name);
17489}
17490
17491void
17492darwin_x86_file_end (void)
17493{
17494  darwin_file_end ();
17495  ix86_file_end ();
17496}
17497#endif /* TARGET_MACHO */
17498
17499/* Order the registers for register allocator.  */
17500
17501void
17502x86_order_regs_for_local_alloc (void)
17503{
17504   int pos = 0;
17505   int i;
17506
17507   /* First allocate the local general purpose registers.  */
17508   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17509     if (GENERAL_REGNO_P (i) && call_used_regs[i])
17510	reg_alloc_order [pos++] = i;
17511
17512   /* Global general purpose registers.  */
17513   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17514     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17515	reg_alloc_order [pos++] = i;
17516
17517   /* x87 registers come first in case we are doing FP math
17518      using them.  */
17519   if (!TARGET_SSE_MATH)
17520     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17521       reg_alloc_order [pos++] = i;
17522
17523   /* SSE registers.  */
17524   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17525     reg_alloc_order [pos++] = i;
17526   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17527     reg_alloc_order [pos++] = i;
17528
17529   /* x87 registers.  */
17530   if (TARGET_SSE_MATH)
17531     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17532       reg_alloc_order [pos++] = i;
17533
17534   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17535     reg_alloc_order [pos++] = i;
17536
17537   /* Initialize the rest of array as we do not allocate some registers
17538      at all.  */
17539   while (pos < FIRST_PSEUDO_REGISTER)
17540     reg_alloc_order [pos++] = 0;
17541}
17542
17543/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17544   struct attribute_spec.handler.  */
17545static tree
17546ix86_handle_struct_attribute (tree *node, tree name,
17547			      tree args ATTRIBUTE_UNUSED,
17548			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17549{
17550  tree *type = NULL;
17551  if (DECL_P (*node))
17552    {
17553      if (TREE_CODE (*node) == TYPE_DECL)
17554	type = &TREE_TYPE (*node);
17555    }
17556  else
17557    type = node;
17558
17559  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17560		 || TREE_CODE (*type) == UNION_TYPE)))
17561    {
17562      warning (OPT_Wattributes, "%qs attribute ignored",
17563	       IDENTIFIER_POINTER (name));
17564      *no_add_attrs = true;
17565    }
17566
17567  else if ((is_attribute_p ("ms_struct", name)
17568	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17569	   || ((is_attribute_p ("gcc_struct", name)
17570		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17571    {
17572      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17573               IDENTIFIER_POINTER (name));
17574      *no_add_attrs = true;
17575    }
17576
17577  return NULL_TREE;
17578}
17579
17580static bool
17581ix86_ms_bitfield_layout_p (tree record_type)
17582{
17583  return (TARGET_MS_BITFIELD_LAYOUT &&
17584	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17585    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17586}
17587
17588/* Returns an expression indicating where the this parameter is
17589   located on entry to the FUNCTION.  */
17590
17591static rtx
17592x86_this_parameter (tree function)
17593{
17594  tree type = TREE_TYPE (function);
17595
17596  if (TARGET_64BIT)
17597    {
17598      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17599      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17600    }
17601
17602  if (ix86_function_regparm (type, function) > 0)
17603    {
17604      tree parm;
17605
17606      parm = TYPE_ARG_TYPES (type);
17607      /* Figure out whether or not the function has a variable number of
17608	 arguments.  */
17609      for (; parm; parm = TREE_CHAIN (parm))
17610	if (TREE_VALUE (parm) == void_type_node)
17611	  break;
17612      /* If not, the this parameter is in the first argument.  */
17613      if (parm)
17614	{
17615	  int regno = 0;
17616	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17617	    regno = 2;
17618	  return gen_rtx_REG (SImode, regno);
17619	}
17620    }
17621
17622  if (aggregate_value_p (TREE_TYPE (type), type))
17623    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17624  else
17625    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17626}
17627
17628/* Determine whether x86_output_mi_thunk can succeed.  */
17629
17630static bool
17631x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17632			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17633			 HOST_WIDE_INT vcall_offset, tree function)
17634{
17635  /* 64-bit can handle anything.  */
17636  if (TARGET_64BIT)
17637    return true;
17638
17639  /* For 32-bit, everything's fine if we have one free register.  */
17640  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17641    return true;
17642
17643  /* Need a free register for vcall_offset.  */
17644  if (vcall_offset)
17645    return false;
17646
17647  /* Need a free register for GOT references.  */
17648  if (flag_pic && !(*targetm.binds_local_p) (function))
17649    return false;
17650
17651  /* Otherwise ok.  */
17652  return true;
17653}
17654
17655/* Output the assembler code for a thunk function.  THUNK_DECL is the
17656   declaration for the thunk function itself, FUNCTION is the decl for
17657   the target function.  DELTA is an immediate constant offset to be
17658   added to THIS.  If VCALL_OFFSET is nonzero, the word at
17659   *(*this + vcall_offset) should be added to THIS.  */
17660
17661static void
17662x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17663		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17664		     HOST_WIDE_INT vcall_offset, tree function)
17665{
17666  rtx xops[3];
17667  rtx this = x86_this_parameter (function);
17668  rtx this_reg, tmp;
17669
17670  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
17671     pull it in now and let DELTA benefit.  */
17672  if (REG_P (this))
17673    this_reg = this;
17674  else if (vcall_offset)
17675    {
17676      /* Put the this parameter into %eax.  */
17677      xops[0] = this;
17678      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17679      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17680    }
17681  else
17682    this_reg = NULL_RTX;
17683
17684  /* Adjust the this parameter by a fixed constant.  */
17685  if (delta)
17686    {
17687      xops[0] = GEN_INT (delta);
17688      xops[1] = this_reg ? this_reg : this;
17689      if (TARGET_64BIT)
17690	{
17691	  if (!x86_64_general_operand (xops[0], DImode))
17692	    {
17693	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17694	      xops[1] = tmp;
17695	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17696	      xops[0] = tmp;
17697	      xops[1] = this;
17698	    }
17699	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17700	}
17701      else
17702	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17703    }
17704
17705  /* Adjust the this parameter by a value stored in the vtable.  */
17706  if (vcall_offset)
17707    {
17708      if (TARGET_64BIT)
17709	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17710      else
17711	{
17712	  int tmp_regno = 2 /* ECX */;
17713	  if (lookup_attribute ("fastcall",
17714	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
17715	    tmp_regno = 0 /* EAX */;
17716	  tmp = gen_rtx_REG (SImode, tmp_regno);
17717	}
17718
17719      xops[0] = gen_rtx_MEM (Pmode, this_reg);
17720      xops[1] = tmp;
17721      if (TARGET_64BIT)
17722	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17723      else
17724	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17725
17726      /* Adjust the this parameter.  */
17727      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17728      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17729	{
17730	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17731	  xops[0] = GEN_INT (vcall_offset);
17732	  xops[1] = tmp2;
17733	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17734	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17735	}
17736      xops[1] = this_reg;
17737      if (TARGET_64BIT)
17738	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17739      else
17740	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17741    }
17742
17743  /* If necessary, drop THIS back to its stack slot.  */
17744  if (this_reg && this_reg != this)
17745    {
17746      xops[0] = this_reg;
17747      xops[1] = this;
17748      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17749    }
17750
17751  xops[0] = XEXP (DECL_RTL (function), 0);
17752  if (TARGET_64BIT)
17753    {
17754      if (!flag_pic || (*targetm.binds_local_p) (function))
17755	output_asm_insn ("jmp\t%P0", xops);
17756      else
17757	{
17758	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17759	  tmp = gen_rtx_CONST (Pmode, tmp);
17760	  tmp = gen_rtx_MEM (QImode, tmp);
17761	  xops[0] = tmp;
17762	  output_asm_insn ("jmp\t%A0", xops);
17763	}
17764    }
17765  else
17766    {
17767      if (!flag_pic || (*targetm.binds_local_p) (function))
17768	output_asm_insn ("jmp\t%P0", xops);
17769      else
17770#if TARGET_MACHO
17771	if (TARGET_MACHO)
17772	  {
17773	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
17774	    tmp = (gen_rtx_SYMBOL_REF
17775		   (Pmode,
17776		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17777	    tmp = gen_rtx_MEM (QImode, tmp);
17778	    xops[0] = tmp;
17779	    output_asm_insn ("jmp\t%0", xops);
17780	  }
17781	else
17782#endif /* TARGET_MACHO */
17783	{
17784	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17785	  output_set_got (tmp, NULL_RTX);
17786
17787	  xops[1] = tmp;
17788	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17789	  output_asm_insn ("jmp\t{*}%1", xops);
17790	}
17791    }
17792}
17793
17794static void
17795x86_file_start (void)
17796{
17797  default_file_start ();
17798#if TARGET_MACHO
17799  darwin_file_start ();
17800#endif
17801  if (X86_FILE_START_VERSION_DIRECTIVE)
17802    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17803  if (X86_FILE_START_FLTUSED)
17804    fputs ("\t.global\t__fltused\n", asm_out_file);
17805  if (ix86_asm_dialect == ASM_INTEL)
17806    fputs ("\t.intel_syntax\n", asm_out_file);
17807}
17808
17809int
17810x86_field_alignment (tree field, int computed)
17811{
17812  enum machine_mode mode;
17813  tree type = TREE_TYPE (field);
17814
17815  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17816    return computed;
17817  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17818		    ? get_inner_array_type (type) : type);
17819  if (mode == DFmode || mode == DCmode
17820      || GET_MODE_CLASS (mode) == MODE_INT
17821      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17822    return MIN (32, computed);
17823  return computed;
17824}
17825
17826/* Output assembler code to FILE to increment profiler label # LABELNO
17827   for profiling a function entry.  */
17828void
17829x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17830{
17831  if (TARGET_64BIT)
17832    if (flag_pic)
17833      {
17834#ifndef NO_PROFILE_COUNTERS
17835	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17836#endif
17837	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17838      }
17839    else
17840      {
17841#ifndef NO_PROFILE_COUNTERS
17842	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17843#endif
17844	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17845      }
17846  else if (flag_pic)
17847    {
17848#ifndef NO_PROFILE_COUNTERS
17849      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17850	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17851#endif
17852      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17853    }
17854  else
17855    {
17856#ifndef NO_PROFILE_COUNTERS
17857      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17858	       PROFILE_COUNT_REGISTER);
17859#endif
17860      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17861    }
17862}
17863
17864/* We don't have exact information about the insn sizes, but we may assume
17865   quite safely that we are informed about all 1 byte insns and memory
17866   address sizes.  This is enough to eliminate unnecessary padding in
17867   99% of cases.  */
17868
17869static int
17870min_insn_size (rtx insn)
17871{
17872  int l = 0;
17873
17874  if (!INSN_P (insn) || !active_insn_p (insn))
17875    return 0;
17876
17877  /* Discard alignments we've emit and jump instructions.  */
17878  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17879      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17880    return 0;
17881  if (GET_CODE (insn) == JUMP_INSN
17882      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17883	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17884    return 0;
17885
17886  /* Important case - calls are always 5 bytes.
17887     It is common to have many calls in the row.  */
17888  if (GET_CODE (insn) == CALL_INSN
17889      && symbolic_reference_mentioned_p (PATTERN (insn))
17890      && !SIBLING_CALL_P (insn))
17891    return 5;
17892  if (get_attr_length (insn) <= 1)
17893    return 1;
17894
17895  /* For normal instructions we may rely on the sizes of addresses
17896     and the presence of symbol to require 4 bytes of encoding.
17897     This is not the case for jumps where references are PC relative.  */
17898  if (GET_CODE (insn) != JUMP_INSN)
17899    {
17900      l = get_attr_length_address (insn);
17901      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17902	l = 4;
17903    }
17904  if (l)
17905    return 1+l;
17906  else
17907    return 2;
17908}
17909
17910/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17911   window.  */
17912
17913static void
17914ix86_avoid_jump_misspredicts (void)
17915{
17916  rtx insn, start = get_insns ();
17917  int nbytes = 0, njumps = 0;
17918  int isjump = 0;
17919
17920  /* Look for all minimal intervals of instructions containing 4 jumps.
17921     The intervals are bounded by START and INSN.  NBYTES is the total
17922     size of instructions in the interval including INSN and not including
17923     START.  When the NBYTES is smaller than 16 bytes, it is possible
17924     that the end of START and INSN ends up in the same 16byte page.
17925
17926     The smallest offset in the page INSN can start is the case where START
17927     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
17928     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17929     */
17930  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17931    {
17932
17933      nbytes += min_insn_size (insn);
17934      if (dump_file)
17935        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17936		INSN_UID (insn), min_insn_size (insn));
17937      if ((GET_CODE (insn) == JUMP_INSN
17938	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
17939	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17940	  || GET_CODE (insn) == CALL_INSN)
17941	njumps++;
17942      else
17943	continue;
17944
17945      while (njumps > 3)
17946	{
17947	  start = NEXT_INSN (start);
17948	  if ((GET_CODE (start) == JUMP_INSN
17949	       && GET_CODE (PATTERN (start)) != ADDR_VEC
17950	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17951	      || GET_CODE (start) == CALL_INSN)
17952	    njumps--, isjump = 1;
17953	  else
17954	    isjump = 0;
17955	  nbytes -= min_insn_size (start);
17956	}
17957      gcc_assert (njumps >= 0);
17958      if (dump_file)
17959        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17960		INSN_UID (start), INSN_UID (insn), nbytes);
17961
17962      if (njumps == 3 && isjump && nbytes < 16)
17963	{
17964	  int padsize = 15 - nbytes + min_insn_size (insn);
17965
17966	  if (dump_file)
17967	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17968		     INSN_UID (insn), padsize);
17969          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17970	}
17971    }
17972}
17973
17974/* AMD Athlon works faster
17975   when RET is not destination of conditional jump or directly preceded
17976   by other jump instruction.  We avoid the penalty by inserting NOP just
17977   before the RET instructions in such cases.  */
17978static void
17979ix86_pad_returns (void)
17980{
17981  edge e;
17982  edge_iterator ei;
17983
17984  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17985    {
17986      basic_block bb = e->src;
17987      rtx ret = BB_END (bb);
17988      rtx prev;
17989      bool replace = false;
17990
17991      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17992	  || !maybe_hot_bb_p (bb))
17993	continue;
17994      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17995	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17996	  break;
17997      if (prev && GET_CODE (prev) == CODE_LABEL)
17998	{
17999	  edge e;
18000	  edge_iterator ei;
18001
18002	  FOR_EACH_EDGE (e, ei, bb->preds)
18003	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
18004		&& !(e->flags & EDGE_FALLTHRU))
18005	      replace = true;
18006	}
18007      if (!replace)
18008	{
18009	  prev = prev_active_insn (ret);
18010	  if (prev
18011	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18012		  || GET_CODE (prev) == CALL_INSN))
18013	    replace = true;
18014	  /* Empty functions get branch mispredict even when the jump destination
18015	     is not visible to us.  */
18016	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18017	    replace = true;
18018	}
18019      if (replace)
18020	{
18021	  emit_insn_before (gen_return_internal_long (), ret);
18022	  delete_insn (ret);
18023	}
18024    }
18025}
18026
18027/* Implement machine specific optimizations.  We implement padding of returns
18028   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18029static void
18030ix86_reorg (void)
18031{
18032  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18033    ix86_pad_returns ();
18034  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18035    ix86_avoid_jump_misspredicts ();
18036}
18037
18038/* Return nonzero when QImode register that must be represented via REX prefix
18039   is used.  */
18040bool
18041x86_extended_QIreg_mentioned_p (rtx insn)
18042{
18043  int i;
18044  extract_insn_cached (insn);
18045  for (i = 0; i < recog_data.n_operands; i++)
18046    if (REG_P (recog_data.operand[i])
18047	&& REGNO (recog_data.operand[i]) >= 4)
18048       return true;
18049  return false;
18050}
18051
18052/* Return nonzero when P points to register encoded via REX prefix.
18053   Called via for_each_rtx.  */
18054static int
18055extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18056{
18057   unsigned int regno;
18058   if (!REG_P (*p))
18059     return 0;
18060   regno = REGNO (*p);
18061   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18062}
18063
18064/* Return true when INSN mentions register that must be encoded using REX
18065   prefix.  */
18066bool
18067x86_extended_reg_mentioned_p (rtx insn)
18068{
18069  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18070}
18071
18072/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18073   optabs would emit if we didn't have TFmode patterns.  */
18074
18075void
18076x86_emit_floatuns (rtx operands[2])
18077{
18078  rtx neglab, donelab, i0, i1, f0, in, out;
18079  enum machine_mode mode, inmode;
18080
18081  inmode = GET_MODE (operands[1]);
18082  gcc_assert (inmode == SImode || inmode == DImode);
18083
18084  out = operands[0];
18085  in = force_reg (inmode, operands[1]);
18086  mode = GET_MODE (out);
18087  neglab = gen_label_rtx ();
18088  donelab = gen_label_rtx ();
18089  i1 = gen_reg_rtx (Pmode);
18090  f0 = gen_reg_rtx (mode);
18091
18092  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18093
18094  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18095  emit_jump_insn (gen_jump (donelab));
18096  emit_barrier ();
18097
18098  emit_label (neglab);
18099
18100  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18101  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18102  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18103  expand_float (f0, i0, 0);
18104  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18105
18106  emit_label (donelab);
18107}
18108
18109/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18110   with all elements equal to VAR.  Return true if successful.  */
18111
18112static bool
18113ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18114				   rtx target, rtx val)
18115{
18116  enum machine_mode smode, wsmode, wvmode;
18117  rtx x;
18118
18119  switch (mode)
18120    {
18121    case V2SImode:
18122    case V2SFmode:
18123      if (!mmx_ok)
18124	return false;
18125      /* FALLTHRU */
18126
18127    case V2DFmode:
18128    case V2DImode:
18129    case V4SFmode:
18130    case V4SImode:
18131      val = force_reg (GET_MODE_INNER (mode), val);
18132      x = gen_rtx_VEC_DUPLICATE (mode, val);
18133      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18134      return true;
18135
18136    case V4HImode:
18137      if (!mmx_ok)
18138	return false;
18139      if (TARGET_SSE || TARGET_3DNOW_A)
18140	{
18141	  val = gen_lowpart (SImode, val);
18142	  x = gen_rtx_TRUNCATE (HImode, val);
18143	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18144	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18145	  return true;
18146	}
18147      else
18148	{
18149	  smode = HImode;
18150	  wsmode = SImode;
18151	  wvmode = V2SImode;
18152	  goto widen;
18153	}
18154
18155    case V8QImode:
18156      if (!mmx_ok)
18157	return false;
18158      smode = QImode;
18159      wsmode = HImode;
18160      wvmode = V4HImode;
18161      goto widen;
18162    case V8HImode:
18163      if (TARGET_SSE2)
18164	{
18165	  rtx tmp1, tmp2;
18166	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18167	  tmp1 = gen_reg_rtx (SImode);
18168	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18169	  /* Insert the SImode value as low element of V4SImode vector. */
18170	  tmp2 = gen_reg_rtx (V4SImode);
18171	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18172				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18173				    CONST0_RTX (V4SImode),
18174				    const1_rtx);
18175	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18176	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18177	  tmp1 = gen_reg_rtx (V8HImode);
18178	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18179	  /* Duplicate the low short through the whole low SImode word.  */
18180	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18181	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18182	  tmp2 = gen_reg_rtx (V4SImode);
18183	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18184	  /* Replicate the low element of the V4SImode vector.  */
18185	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18186	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18187	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18188	  return true;
18189	}
18190      smode = HImode;
18191      wsmode = SImode;
18192      wvmode = V4SImode;
18193      goto widen;
18194    case V16QImode:
18195      if (TARGET_SSE2)
18196	{
18197	  rtx tmp1, tmp2;
18198	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18199	  tmp1 = gen_reg_rtx (SImode);
18200	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18201	  /* Insert the SImode value as low element of V4SImode vector. */
18202	  tmp2 = gen_reg_rtx (V4SImode);
18203	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18204				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18205				    CONST0_RTX (V4SImode),
18206				    const1_rtx);
18207	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18208	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18209	  tmp1 = gen_reg_rtx (V16QImode);
18210	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18211	  /* Duplicate the low byte through the whole low SImode word.  */
18212	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18213	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18214	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18215	  tmp2 = gen_reg_rtx (V4SImode);
18216	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18217	  /* Replicate the low element of the V4SImode vector.  */
18218	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18219	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18220	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18221	  return true;
18222	}
18223      smode = QImode;
18224      wsmode = HImode;
18225      wvmode = V8HImode;
18226      goto widen;
18227    widen:
18228      /* Replicate the value once into the next wider mode and recurse.  */
18229      val = convert_modes (wsmode, smode, val, true);
18230      x = expand_simple_binop (wsmode, ASHIFT, val,
18231			       GEN_INT (GET_MODE_BITSIZE (smode)),
18232			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18233      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18234
18235      x = gen_reg_rtx (wvmode);
18236      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18237	gcc_unreachable ();
18238      emit_move_insn (target, gen_lowpart (mode, x));
18239      return true;
18240
18241    default:
18242      return false;
18243    }
18244}
18245
18246/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18247   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18248   if successful.  */
18249
18250static bool
18251ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18252				     rtx target, rtx var, int one_var)
18253{
18254  enum machine_mode vsimode;
18255  rtx new_target;
18256  rtx x, tmp;
18257
18258  switch (mode)
18259    {
18260    case V2SFmode:
18261    case V2SImode:
18262      if (!mmx_ok)
18263	return false;
18264      /* FALLTHRU */
18265
18266    case V2DFmode:
18267    case V2DImode:
18268      if (one_var != 0)
18269	return false;
18270      var = force_reg (GET_MODE_INNER (mode), var);
18271      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18272      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18273      return true;
18274
18275    case V4SFmode:
18276    case V4SImode:
18277      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18278	new_target = gen_reg_rtx (mode);
18279      else
18280	new_target = target;
18281      var = force_reg (GET_MODE_INNER (mode), var);
18282      x = gen_rtx_VEC_DUPLICATE (mode, var);
18283      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18284      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18285      if (one_var != 0)
18286	{
18287	  /* We need to shuffle the value to the correct position, so
18288	     create a new pseudo to store the intermediate result.  */
18289
18290	  /* With SSE2, we can use the integer shuffle insns.  */
18291	  if (mode != V4SFmode && TARGET_SSE2)
18292	    {
18293	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18294					    GEN_INT (1),
18295					    GEN_INT (one_var == 1 ? 0 : 1),
18296					    GEN_INT (one_var == 2 ? 0 : 1),
18297					    GEN_INT (one_var == 3 ? 0 : 1)));
18298	      if (target != new_target)
18299		emit_move_insn (target, new_target);
18300	      return true;
18301	    }
18302
18303	  /* Otherwise convert the intermediate result to V4SFmode and
18304	     use the SSE1 shuffle instructions.  */
18305	  if (mode != V4SFmode)
18306	    {
18307	      tmp = gen_reg_rtx (V4SFmode);
18308	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18309	    }
18310	  else
18311	    tmp = new_target;
18312
18313	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18314				       GEN_INT (1),
18315				       GEN_INT (one_var == 1 ? 0 : 1),
18316				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18317				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18318
18319	  if (mode != V4SFmode)
18320	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18321	  else if (tmp != target)
18322	    emit_move_insn (target, tmp);
18323	}
18324      else if (target != new_target)
18325	emit_move_insn (target, new_target);
18326      return true;
18327
18328    case V8HImode:
18329    case V16QImode:
18330      vsimode = V4SImode;
18331      goto widen;
18332    case V4HImode:
18333    case V8QImode:
18334      if (!mmx_ok)
18335	return false;
18336      vsimode = V2SImode;
18337      goto widen;
18338    widen:
18339      if (one_var != 0)
18340	return false;
18341
18342      /* Zero extend the variable element to SImode and recurse.  */
18343      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18344
18345      x = gen_reg_rtx (vsimode);
18346      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18347						var, one_var))
18348	gcc_unreachable ();
18349
18350      emit_move_insn (target, gen_lowpart (mode, x));
18351      return true;
18352
18353    default:
18354      return false;
18355    }
18356}
18357
18358/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18359   consisting of the values in VALS.  It is known that all elements
18360   except ONE_VAR are constants.  Return true if successful.  */
18361
18362static bool
18363ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18364				 rtx target, rtx vals, int one_var)
18365{
18366  rtx var = XVECEXP (vals, 0, one_var);
18367  enum machine_mode wmode;
18368  rtx const_vec, x;
18369
18370  const_vec = copy_rtx (vals);
18371  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18372  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18373
18374  switch (mode)
18375    {
18376    case V2DFmode:
18377    case V2DImode:
18378    case V2SFmode:
18379    case V2SImode:
18380      /* For the two element vectors, it's just as easy to use
18381	 the general case.  */
18382      return false;
18383
18384    case V4SFmode:
18385    case V4SImode:
18386    case V8HImode:
18387    case V4HImode:
18388      break;
18389
18390    case V16QImode:
18391      wmode = V8HImode;
18392      goto widen;
18393    case V8QImode:
18394      wmode = V4HImode;
18395      goto widen;
18396    widen:
18397      /* There's no way to set one QImode entry easily.  Combine
18398	 the variable value with its adjacent constant value, and
18399	 promote to an HImode set.  */
18400      x = XVECEXP (vals, 0, one_var ^ 1);
18401      if (one_var & 1)
18402	{
18403	  var = convert_modes (HImode, QImode, var, true);
18404	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18405				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
18406	  x = GEN_INT (INTVAL (x) & 0xff);
18407	}
18408      else
18409	{
18410	  var = convert_modes (HImode, QImode, var, true);
18411	  x = gen_int_mode (INTVAL (x) << 8, HImode);
18412	}
18413      if (x != const0_rtx)
18414	var = expand_simple_binop (HImode, IOR, var, x, var,
18415				   1, OPTAB_LIB_WIDEN);
18416
18417      x = gen_reg_rtx (wmode);
18418      emit_move_insn (x, gen_lowpart (wmode, const_vec));
18419      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18420
18421      emit_move_insn (target, gen_lowpart (mode, x));
18422      return true;
18423
18424    default:
18425      return false;
18426    }
18427
18428  emit_move_insn (target, const_vec);
18429  ix86_expand_vector_set (mmx_ok, target, var, one_var);
18430  return true;
18431}
18432
18433/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
18434   all values variable, and none identical.  */
18435
18436static void
18437ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18438				 rtx target, rtx vals)
18439{
18440  enum machine_mode half_mode = GET_MODE_INNER (mode);
18441  rtx op0 = NULL, op1 = NULL;
18442  bool use_vec_concat = false;
18443
18444  switch (mode)
18445    {
18446    case V2SFmode:
18447    case V2SImode:
18448      if (!mmx_ok && !TARGET_SSE)
18449	break;
18450      /* FALLTHRU */
18451
18452    case V2DFmode:
18453    case V2DImode:
18454      /* For the two element vectors, we always implement VEC_CONCAT.  */
18455      op0 = XVECEXP (vals, 0, 0);
18456      op1 = XVECEXP (vals, 0, 1);
18457      use_vec_concat = true;
18458      break;
18459
18460    case V4SFmode:
18461      half_mode = V2SFmode;
18462      goto half;
18463    case V4SImode:
18464      half_mode = V2SImode;
18465      goto half;
18466    half:
18467      {
18468	rtvec v;
18469
18470	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
18471	   Recurse to load the two halves.  */
18472
18473	op0 = gen_reg_rtx (half_mode);
18474	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18475	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18476
18477	op1 = gen_reg_rtx (half_mode);
18478	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18479	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18480
18481	use_vec_concat = true;
18482      }
18483      break;
18484
18485    case V8HImode:
18486    case V16QImode:
18487    case V4HImode:
18488    case V8QImode:
18489      break;
18490
18491    default:
18492      gcc_unreachable ();
18493    }
18494
18495  if (use_vec_concat)
18496    {
18497      if (!register_operand (op0, half_mode))
18498	op0 = force_reg (half_mode, op0);
18499      if (!register_operand (op1, half_mode))
18500	op1 = force_reg (half_mode, op1);
18501
18502      emit_insn (gen_rtx_SET (VOIDmode, target,
18503			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
18504    }
18505  else
18506    {
18507      int i, j, n_elts, n_words, n_elt_per_word;
18508      enum machine_mode inner_mode;
18509      rtx words[4], shift;
18510
18511      inner_mode = GET_MODE_INNER (mode);
18512      n_elts = GET_MODE_NUNITS (mode);
18513      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18514      n_elt_per_word = n_elts / n_words;
18515      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18516
18517      for (i = 0; i < n_words; ++i)
18518	{
18519	  rtx word = NULL_RTX;
18520
18521	  for (j = 0; j < n_elt_per_word; ++j)
18522	    {
18523	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18524	      elt = convert_modes (word_mode, inner_mode, elt, true);
18525
18526	      if (j == 0)
18527		word = elt;
18528	      else
18529		{
18530		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18531					      word, 1, OPTAB_LIB_WIDEN);
18532		  word = expand_simple_binop (word_mode, IOR, word, elt,
18533					      word, 1, OPTAB_LIB_WIDEN);
18534		}
18535	    }
18536
18537	  words[i] = word;
18538	}
18539
18540      if (n_words == 1)
18541	emit_move_insn (target, gen_lowpart (mode, words[0]));
18542      else if (n_words == 2)
18543	{
18544	  rtx tmp = gen_reg_rtx (mode);
18545	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18546	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18547	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18548	  emit_move_insn (target, tmp);
18549	}
18550      else if (n_words == 4)
18551	{
18552	  rtx tmp = gen_reg_rtx (V4SImode);
18553	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18554	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18555	  emit_move_insn (target, gen_lowpart (mode, tmp));
18556	}
18557      else
18558	gcc_unreachable ();
18559    }
18560}
18561
18562/* Initialize vector TARGET via VALS.  Suppress the use of MMX
18563   instructions unless MMX_OK is true.  */
18564
18565void
18566ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18567{
18568  enum machine_mode mode = GET_MODE (target);
18569  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18570  int n_elts = GET_MODE_NUNITS (mode);
18571  int n_var = 0, one_var = -1;
18572  bool all_same = true, all_const_zero = true;
18573  int i;
18574  rtx x;
18575
18576  for (i = 0; i < n_elts; ++i)
18577    {
18578      x = XVECEXP (vals, 0, i);
18579      if (!CONSTANT_P (x))
18580	n_var++, one_var = i;
18581      else if (x != CONST0_RTX (inner_mode))
18582	all_const_zero = false;
18583      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18584	all_same = false;
18585    }
18586
18587  /* Constants are best loaded from the constant pool.  */
18588  if (n_var == 0)
18589    {
18590      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18591      return;
18592    }
18593
18594  /* If all values are identical, broadcast the value.  */
18595  if (all_same
18596      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18597					    XVECEXP (vals, 0, 0)))
18598    return;
18599
18600  /* Values where only one field is non-constant are best loaded from
18601     the pool and overwritten via move later.  */
18602  if (n_var == 1)
18603    {
18604      if (all_const_zero
18605	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18606						  XVECEXP (vals, 0, one_var),
18607						  one_var))
18608	return;
18609
18610      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18611	return;
18612    }
18613
18614  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18615}
18616
18617void
18618ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18619{
18620  enum machine_mode mode = GET_MODE (target);
18621  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18622  bool use_vec_merge = false;
18623  rtx tmp;
18624
18625  switch (mode)
18626    {
18627    case V2SFmode:
18628    case V2SImode:
18629      if (mmx_ok)
18630	{
18631	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18632	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18633	  if (elt == 0)
18634	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18635	  else
18636	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18637	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18638	  return;
18639	}
18640      break;
18641
18642    case V2DFmode:
18643    case V2DImode:
18644      {
18645	rtx op0, op1;
18646
18647	/* For the two element vectors, we implement a VEC_CONCAT with
18648	   the extraction of the other element.  */
18649
18650	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18651	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18652
18653	if (elt == 0)
18654	  op0 = val, op1 = tmp;
18655	else
18656	  op0 = tmp, op1 = val;
18657
18658	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18659	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18660      }
18661      return;
18662
18663    case V4SFmode:
18664      switch (elt)
18665	{
18666	case 0:
18667	  use_vec_merge = true;
18668	  break;
18669
18670	case 1:
18671	  /* tmp = target = A B C D */
18672	  tmp = copy_to_reg (target);
18673	  /* target = A A B B */
18674	  emit_insn (gen_sse_unpcklps (target, target, target));
18675	  /* target = X A B B */
18676	  ix86_expand_vector_set (false, target, val, 0);
18677	  /* target = A X C D  */
18678	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18679				       GEN_INT (1), GEN_INT (0),
18680				       GEN_INT (2+4), GEN_INT (3+4)));
18681	  return;
18682
18683	case 2:
18684	  /* tmp = target = A B C D */
18685	  tmp = copy_to_reg (target);
18686	  /* tmp = X B C D */
18687	  ix86_expand_vector_set (false, tmp, val, 0);
18688	  /* target = A B X D */
18689	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18690				       GEN_INT (0), GEN_INT (1),
18691				       GEN_INT (0+4), GEN_INT (3+4)));
18692	  return;
18693
18694	case 3:
18695	  /* tmp = target = A B C D */
18696	  tmp = copy_to_reg (target);
18697	  /* tmp = X B C D */
18698	  ix86_expand_vector_set (false, tmp, val, 0);
18699	  /* target = A B X D */
18700	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18701				       GEN_INT (0), GEN_INT (1),
18702				       GEN_INT (2+4), GEN_INT (0+4)));
18703	  return;
18704
18705	default:
18706	  gcc_unreachable ();
18707	}
18708      break;
18709
18710    case V4SImode:
18711      /* Element 0 handled by vec_merge below.  */
18712      if (elt == 0)
18713	{
18714	  use_vec_merge = true;
18715	  break;
18716	}
18717
18718      if (TARGET_SSE2)
18719	{
18720	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
18721	     store into element 0, then shuffle them back.  */
18722
18723	  rtx order[4];
18724
18725	  order[0] = GEN_INT (elt);
18726	  order[1] = const1_rtx;
18727	  order[2] = const2_rtx;
18728	  order[3] = GEN_INT (3);
18729	  order[elt] = const0_rtx;
18730
18731	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18732					order[1], order[2], order[3]));
18733
18734	  ix86_expand_vector_set (false, target, val, 0);
18735
18736	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18737					order[1], order[2], order[3]));
18738	}
18739      else
18740	{
18741	  /* For SSE1, we have to reuse the V4SF code.  */
18742	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18743				  gen_lowpart (SFmode, val), elt);
18744	}
18745      return;
18746
18747    case V8HImode:
18748      use_vec_merge = TARGET_SSE2;
18749      break;
18750    case V4HImode:
18751      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18752      break;
18753
18754    case V16QImode:
18755    case V8QImode:
18756    default:
18757      break;
18758    }
18759
18760  if (use_vec_merge)
18761    {
18762      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18763      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18764      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18765    }
18766  else
18767    {
18768      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18769
18770      emit_move_insn (mem, target);
18771
18772      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18773      emit_move_insn (tmp, val);
18774
18775      emit_move_insn (target, mem);
18776    }
18777}
18778
18779void
18780ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18781{
18782  enum machine_mode mode = GET_MODE (vec);
18783  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18784  bool use_vec_extr = false;
18785  rtx tmp;
18786
18787  switch (mode)
18788    {
18789    case V2SImode:
18790    case V2SFmode:
18791      if (!mmx_ok)
18792	break;
18793      /* FALLTHRU */
18794
18795    case V2DFmode:
18796    case V2DImode:
18797      use_vec_extr = true;
18798      break;
18799
18800    case V4SFmode:
18801      switch (elt)
18802	{
18803	case 0:
18804	  tmp = vec;
18805	  break;
18806
18807	case 1:
18808	case 3:
18809	  tmp = gen_reg_rtx (mode);
18810	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18811				       GEN_INT (elt), GEN_INT (elt),
18812				       GEN_INT (elt+4), GEN_INT (elt+4)));
18813	  break;
18814
18815	case 2:
18816	  tmp = gen_reg_rtx (mode);
18817	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18818	  break;
18819
18820	default:
18821	  gcc_unreachable ();
18822	}
18823      vec = tmp;
18824      use_vec_extr = true;
18825      elt = 0;
18826      break;
18827
18828    case V4SImode:
18829      if (TARGET_SSE2)
18830	{
18831	  switch (elt)
18832	    {
18833	    case 0:
18834	      tmp = vec;
18835	      break;
18836
18837	    case 1:
18838	    case 3:
18839	      tmp = gen_reg_rtx (mode);
18840	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18841					    GEN_INT (elt), GEN_INT (elt),
18842					    GEN_INT (elt), GEN_INT (elt)));
18843	      break;
18844
18845	    case 2:
18846	      tmp = gen_reg_rtx (mode);
18847	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18848	      break;
18849
18850	    default:
18851	      gcc_unreachable ();
18852	    }
18853	  vec = tmp;
18854	  use_vec_extr = true;
18855	  elt = 0;
18856	}
18857      else
18858	{
18859	  /* For SSE1, we have to reuse the V4SF code.  */
18860	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18861				      gen_lowpart (V4SFmode, vec), elt);
18862	  return;
18863	}
18864      break;
18865
18866    case V8HImode:
18867      use_vec_extr = TARGET_SSE2;
18868      break;
18869    case V4HImode:
18870      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18871      break;
18872
18873    case V16QImode:
18874    case V8QImode:
18875      /* ??? Could extract the appropriate HImode element and shift.  */
18876    default:
18877      break;
18878    }
18879
18880  if (use_vec_extr)
18881    {
18882      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18883      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18884
18885      /* Let the rtl optimizers know about the zero extension performed.  */
18886      if (inner_mode == HImode)
18887	{
18888	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18889	  target = gen_lowpart (SImode, target);
18890	}
18891
18892      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18893    }
18894  else
18895    {
18896      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18897
18898      emit_move_insn (mem, vec);
18899
18900      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18901      emit_move_insn (target, tmp);
18902    }
18903}
18904
18905/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
18906   pattern to reduce; DEST is the destination; IN is the input vector.  */
18907
18908void
18909ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18910{
18911  rtx tmp1, tmp2, tmp3;
18912
18913  tmp1 = gen_reg_rtx (V4SFmode);
18914  tmp2 = gen_reg_rtx (V4SFmode);
18915  tmp3 = gen_reg_rtx (V4SFmode);
18916
18917  emit_insn (gen_sse_movhlps (tmp1, in, in));
18918  emit_insn (fn (tmp2, tmp1, in));
18919
18920  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18921			       GEN_INT (1), GEN_INT (1),
18922			       GEN_INT (1+4), GEN_INT (1+4)));
18923  emit_insn (fn (dest, tmp2, tmp3));
18924}
18925
18926/* Target hook for scalar_mode_supported_p.  */
18927static bool
18928ix86_scalar_mode_supported_p (enum machine_mode mode)
18929{
18930  if (DECIMAL_FLOAT_MODE_P (mode))
18931    return true;
18932  else
18933    return default_scalar_mode_supported_p (mode);
18934}
18935
18936/* Implements target hook vector_mode_supported_p.  */
18937static bool
18938ix86_vector_mode_supported_p (enum machine_mode mode)
18939{
18940  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18941    return true;
18942  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18943    return true;
18944  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18945    return true;
18946  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18947    return true;
18948  return false;
18949}
18950
18951/* Worker function for TARGET_MD_ASM_CLOBBERS.
18952
18953   We do this in the new i386 backend to maintain source compatibility
18954   with the old cc0-based compiler.  */
18955
18956static tree
18957ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18958		      tree inputs ATTRIBUTE_UNUSED,
18959		      tree clobbers)
18960{
18961  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18962			clobbers);
18963  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18964			clobbers);
18965  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18966			clobbers);
18967  return clobbers;
18968}
18969
18970/* Return true if this goes in small data/bss.  */
18971
18972static bool
18973ix86_in_large_data_p (tree exp)
18974{
18975  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18976    return false;
18977
18978  /* Functions are never large data.  */
18979  if (TREE_CODE (exp) == FUNCTION_DECL)
18980    return false;
18981
18982  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18983    {
18984      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18985      if (strcmp (section, ".ldata") == 0
18986	  || strcmp (section, ".lbss") == 0)
18987	return true;
18988      return false;
18989    }
18990  else
18991    {
18992      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18993
18994      /* If this is an incomplete type with size 0, then we can't put it
18995	 in data because it might be too big when completed.  */
18996      if (!size || size > ix86_section_threshold)
18997	return true;
18998    }
18999
19000  return false;
19001}
19002static void
19003ix86_encode_section_info (tree decl, rtx rtl, int first)
19004{
19005  default_encode_section_info (decl, rtl, first);
19006
19007  if (TREE_CODE (decl) == VAR_DECL
19008      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19009      && ix86_in_large_data_p (decl))
19010    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19011}
19012
19013/* Worker function for REVERSE_CONDITION.  */
19014
19015enum rtx_code
19016ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19017{
19018  return (mode != CCFPmode && mode != CCFPUmode
19019	  ? reverse_condition (code)
19020	  : reverse_condition_maybe_unordered (code));
19021}
19022
19023/* Output code to perform an x87 FP register move, from OPERANDS[1]
19024   to OPERANDS[0].  */
19025
19026const char *
19027output_387_reg_move (rtx insn, rtx *operands)
19028{
19029  if (REG_P (operands[1])
19030      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19031    {
19032      if (REGNO (operands[0]) == FIRST_STACK_REG)
19033	return output_387_ffreep (operands, 0);
19034      return "fstp\t%y0";
19035    }
19036  if (STACK_TOP_P (operands[0]))
19037    return "fld%z1\t%y1";
19038  return "fst\t%y0";
19039}
19040
19041/* Output code to perform a conditional jump to LABEL, if C2 flag in
19042   FP status register is set.  */
19043
19044void
19045ix86_emit_fp_unordered_jump (rtx label)
19046{
19047  rtx reg = gen_reg_rtx (HImode);
19048  rtx temp;
19049
19050  emit_insn (gen_x86_fnstsw_1 (reg));
19051
19052  if (TARGET_USE_SAHF)
19053    {
19054      emit_insn (gen_x86_sahf_1 (reg));
19055
19056      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19057      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19058    }
19059  else
19060    {
19061      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19062
19063      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19064      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19065    }
19066
19067  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19068			      gen_rtx_LABEL_REF (VOIDmode, label),
19069			      pc_rtx);
19070  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19071  emit_jump_insn (temp);
19072}
19073
19074/* Output code to perform a log1p XFmode calculation.  */
19075
19076void ix86_emit_i387_log1p (rtx op0, rtx op1)
19077{
19078  rtx label1 = gen_label_rtx ();
19079  rtx label2 = gen_label_rtx ();
19080
19081  rtx tmp = gen_reg_rtx (XFmode);
19082  rtx tmp2 = gen_reg_rtx (XFmode);
19083
19084  emit_insn (gen_absxf2 (tmp, op1));
19085  emit_insn (gen_cmpxf (tmp,
19086    CONST_DOUBLE_FROM_REAL_VALUE (
19087       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19088       XFmode)));
19089  emit_jump_insn (gen_bge (label1));
19090
19091  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19092  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19093  emit_jump (label2);
19094
19095  emit_label (label1);
19096  emit_move_insn (tmp, CONST1_RTX (XFmode));
19097  emit_insn (gen_addxf3 (tmp, op1, tmp));
19098  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19099  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19100
19101  emit_label (label2);
19102}
19103
19104/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19105
19106static void
19107i386_solaris_elf_named_section (const char *name, unsigned int flags,
19108				tree decl)
19109{
19110  /* With Binutils 2.15, the "@unwind" marker must be specified on
19111     every occurrence of the ".eh_frame" section, not just the first
19112     one.  */
19113  if (TARGET_64BIT
19114      && strcmp (name, ".eh_frame") == 0)
19115    {
19116      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19117	       flags & SECTION_WRITE ? "aw" : "a");
19118      return;
19119    }
19120  default_elf_asm_named_section (name, flags, decl);
19121}
19122
19123/* Return the mangling of TYPE if it is an extended fundamental type.  */
19124
19125static const char *
19126ix86_mangle_fundamental_type (tree type)
19127{
19128  switch (TYPE_MODE (type))
19129    {
19130    case TFmode:
19131      /* __float128 is "g".  */
19132      return "g";
19133    case XFmode:
19134      /* "long double" or __float80 is "e".  */
19135      return "e";
19136    default:
19137      return NULL;
19138    }
19139}
19140
19141/* For 32-bit code we can save PIC register setup by using
19142   __stack_chk_fail_local hidden function instead of calling
19143   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19144   register, so it is better to call __stack_chk_fail directly.  */
19145
19146static tree
19147ix86_stack_protect_fail (void)
19148{
19149  return TARGET_64BIT
19150	 ? default_external_stack_protect_fail ()
19151	 : default_hidden_stack_protect_fail ();
19152}
19153
19154/* Select a format to encode pointers in exception handling data.  CODE
19155   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19156   true if the symbol may be affected by dynamic relocations.
19157
19158   ??? All x86 object file formats are capable of representing this.
19159   After all, the relocation needed is the same as for the call insn.
19160   Whether or not a particular assembler allows us to enter such, I
19161   guess we'll have to see.  */
19162int
19163asm_preferred_eh_data_format (int code, int global)
19164{
19165  if (flag_pic)
19166    {
19167      int type = DW_EH_PE_sdata8;
19168      if (!TARGET_64BIT
19169	  || ix86_cmodel == CM_SMALL_PIC
19170	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19171	type = DW_EH_PE_sdata4;
19172      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19173    }
19174  if (ix86_cmodel == CM_SMALL
19175      || (ix86_cmodel == CM_MEDIUM && code))
19176    return DW_EH_PE_udata4;
19177  return DW_EH_PE_absptr;
19178}
19179
19180#include "gt-i386.h"
19181