i386.c revision 225736
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/* $FreeBSD: stable/9/contrib/gcc/config/i386/i386.c 219711 2011-03-17 09:44:33Z mm $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables.  */
61#define MODE_INDEX(mode)					\
62  ((mode) == QImode ? 0						\
63   : (mode) == HImode ? 1					\
64   : (mode) == SImode ? 2					\
65   : (mode) == DImode ? 3					\
66   : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = {	/* costs for tuning for size */
74  COSTS_N_BYTES (2),			/* cost of an add instruction */
75  COSTS_N_BYTES (3),			/* cost of a lea instruction */
76  COSTS_N_BYTES (2),			/* variable shift costs */
77  COSTS_N_BYTES (3),			/* constant shift costs */
78  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
79   COSTS_N_BYTES (3),			/*                               HI */
80   COSTS_N_BYTES (3),			/*                               SI */
81   COSTS_N_BYTES (3),			/*                               DI */
82   COSTS_N_BYTES (5)},			/*                            other */
83  0,					/* cost of multiply per each bit set */
84  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
85   COSTS_N_BYTES (3),			/*                          HI */
86   COSTS_N_BYTES (3),			/*                          SI */
87   COSTS_N_BYTES (3),			/*                          DI */
88   COSTS_N_BYTES (5)},			/*                       other */
89  COSTS_N_BYTES (3),			/* cost of movsx */
90  COSTS_N_BYTES (3),			/* cost of movzx */
91  0,					/* "large" insn */
92  2,					/* MOVE_RATIO */
93  2,					/* cost for loading QImode using movzbl */
94  {2, 2, 2},				/* cost of loading integer registers
95					   in QImode, HImode and SImode.
96					   Relative to reg-reg move (2).  */
97  {2, 2, 2},				/* cost of storing integer registers */
98  2,					/* cost of reg,reg fld/fst */
99  {2, 2, 2},				/* cost of loading fp registers
100					   in SFmode, DFmode and XFmode */
101  {2, 2, 2},				/* cost of storing fp registers
102					   in SFmode, DFmode and XFmode */
103  3,					/* cost of moving MMX register */
104  {3, 3},				/* cost of loading MMX registers
105					   in SImode and DImode */
106  {3, 3},				/* cost of storing MMX registers
107					   in SImode and DImode */
108  3,					/* cost of moving SSE register */
109  {3, 3, 3},				/* cost of loading SSE registers
110					   in SImode, DImode and TImode */
111  {3, 3, 3},				/* cost of storing SSE registers
112					   in SImode, DImode and TImode */
113  3,					/* MMX or SSE register to integer */
114  0,					/* size of prefetch block */
115  0,					/* number of parallel prefetches */
116  2,					/* Branch cost */
117  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
118  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
119  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
120  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
121  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
122  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = {	/* 386 specific costs */
128  COSTS_N_INSNS (1),			/* cost of an add instruction */
129  COSTS_N_INSNS (1),			/* cost of a lea instruction */
130  COSTS_N_INSNS (3),			/* variable shift costs */
131  COSTS_N_INSNS (2),			/* constant shift costs */
132  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
133   COSTS_N_INSNS (6),			/*                               HI */
134   COSTS_N_INSNS (6),			/*                               SI */
135   COSTS_N_INSNS (6),			/*                               DI */
136   COSTS_N_INSNS (6)},			/*                               other */
137  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
138  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
139   COSTS_N_INSNS (23),			/*                          HI */
140   COSTS_N_INSNS (23),			/*                          SI */
141   COSTS_N_INSNS (23),			/*                          DI */
142   COSTS_N_INSNS (23)},			/*                          other */
143  COSTS_N_INSNS (3),			/* cost of movsx */
144  COSTS_N_INSNS (2),			/* cost of movzx */
145  15,					/* "large" insn */
146  3,					/* MOVE_RATIO */
147  4,					/* cost for loading QImode using movzbl */
148  {2, 4, 2},				/* cost of loading integer registers
149					   in QImode, HImode and SImode.
150					   Relative to reg-reg move (2).  */
151  {2, 4, 2},				/* cost of storing integer registers */
152  2,					/* cost of reg,reg fld/fst */
153  {8, 8, 8},				/* cost of loading fp registers
154					   in SFmode, DFmode and XFmode */
155  {8, 8, 8},				/* cost of storing fp registers
156					   in SFmode, DFmode and XFmode */
157  2,					/* cost of moving MMX register */
158  {4, 8},				/* cost of loading MMX registers
159					   in SImode and DImode */
160  {4, 8},				/* cost of storing MMX registers
161					   in SImode and DImode */
162  2,					/* cost of moving SSE register */
163  {4, 8, 16},				/* cost of loading SSE registers
164					   in SImode, DImode and TImode */
165  {4, 8, 16},				/* cost of storing SSE registers
166					   in SImode, DImode and TImode */
167  3,					/* MMX or SSE register to integer */
168  0,					/* size of prefetch block */
169  0,					/* number of parallel prefetches */
170  1,					/* Branch cost */
171  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
172  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
173  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
174  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
175  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
176  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
177};
178
179static const
180struct processor_costs i486_cost = {	/* 486 specific costs */
181  COSTS_N_INSNS (1),			/* cost of an add instruction */
182  COSTS_N_INSNS (1),			/* cost of a lea instruction */
183  COSTS_N_INSNS (3),			/* variable shift costs */
184  COSTS_N_INSNS (2),			/* constant shift costs */
185  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
186   COSTS_N_INSNS (12),			/*                               HI */
187   COSTS_N_INSNS (12),			/*                               SI */
188   COSTS_N_INSNS (12),			/*                               DI */
189   COSTS_N_INSNS (12)},			/*                               other */
190  1,					/* cost of multiply per each bit set */
191  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
192   COSTS_N_INSNS (40),			/*                          HI */
193   COSTS_N_INSNS (40),			/*                          SI */
194   COSTS_N_INSNS (40),			/*                          DI */
195   COSTS_N_INSNS (40)},			/*                          other */
196  COSTS_N_INSNS (3),			/* cost of movsx */
197  COSTS_N_INSNS (2),			/* cost of movzx */
198  15,					/* "large" insn */
199  3,					/* MOVE_RATIO */
200  4,					/* cost for loading QImode using movzbl */
201  {2, 4, 2},				/* cost of loading integer registers
202					   in QImode, HImode and SImode.
203					   Relative to reg-reg move (2).  */
204  {2, 4, 2},				/* cost of storing integer registers */
205  2,					/* cost of reg,reg fld/fst */
206  {8, 8, 8},				/* cost of loading fp registers
207					   in SFmode, DFmode and XFmode */
208  {8, 8, 8},				/* cost of storing fp registers
209					   in SFmode, DFmode and XFmode */
210  2,					/* cost of moving MMX register */
211  {4, 8},				/* cost of loading MMX registers
212					   in SImode and DImode */
213  {4, 8},				/* cost of storing MMX registers
214					   in SImode and DImode */
215  2,					/* cost of moving SSE register */
216  {4, 8, 16},				/* cost of loading SSE registers
217					   in SImode, DImode and TImode */
218  {4, 8, 16},				/* cost of storing SSE registers
219					   in SImode, DImode and TImode */
220  3,					/* MMX or SSE register to integer */
221  0,					/* size of prefetch block */
222  0,					/* number of parallel prefetches */
223  1,					/* Branch cost */
224  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
225  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
226  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
227  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
228  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
229  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
230};
231
232static const
233struct processor_costs pentium_cost = {
234  COSTS_N_INSNS (1),			/* cost of an add instruction */
235  COSTS_N_INSNS (1),			/* cost of a lea instruction */
236  COSTS_N_INSNS (4),			/* variable shift costs */
237  COSTS_N_INSNS (1),			/* constant shift costs */
238  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
239   COSTS_N_INSNS (11),			/*                               HI */
240   COSTS_N_INSNS (11),			/*                               SI */
241   COSTS_N_INSNS (11),			/*                               DI */
242   COSTS_N_INSNS (11)},			/*                               other */
243  0,					/* cost of multiply per each bit set */
244  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
245   COSTS_N_INSNS (25),			/*                          HI */
246   COSTS_N_INSNS (25),			/*                          SI */
247   COSTS_N_INSNS (25),			/*                          DI */
248   COSTS_N_INSNS (25)},			/*                          other */
249  COSTS_N_INSNS (3),			/* cost of movsx */
250  COSTS_N_INSNS (2),			/* cost of movzx */
251  8,					/* "large" insn */
252  6,					/* MOVE_RATIO */
253  6,					/* cost for loading QImode using movzbl */
254  {2, 4, 2},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 4, 2},				/* cost of storing integer registers */
258  2,					/* cost of reg,reg fld/fst */
259  {2, 2, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 6},				/* cost of storing fp registers
262					   in SFmode, DFmode and XFmode */
263  8,					/* cost of moving MMX register */
264  {8, 8},				/* cost of loading MMX registers
265					   in SImode and DImode */
266  {8, 8},				/* cost of storing MMX registers
267					   in SImode and DImode */
268  2,					/* cost of moving SSE register */
269  {4, 8, 16},				/* cost of loading SSE registers
270					   in SImode, DImode and TImode */
271  {4, 8, 16},				/* cost of storing SSE registers
272					   in SImode, DImode and TImode */
273  3,					/* MMX or SSE register to integer */
274  0,					/* size of prefetch block */
275  0,					/* number of parallel prefetches */
276  2,					/* Branch cost */
277  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
278  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
279  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
280  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
281  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
282  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287  COSTS_N_INSNS (1),			/* cost of an add instruction */
288  COSTS_N_INSNS (1),			/* cost of a lea instruction */
289  COSTS_N_INSNS (1),			/* variable shift costs */
290  COSTS_N_INSNS (1),			/* constant shift costs */
291  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
292   COSTS_N_INSNS (4),			/*                               HI */
293   COSTS_N_INSNS (4),			/*                               SI */
294   COSTS_N_INSNS (4),			/*                               DI */
295   COSTS_N_INSNS (4)},			/*                               other */
296  0,					/* cost of multiply per each bit set */
297  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
298   COSTS_N_INSNS (17),			/*                          HI */
299   COSTS_N_INSNS (17),			/*                          SI */
300   COSTS_N_INSNS (17),			/*                          DI */
301   COSTS_N_INSNS (17)},			/*                          other */
302  COSTS_N_INSNS (1),			/* cost of movsx */
303  COSTS_N_INSNS (1),			/* cost of movzx */
304  8,					/* "large" insn */
305  6,					/* MOVE_RATIO */
306  2,					/* cost for loading QImode using movzbl */
307  {4, 4, 4},				/* cost of loading integer registers
308					   in QImode, HImode and SImode.
309					   Relative to reg-reg move (2).  */
310  {2, 2, 2},				/* cost of storing integer registers */
311  2,					/* cost of reg,reg fld/fst */
312  {2, 2, 6},				/* cost of loading fp registers
313					   in SFmode, DFmode and XFmode */
314  {4, 4, 6},				/* cost of storing fp registers
315					   in SFmode, DFmode and XFmode */
316  2,					/* cost of moving MMX register */
317  {2, 2},				/* cost of loading MMX registers
318					   in SImode and DImode */
319  {2, 2},				/* cost of storing MMX registers
320					   in SImode and DImode */
321  2,					/* cost of moving SSE register */
322  {2, 2, 8},				/* cost of loading SSE registers
323					   in SImode, DImode and TImode */
324  {2, 2, 8},				/* cost of storing SSE registers
325					   in SImode, DImode and TImode */
326  3,					/* MMX or SSE register to integer */
327  32,					/* size of prefetch block */
328  6,					/* number of parallel prefetches */
329  2,					/* Branch cost */
330  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
331  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
332  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
333  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
334  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
335  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
336};
337
338static const
339struct processor_costs geode_cost = {
340  COSTS_N_INSNS (1),			/* cost of an add instruction */
341  COSTS_N_INSNS (1),			/* cost of a lea instruction */
342  COSTS_N_INSNS (2),			/* variable shift costs */
343  COSTS_N_INSNS (1),			/* constant shift costs */
344  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
345   COSTS_N_INSNS (4),			/*                               HI */
346   COSTS_N_INSNS (7),			/*                               SI */
347   COSTS_N_INSNS (7),			/*                               DI */
348   COSTS_N_INSNS (7)},			/*                               other */
349  0,					/* cost of multiply per each bit set */
350  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
351   COSTS_N_INSNS (23),			/*                          HI */
352   COSTS_N_INSNS (39),			/*                          SI */
353   COSTS_N_INSNS (39),			/*                          DI */
354   COSTS_N_INSNS (39)},			/*                          other */
355  COSTS_N_INSNS (1),			/* cost of movsx */
356  COSTS_N_INSNS (1),			/* cost of movzx */
357  8,					/* "large" insn */
358  4,					/* MOVE_RATIO */
359  1,					/* cost for loading QImode using movzbl */
360  {1, 1, 1},				/* cost of loading integer registers
361					   in QImode, HImode and SImode.
362					   Relative to reg-reg move (2).  */
363  {1, 1, 1},				/* cost of storing integer registers */
364  1,					/* cost of reg,reg fld/fst */
365  {1, 1, 1},				/* cost of loading fp registers
366					   in SFmode, DFmode and XFmode */
367  {4, 6, 6},				/* cost of storing fp registers
368					   in SFmode, DFmode and XFmode */
369
370  1,					/* cost of moving MMX register */
371  {1, 1},				/* cost of loading MMX registers
372					   in SImode and DImode */
373  {1, 1},				/* cost of storing MMX registers
374					   in SImode and DImode */
375  1,					/* cost of moving SSE register */
376  {1, 1, 1},				/* cost of loading SSE registers
377					   in SImode, DImode and TImode */
378  {1, 1, 1},				/* cost of storing SSE registers
379					   in SImode, DImode and TImode */
380  1,					/* MMX or SSE register to integer */
381  32,					/* size of prefetch block */
382  1,					/* number of parallel prefetches */
383  1,					/* Branch cost */
384  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
385  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
386  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
387  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
388  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
389  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
390};
391
392static const
393struct processor_costs k6_cost = {
394  COSTS_N_INSNS (1),			/* cost of an add instruction */
395  COSTS_N_INSNS (2),			/* cost of a lea instruction */
396  COSTS_N_INSNS (1),			/* variable shift costs */
397  COSTS_N_INSNS (1),			/* constant shift costs */
398  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
399   COSTS_N_INSNS (3),			/*                               HI */
400   COSTS_N_INSNS (3),			/*                               SI */
401   COSTS_N_INSNS (3),			/*                               DI */
402   COSTS_N_INSNS (3)},			/*                               other */
403  0,					/* cost of multiply per each bit set */
404  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
405   COSTS_N_INSNS (18),			/*                          HI */
406   COSTS_N_INSNS (18),			/*                          SI */
407   COSTS_N_INSNS (18),			/*                          DI */
408   COSTS_N_INSNS (18)},			/*                          other */
409  COSTS_N_INSNS (2),			/* cost of movsx */
410  COSTS_N_INSNS (2),			/* cost of movzx */
411  8,					/* "large" insn */
412  4,					/* MOVE_RATIO */
413  3,					/* cost for loading QImode using movzbl */
414  {4, 5, 4},				/* cost of loading integer registers
415					   in QImode, HImode and SImode.
416					   Relative to reg-reg move (2).  */
417  {2, 3, 2},				/* cost of storing integer registers */
418  4,					/* cost of reg,reg fld/fst */
419  {6, 6, 6},				/* cost of loading fp registers
420					   in SFmode, DFmode and XFmode */
421  {4, 4, 4},				/* cost of storing fp registers
422					   in SFmode, DFmode and XFmode */
423  2,					/* cost of moving MMX register */
424  {2, 2},				/* cost of loading MMX registers
425					   in SImode and DImode */
426  {2, 2},				/* cost of storing MMX registers
427					   in SImode and DImode */
428  2,					/* cost of moving SSE register */
429  {2, 2, 8},				/* cost of loading SSE registers
430					   in SImode, DImode and TImode */
431  {2, 2, 8},				/* cost of storing SSE registers
432					   in SImode, DImode and TImode */
433  6,					/* MMX or SSE register to integer */
434  32,					/* size of prefetch block */
435  1,					/* number of parallel prefetches */
436  1,					/* Branch cost */
437  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
438  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
439  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
440  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
441  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
442  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
443};
444
445static const
446struct processor_costs athlon_cost = {
447  COSTS_N_INSNS (1),			/* cost of an add instruction */
448  COSTS_N_INSNS (2),			/* cost of a lea instruction */
449  COSTS_N_INSNS (1),			/* variable shift costs */
450  COSTS_N_INSNS (1),			/* constant shift costs */
451  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
452   COSTS_N_INSNS (5),			/*                               HI */
453   COSTS_N_INSNS (5),			/*                               SI */
454   COSTS_N_INSNS (5),			/*                               DI */
455   COSTS_N_INSNS (5)},			/*                               other */
456  0,					/* cost of multiply per each bit set */
457  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
458   COSTS_N_INSNS (26),			/*                          HI */
459   COSTS_N_INSNS (42),			/*                          SI */
460   COSTS_N_INSNS (74),			/*                          DI */
461   COSTS_N_INSNS (74)},			/*                          other */
462  COSTS_N_INSNS (1),			/* cost of movsx */
463  COSTS_N_INSNS (1),			/* cost of movzx */
464  8,					/* "large" insn */
465  9,					/* MOVE_RATIO */
466  4,					/* cost for loading QImode using movzbl */
467  {3, 4, 3},				/* cost of loading integer registers
468					   in QImode, HImode and SImode.
469					   Relative to reg-reg move (2).  */
470  {3, 4, 3},				/* cost of storing integer registers */
471  4,					/* cost of reg,reg fld/fst */
472  {4, 4, 12},				/* cost of loading fp registers
473					   in SFmode, DFmode and XFmode */
474  {6, 6, 8},				/* cost of storing fp registers
475					   in SFmode, DFmode and XFmode */
476  2,					/* cost of moving MMX register */
477  {4, 4},				/* cost of loading MMX registers
478					   in SImode and DImode */
479  {4, 4},				/* cost of storing MMX registers
480					   in SImode and DImode */
481  2,					/* cost of moving SSE register */
482  {4, 4, 6},				/* cost of loading SSE registers
483					   in SImode, DImode and TImode */
484  {4, 4, 5},				/* cost of storing SSE registers
485					   in SImode, DImode and TImode */
486  5,					/* MMX or SSE register to integer */
487  64,					/* size of prefetch block */
488  6,					/* number of parallel prefetches */
489  5,					/* Branch cost */
490  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
491  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
492  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
493  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
494  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
495  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
496};
497
498static const
499struct processor_costs k8_cost = {
500  COSTS_N_INSNS (1),			/* cost of an add instruction */
501  COSTS_N_INSNS (2),			/* cost of a lea instruction */
502  COSTS_N_INSNS (1),			/* variable shift costs */
503  COSTS_N_INSNS (1),			/* constant shift costs */
504  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
505   COSTS_N_INSNS (4),			/*                               HI */
506   COSTS_N_INSNS (3),			/*                               SI */
507   COSTS_N_INSNS (4),			/*                               DI */
508   COSTS_N_INSNS (5)},			/*                               other */
509  0,					/* cost of multiply per each bit set */
510  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
511   COSTS_N_INSNS (26),			/*                          HI */
512   COSTS_N_INSNS (42),			/*                          SI */
513   COSTS_N_INSNS (74),			/*                          DI */
514   COSTS_N_INSNS (74)},			/*                          other */
515  COSTS_N_INSNS (1),			/* cost of movsx */
516  COSTS_N_INSNS (1),			/* cost of movzx */
517  8,					/* "large" insn */
518  9,					/* MOVE_RATIO */
519  4,					/* cost for loading QImode using movzbl */
520  {3, 4, 3},				/* cost of loading integer registers
521					   in QImode, HImode and SImode.
522					   Relative to reg-reg move (2).  */
523  {3, 4, 3},				/* cost of storing integer registers */
524  4,					/* cost of reg,reg fld/fst */
525  {4, 4, 12},				/* cost of loading fp registers
526					   in SFmode, DFmode and XFmode */
527  {6, 6, 8},				/* cost of storing fp registers
528					   in SFmode, DFmode and XFmode */
529  2,					/* cost of moving MMX register */
530  {3, 3},				/* cost of loading MMX registers
531					   in SImode and DImode */
532  {4, 4},				/* cost of storing MMX registers
533					   in SImode and DImode */
534  2,					/* cost of moving SSE register */
535  {4, 3, 6},				/* cost of loading SSE registers
536					   in SImode, DImode and TImode */
537  {4, 4, 5},				/* cost of storing SSE registers
538					   in SImode, DImode and TImode */
539  5,					/* MMX or SSE register to integer */
540  64,					/* size of prefetch block */
541  6,					/* number of parallel prefetches */
542  5,					/* Branch cost */
543  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
544  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
545  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
546  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
547  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
548  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
549};
550
551static const
552struct processor_costs pentium4_cost = {
553  COSTS_N_INSNS (1),			/* cost of an add instruction */
554  COSTS_N_INSNS (3),			/* cost of a lea instruction */
555  COSTS_N_INSNS (4),			/* variable shift costs */
556  COSTS_N_INSNS (4),			/* constant shift costs */
557  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
558   COSTS_N_INSNS (15),			/*                               HI */
559   COSTS_N_INSNS (15),			/*                               SI */
560   COSTS_N_INSNS (15),			/*                               DI */
561   COSTS_N_INSNS (15)},			/*                               other */
562  0,					/* cost of multiply per each bit set */
563  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
564   COSTS_N_INSNS (56),			/*                          HI */
565   COSTS_N_INSNS (56),			/*                          SI */
566   COSTS_N_INSNS (56),			/*                          DI */
567   COSTS_N_INSNS (56)},			/*                          other */
568  COSTS_N_INSNS (1),			/* cost of movsx */
569  COSTS_N_INSNS (1),			/* cost of movzx */
570  16,					/* "large" insn */
571  6,					/* MOVE_RATIO */
572  2,					/* cost for loading QImode using movzbl */
573  {4, 5, 4},				/* cost of loading integer registers
574					   in QImode, HImode and SImode.
575					   Relative to reg-reg move (2).  */
576  {2, 3, 2},				/* cost of storing integer registers */
577  2,					/* cost of reg,reg fld/fst */
578  {2, 2, 6},				/* cost of loading fp registers
579					   in SFmode, DFmode and XFmode */
580  {4, 4, 6},				/* cost of storing fp registers
581					   in SFmode, DFmode and XFmode */
582  2,					/* cost of moving MMX register */
583  {2, 2},				/* cost of loading MMX registers
584					   in SImode and DImode */
585  {2, 2},				/* cost of storing MMX registers
586					   in SImode and DImode */
587  12,					/* cost of moving SSE register */
588  {12, 12, 12},				/* cost of loading SSE registers
589					   in SImode, DImode and TImode */
590  {2, 2, 8},				/* cost of storing SSE registers
591					   in SImode, DImode and TImode */
592  10,					/* MMX or SSE register to integer */
593  64,					/* size of prefetch block */
594  6,					/* number of parallel prefetches */
595  2,					/* Branch cost */
596  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
597  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
598  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
599  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
600  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
601  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
602};
603
604static const
605struct processor_costs nocona_cost = {
606  COSTS_N_INSNS (1),			/* cost of an add instruction */
607  COSTS_N_INSNS (1),			/* cost of a lea instruction */
608  COSTS_N_INSNS (1),			/* variable shift costs */
609  COSTS_N_INSNS (1),			/* constant shift costs */
610  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
611   COSTS_N_INSNS (10),			/*                               HI */
612   COSTS_N_INSNS (10),			/*                               SI */
613   COSTS_N_INSNS (10),			/*                               DI */
614   COSTS_N_INSNS (10)},			/*                               other */
615  0,					/* cost of multiply per each bit set */
616  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
617   COSTS_N_INSNS (66),			/*                          HI */
618   COSTS_N_INSNS (66),			/*                          SI */
619   COSTS_N_INSNS (66),			/*                          DI */
620   COSTS_N_INSNS (66)},			/*                          other */
621  COSTS_N_INSNS (1),			/* cost of movsx */
622  COSTS_N_INSNS (1),			/* cost of movzx */
623  16,					/* "large" insn */
624  17,					/* MOVE_RATIO */
625  4,					/* cost for loading QImode using movzbl */
626  {4, 4, 4},				/* cost of loading integer registers
627					   in QImode, HImode and SImode.
628					   Relative to reg-reg move (2).  */
629  {4, 4, 4},				/* cost of storing integer registers */
630  3,					/* cost of reg,reg fld/fst */
631  {12, 12, 12},				/* cost of loading fp registers
632					   in SFmode, DFmode and XFmode */
633  {4, 4, 4},				/* cost of storing fp registers
634					   in SFmode, DFmode and XFmode */
635  6,					/* cost of moving MMX register */
636  {12, 12},				/* cost of loading MMX registers
637					   in SImode and DImode */
638  {12, 12},				/* cost of storing MMX registers
639					   in SImode and DImode */
640  6,					/* cost of moving SSE register */
641  {12, 12, 12},				/* cost of loading SSE registers
642					   in SImode, DImode and TImode */
643  {12, 12, 12},				/* cost of storing SSE registers
644					   in SImode, DImode and TImode */
645  8,					/* MMX or SSE register to integer */
646  128,					/* size of prefetch block */
647  8,					/* number of parallel prefetches */
648  1,					/* Branch cost */
649  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
650  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
651  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
652  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
653  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
654  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
655};
656
657static const
658struct processor_costs core2_cost = {
659  COSTS_N_INSNS (1),			/* cost of an add instruction */
660  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
661  COSTS_N_INSNS (1),			/* variable shift costs */
662  COSTS_N_INSNS (1),			/* constant shift costs */
663  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
664   COSTS_N_INSNS (3),			/*                               HI */
665   COSTS_N_INSNS (3),			/*                               SI */
666   COSTS_N_INSNS (3),			/*                               DI */
667   COSTS_N_INSNS (3)},			/*                               other */
668  0,					/* cost of multiply per each bit set */
669  {COSTS_N_INSNS (22),			/* cost of a divide/mod for QI */
670   COSTS_N_INSNS (22),			/*                          HI */
671   COSTS_N_INSNS (22),			/*                          SI */
672   COSTS_N_INSNS (22),			/*                          DI */
673   COSTS_N_INSNS (22)},			/*                          other */
674  COSTS_N_INSNS (1),			/* cost of movsx */
675  COSTS_N_INSNS (1),			/* cost of movzx */
676  8,					/* "large" insn */
677  16,					/* MOVE_RATIO */
678  2,					/* cost for loading QImode using movzbl */
679  {6, 6, 6},				/* cost of loading integer registers
680					   in QImode, HImode and SImode.
681					   Relative to reg-reg move (2).  */
682  {4, 4, 4},				/* cost of storing integer registers */
683  2,					/* cost of reg,reg fld/fst */
684  {6, 6, 6},				/* cost of loading fp registers
685					   in SFmode, DFmode and XFmode */
686  {4, 4, 4},				/* cost of loading integer registers */
687  2,					/* cost of moving MMX register */
688  {6, 6},				/* cost of loading MMX registers
689					   in SImode and DImode */
690  {4, 4},				/* cost of storing MMX registers
691					   in SImode and DImode */
692  2,					/* cost of moving SSE register */
693  {6, 6, 6},				/* cost of loading SSE registers
694					   in SImode, DImode and TImode */
695  {4, 4, 4},				/* cost of storing SSE registers
696					   in SImode, DImode and TImode */
697  2,					/* MMX or SSE register to integer */
698  128,					/* size of prefetch block */
699  8,					/* number of parallel prefetches */
700  3,					/* Branch cost */
701  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
702  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
703  COSTS_N_INSNS (32),			/* cost of FDIV instruction.  */
704  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
705  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
706  COSTS_N_INSNS (58),			/* cost of FSQRT instruction.  */
707};
708
709/* Generic64 should produce code tuned for Nocona and K8.  */
710static const
711struct processor_costs generic64_cost = {
712  COSTS_N_INSNS (1),			/* cost of an add instruction */
713  /* On all chips taken into consideration lea is 2 cycles and more.  With
714     this cost however our current implementation of synth_mult results in
715     use of unnecessary temporary registers causing regression on several
716     SPECfp benchmarks.  */
717  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
718  COSTS_N_INSNS (1),			/* variable shift costs */
719  COSTS_N_INSNS (1),			/* constant shift costs */
720  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
721   COSTS_N_INSNS (4),			/*                               HI */
722   COSTS_N_INSNS (3),			/*                               SI */
723   COSTS_N_INSNS (4),			/*                               DI */
724   COSTS_N_INSNS (2)},			/*                               other */
725  0,					/* cost of multiply per each bit set */
726  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
727   COSTS_N_INSNS (26),			/*                          HI */
728   COSTS_N_INSNS (42),			/*                          SI */
729   COSTS_N_INSNS (74),			/*                          DI */
730   COSTS_N_INSNS (74)},			/*                          other */
731  COSTS_N_INSNS (1),			/* cost of movsx */
732  COSTS_N_INSNS (1),			/* cost of movzx */
733  8,					/* "large" insn */
734  17,					/* MOVE_RATIO */
735  4,					/* cost for loading QImode using movzbl */
736  {4, 4, 4},				/* cost of loading integer registers
737					   in QImode, HImode and SImode.
738					   Relative to reg-reg move (2).  */
739  {4, 4, 4},				/* cost of storing integer registers */
740  4,					/* cost of reg,reg fld/fst */
741  {12, 12, 12},				/* cost of loading fp registers
742					   in SFmode, DFmode and XFmode */
743  {6, 6, 8},				/* cost of storing fp registers
744					   in SFmode, DFmode and XFmode */
745  2,					/* cost of moving MMX register */
746  {8, 8},				/* cost of loading MMX registers
747					   in SImode and DImode */
748  {8, 8},				/* cost of storing MMX registers
749					   in SImode and DImode */
750  2,					/* cost of moving SSE register */
751  {8, 8, 8},				/* cost of loading SSE registers
752					   in SImode, DImode and TImode */
753  {8, 8, 8},				/* cost of storing SSE registers
754					   in SImode, DImode and TImode */
755  5,					/* MMX or SSE register to integer */
756  64,					/* size of prefetch block */
757  6,					/* number of parallel prefetches */
758  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
759     is increased to perhaps more appropriate value of 5.  */
760  3,					/* Branch cost */
761  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
762  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
763  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
764  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
765  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
766  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
767};
768
769/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
770static const
771struct processor_costs generic32_cost = {
772  COSTS_N_INSNS (1),			/* cost of an add instruction */
773  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
774  COSTS_N_INSNS (1),			/* variable shift costs */
775  COSTS_N_INSNS (1),			/* constant shift costs */
776  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
777   COSTS_N_INSNS (4),			/*                               HI */
778   COSTS_N_INSNS (3),			/*                               SI */
779   COSTS_N_INSNS (4),			/*                               DI */
780   COSTS_N_INSNS (2)},			/*                               other */
781  0,					/* cost of multiply per each bit set */
782  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
783   COSTS_N_INSNS (26),			/*                          HI */
784   COSTS_N_INSNS (42),			/*                          SI */
785   COSTS_N_INSNS (74),			/*                          DI */
786   COSTS_N_INSNS (74)},			/*                          other */
787  COSTS_N_INSNS (1),			/* cost of movsx */
788  COSTS_N_INSNS (1),			/* cost of movzx */
789  8,					/* "large" insn */
790  17,					/* MOVE_RATIO */
791  4,					/* cost for loading QImode using movzbl */
792  {4, 4, 4},				/* cost of loading integer registers
793					   in QImode, HImode and SImode.
794					   Relative to reg-reg move (2).  */
795  {4, 4, 4},				/* cost of storing integer registers */
796  4,					/* cost of reg,reg fld/fst */
797  {12, 12, 12},				/* cost of loading fp registers
798					   in SFmode, DFmode and XFmode */
799  {6, 6, 8},				/* cost of storing fp registers
800					   in SFmode, DFmode and XFmode */
801  2,					/* cost of moving MMX register */
802  {8, 8},				/* cost of loading MMX registers
803					   in SImode and DImode */
804  {8, 8},				/* cost of storing MMX registers
805					   in SImode and DImode */
806  2,					/* cost of moving SSE register */
807  {8, 8, 8},				/* cost of loading SSE registers
808					   in SImode, DImode and TImode */
809  {8, 8, 8},				/* cost of storing SSE registers
810					   in SImode, DImode and TImode */
811  5,					/* MMX or SSE register to integer */
812  64,					/* size of prefetch block */
813  6,					/* number of parallel prefetches */
814  3,					/* Branch cost */
815  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
816  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
817  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
818  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
819  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
820  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
821};
822
823const struct processor_costs *ix86_cost = &pentium_cost;
824
825/* Processor feature/optimization bitmasks.  */
826#define m_386 (1<<PROCESSOR_I386)
827#define m_486 (1<<PROCESSOR_I486)
828#define m_PENT (1<<PROCESSOR_PENTIUM)
829#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
830#define m_GEODE  (1<<PROCESSOR_GEODE)
831#define m_K6_GEODE  (m_K6 | m_GEODE)
832#define m_K6  (1<<PROCESSOR_K6)
833#define m_ATHLON  (1<<PROCESSOR_ATHLON)
834#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
835#define m_K8  (1<<PROCESSOR_K8)
836#define m_ATHLON_K8  (m_K8 | m_ATHLON)
837#define m_NOCONA  (1<<PROCESSOR_NOCONA)
838#define m_CORE2  (1<<PROCESSOR_CORE2)
839#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
840#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
841#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
842
843/* Generic instruction choice should be common subset of supported CPUs
844   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
845
846/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
847   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
848   generic because it is not working well with PPro base chips.  */
849const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
850const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
851const int x86_zero_extend_with_and = m_486 | m_PENT;
852const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
853const int x86_double_with_add = ~m_386;
854const int x86_use_bit_test = m_386;
855const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
856const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
857const int x86_3dnow_a = m_ATHLON_K8;
858const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
859/* Branch hints were put in P4 based on simulation result. But
860   after P4 was made, no performance benefit was observed with
861   branch hints. It also increases the code size. As the result,
862   icc never generates branch hints.  */
863const int x86_branch_hints = 0;
864const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
865/* We probably ought to watch for partial register stalls on Generic32
866   compilation setting as well.  However in current implementation the
867   partial register stalls are not eliminated very well - they can
868   be introduced via subregs synthesized by combine and can happen
869   in caller/callee saving sequences.
870   Because this option pays back little on PPro based chips and is in conflict
871   with partial reg. dependencies used by Athlon/P4 based chips, it is better
872   to leave it off for generic32 for now.  */
873const int x86_partial_reg_stall = m_PPRO;
874const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
875const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
876const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
877const int x86_use_mov0 = m_K6;
878const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
879const int x86_read_modify_write = ~m_PENT;
880const int x86_read_modify = ~(m_PENT | m_PPRO);
881const int x86_split_long_moves = m_PPRO;
882const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
883const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
884const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
885const int x86_qimode_math = ~(0);
886const int x86_promote_qi_regs = 0;
887/* On PPro this flag is meant to avoid partial register stalls.  Just like
888   the x86_partial_reg_stall this option might be considered for Generic32
889   if our scheme for avoiding partial stalls was more effective.  */
890const int x86_himode_math = ~(m_PPRO);
891const int x86_promote_hi_regs = m_PPRO;
892const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
893const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
894const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
895const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
896const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
897const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
898const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
899const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
900const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
901const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
902const int x86_shift1 = ~m_486;
903const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
904/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
905   that thread 128bit SSE registers as single units versus K8 based chips that
906   divide SSE registers to two 64bit halves.
907   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
908   to allow register renaming on 128bit SSE units, but usually results in one
909   extra microop on 64bit SSE units.  Experimental results shows that disabling
910   this option on P4 brings over 20% SPECfp regression, while enabling it on
911   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
912   of moves.  */
913const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
914/* Set for machines where the type and dependencies are resolved on SSE
915   register parts instead of whole registers, so we may maintain just
916   lower part of scalar values in proper format leaving the upper part
917   undefined.  */
918const int x86_sse_split_regs = m_ATHLON_K8;
919const int x86_sse_typeless_stores = m_ATHLON_K8;
920const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
921const int x86_use_ffreep = m_ATHLON_K8;
922const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
923const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
924
925/* ??? Allowing interunit moves makes it all too easy for the compiler to put
926   integer data in xmm registers.  Which results in pretty abysmal code.  */
927const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
928
929const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_CORE2 | m_PPRO | m_GENERIC32;
930/* Some CPU cores are not able to predict more than 4 branch instructions in
931   the 16 byte window.  */
932const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
934const int x86_use_bt = m_ATHLON_K8;
935/* Compare and exchange was added for 80486.  */
936const int x86_cmpxchg = ~m_386;
937/* Compare and exchange 8 bytes was added for pentium.  */
938const int x86_cmpxchg8b = ~(m_386 | m_486);
939/* Compare and exchange 16 bytes was added for nocona.  */
940const int x86_cmpxchg16b = m_NOCONA | m_CORE2;
941/* Exchange and add was added for 80486.  */
942const int x86_xadd = ~m_386;
943const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
944
945/* In case the average insn count for single function invocation is
946   lower than this constant, emit fast (but longer) prologue and
947   epilogue code.  */
948#define FAST_PROLOGUE_INSN_COUNT 20
949
950/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
951static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
952static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
953static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
954
955/* Array of the smallest class containing reg number REGNO, indexed by
956   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
957
958enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
959{
960  /* ax, dx, cx, bx */
961  AREG, DREG, CREG, BREG,
962  /* si, di, bp, sp */
963  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
964  /* FP registers */
965  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
966  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
967  /* arg pointer */
968  NON_Q_REGS,
969  /* flags, fpsr, dirflag, frame */
970  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
971  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
972  SSE_REGS, SSE_REGS,
973  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
974  MMX_REGS, MMX_REGS,
975  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
976  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
977  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
978  SSE_REGS, SSE_REGS,
979};
980
981/* The "default" register map used in 32bit mode.  */
982
983int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
984{
985  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
986  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
987  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
988  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
989  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
990  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
991  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
992};
993
994static int const x86_64_int_parameter_registers[6] =
995{
996  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
997  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
998};
999
1000static int const x86_64_int_return_registers[4] =
1001{
1002  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1003};
1004
1005/* The "default" register map used in 64bit mode.  */
1006int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1007{
1008  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
1009  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
1010  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1011  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
1012  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1013  8,9,10,11,12,13,14,15,		/* extended integer registers */
1014  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
1015};
1016
1017/* Define the register numbers to be used in Dwarf debugging information.
1018   The SVR4 reference port C compiler uses the following register numbers
1019   in its Dwarf output code:
1020	0 for %eax (gcc regno = 0)
1021	1 for %ecx (gcc regno = 2)
1022	2 for %edx (gcc regno = 1)
1023	3 for %ebx (gcc regno = 3)
1024	4 for %esp (gcc regno = 7)
1025	5 for %ebp (gcc regno = 6)
1026	6 for %esi (gcc regno = 4)
1027	7 for %edi (gcc regno = 5)
1028   The following three DWARF register numbers are never generated by
1029   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1030   believes these numbers have these meanings.
1031	8  for %eip    (no gcc equivalent)
1032	9  for %eflags (gcc regno = 17)
1033	10 for %trapno (no gcc equivalent)
1034   It is not at all clear how we should number the FP stack registers
1035   for the x86 architecture.  If the version of SDB on x86/svr4 were
1036   a bit less brain dead with respect to floating-point then we would
1037   have a precedent to follow with respect to DWARF register numbers
1038   for x86 FP registers, but the SDB on x86/svr4 is so completely
1039   broken with respect to FP registers that it is hardly worth thinking
1040   of it as something to strive for compatibility with.
1041   The version of x86/svr4 SDB I have at the moment does (partially)
1042   seem to believe that DWARF register number 11 is associated with
1043   the x86 register %st(0), but that's about all.  Higher DWARF
1044   register numbers don't seem to be associated with anything in
1045   particular, and even for DWARF regno 11, SDB only seems to under-
1046   stand that it should say that a variable lives in %st(0) (when
1047   asked via an `=' command) if we said it was in DWARF regno 11,
1048   but SDB still prints garbage when asked for the value of the
1049   variable in question (via a `/' command).
1050   (Also note that the labels SDB prints for various FP stack regs
1051   when doing an `x' command are all wrong.)
1052   Note that these problems generally don't affect the native SVR4
1053   C compiler because it doesn't allow the use of -O with -g and
1054   because when it is *not* optimizing, it allocates a memory
1055   location for each floating-point variable, and the memory
1056   location is what gets described in the DWARF AT_location
1057   attribute for the variable in question.
1058   Regardless of the severe mental illness of the x86/svr4 SDB, we
1059   do something sensible here and we use the following DWARF
1060   register numbers.  Note that these are all stack-top-relative
1061   numbers.
1062	11 for %st(0) (gcc regno = 8)
1063	12 for %st(1) (gcc regno = 9)
1064	13 for %st(2) (gcc regno = 10)
1065	14 for %st(3) (gcc regno = 11)
1066	15 for %st(4) (gcc regno = 12)
1067	16 for %st(5) (gcc regno = 13)
1068	17 for %st(6) (gcc regno = 14)
1069	18 for %st(7) (gcc regno = 15)
1070*/
1071int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1072{
1073  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
1074  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
1075  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1076  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
1077  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
1078  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1079  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1080};
1081
1082/* Test and compare insns in i386.md store the information needed to
1083   generate branch and scc insns here.  */
1084
1085rtx ix86_compare_op0 = NULL_RTX;
1086rtx ix86_compare_op1 = NULL_RTX;
1087rtx ix86_compare_emitted = NULL_RTX;
1088
1089/* Size of the register save area.  */
1090#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1091
1092/* Define the structure for the machine field in struct function.  */
1093
1094struct stack_local_entry GTY(())
1095{
1096  unsigned short mode;
1097  unsigned short n;
1098  rtx rtl;
1099  struct stack_local_entry *next;
1100};
1101
1102/* Structure describing stack frame layout.
1103   Stack grows downward:
1104
1105   [arguments]
1106					      <- ARG_POINTER
1107   saved pc
1108
1109   saved frame pointer if frame_pointer_needed
1110					      <- HARD_FRAME_POINTER
1111   [saved regs]
1112
1113   [padding1]          \
1114		        )
1115   [va_arg registers]  (
1116		        > to_allocate	      <- FRAME_POINTER
1117   [frame]	       (
1118		        )
1119   [padding2]	       /
1120  */
1121struct ix86_frame
1122{
1123  int nregs;
1124  int padding1;
1125  int va_arg_size;
1126  HOST_WIDE_INT frame;
1127  int padding2;
1128  int outgoing_arguments_size;
1129  int red_zone_size;
1130
1131  HOST_WIDE_INT to_allocate;
1132  /* The offsets relative to ARG_POINTER.  */
1133  HOST_WIDE_INT frame_pointer_offset;
1134  HOST_WIDE_INT hard_frame_pointer_offset;
1135  HOST_WIDE_INT stack_pointer_offset;
1136
1137  /* When save_regs_using_mov is set, emit prologue using
1138     move instead of push instructions.  */
1139  bool save_regs_using_mov;
1140};
1141
1142/* Code model option.  */
1143enum cmodel ix86_cmodel;
1144/* Asm dialect.  */
1145enum asm_dialect ix86_asm_dialect = ASM_ATT;
1146/* TLS dialects.  */
1147enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1148
1149/* Which unit we are generating floating point math for.  */
1150enum fpmath_unit ix86_fpmath;
1151
1152/* Which cpu are we scheduling for.  */
1153enum processor_type ix86_tune;
1154/* Which instruction set architecture to use.  */
1155enum processor_type ix86_arch;
1156
1157/* true if sse prefetch instruction is not NOOP.  */
1158int x86_prefetch_sse;
1159
1160/* ix86_regparm_string as a number */
1161static int ix86_regparm;
1162
1163/* -mstackrealign option */
1164extern int ix86_force_align_arg_pointer;
1165static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1166
1167/* Preferred alignment for stack boundary in bits.  */
1168unsigned int ix86_preferred_stack_boundary;
1169
1170/* Values 1-5: see jump.c */
1171int ix86_branch_cost;
1172
1173/* Variables which are this size or smaller are put in the data/bss
1174   or ldata/lbss sections.  */
1175
1176int ix86_section_threshold = 65536;
1177
1178/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1179char internal_label_prefix[16];
1180int internal_label_prefix_len;
1181
1182static bool ix86_handle_option (size_t, const char *, int);
1183static void output_pic_addr_const (FILE *, rtx, int);
1184static void put_condition_code (enum rtx_code, enum machine_mode,
1185				int, int, FILE *);
1186static const char *get_some_local_dynamic_name (void);
1187static int get_some_local_dynamic_name_1 (rtx *, void *);
1188static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1189static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1190						   rtx *);
1191static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1192static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1193						   enum machine_mode);
1194static rtx get_thread_pointer (int);
1195static rtx legitimize_tls_address (rtx, enum tls_model, int);
1196static void get_pc_thunk_name (char [32], unsigned int);
1197static rtx gen_push (rtx);
1198static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1199static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1200static struct machine_function * ix86_init_machine_status (void);
1201static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1202static int ix86_nsaved_regs (void);
1203static void ix86_emit_save_regs (void);
1204static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1205static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1206static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1207static HOST_WIDE_INT ix86_GOT_alias_set (void);
1208static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1209static rtx ix86_expand_aligntest (rtx, int);
1210static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1211static int ix86_issue_rate (void);
1212static int ix86_adjust_cost (rtx, rtx, rtx, int);
1213static int ia32_multipass_dfa_lookahead (void);
1214static void ix86_init_mmx_sse_builtins (void);
1215static rtx x86_this_parameter (tree);
1216static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1217				 HOST_WIDE_INT, tree);
1218static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1219static void x86_file_start (void);
1220static void ix86_reorg (void);
1221static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1222static tree ix86_build_builtin_va_list (void);
1223static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1224					 tree, int *, int);
1225static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1226static bool ix86_scalar_mode_supported_p (enum machine_mode);
1227static bool ix86_vector_mode_supported_p (enum machine_mode);
1228
1229static int ix86_address_cost (rtx);
1230static bool ix86_cannot_force_const_mem (rtx);
1231static rtx ix86_delegitimize_address (rtx);
1232
1233static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1234
1235struct builtin_description;
1236static rtx ix86_expand_sse_comi (const struct builtin_description *,
1237				 tree, rtx);
1238static rtx ix86_expand_sse_compare (const struct builtin_description *,
1239				    tree, rtx);
1240static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1241static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1242static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1243static rtx ix86_expand_store_builtin (enum insn_code, tree);
1244static rtx safe_vector_operand (rtx, enum machine_mode);
1245static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1246static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1247static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1248static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1249static int ix86_fp_comparison_cost (enum rtx_code code);
1250static unsigned int ix86_select_alt_pic_regnum (void);
1251static int ix86_save_reg (unsigned int, int);
1252static void ix86_compute_frame_layout (struct ix86_frame *);
1253static int ix86_comp_type_attributes (tree, tree);
1254static int ix86_function_regparm (tree, tree);
1255const struct attribute_spec ix86_attribute_table[];
1256static bool ix86_function_ok_for_sibcall (tree, tree);
1257static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1258static int ix86_value_regno (enum machine_mode, tree, tree);
1259static bool contains_128bit_aligned_vector_p (tree);
1260static rtx ix86_struct_value_rtx (tree, int);
1261static bool ix86_ms_bitfield_layout_p (tree);
1262static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1263static int extended_reg_mentioned_1 (rtx *, void *);
1264static bool ix86_rtx_costs (rtx, int, int, int *);
1265static int min_insn_size (rtx);
1266static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1267static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1268static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1269				    tree, bool);
1270static void ix86_init_builtins (void);
1271static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1272static const char *ix86_mangle_fundamental_type (tree);
1273static tree ix86_stack_protect_fail (void);
1274static rtx ix86_internal_arg_pointer (void);
1275static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1276
1277/* This function is only used on Solaris.  */
1278static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1279  ATTRIBUTE_UNUSED;
1280
1281/* Register class used for passing given 64bit part of the argument.
1282   These represent classes as documented by the PS ABI, with the exception
1283   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1284   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1285
1286   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1287   whenever possible (upper half does contain padding).
1288 */
1289enum x86_64_reg_class
1290  {
1291    X86_64_NO_CLASS,
1292    X86_64_INTEGER_CLASS,
1293    X86_64_INTEGERSI_CLASS,
1294    X86_64_SSE_CLASS,
1295    X86_64_SSESF_CLASS,
1296    X86_64_SSEDF_CLASS,
1297    X86_64_SSEUP_CLASS,
1298    X86_64_X87_CLASS,
1299    X86_64_X87UP_CLASS,
1300    X86_64_COMPLEX_X87_CLASS,
1301    X86_64_MEMORY_CLASS
1302  };
1303static const char * const x86_64_reg_class_name[] = {
1304  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1305  "sseup", "x87", "x87up", "cplx87", "no"
1306};
1307
1308#define MAX_CLASSES 4
1309
1310/* Table of constants used by fldpi, fldln2, etc....  */
1311static REAL_VALUE_TYPE ext_80387_constants_table [5];
1312static bool ext_80387_constants_init = 0;
1313static void init_ext_80387_constants (void);
1314static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1315static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1316static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1317static section *x86_64_elf_select_section (tree decl, int reloc,
1318					   unsigned HOST_WIDE_INT align)
1319					     ATTRIBUTE_UNUSED;
1320
1321/* Initialize the GCC target structure.  */
1322#undef TARGET_ATTRIBUTE_TABLE
1323#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1324#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1325#  undef TARGET_MERGE_DECL_ATTRIBUTES
1326#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1327#endif
1328
1329#undef TARGET_COMP_TYPE_ATTRIBUTES
1330#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1331
1332#undef TARGET_INIT_BUILTINS
1333#define TARGET_INIT_BUILTINS ix86_init_builtins
1334#undef TARGET_EXPAND_BUILTIN
1335#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1336
1337#undef TARGET_ASM_FUNCTION_EPILOGUE
1338#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1339
1340#undef TARGET_ENCODE_SECTION_INFO
1341#ifndef SUBTARGET_ENCODE_SECTION_INFO
1342#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1343#else
1344#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1345#endif
1346
1347#undef TARGET_ASM_OPEN_PAREN
1348#define TARGET_ASM_OPEN_PAREN ""
1349#undef TARGET_ASM_CLOSE_PAREN
1350#define TARGET_ASM_CLOSE_PAREN ""
1351
1352#undef TARGET_ASM_ALIGNED_HI_OP
1353#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1354#undef TARGET_ASM_ALIGNED_SI_OP
1355#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1356#ifdef ASM_QUAD
1357#undef TARGET_ASM_ALIGNED_DI_OP
1358#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1359#endif
1360
1361#undef TARGET_ASM_UNALIGNED_HI_OP
1362#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1363#undef TARGET_ASM_UNALIGNED_SI_OP
1364#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1365#undef TARGET_ASM_UNALIGNED_DI_OP
1366#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1367
1368#undef TARGET_SCHED_ADJUST_COST
1369#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1370#undef TARGET_SCHED_ISSUE_RATE
1371#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1372#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1373#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1374  ia32_multipass_dfa_lookahead
1375
1376#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1377#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1378
1379#ifdef HAVE_AS_TLS
1380#undef TARGET_HAVE_TLS
1381#define TARGET_HAVE_TLS true
1382#endif
1383#undef TARGET_CANNOT_FORCE_CONST_MEM
1384#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1385#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1386#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1387
1388#undef TARGET_DELEGITIMIZE_ADDRESS
1389#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1390
1391#undef TARGET_MS_BITFIELD_LAYOUT_P
1392#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1393
1394#if TARGET_MACHO
1395#undef TARGET_BINDS_LOCAL_P
1396#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1397#endif
1398
1399#undef TARGET_ASM_OUTPUT_MI_THUNK
1400#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1401#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1402#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1403
1404#undef TARGET_ASM_FILE_START
1405#define TARGET_ASM_FILE_START x86_file_start
1406
1407#undef TARGET_DEFAULT_TARGET_FLAGS
1408#define TARGET_DEFAULT_TARGET_FLAGS	\
1409  (TARGET_DEFAULT			\
1410   | TARGET_64BIT_DEFAULT		\
1411   | TARGET_SUBTARGET_DEFAULT		\
1412   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1413
1414#undef TARGET_HANDLE_OPTION
1415#define TARGET_HANDLE_OPTION ix86_handle_option
1416
1417#undef TARGET_RTX_COSTS
1418#define TARGET_RTX_COSTS ix86_rtx_costs
1419#undef TARGET_ADDRESS_COST
1420#define TARGET_ADDRESS_COST ix86_address_cost
1421
1422#undef TARGET_FIXED_CONDITION_CODE_REGS
1423#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1424#undef TARGET_CC_MODES_COMPATIBLE
1425#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1426
1427#undef TARGET_MACHINE_DEPENDENT_REORG
1428#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1429
1430#undef TARGET_BUILD_BUILTIN_VA_LIST
1431#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1432
1433#undef TARGET_MD_ASM_CLOBBERS
1434#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1435
1436#undef TARGET_PROMOTE_PROTOTYPES
1437#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1438#undef TARGET_STRUCT_VALUE_RTX
1439#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1440#undef TARGET_SETUP_INCOMING_VARARGS
1441#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1442#undef TARGET_MUST_PASS_IN_STACK
1443#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1444#undef TARGET_PASS_BY_REFERENCE
1445#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1446#undef TARGET_INTERNAL_ARG_POINTER
1447#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1448#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1449#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1450
1451#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1452#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1453
1454#undef TARGET_SCALAR_MODE_SUPPORTED_P
1455#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1456
1457#undef TARGET_VECTOR_MODE_SUPPORTED_P
1458#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1459
1460#ifdef HAVE_AS_TLS
1461#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1462#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1463#endif
1464
1465#ifdef SUBTARGET_INSERT_ATTRIBUTES
1466#undef TARGET_INSERT_ATTRIBUTES
1467#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1468#endif
1469
1470#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1471#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1472
1473#undef TARGET_STACK_PROTECT_FAIL
1474#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1475
1476#undef TARGET_FUNCTION_VALUE
1477#define TARGET_FUNCTION_VALUE ix86_function_value
1478
1479struct gcc_target targetm = TARGET_INITIALIZER;
1480
1481
1482/* The svr4 ABI for the i386 says that records and unions are returned
1483   in memory.  */
1484#ifndef DEFAULT_PCC_STRUCT_RETURN
1485#define DEFAULT_PCC_STRUCT_RETURN 1
1486#endif
1487
1488/* Implement TARGET_HANDLE_OPTION.  */
1489
1490static bool
1491ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1492{
1493  switch (code)
1494    {
1495    case OPT_m3dnow:
1496      if (!value)
1497	{
1498	  target_flags &= ~MASK_3DNOW_A;
1499	  target_flags_explicit |= MASK_3DNOW_A;
1500	}
1501      return true;
1502
1503    case OPT_mmmx:
1504      if (!value)
1505	{
1506	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1507	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1508	}
1509      return true;
1510
1511    case OPT_msse:
1512      if (!value)
1513	{
1514	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3);
1515	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3;
1516	}
1517      return true;
1518
1519    case OPT_msse2:
1520      if (!value)
1521	{
1522	  target_flags &= ~(MASK_SSE3 | MASK_SSSE3);
1523	  target_flags_explicit |= MASK_SSE3 | MASK_SSSE3;
1524	}
1525      return true;
1526
1527    case OPT_msse3:
1528      if (!value)
1529	{
1530	  target_flags &= ~MASK_SSSE3;
1531	  target_flags_explicit |= MASK_SSSE3;
1532	}
1533      return true;
1534
1535    default:
1536      return true;
1537    }
1538}
1539
1540/* Sometimes certain combinations of command options do not make
1541   sense on a particular target machine.  You can define a macro
1542   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1543   defined, is executed once just after all the command options have
1544   been parsed.
1545
1546   Don't use this macro to turn on various extra optimizations for
1547   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1548
1549void
1550override_options (void)
1551{
1552  int i;
1553  int ix86_tune_defaulted = 0;
1554
1555  /* Comes from final.c -- no real reason to change it.  */
1556#define MAX_CODE_ALIGN 16
1557
1558  static struct ptt
1559    {
1560      const struct processor_costs *cost;	/* Processor costs */
1561      const int target_enable;			/* Target flags to enable.  */
1562      const int target_disable;			/* Target flags to disable.  */
1563      const int align_loop;			/* Default alignments.  */
1564      const int align_loop_max_skip;
1565      const int align_jump;
1566      const int align_jump_max_skip;
1567      const int align_func;
1568    }
1569  const processor_target_table[PROCESSOR_max] =
1570    {
1571      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1572      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1573      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1574      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1575      {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1576      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1577      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1578      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1579      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1580      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1581      {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1582      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1583      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1584    };
1585
1586  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1587  static struct pta
1588    {
1589      const char *const name;		/* processor name or nickname.  */
1590      const enum processor_type processor;
1591      const enum pta_flags
1592	{
1593	  PTA_SSE = 1,
1594	  PTA_SSE2 = 2,
1595	  PTA_SSE3 = 4,
1596	  PTA_MMX = 8,
1597	  PTA_PREFETCH_SSE = 16,
1598	  PTA_3DNOW = 32,
1599	  PTA_3DNOW_A = 64,
1600	  PTA_64BIT = 128,
1601	  PTA_SSSE3 = 256
1602	} flags;
1603    }
1604  const processor_alias_table[] =
1605    {
1606      {"i386", PROCESSOR_I386, 0},
1607      {"i486", PROCESSOR_I486, 0},
1608      {"i586", PROCESSOR_PENTIUM, 0},
1609      {"pentium", PROCESSOR_PENTIUM, 0},
1610      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1611      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1612      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1613      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1614      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1615      {"i686", PROCESSOR_PENTIUMPRO, 0},
1616      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1617      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1618      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1619      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1620      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1621      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1622				       | PTA_MMX | PTA_PREFETCH_SSE},
1623      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1624				        | PTA_MMX | PTA_PREFETCH_SSE},
1625      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1626				        | PTA_MMX | PTA_PREFETCH_SSE},
1627      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1628				        | PTA_MMX | PTA_PREFETCH_SSE},
1629      {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1630                                        | PTA_64BIT | PTA_MMX
1631                                        | PTA_PREFETCH_SSE},
1632      {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1633				   | PTA_3DNOW_A},
1634      {"k6", PROCESSOR_K6, PTA_MMX},
1635      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1636      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1637      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1638				   | PTA_3DNOW_A},
1639      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1640					 | PTA_3DNOW | PTA_3DNOW_A},
1641      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1642				    | PTA_3DNOW_A | PTA_SSE},
1643      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1644				      | PTA_3DNOW_A | PTA_SSE},
1645      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1646				      | PTA_3DNOW_A | PTA_SSE},
1647      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1648			       | PTA_SSE | PTA_SSE2 },
1649      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1650				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1651      {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1652				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1653				      | PTA_SSE3 },
1654      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1655				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1656      {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1657				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1658				      | PTA_SSE3 },
1659      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1660				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1661      {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1662				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1663				      | PTA_SSE3 },
1664      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1665				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1666      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1667      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1668    };
1669
1670  int const pta_size = ARRAY_SIZE (processor_alias_table);
1671
1672#ifdef SUBTARGET_OVERRIDE_OPTIONS
1673  SUBTARGET_OVERRIDE_OPTIONS;
1674#endif
1675
1676#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1677  SUBSUBTARGET_OVERRIDE_OPTIONS;
1678#endif
1679
1680  /* -fPIC is the default for x86_64.  */
1681  if (TARGET_MACHO && TARGET_64BIT)
1682    flag_pic = 2;
1683
1684  /* Set the default values for switches whose default depends on TARGET_64BIT
1685     in case they weren't overwritten by command line options.  */
1686  if (TARGET_64BIT)
1687    {
1688      /* Mach-O doesn't support omitting the frame pointer for now.  */
1689      if (flag_omit_frame_pointer == 2)
1690	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1691      if (flag_asynchronous_unwind_tables == 2)
1692	flag_asynchronous_unwind_tables = 1;
1693      if (flag_pcc_struct_return == 2)
1694	flag_pcc_struct_return = 0;
1695    }
1696  else
1697    {
1698      if (flag_omit_frame_pointer == 2)
1699	flag_omit_frame_pointer = 0;
1700      if (flag_asynchronous_unwind_tables == 2)
1701	flag_asynchronous_unwind_tables = 0;
1702      if (flag_pcc_struct_return == 2)
1703	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1704    }
1705
1706  /* Need to check -mtune=generic first.  */
1707  if (ix86_tune_string)
1708    {
1709      if (!strcmp (ix86_tune_string, "generic")
1710	  || !strcmp (ix86_tune_string, "i686")
1711	  /* As special support for cross compilers we read -mtune=native
1712	     as -mtune=generic.  With native compilers we won't see the
1713	     -mtune=native, as it was changed by the driver.  */
1714	  || !strcmp (ix86_tune_string, "native"))
1715	{
1716	  if (TARGET_64BIT)
1717	    ix86_tune_string = "generic64";
1718	  else
1719	    ix86_tune_string = "generic32";
1720	}
1721      else if (!strncmp (ix86_tune_string, "generic", 7))
1722	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1723    }
1724  else
1725    {
1726      if (ix86_arch_string)
1727	ix86_tune_string = ix86_arch_string;
1728      if (!ix86_tune_string)
1729	{
1730	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1731	  ix86_tune_defaulted = 1;
1732	}
1733
1734      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1735	 need to use a sensible tune option.  */
1736      if (!strcmp (ix86_tune_string, "generic")
1737	  || !strcmp (ix86_tune_string, "x86-64")
1738	  || !strcmp (ix86_tune_string, "i686"))
1739	{
1740	  if (TARGET_64BIT)
1741	    ix86_tune_string = "generic64";
1742	  else
1743	    ix86_tune_string = "generic32";
1744	}
1745    }
1746  if (!strcmp (ix86_tune_string, "x86-64"))
1747    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1748	     "-mtune=generic instead as appropriate.");
1749
1750  if (!ix86_arch_string)
1751    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1752  if (!strcmp (ix86_arch_string, "generic"))
1753    error ("generic CPU can be used only for -mtune= switch");
1754  if (!strncmp (ix86_arch_string, "generic", 7))
1755    error ("bad value (%s) for -march= switch", ix86_arch_string);
1756
1757  if (ix86_cmodel_string != 0)
1758    {
1759      if (!strcmp (ix86_cmodel_string, "small"))
1760	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1761      else if (!strcmp (ix86_cmodel_string, "medium"))
1762	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1763      else if (flag_pic)
1764	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1765      else if (!strcmp (ix86_cmodel_string, "32"))
1766	ix86_cmodel = CM_32;
1767      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1768	ix86_cmodel = CM_KERNEL;
1769      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1770	ix86_cmodel = CM_LARGE;
1771      else
1772	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1773    }
1774  else
1775    {
1776      ix86_cmodel = CM_32;
1777      if (TARGET_64BIT)
1778	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1779    }
1780  if (ix86_asm_string != 0)
1781    {
1782      if (! TARGET_MACHO
1783	  && !strcmp (ix86_asm_string, "intel"))
1784	ix86_asm_dialect = ASM_INTEL;
1785      else if (!strcmp (ix86_asm_string, "att"))
1786	ix86_asm_dialect = ASM_ATT;
1787      else
1788	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1789    }
1790  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1791    error ("code model %qs not supported in the %s bit mode",
1792	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1793  if (ix86_cmodel == CM_LARGE)
1794    sorry ("code model %<large%> not supported yet");
1795  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1796    sorry ("%i-bit mode not compiled in",
1797	   (target_flags & MASK_64BIT) ? 64 : 32);
1798
1799  for (i = 0; i < pta_size; i++)
1800    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1801      {
1802	ix86_arch = processor_alias_table[i].processor;
1803	/* Default cpu tuning to the architecture.  */
1804	ix86_tune = ix86_arch;
1805	if (processor_alias_table[i].flags & PTA_MMX
1806	    && !(target_flags_explicit & MASK_MMX))
1807	  target_flags |= MASK_MMX;
1808	if (processor_alias_table[i].flags & PTA_3DNOW
1809	    && !(target_flags_explicit & MASK_3DNOW))
1810	  target_flags |= MASK_3DNOW;
1811	if (processor_alias_table[i].flags & PTA_3DNOW_A
1812	    && !(target_flags_explicit & MASK_3DNOW_A))
1813	  target_flags |= MASK_3DNOW_A;
1814	if (processor_alias_table[i].flags & PTA_SSE
1815	    && !(target_flags_explicit & MASK_SSE))
1816	  target_flags |= MASK_SSE;
1817	if (processor_alias_table[i].flags & PTA_SSE2
1818	    && !(target_flags_explicit & MASK_SSE2))
1819	  target_flags |= MASK_SSE2;
1820	if (processor_alias_table[i].flags & PTA_SSE3
1821	    && !(target_flags_explicit & MASK_SSE3))
1822	  target_flags |= MASK_SSE3;
1823	if (processor_alias_table[i].flags & PTA_SSSE3
1824	    && !(target_flags_explicit & MASK_SSSE3))
1825	  target_flags |= MASK_SSSE3;
1826	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1827	  x86_prefetch_sse = true;
1828	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1829	  error ("CPU you selected does not support x86-64 "
1830		 "instruction set");
1831	break;
1832      }
1833
1834  if (i == pta_size)
1835    error ("bad value (%s) for -march= switch", ix86_arch_string);
1836
1837  for (i = 0; i < pta_size; i++)
1838    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1839      {
1840	ix86_tune = processor_alias_table[i].processor;
1841	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1842	  {
1843	    if (ix86_tune_defaulted)
1844	      {
1845		ix86_tune_string = "x86-64";
1846		for (i = 0; i < pta_size; i++)
1847		  if (! strcmp (ix86_tune_string,
1848				processor_alias_table[i].name))
1849		    break;
1850		ix86_tune = processor_alias_table[i].processor;
1851	      }
1852	    else
1853	      error ("CPU you selected does not support x86-64 "
1854		     "instruction set");
1855	  }
1856        /* Intel CPUs have always interpreted SSE prefetch instructions as
1857	   NOPs; so, we can enable SSE prefetch instructions even when
1858	   -mtune (rather than -march) points us to a processor that has them.
1859	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1860	   higher processors.  */
1861	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1862	  x86_prefetch_sse = true;
1863	break;
1864      }
1865  if (i == pta_size)
1866    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1867
1868  if (optimize_size)
1869    ix86_cost = &size_cost;
1870  else
1871    ix86_cost = processor_target_table[ix86_tune].cost;
1872  target_flags |= processor_target_table[ix86_tune].target_enable;
1873  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1874
1875  /* Arrange to set up i386_stack_locals for all functions.  */
1876  init_machine_status = ix86_init_machine_status;
1877
1878  /* Validate -mregparm= value.  */
1879  if (ix86_regparm_string)
1880    {
1881      i = atoi (ix86_regparm_string);
1882      if (i < 0 || i > REGPARM_MAX)
1883	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1884      else
1885	ix86_regparm = i;
1886    }
1887  else
1888   if (TARGET_64BIT)
1889     ix86_regparm = REGPARM_MAX;
1890
1891  /* If the user has provided any of the -malign-* options,
1892     warn and use that value only if -falign-* is not set.
1893     Remove this code in GCC 3.2 or later.  */
1894  if (ix86_align_loops_string)
1895    {
1896      warning (0, "-malign-loops is obsolete, use -falign-loops");
1897      if (align_loops == 0)
1898	{
1899	  i = atoi (ix86_align_loops_string);
1900	  if (i < 0 || i > MAX_CODE_ALIGN)
1901	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1902	  else
1903	    align_loops = 1 << i;
1904	}
1905    }
1906
1907  if (ix86_align_jumps_string)
1908    {
1909      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1910      if (align_jumps == 0)
1911	{
1912	  i = atoi (ix86_align_jumps_string);
1913	  if (i < 0 || i > MAX_CODE_ALIGN)
1914	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1915	  else
1916	    align_jumps = 1 << i;
1917	}
1918    }
1919
1920  if (ix86_align_funcs_string)
1921    {
1922      warning (0, "-malign-functions is obsolete, use -falign-functions");
1923      if (align_functions == 0)
1924	{
1925	  i = atoi (ix86_align_funcs_string);
1926	  if (i < 0 || i > MAX_CODE_ALIGN)
1927	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1928	  else
1929	    align_functions = 1 << i;
1930	}
1931    }
1932
1933  /* Default align_* from the processor table.  */
1934  if (align_loops == 0)
1935    {
1936      align_loops = processor_target_table[ix86_tune].align_loop;
1937      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1938    }
1939  if (align_jumps == 0)
1940    {
1941      align_jumps = processor_target_table[ix86_tune].align_jump;
1942      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1943    }
1944  if (align_functions == 0)
1945    {
1946      align_functions = processor_target_table[ix86_tune].align_func;
1947    }
1948
1949  /* Validate -mbranch-cost= value, or provide default.  */
1950  ix86_branch_cost = ix86_cost->branch_cost;
1951  if (ix86_branch_cost_string)
1952    {
1953      i = atoi (ix86_branch_cost_string);
1954      if (i < 0 || i > 5)
1955	error ("-mbranch-cost=%d is not between 0 and 5", i);
1956      else
1957	ix86_branch_cost = i;
1958    }
1959  if (ix86_section_threshold_string)
1960    {
1961      i = atoi (ix86_section_threshold_string);
1962      if (i < 0)
1963	error ("-mlarge-data-threshold=%d is negative", i);
1964      else
1965	ix86_section_threshold = i;
1966    }
1967
1968  if (ix86_tls_dialect_string)
1969    {
1970      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1971	ix86_tls_dialect = TLS_DIALECT_GNU;
1972      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1973	ix86_tls_dialect = TLS_DIALECT_GNU2;
1974      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1975	ix86_tls_dialect = TLS_DIALECT_SUN;
1976      else
1977	error ("bad value (%s) for -mtls-dialect= switch",
1978	       ix86_tls_dialect_string);
1979    }
1980
1981  /* Keep nonleaf frame pointers.  */
1982  if (flag_omit_frame_pointer)
1983    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1984  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1985    flag_omit_frame_pointer = 1;
1986
1987  /* If we're doing fast math, we don't care about comparison order
1988     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1989  if (flag_finite_math_only)
1990    target_flags &= ~MASK_IEEE_FP;
1991
1992  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1993     since the insns won't need emulation.  */
1994  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1995    target_flags &= ~MASK_NO_FANCY_MATH_387;
1996
1997  /* Likewise, if the target doesn't have a 387, or we've specified
1998     software floating point, don't use 387 inline intrinsics.  */
1999  if (!TARGET_80387)
2000    target_flags |= MASK_NO_FANCY_MATH_387;
2001
2002  /* Turn on SSE3 builtins for -mssse3.  */
2003  if (TARGET_SSSE3)
2004    target_flags |= MASK_SSE3;
2005
2006  /* Turn on SSE2 builtins for -msse3.  */
2007  if (TARGET_SSE3)
2008    target_flags |= MASK_SSE2;
2009
2010  /* Turn on SSE builtins for -msse2.  */
2011  if (TARGET_SSE2)
2012    target_flags |= MASK_SSE;
2013
2014  /* Turn on MMX builtins for -msse.  */
2015  if (TARGET_SSE)
2016    {
2017      target_flags |= MASK_MMX & ~target_flags_explicit;
2018      x86_prefetch_sse = true;
2019    }
2020
2021  /* Turn on MMX builtins for 3Dnow.  */
2022  if (TARGET_3DNOW)
2023    target_flags |= MASK_MMX;
2024
2025  if (TARGET_64BIT)
2026    {
2027      if (TARGET_ALIGN_DOUBLE)
2028	error ("-malign-double makes no sense in the 64bit mode");
2029      if (TARGET_RTD)
2030	error ("-mrtd calling convention not supported in the 64bit mode");
2031
2032      /* Enable by default the SSE and MMX builtins.  Do allow the user to
2033	 explicitly disable any of these.  In particular, disabling SSE and
2034	 MMX for kernel code is extremely useful.  */
2035      target_flags
2036	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2037	    & ~target_flags_explicit);
2038     }
2039  else
2040    {
2041      /* i386 ABI does not specify red zone.  It still makes sense to use it
2042         when programmer takes care to stack from being destroyed.  */
2043      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2044        target_flags |= MASK_NO_RED_ZONE;
2045    }
2046
2047  /* Validate -mpreferred-stack-boundary= value, or provide default.
2048     The default of 128 bits is for Pentium III's SSE __m128.  We can't
2049     change it because of optimize_size.  Otherwise, we can't mix object
2050     files compiled with -Os and -On.  */
2051  ix86_preferred_stack_boundary = 128;
2052  if (ix86_preferred_stack_boundary_string)
2053    {
2054      i = atoi (ix86_preferred_stack_boundary_string);
2055      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2056	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2057	       TARGET_64BIT ? 4 : 2);
2058      else
2059	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2060    }
2061
2062  /* Accept -msseregparm only if at least SSE support is enabled.  */
2063  if (TARGET_SSEREGPARM
2064      && ! TARGET_SSE)
2065    error ("-msseregparm used without SSE enabled");
2066
2067  ix86_fpmath = TARGET_FPMATH_DEFAULT;
2068
2069  if (ix86_fpmath_string != 0)
2070    {
2071      if (! strcmp (ix86_fpmath_string, "387"))
2072	ix86_fpmath = FPMATH_387;
2073      else if (! strcmp (ix86_fpmath_string, "sse"))
2074	{
2075	  if (!TARGET_SSE)
2076	    {
2077	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2078	      ix86_fpmath = FPMATH_387;
2079	    }
2080	  else
2081	    ix86_fpmath = FPMATH_SSE;
2082	}
2083      else if (! strcmp (ix86_fpmath_string, "387,sse")
2084	       || ! strcmp (ix86_fpmath_string, "sse,387"))
2085	{
2086	  if (!TARGET_SSE)
2087	    {
2088	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2089	      ix86_fpmath = FPMATH_387;
2090	    }
2091	  else if (!TARGET_80387)
2092	    {
2093	      warning (0, "387 instruction set disabled, using SSE arithmetics");
2094	      ix86_fpmath = FPMATH_SSE;
2095	    }
2096	  else
2097	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
2098	}
2099      else
2100	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2101    }
2102
2103  /* If the i387 is disabled, then do not return values in it. */
2104  if (!TARGET_80387)
2105    target_flags &= ~MASK_FLOAT_RETURNS;
2106
2107  if ((x86_accumulate_outgoing_args & TUNEMASK)
2108      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2109      && !optimize_size)
2110    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2111
2112  /* ??? Unwind info is not correct around the CFG unless either a frame
2113     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
2114     unwind info generation to be aware of the CFG and propagating states
2115     around edges.  */
2116  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2117       || flag_exceptions || flag_non_call_exceptions)
2118      && flag_omit_frame_pointer
2119      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2120    {
2121      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2122	warning (0, "unwind tables currently require either a frame pointer "
2123		 "or -maccumulate-outgoing-args for correctness");
2124      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2125    }
2126
2127  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
2128  {
2129    char *p;
2130    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2131    p = strchr (internal_label_prefix, 'X');
2132    internal_label_prefix_len = p - internal_label_prefix;
2133    *p = '\0';
2134  }
2135
2136  /* When scheduling description is not available, disable scheduler pass
2137     so it won't slow down the compilation and make x87 code slower.  */
2138  if (!TARGET_SCHEDULE)
2139    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2140}
2141
2142/* switch to the appropriate section for output of DECL.
2143   DECL is either a `VAR_DECL' node or a constant of some sort.
2144   RELOC indicates whether forming the initial value of DECL requires
2145   link-time relocations.  */
2146
2147static section *
2148x86_64_elf_select_section (tree decl, int reloc,
2149			   unsigned HOST_WIDE_INT align)
2150{
2151  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2152      && ix86_in_large_data_p (decl))
2153    {
2154      const char *sname = NULL;
2155      unsigned int flags = SECTION_WRITE;
2156      switch (categorize_decl_for_section (decl, reloc))
2157	{
2158	case SECCAT_DATA:
2159	  sname = ".ldata";
2160	  break;
2161	case SECCAT_DATA_REL:
2162	  sname = ".ldata.rel";
2163	  break;
2164	case SECCAT_DATA_REL_LOCAL:
2165	  sname = ".ldata.rel.local";
2166	  break;
2167	case SECCAT_DATA_REL_RO:
2168	  sname = ".ldata.rel.ro";
2169	  break;
2170	case SECCAT_DATA_REL_RO_LOCAL:
2171	  sname = ".ldata.rel.ro.local";
2172	  break;
2173	case SECCAT_BSS:
2174	  sname = ".lbss";
2175	  flags |= SECTION_BSS;
2176	  break;
2177	case SECCAT_RODATA:
2178	case SECCAT_RODATA_MERGE_STR:
2179	case SECCAT_RODATA_MERGE_STR_INIT:
2180	case SECCAT_RODATA_MERGE_CONST:
2181	  sname = ".lrodata";
2182	  flags = 0;
2183	  break;
2184	case SECCAT_SRODATA:
2185	case SECCAT_SDATA:
2186	case SECCAT_SBSS:
2187	  gcc_unreachable ();
2188	case SECCAT_TEXT:
2189	case SECCAT_TDATA:
2190	case SECCAT_TBSS:
2191	  /* We don't split these for medium model.  Place them into
2192	     default sections and hope for best.  */
2193	  break;
2194	}
2195      if (sname)
2196	{
2197	  /* We might get called with string constants, but get_named_section
2198	     doesn't like them as they are not DECLs.  Also, we need to set
2199	     flags in that case.  */
2200	  if (!DECL_P (decl))
2201	    return get_section (sname, flags, NULL);
2202	  return get_named_section (decl, sname, reloc);
2203	}
2204    }
2205  return default_elf_select_section (decl, reloc, align);
2206}
2207
2208/* Build up a unique section name, expressed as a
2209   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2210   RELOC indicates whether the initial value of EXP requires
2211   link-time relocations.  */
2212
2213static void
2214x86_64_elf_unique_section (tree decl, int reloc)
2215{
2216  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2217      && ix86_in_large_data_p (decl))
2218    {
2219      const char *prefix = NULL;
2220      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2221      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2222
2223      switch (categorize_decl_for_section (decl, reloc))
2224	{
2225	case SECCAT_DATA:
2226	case SECCAT_DATA_REL:
2227	case SECCAT_DATA_REL_LOCAL:
2228	case SECCAT_DATA_REL_RO:
2229	case SECCAT_DATA_REL_RO_LOCAL:
2230          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2231	  break;
2232	case SECCAT_BSS:
2233          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2234	  break;
2235	case SECCAT_RODATA:
2236	case SECCAT_RODATA_MERGE_STR:
2237	case SECCAT_RODATA_MERGE_STR_INIT:
2238	case SECCAT_RODATA_MERGE_CONST:
2239          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2240	  break;
2241	case SECCAT_SRODATA:
2242	case SECCAT_SDATA:
2243	case SECCAT_SBSS:
2244	  gcc_unreachable ();
2245	case SECCAT_TEXT:
2246	case SECCAT_TDATA:
2247	case SECCAT_TBSS:
2248	  /* We don't split these for medium model.  Place them into
2249	     default sections and hope for best.  */
2250	  break;
2251	}
2252      if (prefix)
2253	{
2254	  const char *name;
2255	  size_t nlen, plen;
2256	  char *string;
2257	  plen = strlen (prefix);
2258
2259	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2260	  name = targetm.strip_name_encoding (name);
2261	  nlen = strlen (name);
2262
2263	  string = alloca (nlen + plen + 1);
2264	  memcpy (string, prefix, plen);
2265	  memcpy (string + plen, name, nlen + 1);
2266
2267	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2268	  return;
2269	}
2270    }
2271  default_unique_section (decl, reloc);
2272}
2273
2274#ifdef COMMON_ASM_OP
2275/* This says how to output assembler code to declare an
2276   uninitialized external linkage data object.
2277
2278   For medium model x86-64 we need to use .largecomm opcode for
2279   large objects.  */
2280void
2281x86_elf_aligned_common (FILE *file,
2282			const char *name, unsigned HOST_WIDE_INT size,
2283			int align)
2284{
2285  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2286      && size > (unsigned int)ix86_section_threshold)
2287    fprintf (file, ".largecomm\t");
2288  else
2289    fprintf (file, "%s", COMMON_ASM_OP);
2290  assemble_name (file, name);
2291  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2292	   size, align / BITS_PER_UNIT);
2293}
2294
2295/* Utility function for targets to use in implementing
2296   ASM_OUTPUT_ALIGNED_BSS.  */
2297
2298void
2299x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2300			const char *name, unsigned HOST_WIDE_INT size,
2301			int align)
2302{
2303  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2304      && size > (unsigned int)ix86_section_threshold)
2305    switch_to_section (get_named_section (decl, ".lbss", 0));
2306  else
2307    switch_to_section (bss_section);
2308  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2309#ifdef ASM_DECLARE_OBJECT_NAME
2310  last_assemble_variable_decl = decl;
2311  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2312#else
2313  /* Standard thing is just output label for the object.  */
2314  ASM_OUTPUT_LABEL (file, name);
2315#endif /* ASM_DECLARE_OBJECT_NAME */
2316  ASM_OUTPUT_SKIP (file, size ? size : 1);
2317}
2318#endif
2319
2320void
2321optimization_options (int level, int size ATTRIBUTE_UNUSED)
2322{
2323  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2324     make the problem with not enough registers even worse.  */
2325#ifdef INSN_SCHEDULING
2326  if (level > 1)
2327    flag_schedule_insns = 0;
2328#endif
2329
2330  if (TARGET_MACHO)
2331    /* The Darwin libraries never set errno, so we might as well
2332       avoid calling them when that's the only reason we would.  */
2333    flag_errno_math = 0;
2334
2335  /* The default values of these switches depend on the TARGET_64BIT
2336     that is not known at this moment.  Mark these values with 2 and
2337     let user the to override these.  In case there is no command line option
2338     specifying them, we will set the defaults in override_options.  */
2339  if (optimize >= 1)
2340    flag_omit_frame_pointer = 2;
2341  flag_pcc_struct_return = 2;
2342  flag_asynchronous_unwind_tables = 2;
2343#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2344  SUBTARGET_OPTIMIZATION_OPTIONS;
2345#endif
2346}
2347
2348/* Table of valid machine attributes.  */
2349const struct attribute_spec ix86_attribute_table[] =
2350{
2351  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2352  /* Stdcall attribute says callee is responsible for popping arguments
2353     if they are not variable.  */
2354  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2355  /* Fastcall attribute says callee is responsible for popping arguments
2356     if they are not variable.  */
2357  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2358  /* Cdecl attribute says the callee is a normal C declaration */
2359  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2360  /* Regparm attribute specifies how many integer arguments are to be
2361     passed in registers.  */
2362  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2363  /* Sseregparm attribute says we are using x86_64 calling conventions
2364     for FP arguments.  */
2365  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2366  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2367  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2368    false, true,  true, ix86_handle_cconv_attribute },
2369#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2370  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2371  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2372  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2373#endif
2374  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2375  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2376#ifdef SUBTARGET_ATTRIBUTE_TABLE
2377  SUBTARGET_ATTRIBUTE_TABLE,
2378#endif
2379  { NULL,        0, 0, false, false, false, NULL }
2380};
2381
2382/* Decide whether we can make a sibling call to a function.  DECL is the
2383   declaration of the function being targeted by the call and EXP is the
2384   CALL_EXPR representing the call.  */
2385
2386static bool
2387ix86_function_ok_for_sibcall (tree decl, tree exp)
2388{
2389  tree func;
2390  rtx a, b;
2391
2392  /* If we are generating position-independent code, we cannot sibcall
2393     optimize any indirect call, or a direct call to a global function,
2394     as the PLT requires %ebx be live.  */
2395  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2396    return false;
2397
2398  if (decl)
2399    func = decl;
2400  else
2401    {
2402      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2403      if (POINTER_TYPE_P (func))
2404        func = TREE_TYPE (func);
2405    }
2406
2407  /* Check that the return value locations are the same.  Like
2408     if we are returning floats on the 80387 register stack, we cannot
2409     make a sibcall from a function that doesn't return a float to a
2410     function that does or, conversely, from a function that does return
2411     a float to a function that doesn't; the necessary stack adjustment
2412     would not be executed.  This is also the place we notice
2413     differences in the return value ABI.  Note that it is ok for one
2414     of the functions to have void return type as long as the return
2415     value of the other is passed in a register.  */
2416  a = ix86_function_value (TREE_TYPE (exp), func, false);
2417  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2418			   cfun->decl, false);
2419  if (STACK_REG_P (a) || STACK_REG_P (b))
2420    {
2421      if (!rtx_equal_p (a, b))
2422	return false;
2423    }
2424  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2425    ;
2426  else if (!rtx_equal_p (a, b))
2427    return false;
2428
2429  /* If this call is indirect, we'll need to be able to use a call-clobbered
2430     register for the address of the target function.  Make sure that all
2431     such registers are not used for passing parameters.  */
2432  if (!decl && !TARGET_64BIT)
2433    {
2434      tree type;
2435
2436      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2437      type = TREE_OPERAND (exp, 0);		/* pointer expression */
2438      type = TREE_TYPE (type);			/* pointer type */
2439      type = TREE_TYPE (type);			/* function type */
2440
2441      if (ix86_function_regparm (type, NULL) >= 3)
2442	{
2443	  /* ??? Need to count the actual number of registers to be used,
2444	     not the possible number of registers.  Fix later.  */
2445	  return false;
2446	}
2447    }
2448
2449#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2450  /* Dllimport'd functions are also called indirectly.  */
2451  if (decl && DECL_DLLIMPORT_P (decl)
2452      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2453    return false;
2454#endif
2455
2456  /* If we forced aligned the stack, then sibcalling would unalign the
2457     stack, which may break the called function.  */
2458  if (cfun->machine->force_align_arg_pointer)
2459    return false;
2460
2461  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2462  return true;
2463}
2464
2465/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2466   calling convention attributes;
2467   arguments as in struct attribute_spec.handler.  */
2468
2469static tree
2470ix86_handle_cconv_attribute (tree *node, tree name,
2471				   tree args,
2472				   int flags ATTRIBUTE_UNUSED,
2473				   bool *no_add_attrs)
2474{
2475  if (TREE_CODE (*node) != FUNCTION_TYPE
2476      && TREE_CODE (*node) != METHOD_TYPE
2477      && TREE_CODE (*node) != FIELD_DECL
2478      && TREE_CODE (*node) != TYPE_DECL)
2479    {
2480      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2481	       IDENTIFIER_POINTER (name));
2482      *no_add_attrs = true;
2483      return NULL_TREE;
2484    }
2485
2486  /* Can combine regparm with all attributes but fastcall.  */
2487  if (is_attribute_p ("regparm", name))
2488    {
2489      tree cst;
2490
2491      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2492        {
2493	  error ("fastcall and regparm attributes are not compatible");
2494	}
2495
2496      cst = TREE_VALUE (args);
2497      if (TREE_CODE (cst) != INTEGER_CST)
2498	{
2499	  warning (OPT_Wattributes,
2500		   "%qs attribute requires an integer constant argument",
2501		   IDENTIFIER_POINTER (name));
2502	  *no_add_attrs = true;
2503	}
2504      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2505	{
2506	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2507		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2508	  *no_add_attrs = true;
2509	}
2510
2511      if (!TARGET_64BIT
2512	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2513			       TYPE_ATTRIBUTES (*node))
2514	  && compare_tree_int (cst, REGPARM_MAX-1))
2515	{
2516	  error ("%s functions limited to %d register parameters",
2517		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2518	}
2519
2520      return NULL_TREE;
2521    }
2522
2523  if (TARGET_64BIT)
2524    {
2525      warning (OPT_Wattributes, "%qs attribute ignored",
2526	       IDENTIFIER_POINTER (name));
2527      *no_add_attrs = true;
2528      return NULL_TREE;
2529    }
2530
2531  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2532  if (is_attribute_p ("fastcall", name))
2533    {
2534      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2535        {
2536	  error ("fastcall and cdecl attributes are not compatible");
2537	}
2538      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2539        {
2540	  error ("fastcall and stdcall attributes are not compatible");
2541	}
2542      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2543        {
2544	  error ("fastcall and regparm attributes are not compatible");
2545	}
2546    }
2547
2548  /* Can combine stdcall with fastcall (redundant), regparm and
2549     sseregparm.  */
2550  else if (is_attribute_p ("stdcall", name))
2551    {
2552      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2553        {
2554	  error ("stdcall and cdecl attributes are not compatible");
2555	}
2556      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2557        {
2558	  error ("stdcall and fastcall attributes are not compatible");
2559	}
2560    }
2561
2562  /* Can combine cdecl with regparm and sseregparm.  */
2563  else if (is_attribute_p ("cdecl", name))
2564    {
2565      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2566        {
2567	  error ("stdcall and cdecl attributes are not compatible");
2568	}
2569      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2570        {
2571	  error ("fastcall and cdecl attributes are not compatible");
2572	}
2573    }
2574
2575  /* Can combine sseregparm with all attributes.  */
2576
2577  return NULL_TREE;
2578}
2579
2580/* Return 0 if the attributes for two types are incompatible, 1 if they
2581   are compatible, and 2 if they are nearly compatible (which causes a
2582   warning to be generated).  */
2583
2584static int
2585ix86_comp_type_attributes (tree type1, tree type2)
2586{
2587  /* Check for mismatch of non-default calling convention.  */
2588  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2589
2590  if (TREE_CODE (type1) != FUNCTION_TYPE)
2591    return 1;
2592
2593  /* Check for mismatched fastcall/regparm types.  */
2594  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2595       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2596      || (ix86_function_regparm (type1, NULL)
2597	  != ix86_function_regparm (type2, NULL)))
2598    return 0;
2599
2600  /* Check for mismatched sseregparm types.  */
2601  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2602      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2603    return 0;
2604
2605  /* Check for mismatched return types (cdecl vs stdcall).  */
2606  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2607      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2608    return 0;
2609
2610  return 1;
2611}
2612
2613/* Return the regparm value for a function with the indicated TYPE and DECL.
2614   DECL may be NULL when calling function indirectly
2615   or considering a libcall.  */
2616
2617static int
2618ix86_function_regparm (tree type, tree decl)
2619{
2620  tree attr;
2621  int regparm = ix86_regparm;
2622  bool user_convention = false;
2623
2624  if (!TARGET_64BIT)
2625    {
2626      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2627      if (attr)
2628	{
2629	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2630	  user_convention = true;
2631	}
2632
2633      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2634	{
2635	  regparm = 2;
2636	  user_convention = true;
2637	}
2638
2639      /* Use register calling convention for local functions when possible.  */
2640      if (!TARGET_64BIT && !user_convention && decl
2641	  && flag_unit_at_a_time && !profile_flag)
2642	{
2643	  struct cgraph_local_info *i = cgraph_local_info (decl);
2644	  if (i && i->local)
2645	    {
2646	      int local_regparm, globals = 0, regno;
2647
2648	      /* Make sure no regparm register is taken by a global register
2649		 variable.  */
2650	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2651		if (global_regs[local_regparm])
2652		  break;
2653	      /* We can't use regparm(3) for nested functions as these use
2654		 static chain pointer in third argument.  */
2655	      if (local_regparm == 3
2656		  && decl_function_context (decl)
2657		  && !DECL_NO_STATIC_CHAIN (decl))
2658		local_regparm = 2;
2659	      /* If the function realigns its stackpointer, the
2660		 prologue will clobber %ecx.  If we've already
2661		 generated code for the callee, the callee
2662		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2663		 scanning the attributes for the self-realigning
2664		 property.  */
2665	      if ((DECL_STRUCT_FUNCTION (decl)
2666		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2667		  || (!DECL_STRUCT_FUNCTION (decl)
2668		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2669					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2670		local_regparm = 2;
2671	      /* Each global register variable increases register preassure,
2672		 so the more global reg vars there are, the smaller regparm
2673		 optimization use, unless requested by the user explicitly.  */
2674	      for (regno = 0; regno < 6; regno++)
2675		if (global_regs[regno])
2676		  globals++;
2677	      local_regparm
2678		= globals < local_regparm ? local_regparm - globals : 0;
2679
2680	      if (local_regparm > regparm)
2681		regparm = local_regparm;
2682	    }
2683	}
2684    }
2685  return regparm;
2686}
2687
2688/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2689   DFmode (2) arguments in SSE registers for a function with the
2690   indicated TYPE and DECL.  DECL may be NULL when calling function
2691   indirectly or considering a libcall.  Otherwise return 0.  */
2692
2693static int
2694ix86_function_sseregparm (tree type, tree decl)
2695{
2696  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2697     by the sseregparm attribute.  */
2698  if (TARGET_SSEREGPARM
2699      || (type
2700	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2701    {
2702      if (!TARGET_SSE)
2703	{
2704	  if (decl)
2705	    error ("Calling %qD with attribute sseregparm without "
2706		   "SSE/SSE2 enabled", decl);
2707	  else
2708	    error ("Calling %qT with attribute sseregparm without "
2709		   "SSE/SSE2 enabled", type);
2710	  return 0;
2711	}
2712
2713      return 2;
2714    }
2715
2716  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2717     (and DFmode for SSE2) arguments in SSE registers,
2718     even for 32-bit targets.  */
2719  if (!TARGET_64BIT && decl
2720      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2721    {
2722      struct cgraph_local_info *i = cgraph_local_info (decl);
2723      if (i && i->local)
2724	return TARGET_SSE2 ? 2 : 1;
2725    }
2726
2727  return 0;
2728}
2729
2730/* Return true if EAX is live at the start of the function.  Used by
2731   ix86_expand_prologue to determine if we need special help before
2732   calling allocate_stack_worker.  */
2733
2734static bool
2735ix86_eax_live_at_start_p (void)
2736{
2737  /* Cheat.  Don't bother working forward from ix86_function_regparm
2738     to the function type to whether an actual argument is located in
2739     eax.  Instead just look at cfg info, which is still close enough
2740     to correct at this point.  This gives false positives for broken
2741     functions that might use uninitialized data that happens to be
2742     allocated in eax, but who cares?  */
2743  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2744}
2745
2746/* Value is the number of bytes of arguments automatically
2747   popped when returning from a subroutine call.
2748   FUNDECL is the declaration node of the function (as a tree),
2749   FUNTYPE is the data type of the function (as a tree),
2750   or for a library call it is an identifier node for the subroutine name.
2751   SIZE is the number of bytes of arguments passed on the stack.
2752
2753   On the 80386, the RTD insn may be used to pop them if the number
2754     of args is fixed, but if the number is variable then the caller
2755     must pop them all.  RTD can't be used for library calls now
2756     because the library is compiled with the Unix compiler.
2757   Use of RTD is a selectable option, since it is incompatible with
2758   standard Unix calling sequences.  If the option is not selected,
2759   the caller must always pop the args.
2760
2761   The attribute stdcall is equivalent to RTD on a per module basis.  */
2762
2763int
2764ix86_return_pops_args (tree fundecl, tree funtype, int size)
2765{
2766  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2767
2768  /* Cdecl functions override -mrtd, and never pop the stack.  */
2769  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2770
2771    /* Stdcall and fastcall functions will pop the stack if not
2772       variable args.  */
2773    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2774        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2775      rtd = 1;
2776
2777    if (rtd
2778        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2779	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2780		== void_type_node)))
2781      return size;
2782  }
2783
2784  /* Lose any fake structure return argument if it is passed on the stack.  */
2785  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2786      && !TARGET_64BIT
2787      && !KEEP_AGGREGATE_RETURN_POINTER)
2788    {
2789      int nregs = ix86_function_regparm (funtype, fundecl);
2790
2791      if (!nregs)
2792	return GET_MODE_SIZE (Pmode);
2793    }
2794
2795  return 0;
2796}
2797
2798/* Argument support functions.  */
2799
2800/* Return true when register may be used to pass function parameters.  */
2801bool
2802ix86_function_arg_regno_p (int regno)
2803{
2804  int i;
2805  if (!TARGET_64BIT)
2806    {
2807      if (TARGET_MACHO)
2808        return (regno < REGPARM_MAX
2809                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2810      else
2811        return (regno < REGPARM_MAX
2812	        || (TARGET_MMX && MMX_REGNO_P (regno)
2813	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2814	        || (TARGET_SSE && SSE_REGNO_P (regno)
2815		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2816    }
2817
2818  if (TARGET_MACHO)
2819    {
2820      if (SSE_REGNO_P (regno) && TARGET_SSE)
2821        return true;
2822    }
2823  else
2824    {
2825      if (TARGET_SSE && SSE_REGNO_P (regno)
2826          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2827        return true;
2828    }
2829  /* RAX is used as hidden argument to va_arg functions.  */
2830  if (!regno)
2831    return true;
2832  for (i = 0; i < REGPARM_MAX; i++)
2833    if (regno == x86_64_int_parameter_registers[i])
2834      return true;
2835  return false;
2836}
2837
2838/* Return if we do not know how to pass TYPE solely in registers.  */
2839
2840static bool
2841ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2842{
2843  if (must_pass_in_stack_var_size_or_pad (mode, type))
2844    return true;
2845
2846  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2847     The layout_type routine is crafty and tries to trick us into passing
2848     currently unsupported vector types on the stack by using TImode.  */
2849  return (!TARGET_64BIT && mode == TImode
2850	  && type && TREE_CODE (type) != VECTOR_TYPE);
2851}
2852
2853/* Initialize a variable CUM of type CUMULATIVE_ARGS
2854   for a call to a function whose data type is FNTYPE.
2855   For a library call, FNTYPE is 0.  */
2856
2857void
2858init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2859		      tree fntype,	/* tree ptr for function decl */
2860		      rtx libname,	/* SYMBOL_REF of library name or 0 */
2861		      tree fndecl)
2862{
2863  static CUMULATIVE_ARGS zero_cum;
2864  tree param, next_param;
2865
2866  if (TARGET_DEBUG_ARG)
2867    {
2868      fprintf (stderr, "\ninit_cumulative_args (");
2869      if (fntype)
2870	fprintf (stderr, "fntype code = %s, ret code = %s",
2871		 tree_code_name[(int) TREE_CODE (fntype)],
2872		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2873      else
2874	fprintf (stderr, "no fntype");
2875
2876      if (libname)
2877	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2878    }
2879
2880  *cum = zero_cum;
2881
2882  /* Set up the number of registers to use for passing arguments.  */
2883  cum->nregs = ix86_regparm;
2884  if (TARGET_SSE)
2885    cum->sse_nregs = SSE_REGPARM_MAX;
2886  if (TARGET_MMX)
2887    cum->mmx_nregs = MMX_REGPARM_MAX;
2888  cum->warn_sse = true;
2889  cum->warn_mmx = true;
2890  cum->maybe_vaarg = false;
2891
2892  /* Use ecx and edx registers if function has fastcall attribute,
2893     else look for regparm information.  */
2894  if (fntype && !TARGET_64BIT)
2895    {
2896      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2897	{
2898	  cum->nregs = 2;
2899	  cum->fastcall = 1;
2900	}
2901      else
2902	cum->nregs = ix86_function_regparm (fntype, fndecl);
2903    }
2904
2905  /* Set up the number of SSE registers used for passing SFmode
2906     and DFmode arguments.  Warn for mismatching ABI.  */
2907  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2908
2909  /* Determine if this function has variable arguments.  This is
2910     indicated by the last argument being 'void_type_mode' if there
2911     are no variable arguments.  If there are variable arguments, then
2912     we won't pass anything in registers in 32-bit mode. */
2913
2914  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2915    {
2916      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2917	   param != 0; param = next_param)
2918	{
2919	  next_param = TREE_CHAIN (param);
2920	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2921	    {
2922	      if (!TARGET_64BIT)
2923		{
2924		  cum->nregs = 0;
2925		  cum->sse_nregs = 0;
2926		  cum->mmx_nregs = 0;
2927		  cum->warn_sse = 0;
2928		  cum->warn_mmx = 0;
2929		  cum->fastcall = 0;
2930		  cum->float_in_sse = 0;
2931		}
2932	      cum->maybe_vaarg = true;
2933	    }
2934	}
2935    }
2936  if ((!fntype && !libname)
2937      || (fntype && !TYPE_ARG_TYPES (fntype)))
2938    cum->maybe_vaarg = true;
2939
2940  if (TARGET_DEBUG_ARG)
2941    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2942
2943  return;
2944}
2945
2946/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2947   But in the case of vector types, it is some vector mode.
2948
2949   When we have only some of our vector isa extensions enabled, then there
2950   are some modes for which vector_mode_supported_p is false.  For these
2951   modes, the generic vector support in gcc will choose some non-vector mode
2952   in order to implement the type.  By computing the natural mode, we'll
2953   select the proper ABI location for the operand and not depend on whatever
2954   the middle-end decides to do with these vector types.  */
2955
2956static enum machine_mode
2957type_natural_mode (tree type)
2958{
2959  enum machine_mode mode = TYPE_MODE (type);
2960
2961  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2962    {
2963      HOST_WIDE_INT size = int_size_in_bytes (type);
2964      if ((size == 8 || size == 16)
2965	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2966	  && TYPE_VECTOR_SUBPARTS (type) > 1)
2967	{
2968	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2969
2970	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2971	    mode = MIN_MODE_VECTOR_FLOAT;
2972	  else
2973	    mode = MIN_MODE_VECTOR_INT;
2974
2975	  /* Get the mode which has this inner mode and number of units.  */
2976	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2977	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2978		&& GET_MODE_INNER (mode) == innermode)
2979	      return mode;
2980
2981	  gcc_unreachable ();
2982	}
2983    }
2984
2985  return mode;
2986}
2987
2988/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2989   this may not agree with the mode that the type system has chosen for the
2990   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2991   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2992
2993static rtx
2994gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2995		     unsigned int regno)
2996{
2997  rtx tmp;
2998
2999  if (orig_mode != BLKmode)
3000    tmp = gen_rtx_REG (orig_mode, regno);
3001  else
3002    {
3003      tmp = gen_rtx_REG (mode, regno);
3004      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3005      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3006    }
3007
3008  return tmp;
3009}
3010
3011/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
3012   of this code is to classify each 8bytes of incoming argument by the register
3013   class and assign registers accordingly.  */
3014
3015/* Return the union class of CLASS1 and CLASS2.
3016   See the x86-64 PS ABI for details.  */
3017
3018static enum x86_64_reg_class
3019merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3020{
3021  /* Rule #1: If both classes are equal, this is the resulting class.  */
3022  if (class1 == class2)
3023    return class1;
3024
3025  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3026     the other class.  */
3027  if (class1 == X86_64_NO_CLASS)
3028    return class2;
3029  if (class2 == X86_64_NO_CLASS)
3030    return class1;
3031
3032  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
3033  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3034    return X86_64_MEMORY_CLASS;
3035
3036  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
3037  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3038      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3039    return X86_64_INTEGERSI_CLASS;
3040  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3041      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3042    return X86_64_INTEGER_CLASS;
3043
3044  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3045     MEMORY is used.  */
3046  if (class1 == X86_64_X87_CLASS
3047      || class1 == X86_64_X87UP_CLASS
3048      || class1 == X86_64_COMPLEX_X87_CLASS
3049      || class2 == X86_64_X87_CLASS
3050      || class2 == X86_64_X87UP_CLASS
3051      || class2 == X86_64_COMPLEX_X87_CLASS)
3052    return X86_64_MEMORY_CLASS;
3053
3054  /* Rule #6: Otherwise class SSE is used.  */
3055  return X86_64_SSE_CLASS;
3056}
3057
3058/* Classify the argument of type TYPE and mode MODE.
3059   CLASSES will be filled by the register class used to pass each word
3060   of the operand.  The number of words is returned.  In case the parameter
3061   should be passed in memory, 0 is returned. As a special case for zero
3062   sized containers, classes[0] will be NO_CLASS and 1 is returned.
3063
3064   BIT_OFFSET is used internally for handling records and specifies offset
3065   of the offset in bits modulo 256 to avoid overflow cases.
3066
3067   See the x86-64 PS ABI for details.
3068*/
3069
3070static int
3071classify_argument (enum machine_mode mode, tree type,
3072		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3073{
3074  HOST_WIDE_INT bytes =
3075    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3076  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3077
3078  /* Variable sized entities are always passed/returned in memory.  */
3079  if (bytes < 0)
3080    return 0;
3081
3082  if (mode != VOIDmode
3083      && targetm.calls.must_pass_in_stack (mode, type))
3084    return 0;
3085
3086  if (type && AGGREGATE_TYPE_P (type))
3087    {
3088      int i;
3089      tree field;
3090      enum x86_64_reg_class subclasses[MAX_CLASSES];
3091
3092      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
3093      if (bytes > 16)
3094	return 0;
3095
3096      for (i = 0; i < words; i++)
3097	classes[i] = X86_64_NO_CLASS;
3098
3099      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
3100	 signalize memory class, so handle it as special case.  */
3101      if (!words)
3102	{
3103	  classes[0] = X86_64_NO_CLASS;
3104	  return 1;
3105	}
3106
3107      /* Classify each field of record and merge classes.  */
3108      switch (TREE_CODE (type))
3109	{
3110	case RECORD_TYPE:
3111	  /* For classes first merge in the field of the subclasses.  */
3112	  if (TYPE_BINFO (type))
3113	    {
3114	      tree binfo, base_binfo;
3115	      int basenum;
3116
3117	      for (binfo = TYPE_BINFO (type), basenum = 0;
3118		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3119		{
3120		   int num;
3121		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3122		   tree type = BINFO_TYPE (base_binfo);
3123
3124		   num = classify_argument (TYPE_MODE (type),
3125					    type, subclasses,
3126					    (offset + bit_offset) % 256);
3127		   if (!num)
3128		     return 0;
3129		   for (i = 0; i < num; i++)
3130		     {
3131		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
3132		       classes[i + pos] =
3133			 merge_classes (subclasses[i], classes[i + pos]);
3134		     }
3135		}
3136	    }
3137	  /* And now merge the fields of structure.  */
3138	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3139	    {
3140	      if (TREE_CODE (field) == FIELD_DECL)
3141		{
3142		  int num;
3143
3144		  if (TREE_TYPE (field) == error_mark_node)
3145		    continue;
3146
3147		  /* Bitfields are always classified as integer.  Handle them
3148		     early, since later code would consider them to be
3149		     misaligned integers.  */
3150		  if (DECL_BIT_FIELD (field))
3151		    {
3152		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3153			   i < ((int_bit_position (field) + (bit_offset % 64))
3154			        + tree_low_cst (DECL_SIZE (field), 0)
3155				+ 63) / 8 / 8; i++)
3156			classes[i] =
3157			  merge_classes (X86_64_INTEGER_CLASS,
3158					 classes[i]);
3159		    }
3160		  else
3161		    {
3162		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3163					       TREE_TYPE (field), subclasses,
3164					       (int_bit_position (field)
3165						+ bit_offset) % 256);
3166		      if (!num)
3167			return 0;
3168		      for (i = 0; i < num; i++)
3169			{
3170			  int pos =
3171			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3172			  classes[i + pos] =
3173			    merge_classes (subclasses[i], classes[i + pos]);
3174			}
3175		    }
3176		}
3177	    }
3178	  break;
3179
3180	case ARRAY_TYPE:
3181	  /* Arrays are handled as small records.  */
3182	  {
3183	    int num;
3184	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3185				     TREE_TYPE (type), subclasses, bit_offset);
3186	    if (!num)
3187	      return 0;
3188
3189	    /* The partial classes are now full classes.  */
3190	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3191	      subclasses[0] = X86_64_SSE_CLASS;
3192	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3193	      subclasses[0] = X86_64_INTEGER_CLASS;
3194
3195	    for (i = 0; i < words; i++)
3196	      classes[i] = subclasses[i % num];
3197
3198	    break;
3199	  }
3200	case UNION_TYPE:
3201	case QUAL_UNION_TYPE:
3202	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3203	     */
3204
3205	  /* Unions are not derived.  */
3206	  gcc_assert (!TYPE_BINFO (type)
3207		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3208	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3209	    {
3210	      if (TREE_CODE (field) == FIELD_DECL)
3211		{
3212		  int num;
3213
3214		  if (TREE_TYPE (field) == error_mark_node)
3215		    continue;
3216
3217		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3218					   TREE_TYPE (field), subclasses,
3219					   bit_offset);
3220		  if (!num)
3221		    return 0;
3222		  for (i = 0; i < num; i++)
3223		    classes[i] = merge_classes (subclasses[i], classes[i]);
3224		}
3225	    }
3226	  break;
3227
3228	default:
3229	  gcc_unreachable ();
3230	}
3231
3232      /* Final merger cleanup.  */
3233      for (i = 0; i < words; i++)
3234	{
3235	  /* If one class is MEMORY, everything should be passed in
3236	     memory.  */
3237	  if (classes[i] == X86_64_MEMORY_CLASS)
3238	    return 0;
3239
3240	  /* The X86_64_SSEUP_CLASS should be always preceded by
3241	     X86_64_SSE_CLASS.  */
3242	  if (classes[i] == X86_64_SSEUP_CLASS
3243	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3244	    classes[i] = X86_64_SSE_CLASS;
3245
3246	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3247	  if (classes[i] == X86_64_X87UP_CLASS
3248	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3249	    classes[i] = X86_64_SSE_CLASS;
3250	}
3251      return words;
3252    }
3253
3254  /* Compute alignment needed.  We align all types to natural boundaries with
3255     exception of XFmode that is aligned to 64bits.  */
3256  if (mode != VOIDmode && mode != BLKmode)
3257    {
3258      int mode_alignment = GET_MODE_BITSIZE (mode);
3259
3260      if (mode == XFmode)
3261	mode_alignment = 128;
3262      else if (mode == XCmode)
3263	mode_alignment = 256;
3264      if (COMPLEX_MODE_P (mode))
3265	mode_alignment /= 2;
3266      /* Misaligned fields are always returned in memory.  */
3267      if (bit_offset % mode_alignment)
3268	return 0;
3269    }
3270
3271  /* for V1xx modes, just use the base mode */
3272  if (VECTOR_MODE_P (mode)
3273      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3274    mode = GET_MODE_INNER (mode);
3275
3276  /* Classification of atomic types.  */
3277  switch (mode)
3278    {
3279    case SDmode:
3280    case DDmode:
3281      classes[0] = X86_64_SSE_CLASS;
3282      return 1;
3283    case TDmode:
3284      classes[0] = X86_64_SSE_CLASS;
3285      classes[1] = X86_64_SSEUP_CLASS;
3286      return 2;
3287    case DImode:
3288    case SImode:
3289    case HImode:
3290    case QImode:
3291    case CSImode:
3292    case CHImode:
3293    case CQImode:
3294      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3295	classes[0] = X86_64_INTEGERSI_CLASS;
3296      else
3297	classes[0] = X86_64_INTEGER_CLASS;
3298      return 1;
3299    case CDImode:
3300    case TImode:
3301      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3302      return 2;
3303    case CTImode:
3304      return 0;
3305    case SFmode:
3306      if (!(bit_offset % 64))
3307	classes[0] = X86_64_SSESF_CLASS;
3308      else
3309	classes[0] = X86_64_SSE_CLASS;
3310      return 1;
3311    case DFmode:
3312      classes[0] = X86_64_SSEDF_CLASS;
3313      return 1;
3314    case XFmode:
3315      classes[0] = X86_64_X87_CLASS;
3316      classes[1] = X86_64_X87UP_CLASS;
3317      return 2;
3318    case TFmode:
3319      classes[0] = X86_64_SSE_CLASS;
3320      classes[1] = X86_64_SSEUP_CLASS;
3321      return 2;
3322    case SCmode:
3323      classes[0] = X86_64_SSE_CLASS;
3324      return 1;
3325    case DCmode:
3326      classes[0] = X86_64_SSEDF_CLASS;
3327      classes[1] = X86_64_SSEDF_CLASS;
3328      return 2;
3329    case XCmode:
3330      classes[0] = X86_64_COMPLEX_X87_CLASS;
3331      return 1;
3332    case TCmode:
3333      /* This modes is larger than 16 bytes.  */
3334      return 0;
3335    case V4SFmode:
3336    case V4SImode:
3337    case V16QImode:
3338    case V8HImode:
3339    case V2DFmode:
3340    case V2DImode:
3341      classes[0] = X86_64_SSE_CLASS;
3342      classes[1] = X86_64_SSEUP_CLASS;
3343      return 2;
3344    case V2SFmode:
3345    case V2SImode:
3346    case V4HImode:
3347    case V8QImode:
3348      classes[0] = X86_64_SSE_CLASS;
3349      return 1;
3350    case BLKmode:
3351    case VOIDmode:
3352      return 0;
3353    default:
3354      gcc_assert (VECTOR_MODE_P (mode));
3355
3356      if (bytes > 16)
3357	return 0;
3358
3359      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3360
3361      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3362	classes[0] = X86_64_INTEGERSI_CLASS;
3363      else
3364	classes[0] = X86_64_INTEGER_CLASS;
3365      classes[1] = X86_64_INTEGER_CLASS;
3366      return 1 + (bytes > 8);
3367    }
3368}
3369
3370/* Examine the argument and return set number of register required in each
3371   class.  Return 0 iff parameter should be passed in memory.  */
3372static int
3373examine_argument (enum machine_mode mode, tree type, int in_return,
3374		  int *int_nregs, int *sse_nregs)
3375{
3376  enum x86_64_reg_class class[MAX_CLASSES];
3377  int n = classify_argument (mode, type, class, 0);
3378
3379  *int_nregs = 0;
3380  *sse_nregs = 0;
3381  if (!n)
3382    return 0;
3383  for (n--; n >= 0; n--)
3384    switch (class[n])
3385      {
3386      case X86_64_INTEGER_CLASS:
3387      case X86_64_INTEGERSI_CLASS:
3388	(*int_nregs)++;
3389	break;
3390      case X86_64_SSE_CLASS:
3391      case X86_64_SSESF_CLASS:
3392      case X86_64_SSEDF_CLASS:
3393	(*sse_nregs)++;
3394	break;
3395      case X86_64_NO_CLASS:
3396      case X86_64_SSEUP_CLASS:
3397	break;
3398      case X86_64_X87_CLASS:
3399      case X86_64_X87UP_CLASS:
3400	if (!in_return)
3401	  return 0;
3402	break;
3403      case X86_64_COMPLEX_X87_CLASS:
3404	return in_return ? 2 : 0;
3405      case X86_64_MEMORY_CLASS:
3406	gcc_unreachable ();
3407      }
3408  return 1;
3409}
3410
3411/* Construct container for the argument used by GCC interface.  See
3412   FUNCTION_ARG for the detailed description.  */
3413
3414static rtx
3415construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3416		     tree type, int in_return, int nintregs, int nsseregs,
3417		     const int *intreg, int sse_regno)
3418{
3419  /* The following variables hold the static issued_error state.  */
3420  static bool issued_sse_arg_error;
3421  static bool issued_sse_ret_error;
3422  static bool issued_x87_ret_error;
3423
3424  enum machine_mode tmpmode;
3425  int bytes =
3426    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3427  enum x86_64_reg_class class[MAX_CLASSES];
3428  int n;
3429  int i;
3430  int nexps = 0;
3431  int needed_sseregs, needed_intregs;
3432  rtx exp[MAX_CLASSES];
3433  rtx ret;
3434
3435  n = classify_argument (mode, type, class, 0);
3436  if (TARGET_DEBUG_ARG)
3437    {
3438      if (!n)
3439	fprintf (stderr, "Memory class\n");
3440      else
3441	{
3442	  fprintf (stderr, "Classes:");
3443	  for (i = 0; i < n; i++)
3444	    {
3445	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3446	    }
3447	   fprintf (stderr, "\n");
3448	}
3449    }
3450  if (!n)
3451    return NULL;
3452  if (!examine_argument (mode, type, in_return, &needed_intregs,
3453			 &needed_sseregs))
3454    return NULL;
3455  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3456    return NULL;
3457
3458  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3459     some less clueful developer tries to use floating-point anyway.  */
3460  if (needed_sseregs && !TARGET_SSE)
3461    {
3462      if (in_return)
3463	{
3464	  if (!issued_sse_ret_error)
3465	    {
3466	      error ("SSE register return with SSE disabled");
3467	      issued_sse_ret_error = true;
3468	    }
3469	}
3470      else if (!issued_sse_arg_error)
3471	{
3472	  error ("SSE register argument with SSE disabled");
3473	  issued_sse_arg_error = true;
3474	}
3475      return NULL;
3476    }
3477
3478  /* Likewise, error if the ABI requires us to return values in the
3479     x87 registers and the user specified -mno-80387.  */
3480  if (!TARGET_80387 && in_return)
3481    for (i = 0; i < n; i++)
3482      if (class[i] == X86_64_X87_CLASS
3483	  || class[i] == X86_64_X87UP_CLASS
3484	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3485	{
3486	  if (!issued_x87_ret_error)
3487	    {
3488	      error ("x87 register return with x87 disabled");
3489	      issued_x87_ret_error = true;
3490	    }
3491	  return NULL;
3492	}
3493
3494  /* First construct simple cases.  Avoid SCmode, since we want to use
3495     single register to pass this type.  */
3496  if (n == 1 && mode != SCmode)
3497    switch (class[0])
3498      {
3499      case X86_64_INTEGER_CLASS:
3500      case X86_64_INTEGERSI_CLASS:
3501	return gen_rtx_REG (mode, intreg[0]);
3502      case X86_64_SSE_CLASS:
3503      case X86_64_SSESF_CLASS:
3504      case X86_64_SSEDF_CLASS:
3505	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3506      case X86_64_X87_CLASS:
3507      case X86_64_COMPLEX_X87_CLASS:
3508	return gen_rtx_REG (mode, FIRST_STACK_REG);
3509      case X86_64_NO_CLASS:
3510	/* Zero sized array, struct or class.  */
3511	return NULL;
3512      default:
3513	gcc_unreachable ();
3514      }
3515  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3516      && mode != BLKmode)
3517    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3518  if (n == 2
3519      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3520    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3521  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3522      && class[1] == X86_64_INTEGER_CLASS
3523      && (mode == CDImode || mode == TImode || mode == TFmode)
3524      && intreg[0] + 1 == intreg[1])
3525    return gen_rtx_REG (mode, intreg[0]);
3526
3527  /* Otherwise figure out the entries of the PARALLEL.  */
3528  for (i = 0; i < n; i++)
3529    {
3530      switch (class[i])
3531        {
3532	  case X86_64_NO_CLASS:
3533	    break;
3534	  case X86_64_INTEGER_CLASS:
3535	  case X86_64_INTEGERSI_CLASS:
3536	    /* Merge TImodes on aligned occasions here too.  */
3537	    if (i * 8 + 8 > bytes)
3538	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3539	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3540	      tmpmode = SImode;
3541	    else
3542	      tmpmode = DImode;
3543	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3544	    if (tmpmode == BLKmode)
3545	      tmpmode = DImode;
3546	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3547					       gen_rtx_REG (tmpmode, *intreg),
3548					       GEN_INT (i*8));
3549	    intreg++;
3550	    break;
3551	  case X86_64_SSESF_CLASS:
3552	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3553					       gen_rtx_REG (SFmode,
3554							    SSE_REGNO (sse_regno)),
3555					       GEN_INT (i*8));
3556	    sse_regno++;
3557	    break;
3558	  case X86_64_SSEDF_CLASS:
3559	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3560					       gen_rtx_REG (DFmode,
3561							    SSE_REGNO (sse_regno)),
3562					       GEN_INT (i*8));
3563	    sse_regno++;
3564	    break;
3565	  case X86_64_SSE_CLASS:
3566	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3567	      tmpmode = TImode;
3568	    else
3569	      tmpmode = DImode;
3570	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3571					       gen_rtx_REG (tmpmode,
3572							    SSE_REGNO (sse_regno)),
3573					       GEN_INT (i*8));
3574	    if (tmpmode == TImode)
3575	      i++;
3576	    sse_regno++;
3577	    break;
3578	  default:
3579	    gcc_unreachable ();
3580	}
3581    }
3582
3583  /* Empty aligned struct, union or class.  */
3584  if (nexps == 0)
3585    return NULL;
3586
3587  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3588  for (i = 0; i < nexps; i++)
3589    XVECEXP (ret, 0, i) = exp [i];
3590  return ret;
3591}
3592
3593/* Update the data in CUM to advance over an argument
3594   of mode MODE and data type TYPE.
3595   (TYPE is null for libcalls where that information may not be available.)  */
3596
3597void
3598function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3599		      tree type, int named)
3600{
3601  int bytes =
3602    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3603  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3604
3605  if (type)
3606    mode = type_natural_mode (type);
3607
3608  if (TARGET_DEBUG_ARG)
3609    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3610	     "mode=%s, named=%d)\n\n",
3611	     words, cum->words, cum->nregs, cum->sse_nregs,
3612	     GET_MODE_NAME (mode), named);
3613
3614  if (TARGET_64BIT)
3615    {
3616      int int_nregs, sse_nregs;
3617      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3618	cum->words += words;
3619      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3620	{
3621	  cum->nregs -= int_nregs;
3622	  cum->sse_nregs -= sse_nregs;
3623	  cum->regno += int_nregs;
3624	  cum->sse_regno += sse_nregs;
3625	}
3626      else
3627	cum->words += words;
3628    }
3629  else
3630    {
3631      switch (mode)
3632	{
3633	default:
3634	  break;
3635
3636	case BLKmode:
3637	  if (bytes < 0)
3638	    break;
3639	  /* FALLTHRU */
3640
3641	case DImode:
3642	case SImode:
3643	case HImode:
3644	case QImode:
3645	  cum->words += words;
3646	  cum->nregs -= words;
3647	  cum->regno += words;
3648
3649	  if (cum->nregs <= 0)
3650	    {
3651	      cum->nregs = 0;
3652	      cum->regno = 0;
3653	    }
3654	  break;
3655
3656	case DFmode:
3657	  if (cum->float_in_sse < 2)
3658	    break;
3659	case SFmode:
3660	  if (cum->float_in_sse < 1)
3661	    break;
3662	  /* FALLTHRU */
3663
3664	case TImode:
3665	case V16QImode:
3666	case V8HImode:
3667	case V4SImode:
3668	case V2DImode:
3669	case V4SFmode:
3670	case V2DFmode:
3671	  if (!type || !AGGREGATE_TYPE_P (type))
3672	    {
3673	      cum->sse_words += words;
3674	      cum->sse_nregs -= 1;
3675	      cum->sse_regno += 1;
3676	      if (cum->sse_nregs <= 0)
3677		{
3678		  cum->sse_nregs = 0;
3679		  cum->sse_regno = 0;
3680		}
3681	    }
3682	  break;
3683
3684	case V8QImode:
3685	case V4HImode:
3686	case V2SImode:
3687	case V2SFmode:
3688	  if (!type || !AGGREGATE_TYPE_P (type))
3689	    {
3690	      cum->mmx_words += words;
3691	      cum->mmx_nregs -= 1;
3692	      cum->mmx_regno += 1;
3693	      if (cum->mmx_nregs <= 0)
3694		{
3695		  cum->mmx_nregs = 0;
3696		  cum->mmx_regno = 0;
3697		}
3698	    }
3699	  break;
3700	}
3701    }
3702}
3703
3704/* Define where to put the arguments to a function.
3705   Value is zero to push the argument on the stack,
3706   or a hard register in which to store the argument.
3707
3708   MODE is the argument's machine mode.
3709   TYPE is the data type of the argument (as a tree).
3710    This is null for libcalls where that information may
3711    not be available.
3712   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3713    the preceding args and about the function being called.
3714   NAMED is nonzero if this argument is a named parameter
3715    (otherwise it is an extra parameter matching an ellipsis).  */
3716
3717rtx
3718function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3719	      tree type, int named)
3720{
3721  enum machine_mode mode = orig_mode;
3722  rtx ret = NULL_RTX;
3723  int bytes =
3724    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3725  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3726  static bool warnedsse, warnedmmx;
3727
3728  /* To simplify the code below, represent vector types with a vector mode
3729     even if MMX/SSE are not active.  */
3730  if (type && TREE_CODE (type) == VECTOR_TYPE)
3731    mode = type_natural_mode (type);
3732
3733  /* Handle a hidden AL argument containing number of registers for varargs
3734     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3735     any AL settings.  */
3736  if (mode == VOIDmode)
3737    {
3738      if (TARGET_64BIT)
3739	return GEN_INT (cum->maybe_vaarg
3740			? (cum->sse_nregs < 0
3741			   ? SSE_REGPARM_MAX
3742			   : cum->sse_regno)
3743			: -1);
3744      else
3745	return constm1_rtx;
3746    }
3747  if (TARGET_64BIT)
3748    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3749			       cum->sse_nregs,
3750			       &x86_64_int_parameter_registers [cum->regno],
3751			       cum->sse_regno);
3752  else
3753    switch (mode)
3754      {
3755	/* For now, pass fp/complex values on the stack.  */
3756      default:
3757	break;
3758
3759      case BLKmode:
3760	if (bytes < 0)
3761	  break;
3762	/* FALLTHRU */
3763      case DImode:
3764      case SImode:
3765      case HImode:
3766      case QImode:
3767	if (words <= cum->nregs)
3768	  {
3769	    int regno = cum->regno;
3770
3771	    /* Fastcall allocates the first two DWORD (SImode) or
3772	       smaller arguments to ECX and EDX.  */
3773	    if (cum->fastcall)
3774	      {
3775	        if (mode == BLKmode || mode == DImode)
3776	          break;
3777
3778	        /* ECX not EAX is the first allocated register.  */
3779	        if (regno == 0)
3780		  regno = 2;
3781	      }
3782	    ret = gen_rtx_REG (mode, regno);
3783	  }
3784	break;
3785      case DFmode:
3786	if (cum->float_in_sse < 2)
3787	  break;
3788      case SFmode:
3789	if (cum->float_in_sse < 1)
3790	  break;
3791	/* FALLTHRU */
3792      case TImode:
3793      case V16QImode:
3794      case V8HImode:
3795      case V4SImode:
3796      case V2DImode:
3797      case V4SFmode:
3798      case V2DFmode:
3799	if (!type || !AGGREGATE_TYPE_P (type))
3800	  {
3801	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3802	      {
3803		warnedsse = true;
3804		warning (0, "SSE vector argument without SSE enabled "
3805			 "changes the ABI");
3806	      }
3807	    if (cum->sse_nregs)
3808	      ret = gen_reg_or_parallel (mode, orig_mode,
3809					 cum->sse_regno + FIRST_SSE_REG);
3810	  }
3811	break;
3812      case V8QImode:
3813      case V4HImode:
3814      case V2SImode:
3815      case V2SFmode:
3816	if (!type || !AGGREGATE_TYPE_P (type))
3817	  {
3818	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3819	      {
3820		warnedmmx = true;
3821		warning (0, "MMX vector argument without MMX enabled "
3822			 "changes the ABI");
3823	      }
3824	    if (cum->mmx_nregs)
3825	      ret = gen_reg_or_parallel (mode, orig_mode,
3826					 cum->mmx_regno + FIRST_MMX_REG);
3827	  }
3828	break;
3829      }
3830
3831  if (TARGET_DEBUG_ARG)
3832    {
3833      fprintf (stderr,
3834	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3835	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3836
3837      if (ret)
3838	print_simple_rtl (stderr, ret);
3839      else
3840	fprintf (stderr, ", stack");
3841
3842      fprintf (stderr, " )\n");
3843    }
3844
3845  return ret;
3846}
3847
3848/* A C expression that indicates when an argument must be passed by
3849   reference.  If nonzero for an argument, a copy of that argument is
3850   made in memory and a pointer to the argument is passed instead of
3851   the argument itself.  The pointer is passed in whatever way is
3852   appropriate for passing a pointer to that type.  */
3853
3854static bool
3855ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3856			enum machine_mode mode ATTRIBUTE_UNUSED,
3857			tree type, bool named ATTRIBUTE_UNUSED)
3858{
3859  if (!TARGET_64BIT)
3860    return 0;
3861
3862  if (type && int_size_in_bytes (type) == -1)
3863    {
3864      if (TARGET_DEBUG_ARG)
3865	fprintf (stderr, "function_arg_pass_by_reference\n");
3866      return 1;
3867    }
3868
3869  return 0;
3870}
3871
3872/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3873   ABI.  Only called if TARGET_SSE.  */
3874static bool
3875contains_128bit_aligned_vector_p (tree type)
3876{
3877  enum machine_mode mode = TYPE_MODE (type);
3878  if (SSE_REG_MODE_P (mode)
3879      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3880    return true;
3881  if (TYPE_ALIGN (type) < 128)
3882    return false;
3883
3884  if (AGGREGATE_TYPE_P (type))
3885    {
3886      /* Walk the aggregates recursively.  */
3887      switch (TREE_CODE (type))
3888	{
3889	case RECORD_TYPE:
3890	case UNION_TYPE:
3891	case QUAL_UNION_TYPE:
3892	  {
3893	    tree field;
3894
3895	    if (TYPE_BINFO (type))
3896	      {
3897		tree binfo, base_binfo;
3898		int i;
3899
3900		for (binfo = TYPE_BINFO (type), i = 0;
3901		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3902		  if (contains_128bit_aligned_vector_p
3903		      (BINFO_TYPE (base_binfo)))
3904		    return true;
3905	      }
3906	    /* And now merge the fields of structure.  */
3907	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3908	      {
3909		if (TREE_CODE (field) == FIELD_DECL
3910		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3911		  return true;
3912	      }
3913	    break;
3914	  }
3915
3916	case ARRAY_TYPE:
3917	  /* Just for use if some languages passes arrays by value.  */
3918	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3919	    return true;
3920	  break;
3921
3922	default:
3923	  gcc_unreachable ();
3924	}
3925    }
3926  return false;
3927}
3928
3929/* Gives the alignment boundary, in bits, of an argument with the
3930   specified mode and type.  */
3931
3932int
3933ix86_function_arg_boundary (enum machine_mode mode, tree type)
3934{
3935  int align;
3936  if (type)
3937    align = TYPE_ALIGN (type);
3938  else
3939    align = GET_MODE_ALIGNMENT (mode);
3940  if (align < PARM_BOUNDARY)
3941    align = PARM_BOUNDARY;
3942  if (!TARGET_64BIT)
3943    {
3944      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3945	 make an exception for SSE modes since these require 128bit
3946	 alignment.
3947
3948	 The handling here differs from field_alignment.  ICC aligns MMX
3949	 arguments to 4 byte boundaries, while structure fields are aligned
3950	 to 8 byte boundaries.  */
3951      if (!TARGET_SSE)
3952	align = PARM_BOUNDARY;
3953      else if (!type)
3954	{
3955	  if (!SSE_REG_MODE_P (mode))
3956	    align = PARM_BOUNDARY;
3957	}
3958      else
3959	{
3960	  if (!contains_128bit_aligned_vector_p (type))
3961	    align = PARM_BOUNDARY;
3962	}
3963    }
3964  if (align > 128)
3965    align = 128;
3966  return align;
3967}
3968
3969/* Return true if N is a possible register number of function value.  */
3970bool
3971ix86_function_value_regno_p (int regno)
3972{
3973  if (TARGET_MACHO)
3974    {
3975      if (!TARGET_64BIT)
3976        {
3977          return ((regno) == 0
3978                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3979                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3980        }
3981      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3982              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3983              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3984      }
3985  else
3986    {
3987      if (regno == 0
3988          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3989          || (regno == FIRST_SSE_REG && TARGET_SSE))
3990        return true;
3991
3992      if (!TARGET_64BIT
3993          && (regno == FIRST_MMX_REG && TARGET_MMX))
3994	    return true;
3995
3996      return false;
3997    }
3998}
3999
4000/* Define how to find the value returned by a function.
4001   VALTYPE is the data type of the value (as a tree).
4002   If the precise function being called is known, FUNC is its FUNCTION_DECL;
4003   otherwise, FUNC is 0.  */
4004rtx
4005ix86_function_value (tree valtype, tree fntype_or_decl,
4006		     bool outgoing ATTRIBUTE_UNUSED)
4007{
4008  enum machine_mode natmode = type_natural_mode (valtype);
4009
4010  if (TARGET_64BIT)
4011    {
4012      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4013				     1, REGPARM_MAX, SSE_REGPARM_MAX,
4014				     x86_64_int_return_registers, 0);
4015      /* For zero sized structures, construct_container return NULL, but we
4016	 need to keep rest of compiler happy by returning meaningful value.  */
4017      if (!ret)
4018	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4019      return ret;
4020    }
4021  else
4022    {
4023      tree fn = NULL_TREE, fntype;
4024      if (fntype_or_decl
4025	  && DECL_P (fntype_or_decl))
4026        fn = fntype_or_decl;
4027      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4028      return gen_rtx_REG (TYPE_MODE (valtype),
4029			  ix86_value_regno (natmode, fn, fntype));
4030    }
4031}
4032
4033/* Return true iff type is returned in memory.  */
4034int
4035ix86_return_in_memory (tree type)
4036{
4037  int needed_intregs, needed_sseregs, size;
4038  enum machine_mode mode = type_natural_mode (type);
4039
4040  if (TARGET_64BIT)
4041    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4042
4043  if (mode == BLKmode)
4044    return 1;
4045
4046  size = int_size_in_bytes (type);
4047
4048  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4049    return 0;
4050
4051  if (VECTOR_MODE_P (mode) || mode == TImode)
4052    {
4053      /* User-created vectors small enough to fit in EAX.  */
4054      if (size < 8)
4055	return 0;
4056
4057      /* MMX/3dNow values are returned in MM0,
4058	 except when it doesn't exits.  */
4059      if (size == 8)
4060	return (TARGET_MMX ? 0 : 1);
4061
4062      /* SSE values are returned in XMM0, except when it doesn't exist.  */
4063      if (size == 16)
4064	return (TARGET_SSE ? 0 : 1);
4065    }
4066
4067  if (mode == XFmode)
4068    return 0;
4069
4070  if (mode == TDmode)
4071    return 1;
4072
4073  if (size > 12)
4074    return 1;
4075  return 0;
4076}
4077
4078/* When returning SSE vector types, we have a choice of either
4079     (1) being abi incompatible with a -march switch, or
4080     (2) generating an error.
4081   Given no good solution, I think the safest thing is one warning.
4082   The user won't be able to use -Werror, but....
4083
4084   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4085   called in response to actually generating a caller or callee that
4086   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
4087   via aggregate_value_p for general type probing from tree-ssa.  */
4088
4089static rtx
4090ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4091{
4092  static bool warnedsse, warnedmmx;
4093
4094  if (type)
4095    {
4096      /* Look at the return type of the function, not the function type.  */
4097      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4098
4099      if (!TARGET_SSE && !warnedsse)
4100	{
4101	  if (mode == TImode
4102	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4103	    {
4104	      warnedsse = true;
4105	      warning (0, "SSE vector return without SSE enabled "
4106		       "changes the ABI");
4107	    }
4108	}
4109
4110      if (!TARGET_MMX && !warnedmmx)
4111	{
4112	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4113	    {
4114	      warnedmmx = true;
4115	      warning (0, "MMX vector return without MMX enabled "
4116		       "changes the ABI");
4117	    }
4118	}
4119    }
4120
4121  return NULL;
4122}
4123
4124/* Define how to find the value returned by a library function
4125   assuming the value has mode MODE.  */
4126rtx
4127ix86_libcall_value (enum machine_mode mode)
4128{
4129  if (TARGET_64BIT)
4130    {
4131      switch (mode)
4132	{
4133	case SFmode:
4134	case SCmode:
4135	case DFmode:
4136	case DCmode:
4137	case TFmode:
4138	case SDmode:
4139	case DDmode:
4140	case TDmode:
4141	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4142	case XFmode:
4143	case XCmode:
4144	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4145	case TCmode:
4146	  return NULL;
4147	default:
4148	  return gen_rtx_REG (mode, 0);
4149	}
4150    }
4151  else
4152    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4153}
4154
4155/* Given a mode, return the register to use for a return value.  */
4156
4157static int
4158ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4159{
4160  gcc_assert (!TARGET_64BIT);
4161
4162  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4163     we normally prevent this case when mmx is not available.  However
4164     some ABIs may require the result to be returned like DImode.  */
4165  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4166    return TARGET_MMX ? FIRST_MMX_REG : 0;
4167
4168  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4169     we prevent this case when sse is not available.  However some ABIs
4170     may require the result to be returned like integer TImode.  */
4171  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4172    return TARGET_SSE ? FIRST_SSE_REG : 0;
4173
4174  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4175  if (DECIMAL_FLOAT_MODE_P (mode))
4176    return 0;
4177
4178  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4179  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4180    return 0;
4181
4182  /* Floating point return values in %st(0), except for local functions when
4183     SSE math is enabled or for functions with sseregparm attribute.  */
4184  if ((func || fntype)
4185      && (mode == SFmode || mode == DFmode))
4186    {
4187      int sse_level = ix86_function_sseregparm (fntype, func);
4188      if ((sse_level >= 1 && mode == SFmode)
4189	  || (sse_level == 2 && mode == DFmode))
4190        return FIRST_SSE_REG;
4191    }
4192
4193  return FIRST_FLOAT_REG;
4194}
4195
4196/* Create the va_list data type.  */
4197
4198static tree
4199ix86_build_builtin_va_list (void)
4200{
4201  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4202
4203  /* For i386 we use plain pointer to argument area.  */
4204  if (!TARGET_64BIT)
4205    return build_pointer_type (char_type_node);
4206
4207  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4208  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4209
4210  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4211		      unsigned_type_node);
4212  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4213		      unsigned_type_node);
4214  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4215		      ptr_type_node);
4216  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4217		      ptr_type_node);
4218
4219  va_list_gpr_counter_field = f_gpr;
4220  va_list_fpr_counter_field = f_fpr;
4221
4222  DECL_FIELD_CONTEXT (f_gpr) = record;
4223  DECL_FIELD_CONTEXT (f_fpr) = record;
4224  DECL_FIELD_CONTEXT (f_ovf) = record;
4225  DECL_FIELD_CONTEXT (f_sav) = record;
4226
4227  TREE_CHAIN (record) = type_decl;
4228  TYPE_NAME (record) = type_decl;
4229  TYPE_FIELDS (record) = f_gpr;
4230  TREE_CHAIN (f_gpr) = f_fpr;
4231  TREE_CHAIN (f_fpr) = f_ovf;
4232  TREE_CHAIN (f_ovf) = f_sav;
4233
4234  layout_type (record);
4235
4236  /* The correct type is an array type of one element.  */
4237  return build_array_type (record, build_index_type (size_zero_node));
4238}
4239
4240/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4241
4242static void
4243ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4244			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4245			     int no_rtl)
4246{
4247  CUMULATIVE_ARGS next_cum;
4248  rtx save_area = NULL_RTX, mem;
4249  rtx label;
4250  rtx label_ref;
4251  rtx tmp_reg;
4252  rtx nsse_reg;
4253  int set;
4254  tree fntype;
4255  int stdarg_p;
4256  int i;
4257
4258  if (!TARGET_64BIT)
4259    return;
4260
4261  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4262    return;
4263
4264  /* Indicate to allocate space on the stack for varargs save area.  */
4265  ix86_save_varrargs_registers = 1;
4266
4267  cfun->stack_alignment_needed = 128;
4268
4269  fntype = TREE_TYPE (current_function_decl);
4270  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4271	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4272		  != void_type_node));
4273
4274  /* For varargs, we do not want to skip the dummy va_dcl argument.
4275     For stdargs, we do want to skip the last named argument.  */
4276  next_cum = *cum;
4277  if (stdarg_p)
4278    function_arg_advance (&next_cum, mode, type, 1);
4279
4280  if (!no_rtl)
4281    save_area = frame_pointer_rtx;
4282
4283  set = get_varargs_alias_set ();
4284
4285  for (i = next_cum.regno;
4286       i < ix86_regparm
4287       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4288       i++)
4289    {
4290      mem = gen_rtx_MEM (Pmode,
4291			 plus_constant (save_area, i * UNITS_PER_WORD));
4292      MEM_NOTRAP_P (mem) = 1;
4293      set_mem_alias_set (mem, set);
4294      emit_move_insn (mem, gen_rtx_REG (Pmode,
4295					x86_64_int_parameter_registers[i]));
4296    }
4297
4298  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4299    {
4300      /* Now emit code to save SSE registers.  The AX parameter contains number
4301	 of SSE parameter registers used to call this function.  We use
4302	 sse_prologue_save insn template that produces computed jump across
4303	 SSE saves.  We need some preparation work to get this working.  */
4304
4305      label = gen_label_rtx ();
4306      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4307
4308      /* Compute address to jump to :
4309         label - 5*eax + nnamed_sse_arguments*5  */
4310      tmp_reg = gen_reg_rtx (Pmode);
4311      nsse_reg = gen_reg_rtx (Pmode);
4312      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4313      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4314			      gen_rtx_MULT (Pmode, nsse_reg,
4315					    GEN_INT (4))));
4316      if (next_cum.sse_regno)
4317	emit_move_insn
4318	  (nsse_reg,
4319	   gen_rtx_CONST (DImode,
4320			  gen_rtx_PLUS (DImode,
4321					label_ref,
4322					GEN_INT (next_cum.sse_regno * 4))));
4323      else
4324	emit_move_insn (nsse_reg, label_ref);
4325      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4326
4327      /* Compute address of memory block we save into.  We always use pointer
4328	 pointing 127 bytes after first byte to store - this is needed to keep
4329	 instruction size limited by 4 bytes.  */
4330      tmp_reg = gen_reg_rtx (Pmode);
4331      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4332			      plus_constant (save_area,
4333					     8 * REGPARM_MAX + 127)));
4334      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4335      MEM_NOTRAP_P (mem) = 1;
4336      set_mem_alias_set (mem, set);
4337      set_mem_align (mem, BITS_PER_WORD);
4338
4339      /* And finally do the dirty job!  */
4340      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4341					GEN_INT (next_cum.sse_regno), label));
4342    }
4343
4344}
4345
4346/* Implement va_start.  */
4347
4348void
4349ix86_va_start (tree valist, rtx nextarg)
4350{
4351  HOST_WIDE_INT words, n_gpr, n_fpr;
4352  tree f_gpr, f_fpr, f_ovf, f_sav;
4353  tree gpr, fpr, ovf, sav, t;
4354  tree type;
4355
4356  /* Only 64bit target needs something special.  */
4357  if (!TARGET_64BIT)
4358    {
4359      std_expand_builtin_va_start (valist, nextarg);
4360      return;
4361    }
4362
4363  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4364  f_fpr = TREE_CHAIN (f_gpr);
4365  f_ovf = TREE_CHAIN (f_fpr);
4366  f_sav = TREE_CHAIN (f_ovf);
4367
4368  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4369  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4370  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4371  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4372  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4373
4374  /* Count number of gp and fp argument registers used.  */
4375  words = current_function_args_info.words;
4376  n_gpr = current_function_args_info.regno;
4377  n_fpr = current_function_args_info.sse_regno;
4378
4379  if (TARGET_DEBUG_ARG)
4380    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4381	     (int) words, (int) n_gpr, (int) n_fpr);
4382
4383  if (cfun->va_list_gpr_size)
4384    {
4385      type = TREE_TYPE (gpr);
4386      t = build2 (MODIFY_EXPR, type, gpr,
4387		  build_int_cst (type, n_gpr * 8));
4388      TREE_SIDE_EFFECTS (t) = 1;
4389      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4390    }
4391
4392  if (cfun->va_list_fpr_size)
4393    {
4394      type = TREE_TYPE (fpr);
4395      t = build2 (MODIFY_EXPR, type, fpr,
4396		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4397      TREE_SIDE_EFFECTS (t) = 1;
4398      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4399    }
4400
4401  /* Find the overflow area.  */
4402  type = TREE_TYPE (ovf);
4403  t = make_tree (type, virtual_incoming_args_rtx);
4404  if (words != 0)
4405    t = build2 (PLUS_EXPR, type, t,
4406	        build_int_cst (type, words * UNITS_PER_WORD));
4407  t = build2 (MODIFY_EXPR, type, ovf, t);
4408  TREE_SIDE_EFFECTS (t) = 1;
4409  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4410
4411  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4412    {
4413      /* Find the register save area.
4414	 Prologue of the function save it right above stack frame.  */
4415      type = TREE_TYPE (sav);
4416      t = make_tree (type, frame_pointer_rtx);
4417      t = build2 (MODIFY_EXPR, type, sav, t);
4418      TREE_SIDE_EFFECTS (t) = 1;
4419      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4420    }
4421}
4422
4423/* Implement va_arg.  */
4424
4425tree
4426ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4427{
4428  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4429  tree f_gpr, f_fpr, f_ovf, f_sav;
4430  tree gpr, fpr, ovf, sav, t;
4431  int size, rsize;
4432  tree lab_false, lab_over = NULL_TREE;
4433  tree addr, t2;
4434  rtx container;
4435  int indirect_p = 0;
4436  tree ptrtype;
4437  enum machine_mode nat_mode;
4438
4439  /* Only 64bit target needs something special.  */
4440  if (!TARGET_64BIT)
4441    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4442
4443  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4444  f_fpr = TREE_CHAIN (f_gpr);
4445  f_ovf = TREE_CHAIN (f_fpr);
4446  f_sav = TREE_CHAIN (f_ovf);
4447
4448  valist = build_va_arg_indirect_ref (valist);
4449  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4450  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4451  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4452  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4453
4454  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4455  if (indirect_p)
4456    type = build_pointer_type (type);
4457  size = int_size_in_bytes (type);
4458  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4459
4460  nat_mode = type_natural_mode (type);
4461  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4462				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4463
4464  /* Pull the value out of the saved registers.  */
4465
4466  addr = create_tmp_var (ptr_type_node, "addr");
4467  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4468
4469  if (container)
4470    {
4471      int needed_intregs, needed_sseregs;
4472      bool need_temp;
4473      tree int_addr, sse_addr;
4474
4475      lab_false = create_artificial_label ();
4476      lab_over = create_artificial_label ();
4477
4478      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4479
4480      need_temp = (!REG_P (container)
4481		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4482		       || TYPE_ALIGN (type) > 128));
4483
4484      /* In case we are passing structure, verify that it is consecutive block
4485         on the register save area.  If not we need to do moves.  */
4486      if (!need_temp && !REG_P (container))
4487	{
4488	  /* Verify that all registers are strictly consecutive  */
4489	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4490	    {
4491	      int i;
4492
4493	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4494		{
4495		  rtx slot = XVECEXP (container, 0, i);
4496		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4497		      || INTVAL (XEXP (slot, 1)) != i * 16)
4498		    need_temp = 1;
4499		}
4500	    }
4501	  else
4502	    {
4503	      int i;
4504
4505	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4506		{
4507		  rtx slot = XVECEXP (container, 0, i);
4508		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4509		      || INTVAL (XEXP (slot, 1)) != i * 8)
4510		    need_temp = 1;
4511		}
4512	    }
4513	}
4514      if (!need_temp)
4515	{
4516	  int_addr = addr;
4517	  sse_addr = addr;
4518	}
4519      else
4520	{
4521	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4522	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4523	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4524	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4525	}
4526
4527      /* First ensure that we fit completely in registers.  */
4528      if (needed_intregs)
4529	{
4530	  t = build_int_cst (TREE_TYPE (gpr),
4531			     (REGPARM_MAX - needed_intregs + 1) * 8);
4532	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4533	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4534	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4535	  gimplify_and_add (t, pre_p);
4536	}
4537      if (needed_sseregs)
4538	{
4539	  t = build_int_cst (TREE_TYPE (fpr),
4540			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4541			     + REGPARM_MAX * 8);
4542	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4543	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4544	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4545	  gimplify_and_add (t, pre_p);
4546	}
4547
4548      /* Compute index to start of area used for integer regs.  */
4549      if (needed_intregs)
4550	{
4551	  /* int_addr = gpr + sav; */
4552	  t = fold_convert (ptr_type_node, gpr);
4553	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4554	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4555	  gimplify_and_add (t, pre_p);
4556	}
4557      if (needed_sseregs)
4558	{
4559	  /* sse_addr = fpr + sav; */
4560	  t = fold_convert (ptr_type_node, fpr);
4561	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4562	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4563	  gimplify_and_add (t, pre_p);
4564	}
4565      if (need_temp)
4566	{
4567	  int i;
4568	  tree temp = create_tmp_var (type, "va_arg_tmp");
4569
4570	  /* addr = &temp; */
4571	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4572	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4573	  gimplify_and_add (t, pre_p);
4574
4575	  for (i = 0; i < XVECLEN (container, 0); i++)
4576	    {
4577	      rtx slot = XVECEXP (container, 0, i);
4578	      rtx reg = XEXP (slot, 0);
4579	      enum machine_mode mode = GET_MODE (reg);
4580	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4581	      tree addr_type = build_pointer_type (piece_type);
4582	      tree src_addr, src;
4583	      int src_offset;
4584	      tree dest_addr, dest;
4585
4586	      if (SSE_REGNO_P (REGNO (reg)))
4587		{
4588		  src_addr = sse_addr;
4589		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4590		}
4591	      else
4592		{
4593		  src_addr = int_addr;
4594		  src_offset = REGNO (reg) * 8;
4595		}
4596	      src_addr = fold_convert (addr_type, src_addr);
4597	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4598				       size_int (src_offset)));
4599	      src = build_va_arg_indirect_ref (src_addr);
4600
4601	      dest_addr = fold_convert (addr_type, addr);
4602	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4603					size_int (INTVAL (XEXP (slot, 1)))));
4604	      dest = build_va_arg_indirect_ref (dest_addr);
4605
4606	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4607	      gimplify_and_add (t, pre_p);
4608	    }
4609	}
4610
4611      if (needed_intregs)
4612	{
4613	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4614		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4615	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4616	  gimplify_and_add (t, pre_p);
4617	}
4618      if (needed_sseregs)
4619	{
4620	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4621		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4622	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4623	  gimplify_and_add (t, pre_p);
4624	}
4625
4626      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4627      gimplify_and_add (t, pre_p);
4628
4629      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4630      append_to_statement_list (t, pre_p);
4631    }
4632
4633  /* ... otherwise out of the overflow area.  */
4634
4635  /* Care for on-stack alignment if needed.  */
4636  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4637      || integer_zerop (TYPE_SIZE (type)))
4638    t = ovf;
4639  else
4640    {
4641      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4642      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4643		  build_int_cst (TREE_TYPE (ovf), align - 1));
4644      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4645		  build_int_cst (TREE_TYPE (t), -align));
4646    }
4647  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4648
4649  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4650  gimplify_and_add (t2, pre_p);
4651
4652  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4653	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4654  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4655  gimplify_and_add (t, pre_p);
4656
4657  if (container)
4658    {
4659      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4660      append_to_statement_list (t, pre_p);
4661    }
4662
4663  ptrtype = build_pointer_type (type);
4664  addr = fold_convert (ptrtype, addr);
4665
4666  if (indirect_p)
4667    addr = build_va_arg_indirect_ref (addr);
4668  return build_va_arg_indirect_ref (addr);
4669}
4670
4671/* Return nonzero if OPNUM's MEM should be matched
4672   in movabs* patterns.  */
4673
4674int
4675ix86_check_movabs (rtx insn, int opnum)
4676{
4677  rtx set, mem;
4678
4679  set = PATTERN (insn);
4680  if (GET_CODE (set) == PARALLEL)
4681    set = XVECEXP (set, 0, 0);
4682  gcc_assert (GET_CODE (set) == SET);
4683  mem = XEXP (set, opnum);
4684  while (GET_CODE (mem) == SUBREG)
4685    mem = SUBREG_REG (mem);
4686  gcc_assert (GET_CODE (mem) == MEM);
4687  return (volatile_ok || !MEM_VOLATILE_P (mem));
4688}
4689
4690/* Initialize the table of extra 80387 mathematical constants.  */
4691
4692static void
4693init_ext_80387_constants (void)
4694{
4695  static const char * cst[5] =
4696  {
4697    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4698    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4699    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4700    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4701    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4702  };
4703  int i;
4704
4705  for (i = 0; i < 5; i++)
4706    {
4707      real_from_string (&ext_80387_constants_table[i], cst[i]);
4708      /* Ensure each constant is rounded to XFmode precision.  */
4709      real_convert (&ext_80387_constants_table[i],
4710		    XFmode, &ext_80387_constants_table[i]);
4711    }
4712
4713  ext_80387_constants_init = 1;
4714}
4715
4716/* Return true if the constant is something that can be loaded with
4717   a special instruction.  */
4718
4719int
4720standard_80387_constant_p (rtx x)
4721{
4722  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4723    return -1;
4724
4725  if (x == CONST0_RTX (GET_MODE (x)))
4726    return 1;
4727  if (x == CONST1_RTX (GET_MODE (x)))
4728    return 2;
4729
4730  /* For XFmode constants, try to find a special 80387 instruction when
4731     optimizing for size or on those CPUs that benefit from them.  */
4732  if (GET_MODE (x) == XFmode
4733      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4734    {
4735      REAL_VALUE_TYPE r;
4736      int i;
4737
4738      if (! ext_80387_constants_init)
4739	init_ext_80387_constants ();
4740
4741      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4742      for (i = 0; i < 5; i++)
4743        if (real_identical (&r, &ext_80387_constants_table[i]))
4744	  return i + 3;
4745    }
4746
4747  return 0;
4748}
4749
4750/* Return the opcode of the special instruction to be used to load
4751   the constant X.  */
4752
4753const char *
4754standard_80387_constant_opcode (rtx x)
4755{
4756  switch (standard_80387_constant_p (x))
4757    {
4758    case 1:
4759      return "fldz";
4760    case 2:
4761      return "fld1";
4762    case 3:
4763      return "fldlg2";
4764    case 4:
4765      return "fldln2";
4766    case 5:
4767      return "fldl2e";
4768    case 6:
4769      return "fldl2t";
4770    case 7:
4771      return "fldpi";
4772    default:
4773      gcc_unreachable ();
4774    }
4775}
4776
4777/* Return the CONST_DOUBLE representing the 80387 constant that is
4778   loaded by the specified special instruction.  The argument IDX
4779   matches the return value from standard_80387_constant_p.  */
4780
4781rtx
4782standard_80387_constant_rtx (int idx)
4783{
4784  int i;
4785
4786  if (! ext_80387_constants_init)
4787    init_ext_80387_constants ();
4788
4789  switch (idx)
4790    {
4791    case 3:
4792    case 4:
4793    case 5:
4794    case 6:
4795    case 7:
4796      i = idx - 3;
4797      break;
4798
4799    default:
4800      gcc_unreachable ();
4801    }
4802
4803  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4804				       XFmode);
4805}
4806
4807/* Return 1 if mode is a valid mode for sse.  */
4808static int
4809standard_sse_mode_p (enum machine_mode mode)
4810{
4811  switch (mode)
4812    {
4813    case V16QImode:
4814    case V8HImode:
4815    case V4SImode:
4816    case V2DImode:
4817    case V4SFmode:
4818    case V2DFmode:
4819      return 1;
4820
4821    default:
4822      return 0;
4823    }
4824}
4825
4826/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4827 */
4828int
4829standard_sse_constant_p (rtx x)
4830{
4831  enum machine_mode mode = GET_MODE (x);
4832
4833  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4834    return 1;
4835  if (vector_all_ones_operand (x, mode)
4836      && standard_sse_mode_p (mode))
4837    return TARGET_SSE2 ? 2 : -1;
4838
4839  return 0;
4840}
4841
4842/* Return the opcode of the special instruction to be used to load
4843   the constant X.  */
4844
4845const char *
4846standard_sse_constant_opcode (rtx insn, rtx x)
4847{
4848  switch (standard_sse_constant_p (x))
4849    {
4850    case 1:
4851      if (get_attr_mode (insn) == MODE_V4SF)
4852        return "xorps\t%0, %0";
4853      else if (get_attr_mode (insn) == MODE_V2DF)
4854        return "xorpd\t%0, %0";
4855      else
4856        return "pxor\t%0, %0";
4857    case 2:
4858      return "pcmpeqd\t%0, %0";
4859    }
4860  gcc_unreachable ();
4861}
4862
4863/* Returns 1 if OP contains a symbol reference */
4864
4865int
4866symbolic_reference_mentioned_p (rtx op)
4867{
4868  const char *fmt;
4869  int i;
4870
4871  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4872    return 1;
4873
4874  fmt = GET_RTX_FORMAT (GET_CODE (op));
4875  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4876    {
4877      if (fmt[i] == 'E')
4878	{
4879	  int j;
4880
4881	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4882	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4883	      return 1;
4884	}
4885
4886      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4887	return 1;
4888    }
4889
4890  return 0;
4891}
4892
4893/* Return 1 if it is appropriate to emit `ret' instructions in the
4894   body of a function.  Do this only if the epilogue is simple, needing a
4895   couple of insns.  Prior to reloading, we can't tell how many registers
4896   must be saved, so return 0 then.  Return 0 if there is no frame
4897   marker to de-allocate.  */
4898
4899int
4900ix86_can_use_return_insn_p (void)
4901{
4902  struct ix86_frame frame;
4903
4904  if (! reload_completed || frame_pointer_needed)
4905    return 0;
4906
4907  /* Don't allow more than 32 pop, since that's all we can do
4908     with one instruction.  */
4909  if (current_function_pops_args
4910      && current_function_args_size >= 32768)
4911    return 0;
4912
4913  ix86_compute_frame_layout (&frame);
4914  return frame.to_allocate == 0 && frame.nregs == 0;
4915}
4916
4917/* Value should be nonzero if functions must have frame pointers.
4918   Zero means the frame pointer need not be set up (and parms may
4919   be accessed via the stack pointer) in functions that seem suitable.  */
4920
4921int
4922ix86_frame_pointer_required (void)
4923{
4924  /* If we accessed previous frames, then the generated code expects
4925     to be able to access the saved ebp value in our frame.  */
4926  if (cfun->machine->accesses_prev_frame)
4927    return 1;
4928
4929  /* Several x86 os'es need a frame pointer for other reasons,
4930     usually pertaining to setjmp.  */
4931  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4932    return 1;
4933
4934  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4935     the frame pointer by default.  Turn it back on now if we've not
4936     got a leaf function.  */
4937  if (TARGET_OMIT_LEAF_FRAME_POINTER
4938      && (!current_function_is_leaf
4939	  || ix86_current_function_calls_tls_descriptor))
4940    return 1;
4941
4942  if (current_function_profile)
4943    return 1;
4944
4945  return 0;
4946}
4947
4948/* Record that the current function accesses previous call frames.  */
4949
4950void
4951ix86_setup_frame_addresses (void)
4952{
4953  cfun->machine->accesses_prev_frame = 1;
4954}
4955
4956#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4957# define USE_HIDDEN_LINKONCE 1
4958#else
4959# define USE_HIDDEN_LINKONCE 0
4960#endif
4961
4962static int pic_labels_used;
4963
4964/* Fills in the label name that should be used for a pc thunk for
4965   the given register.  */
4966
4967static void
4968get_pc_thunk_name (char name[32], unsigned int regno)
4969{
4970  gcc_assert (!TARGET_64BIT);
4971
4972  if (USE_HIDDEN_LINKONCE)
4973    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4974  else
4975    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4976}
4977
4978
4979/* This function generates code for -fpic that loads %ebx with
4980   the return address of the caller and then returns.  */
4981
4982void
4983ix86_file_end (void)
4984{
4985  rtx xops[2];
4986  int regno;
4987
4988  for (regno = 0; regno < 8; ++regno)
4989    {
4990      char name[32];
4991
4992      if (! ((pic_labels_used >> regno) & 1))
4993	continue;
4994
4995      get_pc_thunk_name (name, regno);
4996
4997#if TARGET_MACHO
4998      if (TARGET_MACHO)
4999	{
5000	  switch_to_section (darwin_sections[text_coal_section]);
5001	  fputs ("\t.weak_definition\t", asm_out_file);
5002	  assemble_name (asm_out_file, name);
5003	  fputs ("\n\t.private_extern\t", asm_out_file);
5004	  assemble_name (asm_out_file, name);
5005	  fputs ("\n", asm_out_file);
5006	  ASM_OUTPUT_LABEL (asm_out_file, name);
5007	}
5008      else
5009#endif
5010      if (USE_HIDDEN_LINKONCE)
5011	{
5012	  tree decl;
5013
5014	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
5015			     error_mark_node);
5016	  TREE_PUBLIC (decl) = 1;
5017	  TREE_STATIC (decl) = 1;
5018	  DECL_ONE_ONLY (decl) = 1;
5019
5020	  (*targetm.asm_out.unique_section) (decl, 0);
5021	  switch_to_section (get_named_section (decl, NULL, 0));
5022
5023	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
5024	  fputs ("\t.hidden\t", asm_out_file);
5025	  assemble_name (asm_out_file, name);
5026	  fputc ('\n', asm_out_file);
5027	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5028	}
5029      else
5030	{
5031	  switch_to_section (text_section);
5032	  ASM_OUTPUT_LABEL (asm_out_file, name);
5033	}
5034
5035      xops[0] = gen_rtx_REG (SImode, regno);
5036      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5037      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5038      output_asm_insn ("ret", xops);
5039    }
5040
5041  if (NEED_INDICATE_EXEC_STACK)
5042    file_end_indicate_exec_stack ();
5043}
5044
5045/* Emit code for the SET_GOT patterns.  */
5046
5047const char *
5048output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5049{
5050  rtx xops[3];
5051
5052  xops[0] = dest;
5053  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5054
5055  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5056    {
5057      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5058
5059      if (!flag_pic)
5060	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5061      else
5062	output_asm_insn ("call\t%a2", xops);
5063
5064#if TARGET_MACHO
5065      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5066         is what will be referenced by the Mach-O PIC subsystem.  */
5067      if (!label)
5068	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5069#endif
5070
5071      (*targetm.asm_out.internal_label) (asm_out_file, "L",
5072				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5073
5074      if (flag_pic)
5075	output_asm_insn ("pop{l}\t%0", xops);
5076    }
5077  else
5078    {
5079      char name[32];
5080      get_pc_thunk_name (name, REGNO (dest));
5081      pic_labels_used |= 1 << REGNO (dest);
5082
5083      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5084      xops[2] = gen_rtx_MEM (QImode, xops[2]);
5085      output_asm_insn ("call\t%X2", xops);
5086      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5087         is what will be referenced by the Mach-O PIC subsystem.  */
5088#if TARGET_MACHO
5089      if (!label)
5090	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5091      else
5092        targetm.asm_out.internal_label (asm_out_file, "L",
5093					   CODE_LABEL_NUMBER (label));
5094#endif
5095    }
5096
5097  if (TARGET_MACHO)
5098    return "";
5099
5100  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5101    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5102  else
5103    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5104
5105  return "";
5106}
5107
5108/* Generate an "push" pattern for input ARG.  */
5109
5110static rtx
5111gen_push (rtx arg)
5112{
5113  return gen_rtx_SET (VOIDmode,
5114		      gen_rtx_MEM (Pmode,
5115				   gen_rtx_PRE_DEC (Pmode,
5116						    stack_pointer_rtx)),
5117		      arg);
5118}
5119
5120/* Return >= 0 if there is an unused call-clobbered register available
5121   for the entire function.  */
5122
5123static unsigned int
5124ix86_select_alt_pic_regnum (void)
5125{
5126  if (current_function_is_leaf && !current_function_profile
5127      && !ix86_current_function_calls_tls_descriptor)
5128    {
5129      int i;
5130      for (i = 2; i >= 0; --i)
5131        if (!regs_ever_live[i])
5132	  return i;
5133    }
5134
5135  return INVALID_REGNUM;
5136}
5137
5138/* Return 1 if we need to save REGNO.  */
5139static int
5140ix86_save_reg (unsigned int regno, int maybe_eh_return)
5141{
5142  if (pic_offset_table_rtx
5143      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5144      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5145	  || current_function_profile
5146	  || current_function_calls_eh_return
5147	  || current_function_uses_const_pool))
5148    {
5149      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5150	return 0;
5151      return 1;
5152    }
5153
5154  if (current_function_calls_eh_return && maybe_eh_return)
5155    {
5156      unsigned i;
5157      for (i = 0; ; i++)
5158	{
5159	  unsigned test = EH_RETURN_DATA_REGNO (i);
5160	  if (test == INVALID_REGNUM)
5161	    break;
5162	  if (test == regno)
5163	    return 1;
5164	}
5165    }
5166
5167  if (cfun->machine->force_align_arg_pointer
5168      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5169    return 1;
5170
5171  return (regs_ever_live[regno]
5172	  && !call_used_regs[regno]
5173	  && !fixed_regs[regno]
5174	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5175}
5176
5177/* Return number of registers to be saved on the stack.  */
5178
5179static int
5180ix86_nsaved_regs (void)
5181{
5182  int nregs = 0;
5183  int regno;
5184
5185  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5186    if (ix86_save_reg (regno, true))
5187      nregs++;
5188  return nregs;
5189}
5190
5191/* Return the offset between two registers, one to be eliminated, and the other
5192   its replacement, at the start of a routine.  */
5193
5194HOST_WIDE_INT
5195ix86_initial_elimination_offset (int from, int to)
5196{
5197  struct ix86_frame frame;
5198  ix86_compute_frame_layout (&frame);
5199
5200  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5201    return frame.hard_frame_pointer_offset;
5202  else if (from == FRAME_POINTER_REGNUM
5203	   && to == HARD_FRAME_POINTER_REGNUM)
5204    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5205  else
5206    {
5207      gcc_assert (to == STACK_POINTER_REGNUM);
5208
5209      if (from == ARG_POINTER_REGNUM)
5210	return frame.stack_pointer_offset;
5211
5212      gcc_assert (from == FRAME_POINTER_REGNUM);
5213      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5214    }
5215}
5216
5217/* Fill structure ix86_frame about frame of currently computed function.  */
5218
5219static void
5220ix86_compute_frame_layout (struct ix86_frame *frame)
5221{
5222  HOST_WIDE_INT total_size;
5223  unsigned int stack_alignment_needed;
5224  HOST_WIDE_INT offset;
5225  unsigned int preferred_alignment;
5226  HOST_WIDE_INT size = get_frame_size ();
5227
5228  frame->nregs = ix86_nsaved_regs ();
5229  total_size = size;
5230
5231  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5232  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5233
5234  /* During reload iteration the amount of registers saved can change.
5235     Recompute the value as needed.  Do not recompute when amount of registers
5236     didn't change as reload does multiple calls to the function and does not
5237     expect the decision to change within single iteration.  */
5238  if (!optimize_size
5239      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5240    {
5241      int count = frame->nregs;
5242
5243      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5244      /* The fast prologue uses move instead of push to save registers.  This
5245         is significantly longer, but also executes faster as modern hardware
5246         can execute the moves in parallel, but can't do that for push/pop.
5247
5248	 Be careful about choosing what prologue to emit:  When function takes
5249	 many instructions to execute we may use slow version as well as in
5250	 case function is known to be outside hot spot (this is known with
5251	 feedback only).  Weight the size of function by number of registers
5252	 to save as it is cheap to use one or two push instructions but very
5253	 slow to use many of them.  */
5254      if (count)
5255	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5256      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5257	  || (flag_branch_probabilities
5258	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5259        cfun->machine->use_fast_prologue_epilogue = false;
5260      else
5261        cfun->machine->use_fast_prologue_epilogue
5262	   = !expensive_function_p (count);
5263    }
5264  if (TARGET_PROLOGUE_USING_MOVE
5265      && cfun->machine->use_fast_prologue_epilogue)
5266    frame->save_regs_using_mov = true;
5267  else
5268    frame->save_regs_using_mov = false;
5269
5270
5271  /* Skip return address and saved base pointer.  */
5272  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5273
5274  frame->hard_frame_pointer_offset = offset;
5275
5276  /* Do some sanity checking of stack_alignment_needed and
5277     preferred_alignment, since i386 port is the only using those features
5278     that may break easily.  */
5279
5280  gcc_assert (!size || stack_alignment_needed);
5281  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5282  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5283  gcc_assert (stack_alignment_needed
5284	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5285
5286  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5287    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5288
5289  /* Register save area */
5290  offset += frame->nregs * UNITS_PER_WORD;
5291
5292  /* Va-arg area */
5293  if (ix86_save_varrargs_registers)
5294    {
5295      offset += X86_64_VARARGS_SIZE;
5296      frame->va_arg_size = X86_64_VARARGS_SIZE;
5297    }
5298  else
5299    frame->va_arg_size = 0;
5300
5301  /* Align start of frame for local function.  */
5302  frame->padding1 = ((offset + stack_alignment_needed - 1)
5303		     & -stack_alignment_needed) - offset;
5304
5305  offset += frame->padding1;
5306
5307  /* Frame pointer points here.  */
5308  frame->frame_pointer_offset = offset;
5309
5310  offset += size;
5311
5312  /* Add outgoing arguments area.  Can be skipped if we eliminated
5313     all the function calls as dead code.
5314     Skipping is however impossible when function calls alloca.  Alloca
5315     expander assumes that last current_function_outgoing_args_size
5316     of stack frame are unused.  */
5317  if (ACCUMULATE_OUTGOING_ARGS
5318      && (!current_function_is_leaf || current_function_calls_alloca
5319	  || ix86_current_function_calls_tls_descriptor))
5320    {
5321      offset += current_function_outgoing_args_size;
5322      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5323    }
5324  else
5325    frame->outgoing_arguments_size = 0;
5326
5327  /* Align stack boundary.  Only needed if we're calling another function
5328     or using alloca.  */
5329  if (!current_function_is_leaf || current_function_calls_alloca
5330      || ix86_current_function_calls_tls_descriptor)
5331    frame->padding2 = ((offset + preferred_alignment - 1)
5332		       & -preferred_alignment) - offset;
5333  else
5334    frame->padding2 = 0;
5335
5336  offset += frame->padding2;
5337
5338  /* We've reached end of stack frame.  */
5339  frame->stack_pointer_offset = offset;
5340
5341  /* Size prologue needs to allocate.  */
5342  frame->to_allocate =
5343    (size + frame->padding1 + frame->padding2
5344     + frame->outgoing_arguments_size + frame->va_arg_size);
5345
5346  if ((!frame->to_allocate && frame->nregs <= 1)
5347      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5348    frame->save_regs_using_mov = false;
5349
5350  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5351      && current_function_is_leaf
5352      && !ix86_current_function_calls_tls_descriptor)
5353    {
5354      frame->red_zone_size = frame->to_allocate;
5355      if (frame->save_regs_using_mov)
5356	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5357      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5358	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5359    }
5360  else
5361    frame->red_zone_size = 0;
5362  frame->to_allocate -= frame->red_zone_size;
5363  frame->stack_pointer_offset -= frame->red_zone_size;
5364#if 0
5365  fprintf (stderr, "nregs: %i\n", frame->nregs);
5366  fprintf (stderr, "size: %i\n", size);
5367  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5368  fprintf (stderr, "padding1: %i\n", frame->padding1);
5369  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5370  fprintf (stderr, "padding2: %i\n", frame->padding2);
5371  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5372  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5373  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5374  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5375	   frame->hard_frame_pointer_offset);
5376  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5377#endif
5378}
5379
5380/* Emit code to save registers in the prologue.  */
5381
5382static void
5383ix86_emit_save_regs (void)
5384{
5385  unsigned int regno;
5386  rtx insn;
5387
5388  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5389    if (ix86_save_reg (regno, true))
5390      {
5391	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5392	RTX_FRAME_RELATED_P (insn) = 1;
5393      }
5394}
5395
5396/* Emit code to save registers using MOV insns.  First register
5397   is restored from POINTER + OFFSET.  */
5398static void
5399ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5400{
5401  unsigned int regno;
5402  rtx insn;
5403
5404  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5405    if (ix86_save_reg (regno, true))
5406      {
5407	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5408					       Pmode, offset),
5409			       gen_rtx_REG (Pmode, regno));
5410	RTX_FRAME_RELATED_P (insn) = 1;
5411	offset += UNITS_PER_WORD;
5412      }
5413}
5414
5415/* Expand prologue or epilogue stack adjustment.
5416   The pattern exist to put a dependency on all ebp-based memory accesses.
5417   STYLE should be negative if instructions should be marked as frame related,
5418   zero if %r11 register is live and cannot be freely used and positive
5419   otherwise.  */
5420
5421static void
5422pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5423{
5424  rtx insn;
5425
5426  if (! TARGET_64BIT)
5427    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5428  else if (x86_64_immediate_operand (offset, DImode))
5429    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5430  else
5431    {
5432      rtx r11;
5433      /* r11 is used by indirect sibcall return as well, set before the
5434	 epilogue and used after the epilogue.  ATM indirect sibcall
5435	 shouldn't be used together with huge frame sizes in one
5436	 function because of the frame_size check in sibcall.c.  */
5437      gcc_assert (style);
5438      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5439      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5440      if (style < 0)
5441	RTX_FRAME_RELATED_P (insn) = 1;
5442      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5443							       offset));
5444    }
5445  if (style < 0)
5446    RTX_FRAME_RELATED_P (insn) = 1;
5447}
5448
5449/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5450
5451static rtx
5452ix86_internal_arg_pointer (void)
5453{
5454  bool has_force_align_arg_pointer =
5455    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5456			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5457  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5458       && DECL_NAME (current_function_decl)
5459       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5460       && DECL_FILE_SCOPE_P (current_function_decl))
5461      || ix86_force_align_arg_pointer
5462      || has_force_align_arg_pointer)
5463    {
5464      /* Nested functions can't realign the stack due to a register
5465	 conflict.  */
5466      if (DECL_CONTEXT (current_function_decl)
5467	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5468	{
5469	  if (ix86_force_align_arg_pointer)
5470	    warning (0, "-mstackrealign ignored for nested functions");
5471	  if (has_force_align_arg_pointer)
5472	    error ("%s not supported for nested functions",
5473		   ix86_force_align_arg_pointer_string);
5474	  return virtual_incoming_args_rtx;
5475	}
5476      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5477      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5478    }
5479  else
5480    return virtual_incoming_args_rtx;
5481}
5482
5483/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5484   This is called from dwarf2out.c to emit call frame instructions
5485   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5486static void
5487ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5488{
5489  rtx unspec = SET_SRC (pattern);
5490  gcc_assert (GET_CODE (unspec) == UNSPEC);
5491
5492  switch (index)
5493    {
5494    case UNSPEC_REG_SAVE:
5495      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5496			      SET_DEST (pattern));
5497      break;
5498    case UNSPEC_DEF_CFA:
5499      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5500			 INTVAL (XVECEXP (unspec, 0, 0)));
5501      break;
5502    default:
5503      gcc_unreachable ();
5504    }
5505}
5506
5507/* Expand the prologue into a bunch of separate insns.  */
5508
5509void
5510ix86_expand_prologue (void)
5511{
5512  rtx insn;
5513  bool pic_reg_used;
5514  struct ix86_frame frame;
5515  HOST_WIDE_INT allocate;
5516
5517  ix86_compute_frame_layout (&frame);
5518
5519  if (cfun->machine->force_align_arg_pointer)
5520    {
5521      rtx x, y;
5522
5523      /* Grab the argument pointer.  */
5524      x = plus_constant (stack_pointer_rtx, 4);
5525      y = cfun->machine->force_align_arg_pointer;
5526      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5527      RTX_FRAME_RELATED_P (insn) = 1;
5528
5529      /* The unwind info consists of two parts: install the fafp as the cfa,
5530	 and record the fafp as the "save register" of the stack pointer.
5531	 The later is there in order that the unwinder can see where it
5532	 should restore the stack pointer across the and insn.  */
5533      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5534      x = gen_rtx_SET (VOIDmode, y, x);
5535      RTX_FRAME_RELATED_P (x) = 1;
5536      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5537			  UNSPEC_REG_SAVE);
5538      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5539      RTX_FRAME_RELATED_P (y) = 1;
5540      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5541      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5542      REG_NOTES (insn) = x;
5543
5544      /* Align the stack.  */
5545      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5546			     GEN_INT (-16)));
5547
5548      /* And here we cheat like madmen with the unwind info.  We force the
5549	 cfa register back to sp+4, which is exactly what it was at the
5550	 start of the function.  Re-pushing the return address results in
5551	 the return at the same spot relative to the cfa, and thus is
5552	 correct wrt the unwind info.  */
5553      x = cfun->machine->force_align_arg_pointer;
5554      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5555      insn = emit_insn (gen_push (x));
5556      RTX_FRAME_RELATED_P (insn) = 1;
5557
5558      x = GEN_INT (4);
5559      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5560      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5561      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5562      REG_NOTES (insn) = x;
5563    }
5564
5565  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5566     slower on all targets.  Also sdb doesn't like it.  */
5567
5568  if (frame_pointer_needed)
5569    {
5570      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5571      RTX_FRAME_RELATED_P (insn) = 1;
5572
5573      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5574      RTX_FRAME_RELATED_P (insn) = 1;
5575    }
5576
5577  allocate = frame.to_allocate;
5578
5579  if (!frame.save_regs_using_mov)
5580    ix86_emit_save_regs ();
5581  else
5582    allocate += frame.nregs * UNITS_PER_WORD;
5583
5584  /* When using red zone we may start register saving before allocating
5585     the stack frame saving one cycle of the prologue.  */
5586  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5587    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5588				   : stack_pointer_rtx,
5589				   -frame.nregs * UNITS_PER_WORD);
5590
5591  if (allocate == 0)
5592    ;
5593  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5594    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5595			       GEN_INT (-allocate), -1);
5596  else
5597    {
5598      /* Only valid for Win32.  */
5599      rtx eax = gen_rtx_REG (SImode, 0);
5600      bool eax_live = ix86_eax_live_at_start_p ();
5601      rtx t;
5602
5603      gcc_assert (!TARGET_64BIT);
5604
5605      if (eax_live)
5606	{
5607	  emit_insn (gen_push (eax));
5608	  allocate -= 4;
5609	}
5610
5611      emit_move_insn (eax, GEN_INT (allocate));
5612
5613      insn = emit_insn (gen_allocate_stack_worker (eax));
5614      RTX_FRAME_RELATED_P (insn) = 1;
5615      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5616      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5617      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5618					    t, REG_NOTES (insn));
5619
5620      if (eax_live)
5621	{
5622	  if (frame_pointer_needed)
5623	    t = plus_constant (hard_frame_pointer_rtx,
5624			       allocate
5625			       - frame.to_allocate
5626			       - frame.nregs * UNITS_PER_WORD);
5627	  else
5628	    t = plus_constant (stack_pointer_rtx, allocate);
5629	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5630	}
5631    }
5632
5633  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5634    {
5635      if (!frame_pointer_needed || !frame.to_allocate)
5636        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5637      else
5638        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5639				       -frame.nregs * UNITS_PER_WORD);
5640    }
5641
5642  pic_reg_used = false;
5643  if (pic_offset_table_rtx
5644      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5645	  || current_function_profile))
5646    {
5647      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5648
5649      if (alt_pic_reg_used != INVALID_REGNUM)
5650	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5651
5652      pic_reg_used = true;
5653    }
5654
5655  if (pic_reg_used)
5656    {
5657      if (TARGET_64BIT)
5658        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5659      else
5660        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5661
5662      /* Even with accurate pre-reload life analysis, we can wind up
5663	 deleting all references to the pic register after reload.
5664	 Consider if cross-jumping unifies two sides of a branch
5665	 controlled by a comparison vs the only read from a global.
5666	 In which case, allow the set_got to be deleted, though we're
5667	 too late to do anything about the ebx save in the prologue.  */
5668      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5669    }
5670
5671  /* Prevent function calls from be scheduled before the call to mcount.
5672     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5673  if (current_function_profile)
5674    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5675}
5676
5677/* Emit code to restore saved registers using MOV insns.  First register
5678   is restored from POINTER + OFFSET.  */
5679static void
5680ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5681				  int maybe_eh_return)
5682{
5683  int regno;
5684  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5685
5686  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5687    if (ix86_save_reg (regno, maybe_eh_return))
5688      {
5689	/* Ensure that adjust_address won't be forced to produce pointer
5690	   out of range allowed by x86-64 instruction set.  */
5691	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5692	  {
5693	    rtx r11;
5694
5695	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5696	    emit_move_insn (r11, GEN_INT (offset));
5697	    emit_insn (gen_adddi3 (r11, r11, pointer));
5698	    base_address = gen_rtx_MEM (Pmode, r11);
5699	    offset = 0;
5700	  }
5701	emit_move_insn (gen_rtx_REG (Pmode, regno),
5702			adjust_address (base_address, Pmode, offset));
5703	offset += UNITS_PER_WORD;
5704      }
5705}
5706
5707/* Restore function stack, frame, and registers.  */
5708
5709void
5710ix86_expand_epilogue (int style)
5711{
5712  int regno;
5713  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5714  struct ix86_frame frame;
5715  HOST_WIDE_INT offset;
5716
5717  ix86_compute_frame_layout (&frame);
5718
5719  /* Calculate start of saved registers relative to ebp.  Special care
5720     must be taken for the normal return case of a function using
5721     eh_return: the eax and edx registers are marked as saved, but not
5722     restored along this path.  */
5723  offset = frame.nregs;
5724  if (current_function_calls_eh_return && style != 2)
5725    offset -= 2;
5726  offset *= -UNITS_PER_WORD;
5727
5728  /* If we're only restoring one register and sp is not valid then
5729     using a move instruction to restore the register since it's
5730     less work than reloading sp and popping the register.
5731
5732     The default code result in stack adjustment using add/lea instruction,
5733     while this code results in LEAVE instruction (or discrete equivalent),
5734     so it is profitable in some other cases as well.  Especially when there
5735     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5736     and there is exactly one register to pop. This heuristic may need some
5737     tuning in future.  */
5738  if ((!sp_valid && frame.nregs <= 1)
5739      || (TARGET_EPILOGUE_USING_MOVE
5740	  && cfun->machine->use_fast_prologue_epilogue
5741	  && (frame.nregs > 1 || frame.to_allocate))
5742      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5743      || (frame_pointer_needed && TARGET_USE_LEAVE
5744	  && cfun->machine->use_fast_prologue_epilogue
5745	  && frame.nregs == 1)
5746      || current_function_calls_eh_return)
5747    {
5748      /* Restore registers.  We can use ebp or esp to address the memory
5749	 locations.  If both are available, default to ebp, since offsets
5750	 are known to be small.  Only exception is esp pointing directly to the
5751	 end of block of saved registers, where we may simplify addressing
5752	 mode.  */
5753
5754      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5755	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5756					  frame.to_allocate, style == 2);
5757      else
5758	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5759					  offset, style == 2);
5760
5761      /* eh_return epilogues need %ecx added to the stack pointer.  */
5762      if (style == 2)
5763	{
5764	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5765
5766	  if (frame_pointer_needed)
5767	    {
5768	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5769	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5770	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5771
5772	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5773	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5774
5775	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5776					 const0_rtx, style);
5777	    }
5778	  else
5779	    {
5780	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5781	      tmp = plus_constant (tmp, (frame.to_allocate
5782                                         + frame.nregs * UNITS_PER_WORD));
5783	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5784	    }
5785	}
5786      else if (!frame_pointer_needed)
5787	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5788				   GEN_INT (frame.to_allocate
5789					    + frame.nregs * UNITS_PER_WORD),
5790				   style);
5791      /* If not an i386, mov & pop is faster than "leave".  */
5792      else if (TARGET_USE_LEAVE || optimize_size
5793	       || !cfun->machine->use_fast_prologue_epilogue)
5794	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5795      else
5796	{
5797	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5798				     hard_frame_pointer_rtx,
5799				     const0_rtx, style);
5800	  if (TARGET_64BIT)
5801	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5802	  else
5803	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5804	}
5805    }
5806  else
5807    {
5808      /* First step is to deallocate the stack frame so that we can
5809	 pop the registers.  */
5810      if (!sp_valid)
5811	{
5812	  gcc_assert (frame_pointer_needed);
5813	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5814				     hard_frame_pointer_rtx,
5815				     GEN_INT (offset), style);
5816	}
5817      else if (frame.to_allocate)
5818	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5819				   GEN_INT (frame.to_allocate), style);
5820
5821      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5822	if (ix86_save_reg (regno, false))
5823	  {
5824	    if (TARGET_64BIT)
5825	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5826	    else
5827	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5828	  }
5829      if (frame_pointer_needed)
5830	{
5831	  /* Leave results in shorter dependency chains on CPUs that are
5832	     able to grok it fast.  */
5833	  if (TARGET_USE_LEAVE)
5834	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5835	  else if (TARGET_64BIT)
5836	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5837	  else
5838	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5839	}
5840    }
5841
5842  if (cfun->machine->force_align_arg_pointer)
5843    {
5844      emit_insn (gen_addsi3 (stack_pointer_rtx,
5845			     cfun->machine->force_align_arg_pointer,
5846			     GEN_INT (-4)));
5847    }
5848
5849  /* Sibcall epilogues don't want a return instruction.  */
5850  if (style == 0)
5851    return;
5852
5853  if (current_function_pops_args && current_function_args_size)
5854    {
5855      rtx popc = GEN_INT (current_function_pops_args);
5856
5857      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5858	 return address, do explicit add, and jump indirectly to the
5859	 caller.  */
5860
5861      if (current_function_pops_args >= 65536)
5862	{
5863	  rtx ecx = gen_rtx_REG (SImode, 2);
5864
5865	  /* There is no "pascal" calling convention in 64bit ABI.  */
5866	  gcc_assert (!TARGET_64BIT);
5867
5868	  emit_insn (gen_popsi1 (ecx));
5869	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5870	  emit_jump_insn (gen_return_indirect_internal (ecx));
5871	}
5872      else
5873	emit_jump_insn (gen_return_pop_internal (popc));
5874    }
5875  else
5876    emit_jump_insn (gen_return_internal ());
5877}
5878
5879/* Reset from the function's potential modifications.  */
5880
5881static void
5882ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5883			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5884{
5885  if (pic_offset_table_rtx)
5886    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5887#if TARGET_MACHO
5888  /* Mach-O doesn't support labels at the end of objects, so if
5889     it looks like we might want one, insert a NOP.  */
5890  {
5891    rtx insn = get_last_insn ();
5892    while (insn
5893	   && NOTE_P (insn)
5894	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5895      insn = PREV_INSN (insn);
5896    if (insn
5897	&& (LABEL_P (insn)
5898	    || (NOTE_P (insn)
5899		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5900      fputs ("\tnop\n", file);
5901  }
5902#endif
5903
5904}
5905
5906/* Extract the parts of an RTL expression that is a valid memory address
5907   for an instruction.  Return 0 if the structure of the address is
5908   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5909   strictly valid, but still used for computing length of lea instruction.  */
5910
5911int
5912ix86_decompose_address (rtx addr, struct ix86_address *out)
5913{
5914  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5915  rtx base_reg, index_reg;
5916  HOST_WIDE_INT scale = 1;
5917  rtx scale_rtx = NULL_RTX;
5918  int retval = 1;
5919  enum ix86_address_seg seg = SEG_DEFAULT;
5920
5921  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5922    base = addr;
5923  else if (GET_CODE (addr) == PLUS)
5924    {
5925      rtx addends[4], op;
5926      int n = 0, i;
5927
5928      op = addr;
5929      do
5930	{
5931	  if (n >= 4)
5932	    return 0;
5933	  addends[n++] = XEXP (op, 1);
5934	  op = XEXP (op, 0);
5935	}
5936      while (GET_CODE (op) == PLUS);
5937      if (n >= 4)
5938	return 0;
5939      addends[n] = op;
5940
5941      for (i = n; i >= 0; --i)
5942	{
5943	  op = addends[i];
5944	  switch (GET_CODE (op))
5945	    {
5946	    case MULT:
5947	      if (index)
5948		return 0;
5949	      index = XEXP (op, 0);
5950	      scale_rtx = XEXP (op, 1);
5951	      break;
5952
5953	    case UNSPEC:
5954	      if (XINT (op, 1) == UNSPEC_TP
5955	          && TARGET_TLS_DIRECT_SEG_REFS
5956	          && seg == SEG_DEFAULT)
5957		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5958	      else
5959		return 0;
5960	      break;
5961
5962	    case REG:
5963	    case SUBREG:
5964	      if (!base)
5965		base = op;
5966	      else if (!index)
5967		index = op;
5968	      else
5969		return 0;
5970	      break;
5971
5972	    case CONST:
5973	    case CONST_INT:
5974	    case SYMBOL_REF:
5975	    case LABEL_REF:
5976	      if (disp)
5977		return 0;
5978	      disp = op;
5979	      break;
5980
5981	    default:
5982	      return 0;
5983	    }
5984	}
5985    }
5986  else if (GET_CODE (addr) == MULT)
5987    {
5988      index = XEXP (addr, 0);		/* index*scale */
5989      scale_rtx = XEXP (addr, 1);
5990    }
5991  else if (GET_CODE (addr) == ASHIFT)
5992    {
5993      rtx tmp;
5994
5995      /* We're called for lea too, which implements ashift on occasion.  */
5996      index = XEXP (addr, 0);
5997      tmp = XEXP (addr, 1);
5998      if (GET_CODE (tmp) != CONST_INT)
5999	return 0;
6000      scale = INTVAL (tmp);
6001      if ((unsigned HOST_WIDE_INT) scale > 3)
6002	return 0;
6003      scale = 1 << scale;
6004      retval = -1;
6005    }
6006  else
6007    disp = addr;			/* displacement */
6008
6009  /* Extract the integral value of scale.  */
6010  if (scale_rtx)
6011    {
6012      if (GET_CODE (scale_rtx) != CONST_INT)
6013	return 0;
6014      scale = INTVAL (scale_rtx);
6015    }
6016
6017  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6018  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6019
6020  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
6021  if (base_reg && index_reg && scale == 1
6022      && (index_reg == arg_pointer_rtx
6023	  || index_reg == frame_pointer_rtx
6024	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6025    {
6026      rtx tmp;
6027      tmp = base, base = index, index = tmp;
6028      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6029    }
6030
6031  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
6032  if ((base_reg == hard_frame_pointer_rtx
6033       || base_reg == frame_pointer_rtx
6034       || base_reg == arg_pointer_rtx) && !disp)
6035    disp = const0_rtx;
6036
6037  /* Special case: on K6, [%esi] makes the instruction vector decoded.
6038     Avoid this by transforming to [%esi+0].  */
6039  if (ix86_tune == PROCESSOR_K6 && !optimize_size
6040      && base_reg && !index_reg && !disp
6041      && REG_P (base_reg)
6042      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6043    disp = const0_rtx;
6044
6045  /* Special case: encode reg+reg instead of reg*2.  */
6046  if (!base && index && scale && scale == 2)
6047    base = index, base_reg = index_reg, scale = 1;
6048
6049  /* Special case: scaling cannot be encoded without base or displacement.  */
6050  if (!base && !disp && index && scale != 1)
6051    disp = const0_rtx;
6052
6053  out->base = base;
6054  out->index = index;
6055  out->disp = disp;
6056  out->scale = scale;
6057  out->seg = seg;
6058
6059  return retval;
6060}
6061
6062/* Return cost of the memory address x.
6063   For i386, it is better to use a complex address than let gcc copy
6064   the address into a reg and make a new pseudo.  But not if the address
6065   requires to two regs - that would mean more pseudos with longer
6066   lifetimes.  */
6067static int
6068ix86_address_cost (rtx x)
6069{
6070  struct ix86_address parts;
6071  int cost = 1;
6072  int ok = ix86_decompose_address (x, &parts);
6073
6074  gcc_assert (ok);
6075
6076  if (parts.base && GET_CODE (parts.base) == SUBREG)
6077    parts.base = SUBREG_REG (parts.base);
6078  if (parts.index && GET_CODE (parts.index) == SUBREG)
6079    parts.index = SUBREG_REG (parts.index);
6080
6081  /* More complex memory references are better.  */
6082  if (parts.disp && parts.disp != const0_rtx)
6083    cost--;
6084  if (parts.seg != SEG_DEFAULT)
6085    cost--;
6086
6087  /* Attempt to minimize number of registers in the address.  */
6088  if ((parts.base
6089       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6090      || (parts.index
6091	  && (!REG_P (parts.index)
6092	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6093    cost++;
6094
6095  if (parts.base
6096      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6097      && parts.index
6098      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6099      && parts.base != parts.index)
6100    cost++;
6101
6102  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6103     since it's predecode logic can't detect the length of instructions
6104     and it degenerates to vector decoded.  Increase cost of such
6105     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
6106     to split such addresses or even refuse such addresses at all.
6107
6108     Following addressing modes are affected:
6109      [base+scale*index]
6110      [scale*index+disp]
6111      [base+index]
6112
6113     The first and last case  may be avoidable by explicitly coding the zero in
6114     memory address, but I don't have AMD-K6 machine handy to check this
6115     theory.  */
6116
6117  if (TARGET_K6
6118      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6119	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6120	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6121    cost += 10;
6122
6123  return cost;
6124}
6125
6126/* If X is a machine specific address (i.e. a symbol or label being
6127   referenced as a displacement from the GOT implemented using an
6128   UNSPEC), then return the base term.  Otherwise return X.  */
6129
6130rtx
6131ix86_find_base_term (rtx x)
6132{
6133  rtx term;
6134
6135  if (TARGET_64BIT)
6136    {
6137      if (GET_CODE (x) != CONST)
6138	return x;
6139      term = XEXP (x, 0);
6140      if (GET_CODE (term) == PLUS
6141	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6142	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6143	term = XEXP (term, 0);
6144      if (GET_CODE (term) != UNSPEC
6145	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6146	return x;
6147
6148      term = XVECEXP (term, 0, 0);
6149
6150      if (GET_CODE (term) != SYMBOL_REF
6151	  && GET_CODE (term) != LABEL_REF)
6152	return x;
6153
6154      return term;
6155    }
6156
6157  term = ix86_delegitimize_address (x);
6158
6159  if (GET_CODE (term) != SYMBOL_REF
6160      && GET_CODE (term) != LABEL_REF)
6161    return x;
6162
6163  return term;
6164}
6165
6166/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6167   this is used for to form addresses to local data when -fPIC is in
6168   use.  */
6169
6170static bool
6171darwin_local_data_pic (rtx disp)
6172{
6173  if (GET_CODE (disp) == MINUS)
6174    {
6175      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6176          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6177        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6178          {
6179            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6180            if (! strcmp (sym_name, "<pic base>"))
6181              return true;
6182          }
6183    }
6184
6185  return false;
6186}
6187
6188/* Determine if a given RTX is a valid constant.  We already know this
6189   satisfies CONSTANT_P.  */
6190
6191bool
6192legitimate_constant_p (rtx x)
6193{
6194  switch (GET_CODE (x))
6195    {
6196    case CONST:
6197      x = XEXP (x, 0);
6198
6199      if (GET_CODE (x) == PLUS)
6200	{
6201	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6202	    return false;
6203	  x = XEXP (x, 0);
6204	}
6205
6206      if (TARGET_MACHO && darwin_local_data_pic (x))
6207	return true;
6208
6209      /* Only some unspecs are valid as "constants".  */
6210      if (GET_CODE (x) == UNSPEC)
6211	switch (XINT (x, 1))
6212	  {
6213	  case UNSPEC_GOTOFF:
6214	    return TARGET_64BIT;
6215	  case UNSPEC_TPOFF:
6216	  case UNSPEC_NTPOFF:
6217	    x = XVECEXP (x, 0, 0);
6218	    return (GET_CODE (x) == SYMBOL_REF
6219		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6220	  case UNSPEC_DTPOFF:
6221	    x = XVECEXP (x, 0, 0);
6222	    return (GET_CODE (x) == SYMBOL_REF
6223		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6224	  default:
6225	    return false;
6226	  }
6227
6228      /* We must have drilled down to a symbol.  */
6229      if (GET_CODE (x) == LABEL_REF)
6230	return true;
6231      if (GET_CODE (x) != SYMBOL_REF)
6232	return false;
6233      /* FALLTHRU */
6234
6235    case SYMBOL_REF:
6236      /* TLS symbols are never valid.  */
6237      if (SYMBOL_REF_TLS_MODEL (x))
6238	return false;
6239      break;
6240
6241    case CONST_DOUBLE:
6242      if (GET_MODE (x) == TImode
6243	  && x != CONST0_RTX (TImode)
6244          && !TARGET_64BIT)
6245	return false;
6246      break;
6247
6248    case CONST_VECTOR:
6249      if (x == CONST0_RTX (GET_MODE (x)))
6250	return true;
6251      return false;
6252
6253    default:
6254      break;
6255    }
6256
6257  /* Otherwise we handle everything else in the move patterns.  */
6258  return true;
6259}
6260
6261/* Determine if it's legal to put X into the constant pool.  This
6262   is not possible for the address of thread-local symbols, which
6263   is checked above.  */
6264
6265static bool
6266ix86_cannot_force_const_mem (rtx x)
6267{
6268  /* We can always put integral constants and vectors in memory.  */
6269  switch (GET_CODE (x))
6270    {
6271    case CONST_INT:
6272    case CONST_DOUBLE:
6273    case CONST_VECTOR:
6274      return false;
6275
6276    default:
6277      break;
6278    }
6279  return !legitimate_constant_p (x);
6280}
6281
6282/* Determine if a given RTX is a valid constant address.  */
6283
6284bool
6285constant_address_p (rtx x)
6286{
6287  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6288}
6289
6290/* Nonzero if the constant value X is a legitimate general operand
6291   when generating PIC code.  It is given that flag_pic is on and
6292   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6293
6294bool
6295legitimate_pic_operand_p (rtx x)
6296{
6297  rtx inner;
6298
6299  switch (GET_CODE (x))
6300    {
6301    case CONST:
6302      inner = XEXP (x, 0);
6303      if (GET_CODE (inner) == PLUS
6304	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6305	inner = XEXP (inner, 0);
6306
6307      /* Only some unspecs are valid as "constants".  */
6308      if (GET_CODE (inner) == UNSPEC)
6309	switch (XINT (inner, 1))
6310	  {
6311	  case UNSPEC_GOTOFF:
6312	    return TARGET_64BIT;
6313	  case UNSPEC_TPOFF:
6314	    x = XVECEXP (inner, 0, 0);
6315	    return (GET_CODE (x) == SYMBOL_REF
6316		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6317	  default:
6318	    return false;
6319	  }
6320      /* FALLTHRU */
6321
6322    case SYMBOL_REF:
6323    case LABEL_REF:
6324      return legitimate_pic_address_disp_p (x);
6325
6326    default:
6327      return true;
6328    }
6329}
6330
6331/* Determine if a given CONST RTX is a valid memory displacement
6332   in PIC mode.  */
6333
6334int
6335legitimate_pic_address_disp_p (rtx disp)
6336{
6337  bool saw_plus;
6338
6339  /* In 64bit mode we can allow direct addresses of symbols and labels
6340     when they are not dynamic symbols.  */
6341  if (TARGET_64BIT)
6342    {
6343      rtx op0 = disp, op1;
6344
6345      switch (GET_CODE (disp))
6346	{
6347	case LABEL_REF:
6348	  return true;
6349
6350	case CONST:
6351	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6352	    break;
6353	  op0 = XEXP (XEXP (disp, 0), 0);
6354	  op1 = XEXP (XEXP (disp, 0), 1);
6355	  if (GET_CODE (op1) != CONST_INT
6356	      || INTVAL (op1) >= 16*1024*1024
6357	      || INTVAL (op1) < -16*1024*1024)
6358            break;
6359	  if (GET_CODE (op0) == LABEL_REF)
6360	    return true;
6361	  if (GET_CODE (op0) != SYMBOL_REF)
6362	    break;
6363	  /* FALLTHRU */
6364
6365	case SYMBOL_REF:
6366	  /* TLS references should always be enclosed in UNSPEC.  */
6367	  if (SYMBOL_REF_TLS_MODEL (op0))
6368	    return false;
6369	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6370	    return true;
6371	  break;
6372
6373	default:
6374	  break;
6375	}
6376    }
6377  if (GET_CODE (disp) != CONST)
6378    return 0;
6379  disp = XEXP (disp, 0);
6380
6381  if (TARGET_64BIT)
6382    {
6383      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6384         of GOT tables.  We should not need these anyway.  */
6385      if (GET_CODE (disp) != UNSPEC
6386	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6387	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6388	return 0;
6389
6390      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6391	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6392	return 0;
6393      return 1;
6394    }
6395
6396  saw_plus = false;
6397  if (GET_CODE (disp) == PLUS)
6398    {
6399      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6400	return 0;
6401      disp = XEXP (disp, 0);
6402      saw_plus = true;
6403    }
6404
6405  if (TARGET_MACHO && darwin_local_data_pic (disp))
6406    return 1;
6407
6408  if (GET_CODE (disp) != UNSPEC)
6409    return 0;
6410
6411  switch (XINT (disp, 1))
6412    {
6413    case UNSPEC_GOT:
6414      if (saw_plus)
6415	return false;
6416      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6417    case UNSPEC_GOTOFF:
6418      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6419	 While ABI specify also 32bit relocation but we don't produce it in
6420	 small PIC model at all.  */
6421      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6422	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6423	  && !TARGET_64BIT)
6424        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6425      return false;
6426    case UNSPEC_GOTTPOFF:
6427    case UNSPEC_GOTNTPOFF:
6428    case UNSPEC_INDNTPOFF:
6429      if (saw_plus)
6430	return false;
6431      disp = XVECEXP (disp, 0, 0);
6432      return (GET_CODE (disp) == SYMBOL_REF
6433	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6434    case UNSPEC_NTPOFF:
6435      disp = XVECEXP (disp, 0, 0);
6436      return (GET_CODE (disp) == SYMBOL_REF
6437	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6438    case UNSPEC_DTPOFF:
6439      disp = XVECEXP (disp, 0, 0);
6440      return (GET_CODE (disp) == SYMBOL_REF
6441	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6442    }
6443
6444  return 0;
6445}
6446
6447/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6448   memory address for an instruction.  The MODE argument is the machine mode
6449   for the MEM expression that wants to use this address.
6450
6451   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6452   convert common non-canonical forms to canonical form so that they will
6453   be recognized.  */
6454
6455int
6456legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6457{
6458  struct ix86_address parts;
6459  rtx base, index, disp;
6460  HOST_WIDE_INT scale;
6461  const char *reason = NULL;
6462  rtx reason_rtx = NULL_RTX;
6463
6464  if (TARGET_DEBUG_ADDR)
6465    {
6466      fprintf (stderr,
6467	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6468	       GET_MODE_NAME (mode), strict);
6469      debug_rtx (addr);
6470    }
6471
6472  if (ix86_decompose_address (addr, &parts) <= 0)
6473    {
6474      reason = "decomposition failed";
6475      goto report_error;
6476    }
6477
6478  base = parts.base;
6479  index = parts.index;
6480  disp = parts.disp;
6481  scale = parts.scale;
6482
6483  /* Validate base register.
6484
6485     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6486     failures when the base is one word out of a two word structure, which is
6487     represented internally as a DImode int.  */
6488
6489  if (base)
6490    {
6491      rtx reg;
6492      reason_rtx = base;
6493
6494      if (REG_P (base))
6495  	reg = base;
6496      else if (GET_CODE (base) == SUBREG
6497	       && REG_P (SUBREG_REG (base))
6498	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6499		  <= UNITS_PER_WORD)
6500  	reg = SUBREG_REG (base);
6501      else
6502	{
6503	  reason = "base is not a register";
6504	  goto report_error;
6505	}
6506
6507      if (GET_MODE (base) != Pmode)
6508	{
6509	  reason = "base is not in Pmode";
6510	  goto report_error;
6511	}
6512
6513      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6514	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6515	{
6516	  reason = "base is not valid";
6517	  goto report_error;
6518	}
6519    }
6520
6521  /* Validate index register.
6522
6523     Don't allow SUBREG's that span more than a word here -- same as above.  */
6524
6525  if (index)
6526    {
6527      rtx reg;
6528      reason_rtx = index;
6529
6530      if (REG_P (index))
6531  	reg = index;
6532      else if (GET_CODE (index) == SUBREG
6533	       && REG_P (SUBREG_REG (index))
6534	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6535		  <= UNITS_PER_WORD)
6536  	reg = SUBREG_REG (index);
6537      else
6538	{
6539	  reason = "index is not a register";
6540	  goto report_error;
6541	}
6542
6543      if (GET_MODE (index) != Pmode)
6544	{
6545	  reason = "index is not in Pmode";
6546	  goto report_error;
6547	}
6548
6549      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6550	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6551	{
6552	  reason = "index is not valid";
6553	  goto report_error;
6554	}
6555    }
6556
6557  /* Validate scale factor.  */
6558  if (scale != 1)
6559    {
6560      reason_rtx = GEN_INT (scale);
6561      if (!index)
6562	{
6563	  reason = "scale without index";
6564	  goto report_error;
6565	}
6566
6567      if (scale != 2 && scale != 4 && scale != 8)
6568	{
6569	  reason = "scale is not a valid multiplier";
6570	  goto report_error;
6571	}
6572    }
6573
6574  /* Validate displacement.  */
6575  if (disp)
6576    {
6577      reason_rtx = disp;
6578
6579      if (GET_CODE (disp) == CONST
6580	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6581	switch (XINT (XEXP (disp, 0), 1))
6582	  {
6583	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6584	     used.  While ABI specify also 32bit relocations, we don't produce
6585	     them at all and use IP relative instead.  */
6586	  case UNSPEC_GOT:
6587	  case UNSPEC_GOTOFF:
6588	    gcc_assert (flag_pic);
6589	    if (!TARGET_64BIT)
6590	      goto is_legitimate_pic;
6591	    reason = "64bit address unspec";
6592	    goto report_error;
6593
6594	  case UNSPEC_GOTPCREL:
6595	    gcc_assert (flag_pic);
6596	    goto is_legitimate_pic;
6597
6598	  case UNSPEC_GOTTPOFF:
6599	  case UNSPEC_GOTNTPOFF:
6600	  case UNSPEC_INDNTPOFF:
6601	  case UNSPEC_NTPOFF:
6602	  case UNSPEC_DTPOFF:
6603	    break;
6604
6605	  default:
6606	    reason = "invalid address unspec";
6607	    goto report_error;
6608	  }
6609
6610      else if (SYMBOLIC_CONST (disp)
6611	       && (flag_pic
6612		   || (TARGET_MACHO
6613#if TARGET_MACHO
6614		       && MACHOPIC_INDIRECT
6615		       && !machopic_operand_p (disp)
6616#endif
6617	       )))
6618	{
6619
6620	is_legitimate_pic:
6621	  if (TARGET_64BIT && (index || base))
6622	    {
6623	      /* foo@dtpoff(%rX) is ok.  */
6624	      if (GET_CODE (disp) != CONST
6625		  || GET_CODE (XEXP (disp, 0)) != PLUS
6626		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6627		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6628		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6629		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6630		{
6631		  reason = "non-constant pic memory reference";
6632		  goto report_error;
6633		}
6634	    }
6635	  else if (! legitimate_pic_address_disp_p (disp))
6636	    {
6637	      reason = "displacement is an invalid pic construct";
6638	      goto report_error;
6639	    }
6640
6641          /* This code used to verify that a symbolic pic displacement
6642	     includes the pic_offset_table_rtx register.
6643
6644	     While this is good idea, unfortunately these constructs may
6645	     be created by "adds using lea" optimization for incorrect
6646	     code like:
6647
6648	     int a;
6649	     int foo(int i)
6650	       {
6651	         return *(&a+i);
6652	       }
6653
6654	     This code is nonsensical, but results in addressing
6655	     GOT table with pic_offset_table_rtx base.  We can't
6656	     just refuse it easily, since it gets matched by
6657	     "addsi3" pattern, that later gets split to lea in the
6658	     case output register differs from input.  While this
6659	     can be handled by separate addsi pattern for this case
6660	     that never results in lea, this seems to be easier and
6661	     correct fix for crash to disable this test.  */
6662	}
6663      else if (GET_CODE (disp) != LABEL_REF
6664	       && GET_CODE (disp) != CONST_INT
6665	       && (GET_CODE (disp) != CONST
6666		   || !legitimate_constant_p (disp))
6667	       && (GET_CODE (disp) != SYMBOL_REF
6668		   || !legitimate_constant_p (disp)))
6669	{
6670	  reason = "displacement is not constant";
6671	  goto report_error;
6672	}
6673      else if (TARGET_64BIT
6674	       && !x86_64_immediate_operand (disp, VOIDmode))
6675	{
6676	  reason = "displacement is out of range";
6677	  goto report_error;
6678	}
6679    }
6680
6681  /* Everything looks valid.  */
6682  if (TARGET_DEBUG_ADDR)
6683    fprintf (stderr, "Success.\n");
6684  return TRUE;
6685
6686 report_error:
6687  if (TARGET_DEBUG_ADDR)
6688    {
6689      fprintf (stderr, "Error: %s\n", reason);
6690      debug_rtx (reason_rtx);
6691    }
6692  return FALSE;
6693}
6694
6695/* Return a unique alias set for the GOT.  */
6696
6697static HOST_WIDE_INT
6698ix86_GOT_alias_set (void)
6699{
6700  static HOST_WIDE_INT set = -1;
6701  if (set == -1)
6702    set = new_alias_set ();
6703  return set;
6704}
6705
6706/* Return a legitimate reference for ORIG (an address) using the
6707   register REG.  If REG is 0, a new pseudo is generated.
6708
6709   There are two types of references that must be handled:
6710
6711   1. Global data references must load the address from the GOT, via
6712      the PIC reg.  An insn is emitted to do this load, and the reg is
6713      returned.
6714
6715   2. Static data references, constant pool addresses, and code labels
6716      compute the address as an offset from the GOT, whose base is in
6717      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6718      differentiate them from global data objects.  The returned
6719      address is the PIC reg + an unspec constant.
6720
6721   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6722   reg also appears in the address.  */
6723
6724static rtx
6725legitimize_pic_address (rtx orig, rtx reg)
6726{
6727  rtx addr = orig;
6728  rtx new = orig;
6729  rtx base;
6730
6731#if TARGET_MACHO
6732  if (TARGET_MACHO && !TARGET_64BIT)
6733    {
6734      if (reg == 0)
6735	reg = gen_reg_rtx (Pmode);
6736      /* Use the generic Mach-O PIC machinery.  */
6737      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6738    }
6739#endif
6740
6741  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6742    new = addr;
6743  else if (TARGET_64BIT
6744	   && ix86_cmodel != CM_SMALL_PIC
6745	   && local_symbolic_operand (addr, Pmode))
6746    {
6747      rtx tmpreg;
6748      /* This symbol may be referenced via a displacement from the PIC
6749	 base address (@GOTOFF).  */
6750
6751      if (reload_in_progress)
6752	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6753      if (GET_CODE (addr) == CONST)
6754	addr = XEXP (addr, 0);
6755      if (GET_CODE (addr) == PLUS)
6756	  {
6757            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6758	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6759	  }
6760	else
6761          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6762      new = gen_rtx_CONST (Pmode, new);
6763      if (!reg)
6764        tmpreg = gen_reg_rtx (Pmode);
6765      else
6766	tmpreg = reg;
6767      emit_move_insn (tmpreg, new);
6768
6769      if (reg != 0)
6770	{
6771	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6772				     tmpreg, 1, OPTAB_DIRECT);
6773	  new = reg;
6774	}
6775      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6776    }
6777  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6778    {
6779      /* This symbol may be referenced via a displacement from the PIC
6780	 base address (@GOTOFF).  */
6781
6782      if (reload_in_progress)
6783	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6784      if (GET_CODE (addr) == CONST)
6785	addr = XEXP (addr, 0);
6786      if (GET_CODE (addr) == PLUS)
6787	  {
6788            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6789	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6790	  }
6791	else
6792          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6793      new = gen_rtx_CONST (Pmode, new);
6794      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6795
6796      if (reg != 0)
6797	{
6798	  emit_move_insn (reg, new);
6799	  new = reg;
6800	}
6801    }
6802  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6803    {
6804      if (TARGET_64BIT)
6805	{
6806	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6807	  new = gen_rtx_CONST (Pmode, new);
6808	  new = gen_const_mem (Pmode, new);
6809	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6810
6811	  if (reg == 0)
6812	    reg = gen_reg_rtx (Pmode);
6813	  /* Use directly gen_movsi, otherwise the address is loaded
6814	     into register for CSE.  We don't want to CSE this addresses,
6815	     instead we CSE addresses from the GOT table, so skip this.  */
6816	  emit_insn (gen_movsi (reg, new));
6817	  new = reg;
6818	}
6819      else
6820	{
6821	  /* This symbol must be referenced via a load from the
6822	     Global Offset Table (@GOT).  */
6823
6824	  if (reload_in_progress)
6825	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6826	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6827	  new = gen_rtx_CONST (Pmode, new);
6828	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6829	  new = gen_const_mem (Pmode, new);
6830	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6831
6832	  if (reg == 0)
6833	    reg = gen_reg_rtx (Pmode);
6834	  emit_move_insn (reg, new);
6835	  new = reg;
6836	}
6837    }
6838  else
6839    {
6840      if (GET_CODE (addr) == CONST_INT
6841	  && !x86_64_immediate_operand (addr, VOIDmode))
6842	{
6843	  if (reg)
6844	    {
6845	      emit_move_insn (reg, addr);
6846	      new = reg;
6847	    }
6848	  else
6849	    new = force_reg (Pmode, addr);
6850	}
6851      else if (GET_CODE (addr) == CONST)
6852	{
6853	  addr = XEXP (addr, 0);
6854
6855	  /* We must match stuff we generate before.  Assume the only
6856	     unspecs that can get here are ours.  Not that we could do
6857	     anything with them anyway....  */
6858	  if (GET_CODE (addr) == UNSPEC
6859	      || (GET_CODE (addr) == PLUS
6860		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6861	    return orig;
6862	  gcc_assert (GET_CODE (addr) == PLUS);
6863	}
6864      if (GET_CODE (addr) == PLUS)
6865	{
6866	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6867
6868	  /* Check first to see if this is a constant offset from a @GOTOFF
6869	     symbol reference.  */
6870	  if (local_symbolic_operand (op0, Pmode)
6871	      && GET_CODE (op1) == CONST_INT)
6872	    {
6873	      if (!TARGET_64BIT)
6874		{
6875		  if (reload_in_progress)
6876		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6877		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6878					UNSPEC_GOTOFF);
6879		  new = gen_rtx_PLUS (Pmode, new, op1);
6880		  new = gen_rtx_CONST (Pmode, new);
6881		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6882
6883		  if (reg != 0)
6884		    {
6885		      emit_move_insn (reg, new);
6886		      new = reg;
6887		    }
6888		}
6889	      else
6890		{
6891		  if (INTVAL (op1) < -16*1024*1024
6892		      || INTVAL (op1) >= 16*1024*1024)
6893		    {
6894		      if (!x86_64_immediate_operand (op1, Pmode))
6895			op1 = force_reg (Pmode, op1);
6896		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6897		    }
6898		}
6899	    }
6900	  else
6901	    {
6902	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6903	      new  = legitimize_pic_address (XEXP (addr, 1),
6904					     base == reg ? NULL_RTX : reg);
6905
6906	      if (GET_CODE (new) == CONST_INT)
6907		new = plus_constant (base, INTVAL (new));
6908	      else
6909		{
6910		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6911		    {
6912		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6913		      new = XEXP (new, 1);
6914		    }
6915		  new = gen_rtx_PLUS (Pmode, base, new);
6916		}
6917	    }
6918	}
6919    }
6920  return new;
6921}
6922
6923/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6924
6925static rtx
6926get_thread_pointer (int to_reg)
6927{
6928  rtx tp, reg, insn;
6929
6930  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6931  if (!to_reg)
6932    return tp;
6933
6934  reg = gen_reg_rtx (Pmode);
6935  insn = gen_rtx_SET (VOIDmode, reg, tp);
6936  insn = emit_insn (insn);
6937
6938  return reg;
6939}
6940
6941/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6942   false if we expect this to be used for a memory address and true if
6943   we expect to load the address into a register.  */
6944
6945static rtx
6946legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6947{
6948  rtx dest, base, off, pic, tp;
6949  int type;
6950
6951  switch (model)
6952    {
6953    case TLS_MODEL_GLOBAL_DYNAMIC:
6954      dest = gen_reg_rtx (Pmode);
6955      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6956
6957      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6958	{
6959	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6960
6961	  start_sequence ();
6962	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6963	  insns = get_insns ();
6964	  end_sequence ();
6965
6966	  emit_libcall_block (insns, dest, rax, x);
6967	}
6968      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6969	emit_insn (gen_tls_global_dynamic_64 (dest, x));
6970      else
6971	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6972
6973      if (TARGET_GNU2_TLS)
6974	{
6975	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6976
6977	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6978	}
6979      break;
6980
6981    case TLS_MODEL_LOCAL_DYNAMIC:
6982      base = gen_reg_rtx (Pmode);
6983      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6984
6985      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6986	{
6987	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6988
6989	  start_sequence ();
6990	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6991	  insns = get_insns ();
6992	  end_sequence ();
6993
6994	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6995	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6996	  emit_libcall_block (insns, base, rax, note);
6997	}
6998      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6999	emit_insn (gen_tls_local_dynamic_base_64 (base));
7000      else
7001	emit_insn (gen_tls_local_dynamic_base_32 (base));
7002
7003      if (TARGET_GNU2_TLS)
7004	{
7005	  rtx x = ix86_tls_module_base ();
7006
7007	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
7008			       gen_rtx_MINUS (Pmode, x, tp));
7009	}
7010
7011      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7012      off = gen_rtx_CONST (Pmode, off);
7013
7014      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7015
7016      if (TARGET_GNU2_TLS)
7017	{
7018	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7019
7020	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7021	}
7022
7023      break;
7024
7025    case TLS_MODEL_INITIAL_EXEC:
7026      if (TARGET_64BIT)
7027	{
7028	  pic = NULL;
7029	  type = UNSPEC_GOTNTPOFF;
7030	}
7031      else if (flag_pic)
7032	{
7033	  if (reload_in_progress)
7034	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7035	  pic = pic_offset_table_rtx;
7036	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7037	}
7038      else if (!TARGET_ANY_GNU_TLS)
7039	{
7040	  pic = gen_reg_rtx (Pmode);
7041	  emit_insn (gen_set_got (pic));
7042	  type = UNSPEC_GOTTPOFF;
7043	}
7044      else
7045	{
7046	  pic = NULL;
7047	  type = UNSPEC_INDNTPOFF;
7048	}
7049
7050      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7051      off = gen_rtx_CONST (Pmode, off);
7052      if (pic)
7053	off = gen_rtx_PLUS (Pmode, pic, off);
7054      off = gen_const_mem (Pmode, off);
7055      set_mem_alias_set (off, ix86_GOT_alias_set ());
7056
7057      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7058	{
7059          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7060	  off = force_reg (Pmode, off);
7061	  return gen_rtx_PLUS (Pmode, base, off);
7062	}
7063      else
7064	{
7065	  base = get_thread_pointer (true);
7066	  dest = gen_reg_rtx (Pmode);
7067	  emit_insn (gen_subsi3 (dest, base, off));
7068	}
7069      break;
7070
7071    case TLS_MODEL_LOCAL_EXEC:
7072      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7073			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7074			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7075      off = gen_rtx_CONST (Pmode, off);
7076
7077      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7078	{
7079	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7080	  return gen_rtx_PLUS (Pmode, base, off);
7081	}
7082      else
7083	{
7084	  base = get_thread_pointer (true);
7085	  dest = gen_reg_rtx (Pmode);
7086	  emit_insn (gen_subsi3 (dest, base, off));
7087	}
7088      break;
7089
7090    default:
7091      gcc_unreachable ();
7092    }
7093
7094  return dest;
7095}
7096
7097/* Try machine-dependent ways of modifying an illegitimate address
7098   to be legitimate.  If we find one, return the new, valid address.
7099   This macro is used in only one place: `memory_address' in explow.c.
7100
7101   OLDX is the address as it was before break_out_memory_refs was called.
7102   In some cases it is useful to look at this to decide what needs to be done.
7103
7104   MODE and WIN are passed so that this macro can use
7105   GO_IF_LEGITIMATE_ADDRESS.
7106
7107   It is always safe for this macro to do nothing.  It exists to recognize
7108   opportunities to optimize the output.
7109
7110   For the 80386, we handle X+REG by loading X into a register R and
7111   using R+REG.  R will go in a general reg and indexing will be used.
7112   However, if REG is a broken-out memory address or multiplication,
7113   nothing needs to be done because REG can certainly go in a general reg.
7114
7115   When -fpic is used, special handling is needed for symbolic references.
7116   See comments by legitimize_pic_address in i386.c for details.  */
7117
7118rtx
7119legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7120{
7121  int changed = 0;
7122  unsigned log;
7123
7124  if (TARGET_DEBUG_ADDR)
7125    {
7126      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7127	       GET_MODE_NAME (mode));
7128      debug_rtx (x);
7129    }
7130
7131  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7132  if (log)
7133    return legitimize_tls_address (x, log, false);
7134  if (GET_CODE (x) == CONST
7135      && GET_CODE (XEXP (x, 0)) == PLUS
7136      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7137      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7138    {
7139      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7140      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7141    }
7142
7143  if (flag_pic && SYMBOLIC_CONST (x))
7144    return legitimize_pic_address (x, 0);
7145
7146  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7147  if (GET_CODE (x) == ASHIFT
7148      && GET_CODE (XEXP (x, 1)) == CONST_INT
7149      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7150    {
7151      changed = 1;
7152      log = INTVAL (XEXP (x, 1));
7153      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7154			GEN_INT (1 << log));
7155    }
7156
7157  if (GET_CODE (x) == PLUS)
7158    {
7159      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7160
7161      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7162	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7163	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7164	{
7165	  changed = 1;
7166	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7167	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7168				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7169				      GEN_INT (1 << log));
7170	}
7171
7172      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7173	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7174	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7175	{
7176	  changed = 1;
7177	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7178	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7179				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7180				      GEN_INT (1 << log));
7181	}
7182
7183      /* Put multiply first if it isn't already.  */
7184      if (GET_CODE (XEXP (x, 1)) == MULT)
7185	{
7186	  rtx tmp = XEXP (x, 0);
7187	  XEXP (x, 0) = XEXP (x, 1);
7188	  XEXP (x, 1) = tmp;
7189	  changed = 1;
7190	}
7191
7192      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7193	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7194	 created by virtual register instantiation, register elimination, and
7195	 similar optimizations.  */
7196      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7197	{
7198	  changed = 1;
7199	  x = gen_rtx_PLUS (Pmode,
7200			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7201					  XEXP (XEXP (x, 1), 0)),
7202			    XEXP (XEXP (x, 1), 1));
7203	}
7204
7205      /* Canonicalize
7206	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7207	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7208      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7209	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7210	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7211	       && CONSTANT_P (XEXP (x, 1)))
7212	{
7213	  rtx constant;
7214	  rtx other = NULL_RTX;
7215
7216	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7217	    {
7218	      constant = XEXP (x, 1);
7219	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7220	    }
7221	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7222	    {
7223	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7224	      other = XEXP (x, 1);
7225	    }
7226	  else
7227	    constant = 0;
7228
7229	  if (constant)
7230	    {
7231	      changed = 1;
7232	      x = gen_rtx_PLUS (Pmode,
7233				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7234					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7235				plus_constant (other, INTVAL (constant)));
7236	    }
7237	}
7238
7239      if (changed && legitimate_address_p (mode, x, FALSE))
7240	return x;
7241
7242      if (GET_CODE (XEXP (x, 0)) == MULT)
7243	{
7244	  changed = 1;
7245	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7246	}
7247
7248      if (GET_CODE (XEXP (x, 1)) == MULT)
7249	{
7250	  changed = 1;
7251	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7252	}
7253
7254      if (changed
7255	  && GET_CODE (XEXP (x, 1)) == REG
7256	  && GET_CODE (XEXP (x, 0)) == REG)
7257	return x;
7258
7259      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7260	{
7261	  changed = 1;
7262	  x = legitimize_pic_address (x, 0);
7263	}
7264
7265      if (changed && legitimate_address_p (mode, x, FALSE))
7266	return x;
7267
7268      if (GET_CODE (XEXP (x, 0)) == REG)
7269	{
7270	  rtx temp = gen_reg_rtx (Pmode);
7271	  rtx val  = force_operand (XEXP (x, 1), temp);
7272	  if (val != temp)
7273	    emit_move_insn (temp, val);
7274
7275	  XEXP (x, 1) = temp;
7276	  return x;
7277	}
7278
7279      else if (GET_CODE (XEXP (x, 1)) == REG)
7280	{
7281	  rtx temp = gen_reg_rtx (Pmode);
7282	  rtx val  = force_operand (XEXP (x, 0), temp);
7283	  if (val != temp)
7284	    emit_move_insn (temp, val);
7285
7286	  XEXP (x, 0) = temp;
7287	  return x;
7288	}
7289    }
7290
7291  return x;
7292}
7293
7294/* Print an integer constant expression in assembler syntax.  Addition
7295   and subtraction are the only arithmetic that may appear in these
7296   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7297   CODE is the operand print code from the output string.  */
7298
7299static void
7300output_pic_addr_const (FILE *file, rtx x, int code)
7301{
7302  char buf[256];
7303
7304  switch (GET_CODE (x))
7305    {
7306    case PC:
7307      gcc_assert (flag_pic);
7308      putc ('.', file);
7309      break;
7310
7311    case SYMBOL_REF:
7312      if (! TARGET_MACHO || TARGET_64BIT)
7313	output_addr_const (file, x);
7314      else
7315	{
7316	  const char *name = XSTR (x, 0);
7317
7318	  /* Mark the decl as referenced so that cgraph will output the function.  */
7319	  if (SYMBOL_REF_DECL (x))
7320	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7321
7322#if TARGET_MACHO
7323	  if (MACHOPIC_INDIRECT
7324	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7325	    name = machopic_indirection_name (x, /*stub_p=*/true);
7326#endif
7327	  assemble_name (file, name);
7328	}
7329      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7330	fputs ("@PLT", file);
7331      break;
7332
7333    case LABEL_REF:
7334      x = XEXP (x, 0);
7335      /* FALLTHRU */
7336    case CODE_LABEL:
7337      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7338      assemble_name (asm_out_file, buf);
7339      break;
7340
7341    case CONST_INT:
7342      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7343      break;
7344
7345    case CONST:
7346      /* This used to output parentheses around the expression,
7347	 but that does not work on the 386 (either ATT or BSD assembler).  */
7348      output_pic_addr_const (file, XEXP (x, 0), code);
7349      break;
7350
7351    case CONST_DOUBLE:
7352      if (GET_MODE (x) == VOIDmode)
7353	{
7354	  /* We can use %d if the number is <32 bits and positive.  */
7355	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7356	    fprintf (file, "0x%lx%08lx",
7357		     (unsigned long) CONST_DOUBLE_HIGH (x),
7358		     (unsigned long) CONST_DOUBLE_LOW (x));
7359	  else
7360	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7361	}
7362      else
7363	/* We can't handle floating point constants;
7364	   PRINT_OPERAND must handle them.  */
7365	output_operand_lossage ("floating constant misused");
7366      break;
7367
7368    case PLUS:
7369      /* Some assemblers need integer constants to appear first.  */
7370      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7371	{
7372	  output_pic_addr_const (file, XEXP (x, 0), code);
7373	  putc ('+', file);
7374	  output_pic_addr_const (file, XEXP (x, 1), code);
7375	}
7376      else
7377	{
7378	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7379	  output_pic_addr_const (file, XEXP (x, 1), code);
7380	  putc ('+', file);
7381	  output_pic_addr_const (file, XEXP (x, 0), code);
7382	}
7383      break;
7384
7385    case MINUS:
7386      if (!TARGET_MACHO)
7387	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7388      output_pic_addr_const (file, XEXP (x, 0), code);
7389      putc ('-', file);
7390      output_pic_addr_const (file, XEXP (x, 1), code);
7391      if (!TARGET_MACHO)
7392	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7393      break;
7394
7395     case UNSPEC:
7396       gcc_assert (XVECLEN (x, 0) == 1);
7397       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7398       switch (XINT (x, 1))
7399	{
7400	case UNSPEC_GOT:
7401	  fputs ("@GOT", file);
7402	  break;
7403	case UNSPEC_GOTOFF:
7404	  fputs ("@GOTOFF", file);
7405	  break;
7406	case UNSPEC_GOTPCREL:
7407	  fputs ("@GOTPCREL(%rip)", file);
7408	  break;
7409	case UNSPEC_GOTTPOFF:
7410	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7411	  fputs ("@GOTTPOFF", file);
7412	  break;
7413	case UNSPEC_TPOFF:
7414	  fputs ("@TPOFF", file);
7415	  break;
7416	case UNSPEC_NTPOFF:
7417	  if (TARGET_64BIT)
7418	    fputs ("@TPOFF", file);
7419	  else
7420	    fputs ("@NTPOFF", file);
7421	  break;
7422	case UNSPEC_DTPOFF:
7423	  fputs ("@DTPOFF", file);
7424	  break;
7425	case UNSPEC_GOTNTPOFF:
7426	  if (TARGET_64BIT)
7427	    fputs ("@GOTTPOFF(%rip)", file);
7428	  else
7429	    fputs ("@GOTNTPOFF", file);
7430	  break;
7431	case UNSPEC_INDNTPOFF:
7432	  fputs ("@INDNTPOFF", file);
7433	  break;
7434	default:
7435	  output_operand_lossage ("invalid UNSPEC as operand");
7436	  break;
7437	}
7438       break;
7439
7440    default:
7441      output_operand_lossage ("invalid expression as operand");
7442    }
7443}
7444
7445/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7446   We need to emit DTP-relative relocations.  */
7447
7448static void
7449i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7450{
7451  fputs (ASM_LONG, file);
7452  output_addr_const (file, x);
7453  fputs ("@DTPOFF", file);
7454  switch (size)
7455    {
7456    case 4:
7457      break;
7458    case 8:
7459      fputs (", 0", file);
7460      break;
7461    default:
7462      gcc_unreachable ();
7463   }
7464}
7465
7466/* In the name of slightly smaller debug output, and to cater to
7467   general assembler lossage, recognize PIC+GOTOFF and turn it back
7468   into a direct symbol reference.
7469
7470   On Darwin, this is necessary to avoid a crash, because Darwin
7471   has a different PIC label for each routine but the DWARF debugging
7472   information is not associated with any particular routine, so it's
7473   necessary to remove references to the PIC label from RTL stored by
7474   the DWARF output code.  */
7475
7476static rtx
7477ix86_delegitimize_address (rtx orig_x)
7478{
7479  rtx x = orig_x;
7480  /* reg_addend is NULL or a multiple of some register.  */
7481  rtx reg_addend = NULL_RTX;
7482  /* const_addend is NULL or a const_int.  */
7483  rtx const_addend = NULL_RTX;
7484  /* This is the result, or NULL.  */
7485  rtx result = NULL_RTX;
7486
7487  if (GET_CODE (x) == MEM)
7488    x = XEXP (x, 0);
7489
7490  if (TARGET_64BIT)
7491    {
7492      if (GET_CODE (x) != CONST
7493	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7494	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7495	  || GET_CODE (orig_x) != MEM)
7496	return orig_x;
7497      return XVECEXP (XEXP (x, 0), 0, 0);
7498    }
7499
7500  if (GET_CODE (x) != PLUS
7501      || GET_CODE (XEXP (x, 1)) != CONST)
7502    return orig_x;
7503
7504  if (GET_CODE (XEXP (x, 0)) == REG
7505      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7506    /* %ebx + GOT/GOTOFF */
7507    ;
7508  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7509    {
7510      /* %ebx + %reg * scale + GOT/GOTOFF */
7511      reg_addend = XEXP (x, 0);
7512      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7513	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7514	reg_addend = XEXP (reg_addend, 1);
7515      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7516	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7517	reg_addend = XEXP (reg_addend, 0);
7518      else
7519	return orig_x;
7520      if (GET_CODE (reg_addend) != REG
7521	  && GET_CODE (reg_addend) != MULT
7522	  && GET_CODE (reg_addend) != ASHIFT)
7523	return orig_x;
7524    }
7525  else
7526    return orig_x;
7527
7528  x = XEXP (XEXP (x, 1), 0);
7529  if (GET_CODE (x) == PLUS
7530      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7531    {
7532      const_addend = XEXP (x, 1);
7533      x = XEXP (x, 0);
7534    }
7535
7536  if (GET_CODE (x) == UNSPEC
7537      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7538	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7539    result = XVECEXP (x, 0, 0);
7540
7541  if (TARGET_MACHO && darwin_local_data_pic (x)
7542      && GET_CODE (orig_x) != MEM)
7543    result = XEXP (x, 0);
7544
7545  if (! result)
7546    return orig_x;
7547
7548  if (const_addend)
7549    result = gen_rtx_PLUS (Pmode, result, const_addend);
7550  if (reg_addend)
7551    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7552  return result;
7553}
7554
7555static void
7556put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7557		    int fp, FILE *file)
7558{
7559  const char *suffix;
7560
7561  if (mode == CCFPmode || mode == CCFPUmode)
7562    {
7563      enum rtx_code second_code, bypass_code;
7564      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7565      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7566      code = ix86_fp_compare_code_to_integer (code);
7567      mode = CCmode;
7568    }
7569  if (reverse)
7570    code = reverse_condition (code);
7571
7572  switch (code)
7573    {
7574    case EQ:
7575      suffix = "e";
7576      break;
7577    case NE:
7578      suffix = "ne";
7579      break;
7580    case GT:
7581      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7582      suffix = "g";
7583      break;
7584    case GTU:
7585      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7586	 Those same assemblers have the same but opposite lossage on cmov.  */
7587      gcc_assert (mode == CCmode);
7588      suffix = fp ? "nbe" : "a";
7589      break;
7590    case LT:
7591      switch (mode)
7592	{
7593	case CCNOmode:
7594	case CCGOCmode:
7595	  suffix = "s";
7596	  break;
7597
7598	case CCmode:
7599	case CCGCmode:
7600	  suffix = "l";
7601	  break;
7602
7603	default:
7604	  gcc_unreachable ();
7605	}
7606      break;
7607    case LTU:
7608      gcc_assert (mode == CCmode);
7609      suffix = "b";
7610      break;
7611    case GE:
7612      switch (mode)
7613	{
7614	case CCNOmode:
7615	case CCGOCmode:
7616	  suffix = "ns";
7617	  break;
7618
7619	case CCmode:
7620	case CCGCmode:
7621	  suffix = "ge";
7622	  break;
7623
7624	default:
7625	  gcc_unreachable ();
7626	}
7627      break;
7628    case GEU:
7629      /* ??? As above.  */
7630      gcc_assert (mode == CCmode);
7631      suffix = fp ? "nb" : "ae";
7632      break;
7633    case LE:
7634      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7635      suffix = "le";
7636      break;
7637    case LEU:
7638      gcc_assert (mode == CCmode);
7639      suffix = "be";
7640      break;
7641    case UNORDERED:
7642      suffix = fp ? "u" : "p";
7643      break;
7644    case ORDERED:
7645      suffix = fp ? "nu" : "np";
7646      break;
7647    default:
7648      gcc_unreachable ();
7649    }
7650  fputs (suffix, file);
7651}
7652
7653/* Print the name of register X to FILE based on its machine mode and number.
7654   If CODE is 'w', pretend the mode is HImode.
7655   If CODE is 'b', pretend the mode is QImode.
7656   If CODE is 'k', pretend the mode is SImode.
7657   If CODE is 'q', pretend the mode is DImode.
7658   If CODE is 'h', pretend the reg is the 'high' byte register.
7659   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7660
7661void
7662print_reg (rtx x, int code, FILE *file)
7663{
7664  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7665	      && REGNO (x) != FRAME_POINTER_REGNUM
7666	      && REGNO (x) != FLAGS_REG
7667	      && REGNO (x) != FPSR_REG);
7668
7669  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7670    putc ('%', file);
7671
7672  if (code == 'w' || MMX_REG_P (x))
7673    code = 2;
7674  else if (code == 'b')
7675    code = 1;
7676  else if (code == 'k')
7677    code = 4;
7678  else if (code == 'q')
7679    code = 8;
7680  else if (code == 'y')
7681    code = 3;
7682  else if (code == 'h')
7683    code = 0;
7684  else
7685    code = GET_MODE_SIZE (GET_MODE (x));
7686
7687  /* Irritatingly, AMD extended registers use different naming convention
7688     from the normal registers.  */
7689  if (REX_INT_REG_P (x))
7690    {
7691      gcc_assert (TARGET_64BIT);
7692      switch (code)
7693	{
7694	  case 0:
7695	    error ("extended registers have no high halves");
7696	    break;
7697	  case 1:
7698	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7699	    break;
7700	  case 2:
7701	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7702	    break;
7703	  case 4:
7704	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7705	    break;
7706	  case 8:
7707	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7708	    break;
7709	  default:
7710	    error ("unsupported operand size for extended register");
7711	    break;
7712	}
7713      return;
7714    }
7715  switch (code)
7716    {
7717    case 3:
7718      if (STACK_TOP_P (x))
7719	{
7720	  fputs ("st(0)", file);
7721	  break;
7722	}
7723      /* FALLTHRU */
7724    case 8:
7725    case 4:
7726    case 12:
7727      if (! ANY_FP_REG_P (x))
7728	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7729      /* FALLTHRU */
7730    case 16:
7731    case 2:
7732    normal:
7733      fputs (hi_reg_name[REGNO (x)], file);
7734      break;
7735    case 1:
7736      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7737	goto normal;
7738      fputs (qi_reg_name[REGNO (x)], file);
7739      break;
7740    case 0:
7741      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7742	goto normal;
7743      fputs (qi_high_reg_name[REGNO (x)], file);
7744      break;
7745    default:
7746      gcc_unreachable ();
7747    }
7748}
7749
7750/* Locate some local-dynamic symbol still in use by this function
7751   so that we can print its name in some tls_local_dynamic_base
7752   pattern.  */
7753
7754static const char *
7755get_some_local_dynamic_name (void)
7756{
7757  rtx insn;
7758
7759  if (cfun->machine->some_ld_name)
7760    return cfun->machine->some_ld_name;
7761
7762  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7763    if (INSN_P (insn)
7764	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7765      return cfun->machine->some_ld_name;
7766
7767  gcc_unreachable ();
7768}
7769
7770static int
7771get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7772{
7773  rtx x = *px;
7774
7775  if (GET_CODE (x) == SYMBOL_REF
7776      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7777    {
7778      cfun->machine->some_ld_name = XSTR (x, 0);
7779      return 1;
7780    }
7781
7782  return 0;
7783}
7784
7785/* Meaning of CODE:
7786   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7787   C -- print opcode suffix for set/cmov insn.
7788   c -- like C, but print reversed condition
7789   F,f -- likewise, but for floating-point.
7790   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7791        otherwise nothing
7792   R -- print the prefix for register names.
7793   z -- print the opcode suffix for the size of the current operand.
7794   * -- print a star (in certain assembler syntax)
7795   A -- print an absolute memory reference.
7796   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7797   s -- print a shift double count, followed by the assemblers argument
7798	delimiter.
7799   b -- print the QImode name of the register for the indicated operand.
7800	%b0 would print %al if operands[0] is reg 0.
7801   w --  likewise, print the HImode name of the register.
7802   k --  likewise, print the SImode name of the register.
7803   q --  likewise, print the DImode name of the register.
7804   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7805   y -- print "st(0)" instead of "st" as a register.
7806   D -- print condition for SSE cmp instruction.
7807   P -- if PIC, print an @PLT suffix.
7808   X -- don't print any sort of PIC '@' suffix for a symbol.
7809   & -- print some in-use local-dynamic symbol name.
7810   H -- print a memory address offset by 8; used for sse high-parts
7811 */
7812
7813void
7814print_operand (FILE *file, rtx x, int code)
7815{
7816  if (code)
7817    {
7818      switch (code)
7819	{
7820	case '*':
7821	  if (ASSEMBLER_DIALECT == ASM_ATT)
7822	    putc ('*', file);
7823	  return;
7824
7825	case '&':
7826	  assemble_name (file, get_some_local_dynamic_name ());
7827	  return;
7828
7829	case 'A':
7830	  switch (ASSEMBLER_DIALECT)
7831	    {
7832	    case ASM_ATT:
7833	      putc ('*', file);
7834	      break;
7835
7836	    case ASM_INTEL:
7837	      /* Intel syntax. For absolute addresses, registers should not
7838		 be surrounded by braces.  */
7839	      if (GET_CODE (x) != REG)
7840		{
7841		  putc ('[', file);
7842		  PRINT_OPERAND (file, x, 0);
7843		  putc (']', file);
7844		  return;
7845		}
7846	      break;
7847
7848	    default:
7849	      gcc_unreachable ();
7850	    }
7851
7852	  PRINT_OPERAND (file, x, 0);
7853	  return;
7854
7855
7856	case 'L':
7857	  if (ASSEMBLER_DIALECT == ASM_ATT)
7858	    putc ('l', file);
7859	  return;
7860
7861	case 'W':
7862	  if (ASSEMBLER_DIALECT == ASM_ATT)
7863	    putc ('w', file);
7864	  return;
7865
7866	case 'B':
7867	  if (ASSEMBLER_DIALECT == ASM_ATT)
7868	    putc ('b', file);
7869	  return;
7870
7871	case 'Q':
7872	  if (ASSEMBLER_DIALECT == ASM_ATT)
7873	    putc ('l', file);
7874	  return;
7875
7876	case 'S':
7877	  if (ASSEMBLER_DIALECT == ASM_ATT)
7878	    putc ('s', file);
7879	  return;
7880
7881	case 'T':
7882	  if (ASSEMBLER_DIALECT == ASM_ATT)
7883	    putc ('t', file);
7884	  return;
7885
7886	case 'z':
7887	  /* 387 opcodes don't get size suffixes if the operands are
7888	     registers.  */
7889	  if (STACK_REG_P (x))
7890	    return;
7891
7892	  /* Likewise if using Intel opcodes.  */
7893	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7894	    return;
7895
7896	  /* This is the size of op from size of operand.  */
7897	  switch (GET_MODE_SIZE (GET_MODE (x)))
7898	    {
7899	    case 2:
7900#ifdef HAVE_GAS_FILDS_FISTS
7901	      putc ('s', file);
7902#endif
7903	      return;
7904
7905	    case 4:
7906	      if (GET_MODE (x) == SFmode)
7907		{
7908		  putc ('s', file);
7909		  return;
7910		}
7911	      else
7912		putc ('l', file);
7913	      return;
7914
7915	    case 12:
7916	    case 16:
7917	      putc ('t', file);
7918	      return;
7919
7920	    case 8:
7921	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7922		{
7923#ifdef GAS_MNEMONICS
7924		  putc ('q', file);
7925#else
7926		  putc ('l', file);
7927		  putc ('l', file);
7928#endif
7929		}
7930	      else
7931	        putc ('l', file);
7932	      return;
7933
7934	    default:
7935	      gcc_unreachable ();
7936	    }
7937
7938	case 'b':
7939	case 'w':
7940	case 'k':
7941	case 'q':
7942	case 'h':
7943	case 'y':
7944	case 'X':
7945	case 'P':
7946	  break;
7947
7948	case 's':
7949	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7950	    {
7951	      PRINT_OPERAND (file, x, 0);
7952	      putc (',', file);
7953	    }
7954	  return;
7955
7956	case 'D':
7957	  /* Little bit of braindamage here.  The SSE compare instructions
7958	     does use completely different names for the comparisons that the
7959	     fp conditional moves.  */
7960	  switch (GET_CODE (x))
7961	    {
7962	    case EQ:
7963	    case UNEQ:
7964	      fputs ("eq", file);
7965	      break;
7966	    case LT:
7967	    case UNLT:
7968	      fputs ("lt", file);
7969	      break;
7970	    case LE:
7971	    case UNLE:
7972	      fputs ("le", file);
7973	      break;
7974	    case UNORDERED:
7975	      fputs ("unord", file);
7976	      break;
7977	    case NE:
7978	    case LTGT:
7979	      fputs ("neq", file);
7980	      break;
7981	    case UNGE:
7982	    case GE:
7983	      fputs ("nlt", file);
7984	      break;
7985	    case UNGT:
7986	    case GT:
7987	      fputs ("nle", file);
7988	      break;
7989	    case ORDERED:
7990	      fputs ("ord", file);
7991	      break;
7992	    default:
7993	      gcc_unreachable ();
7994	    }
7995	  return;
7996	case 'O':
7997#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7998	  if (ASSEMBLER_DIALECT == ASM_ATT)
7999	    {
8000	      switch (GET_MODE (x))
8001		{
8002		case HImode: putc ('w', file); break;
8003		case SImode:
8004		case SFmode: putc ('l', file); break;
8005		case DImode:
8006		case DFmode: putc ('q', file); break;
8007		default: gcc_unreachable ();
8008		}
8009	      putc ('.', file);
8010	    }
8011#endif
8012	  return;
8013	case 'C':
8014	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8015	  return;
8016	case 'F':
8017#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8018	  if (ASSEMBLER_DIALECT == ASM_ATT)
8019	    putc ('.', file);
8020#endif
8021	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8022	  return;
8023
8024	  /* Like above, but reverse condition */
8025	case 'c':
8026	  /* Check to see if argument to %c is really a constant
8027	     and not a condition code which needs to be reversed.  */
8028	  if (!COMPARISON_P (x))
8029	  {
8030	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8031	     return;
8032	  }
8033	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8034	  return;
8035	case 'f':
8036#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8037	  if (ASSEMBLER_DIALECT == ASM_ATT)
8038	    putc ('.', file);
8039#endif
8040	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8041	  return;
8042
8043	case 'H':
8044	  /* It doesn't actually matter what mode we use here, as we're
8045	     only going to use this for printing.  */
8046	  x = adjust_address_nv (x, DImode, 8);
8047	  break;
8048
8049	case '+':
8050	  {
8051	    rtx x;
8052
8053	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8054	      return;
8055
8056	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8057	    if (x)
8058	      {
8059		int pred_val = INTVAL (XEXP (x, 0));
8060
8061		if (pred_val < REG_BR_PROB_BASE * 45 / 100
8062		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
8063		  {
8064		    int taken = pred_val > REG_BR_PROB_BASE / 2;
8065		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
8066
8067		    /* Emit hints only in the case default branch prediction
8068		       heuristics would fail.  */
8069		    if (taken != cputaken)
8070		      {
8071			/* We use 3e (DS) prefix for taken branches and
8072			   2e (CS) prefix for not taken branches.  */
8073			if (taken)
8074			  fputs ("ds ; ", file);
8075			else
8076			  fputs ("cs ; ", file);
8077		      }
8078		  }
8079	      }
8080	    return;
8081	  }
8082	default:
8083	    output_operand_lossage ("invalid operand code '%c'", code);
8084	}
8085    }
8086
8087  if (GET_CODE (x) == REG)
8088    print_reg (x, code, file);
8089
8090  else if (GET_CODE (x) == MEM)
8091    {
8092      /* No `byte ptr' prefix for call instructions.  */
8093      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8094	{
8095	  const char * size;
8096	  switch (GET_MODE_SIZE (GET_MODE (x)))
8097	    {
8098	    case 1: size = "BYTE"; break;
8099	    case 2: size = "WORD"; break;
8100	    case 4: size = "DWORD"; break;
8101	    case 8: size = "QWORD"; break;
8102	    case 12: size = "XWORD"; break;
8103	    case 16: size = "XMMWORD"; break;
8104	    default:
8105	      gcc_unreachable ();
8106	    }
8107
8108	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
8109	  if (code == 'b')
8110	    size = "BYTE";
8111	  else if (code == 'w')
8112	    size = "WORD";
8113	  else if (code == 'k')
8114	    size = "DWORD";
8115
8116	  fputs (size, file);
8117	  fputs (" PTR ", file);
8118	}
8119
8120      x = XEXP (x, 0);
8121      /* Avoid (%rip) for call operands.  */
8122      if (CONSTANT_ADDRESS_P (x) && code == 'P'
8123	       && GET_CODE (x) != CONST_INT)
8124	output_addr_const (file, x);
8125      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8126	output_operand_lossage ("invalid constraints for operand");
8127      else
8128	output_address (x);
8129    }
8130
8131  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8132    {
8133      REAL_VALUE_TYPE r;
8134      long l;
8135
8136      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8137      REAL_VALUE_TO_TARGET_SINGLE (r, l);
8138
8139      if (ASSEMBLER_DIALECT == ASM_ATT)
8140	putc ('$', file);
8141      fprintf (file, "0x%08lx", l);
8142    }
8143
8144  /* These float cases don't actually occur as immediate operands.  */
8145  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8146    {
8147      char dstr[30];
8148
8149      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8150      fprintf (file, "%s", dstr);
8151    }
8152
8153  else if (GET_CODE (x) == CONST_DOUBLE
8154	   && GET_MODE (x) == XFmode)
8155    {
8156      char dstr[30];
8157
8158      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8159      fprintf (file, "%s", dstr);
8160    }
8161
8162  else
8163    {
8164      /* We have patterns that allow zero sets of memory, for instance.
8165	 In 64-bit mode, we should probably support all 8-byte vectors,
8166	 since we can in fact encode that into an immediate.  */
8167      if (GET_CODE (x) == CONST_VECTOR)
8168	{
8169	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8170	  x = const0_rtx;
8171	}
8172
8173      if (code != 'P')
8174	{
8175	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8176	    {
8177	      if (ASSEMBLER_DIALECT == ASM_ATT)
8178		putc ('$', file);
8179	    }
8180	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8181		   || GET_CODE (x) == LABEL_REF)
8182	    {
8183	      if (ASSEMBLER_DIALECT == ASM_ATT)
8184		putc ('$', file);
8185	      else
8186		fputs ("OFFSET FLAT:", file);
8187	    }
8188	}
8189      if (GET_CODE (x) == CONST_INT)
8190	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8191      else if (flag_pic)
8192	output_pic_addr_const (file, x, code);
8193      else
8194	output_addr_const (file, x);
8195    }
8196}
8197
8198/* Print a memory operand whose address is ADDR.  */
8199
8200void
8201print_operand_address (FILE *file, rtx addr)
8202{
8203  struct ix86_address parts;
8204  rtx base, index, disp;
8205  int scale;
8206  int ok = ix86_decompose_address (addr, &parts);
8207
8208  gcc_assert (ok);
8209
8210  base = parts.base;
8211  index = parts.index;
8212  disp = parts.disp;
8213  scale = parts.scale;
8214
8215  switch (parts.seg)
8216    {
8217    case SEG_DEFAULT:
8218      break;
8219    case SEG_FS:
8220    case SEG_GS:
8221      if (USER_LABEL_PREFIX[0] == 0)
8222	putc ('%', file);
8223      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8224      break;
8225    default:
8226      gcc_unreachable ();
8227    }
8228
8229  if (!base && !index)
8230    {
8231      /* Displacement only requires special attention.  */
8232
8233      if (GET_CODE (disp) == CONST_INT)
8234	{
8235	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8236	    {
8237	      if (USER_LABEL_PREFIX[0] == 0)
8238		putc ('%', file);
8239	      fputs ("ds:", file);
8240	    }
8241	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8242	}
8243      else if (flag_pic)
8244	output_pic_addr_const (file, disp, 0);
8245      else
8246	output_addr_const (file, disp);
8247
8248      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8249      if (TARGET_64BIT)
8250	{
8251	  if (GET_CODE (disp) == CONST
8252	      && GET_CODE (XEXP (disp, 0)) == PLUS
8253	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8254	    disp = XEXP (XEXP (disp, 0), 0);
8255	  if (GET_CODE (disp) == LABEL_REF
8256	      || (GET_CODE (disp) == SYMBOL_REF
8257		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8258	    fputs ("(%rip)", file);
8259	}
8260    }
8261  else
8262    {
8263      if (ASSEMBLER_DIALECT == ASM_ATT)
8264	{
8265	  if (disp)
8266	    {
8267	      if (flag_pic)
8268		output_pic_addr_const (file, disp, 0);
8269	      else if (GET_CODE (disp) == LABEL_REF)
8270		output_asm_label (disp);
8271	      else
8272		output_addr_const (file, disp);
8273	    }
8274
8275	  putc ('(', file);
8276	  if (base)
8277	    print_reg (base, 0, file);
8278	  if (index)
8279	    {
8280	      putc (',', file);
8281	      print_reg (index, 0, file);
8282	      if (scale != 1)
8283		fprintf (file, ",%d", scale);
8284	    }
8285	  putc (')', file);
8286	}
8287      else
8288	{
8289	  rtx offset = NULL_RTX;
8290
8291	  if (disp)
8292	    {
8293	      /* Pull out the offset of a symbol; print any symbol itself.  */
8294	      if (GET_CODE (disp) == CONST
8295		  && GET_CODE (XEXP (disp, 0)) == PLUS
8296		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8297		{
8298		  offset = XEXP (XEXP (disp, 0), 1);
8299		  disp = gen_rtx_CONST (VOIDmode,
8300					XEXP (XEXP (disp, 0), 0));
8301		}
8302
8303	      if (flag_pic)
8304		output_pic_addr_const (file, disp, 0);
8305	      else if (GET_CODE (disp) == LABEL_REF)
8306		output_asm_label (disp);
8307	      else if (GET_CODE (disp) == CONST_INT)
8308		offset = disp;
8309	      else
8310		output_addr_const (file, disp);
8311	    }
8312
8313	  putc ('[', file);
8314	  if (base)
8315	    {
8316	      print_reg (base, 0, file);
8317	      if (offset)
8318		{
8319		  if (INTVAL (offset) >= 0)
8320		    putc ('+', file);
8321		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8322		}
8323	    }
8324	  else if (offset)
8325	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8326	  else
8327	    putc ('0', file);
8328
8329	  if (index)
8330	    {
8331	      putc ('+', file);
8332	      print_reg (index, 0, file);
8333	      if (scale != 1)
8334		fprintf (file, "*%d", scale);
8335	    }
8336	  putc (']', file);
8337	}
8338    }
8339}
8340
8341bool
8342output_addr_const_extra (FILE *file, rtx x)
8343{
8344  rtx op;
8345
8346  if (GET_CODE (x) != UNSPEC)
8347    return false;
8348
8349  op = XVECEXP (x, 0, 0);
8350  switch (XINT (x, 1))
8351    {
8352    case UNSPEC_GOTTPOFF:
8353      output_addr_const (file, op);
8354      /* FIXME: This might be @TPOFF in Sun ld.  */
8355      fputs ("@GOTTPOFF", file);
8356      break;
8357    case UNSPEC_TPOFF:
8358      output_addr_const (file, op);
8359      fputs ("@TPOFF", file);
8360      break;
8361    case UNSPEC_NTPOFF:
8362      output_addr_const (file, op);
8363      if (TARGET_64BIT)
8364	fputs ("@TPOFF", file);
8365      else
8366	fputs ("@NTPOFF", file);
8367      break;
8368    case UNSPEC_DTPOFF:
8369      output_addr_const (file, op);
8370      fputs ("@DTPOFF", file);
8371      break;
8372    case UNSPEC_GOTNTPOFF:
8373      output_addr_const (file, op);
8374      if (TARGET_64BIT)
8375	fputs ("@GOTTPOFF(%rip)", file);
8376      else
8377	fputs ("@GOTNTPOFF", file);
8378      break;
8379    case UNSPEC_INDNTPOFF:
8380      output_addr_const (file, op);
8381      fputs ("@INDNTPOFF", file);
8382      break;
8383
8384    default:
8385      return false;
8386    }
8387
8388  return true;
8389}
8390
8391/* Split one or more DImode RTL references into pairs of SImode
8392   references.  The RTL can be REG, offsettable MEM, integer constant, or
8393   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8394   split and "num" is its length.  lo_half and hi_half are output arrays
8395   that parallel "operands".  */
8396
8397void
8398split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8399{
8400  while (num--)
8401    {
8402      rtx op = operands[num];
8403
8404      /* simplify_subreg refuse to split volatile memory addresses,
8405         but we still have to handle it.  */
8406      if (GET_CODE (op) == MEM)
8407	{
8408	  lo_half[num] = adjust_address (op, SImode, 0);
8409	  hi_half[num] = adjust_address (op, SImode, 4);
8410	}
8411      else
8412	{
8413	  lo_half[num] = simplify_gen_subreg (SImode, op,
8414					      GET_MODE (op) == VOIDmode
8415					      ? DImode : GET_MODE (op), 0);
8416	  hi_half[num] = simplify_gen_subreg (SImode, op,
8417					      GET_MODE (op) == VOIDmode
8418					      ? DImode : GET_MODE (op), 4);
8419	}
8420    }
8421}
8422/* Split one or more TImode RTL references into pairs of DImode
8423   references.  The RTL can be REG, offsettable MEM, integer constant, or
8424   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8425   split and "num" is its length.  lo_half and hi_half are output arrays
8426   that parallel "operands".  */
8427
8428void
8429split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8430{
8431  while (num--)
8432    {
8433      rtx op = operands[num];
8434
8435      /* simplify_subreg refuse to split volatile memory addresses, but we
8436         still have to handle it.  */
8437      if (GET_CODE (op) == MEM)
8438	{
8439	  lo_half[num] = adjust_address (op, DImode, 0);
8440	  hi_half[num] = adjust_address (op, DImode, 8);
8441	}
8442      else
8443	{
8444	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8445	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8446	}
8447    }
8448}
8449
8450/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8451   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8452   is the expression of the binary operation.  The output may either be
8453   emitted here, or returned to the caller, like all output_* functions.
8454
8455   There is no guarantee that the operands are the same mode, as they
8456   might be within FLOAT or FLOAT_EXTEND expressions.  */
8457
8458#ifndef SYSV386_COMPAT
8459/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8460   wants to fix the assemblers because that causes incompatibility
8461   with gcc.  No-one wants to fix gcc because that causes
8462   incompatibility with assemblers...  You can use the option of
8463   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8464#define SYSV386_COMPAT 1
8465#endif
8466
8467const char *
8468output_387_binary_op (rtx insn, rtx *operands)
8469{
8470  static char buf[30];
8471  const char *p;
8472  const char *ssep;
8473  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8474
8475#ifdef ENABLE_CHECKING
8476  /* Even if we do not want to check the inputs, this documents input
8477     constraints.  Which helps in understanding the following code.  */
8478  if (STACK_REG_P (operands[0])
8479      && ((REG_P (operands[1])
8480	   && REGNO (operands[0]) == REGNO (operands[1])
8481	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8482	  || (REG_P (operands[2])
8483	      && REGNO (operands[0]) == REGNO (operands[2])
8484	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8485      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8486    ; /* ok */
8487  else
8488    gcc_assert (is_sse);
8489#endif
8490
8491  switch (GET_CODE (operands[3]))
8492    {
8493    case PLUS:
8494      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8495	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8496	p = "fiadd";
8497      else
8498	p = "fadd";
8499      ssep = "add";
8500      break;
8501
8502    case MINUS:
8503      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8504	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8505	p = "fisub";
8506      else
8507	p = "fsub";
8508      ssep = "sub";
8509      break;
8510
8511    case MULT:
8512      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8513	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8514	p = "fimul";
8515      else
8516	p = "fmul";
8517      ssep = "mul";
8518      break;
8519
8520    case DIV:
8521      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8522	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8523	p = "fidiv";
8524      else
8525	p = "fdiv";
8526      ssep = "div";
8527      break;
8528
8529    default:
8530      gcc_unreachable ();
8531    }
8532
8533  if (is_sse)
8534   {
8535      strcpy (buf, ssep);
8536      if (GET_MODE (operands[0]) == SFmode)
8537	strcat (buf, "ss\t{%2, %0|%0, %2}");
8538      else
8539	strcat (buf, "sd\t{%2, %0|%0, %2}");
8540      return buf;
8541   }
8542  strcpy (buf, p);
8543
8544  switch (GET_CODE (operands[3]))
8545    {
8546    case MULT:
8547    case PLUS:
8548      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8549	{
8550	  rtx temp = operands[2];
8551	  operands[2] = operands[1];
8552	  operands[1] = temp;
8553	}
8554
8555      /* know operands[0] == operands[1].  */
8556
8557      if (GET_CODE (operands[2]) == MEM)
8558	{
8559	  p = "%z2\t%2";
8560	  break;
8561	}
8562
8563      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8564	{
8565	  if (STACK_TOP_P (operands[0]))
8566	    /* How is it that we are storing to a dead operand[2]?
8567	       Well, presumably operands[1] is dead too.  We can't
8568	       store the result to st(0) as st(0) gets popped on this
8569	       instruction.  Instead store to operands[2] (which I
8570	       think has to be st(1)).  st(1) will be popped later.
8571	       gcc <= 2.8.1 didn't have this check and generated
8572	       assembly code that the Unixware assembler rejected.  */
8573	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8574	  else
8575	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8576	  break;
8577	}
8578
8579      if (STACK_TOP_P (operands[0]))
8580	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8581      else
8582	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8583      break;
8584
8585    case MINUS:
8586    case DIV:
8587      if (GET_CODE (operands[1]) == MEM)
8588	{
8589	  p = "r%z1\t%1";
8590	  break;
8591	}
8592
8593      if (GET_CODE (operands[2]) == MEM)
8594	{
8595	  p = "%z2\t%2";
8596	  break;
8597	}
8598
8599      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8600	{
8601#if SYSV386_COMPAT
8602	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8603	     derived assemblers, confusingly reverse the direction of
8604	     the operation for fsub{r} and fdiv{r} when the
8605	     destination register is not st(0).  The Intel assembler
8606	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8607	     figure out what the hardware really does.  */
8608	  if (STACK_TOP_P (operands[0]))
8609	    p = "{p\t%0, %2|rp\t%2, %0}";
8610	  else
8611	    p = "{rp\t%2, %0|p\t%0, %2}";
8612#else
8613	  if (STACK_TOP_P (operands[0]))
8614	    /* As above for fmul/fadd, we can't store to st(0).  */
8615	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8616	  else
8617	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8618#endif
8619	  break;
8620	}
8621
8622      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8623	{
8624#if SYSV386_COMPAT
8625	  if (STACK_TOP_P (operands[0]))
8626	    p = "{rp\t%0, %1|p\t%1, %0}";
8627	  else
8628	    p = "{p\t%1, %0|rp\t%0, %1}";
8629#else
8630	  if (STACK_TOP_P (operands[0]))
8631	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8632	  else
8633	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8634#endif
8635	  break;
8636	}
8637
8638      if (STACK_TOP_P (operands[0]))
8639	{
8640	  if (STACK_TOP_P (operands[1]))
8641	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8642	  else
8643	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8644	  break;
8645	}
8646      else if (STACK_TOP_P (operands[1]))
8647	{
8648#if SYSV386_COMPAT
8649	  p = "{\t%1, %0|r\t%0, %1}";
8650#else
8651	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8652#endif
8653	}
8654      else
8655	{
8656#if SYSV386_COMPAT
8657	  p = "{r\t%2, %0|\t%0, %2}";
8658#else
8659	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8660#endif
8661	}
8662      break;
8663
8664    default:
8665      gcc_unreachable ();
8666    }
8667
8668  strcat (buf, p);
8669  return buf;
8670}
8671
8672/* Return needed mode for entity in optimize_mode_switching pass.  */
8673
8674int
8675ix86_mode_needed (int entity, rtx insn)
8676{
8677  enum attr_i387_cw mode;
8678
8679  /* The mode UNINITIALIZED is used to store control word after a
8680     function call or ASM pattern.  The mode ANY specify that function
8681     has no requirements on the control word and make no changes in the
8682     bits we are interested in.  */
8683
8684  if (CALL_P (insn)
8685      || (NONJUMP_INSN_P (insn)
8686	  && (asm_noperands (PATTERN (insn)) >= 0
8687	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8688    return I387_CW_UNINITIALIZED;
8689
8690  if (recog_memoized (insn) < 0)
8691    return I387_CW_ANY;
8692
8693  mode = get_attr_i387_cw (insn);
8694
8695  switch (entity)
8696    {
8697    case I387_TRUNC:
8698      if (mode == I387_CW_TRUNC)
8699	return mode;
8700      break;
8701
8702    case I387_FLOOR:
8703      if (mode == I387_CW_FLOOR)
8704	return mode;
8705      break;
8706
8707    case I387_CEIL:
8708      if (mode == I387_CW_CEIL)
8709	return mode;
8710      break;
8711
8712    case I387_MASK_PM:
8713      if (mode == I387_CW_MASK_PM)
8714	return mode;
8715      break;
8716
8717    default:
8718      gcc_unreachable ();
8719    }
8720
8721  return I387_CW_ANY;
8722}
8723
8724/* Output code to initialize control word copies used by trunc?f?i and
8725   rounding patterns.  CURRENT_MODE is set to current control word,
8726   while NEW_MODE is set to new control word.  */
8727
8728void
8729emit_i387_cw_initialization (int mode)
8730{
8731  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8732  rtx new_mode;
8733
8734  int slot;
8735
8736  rtx reg = gen_reg_rtx (HImode);
8737
8738  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8739  emit_move_insn (reg, stored_mode);
8740
8741  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8742    {
8743      switch (mode)
8744	{
8745	case I387_CW_TRUNC:
8746	  /* round toward zero (truncate) */
8747	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8748	  slot = SLOT_CW_TRUNC;
8749	  break;
8750
8751	case I387_CW_FLOOR:
8752	  /* round down toward -oo */
8753	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8754	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8755	  slot = SLOT_CW_FLOOR;
8756	  break;
8757
8758	case I387_CW_CEIL:
8759	  /* round up toward +oo */
8760	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8761	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8762	  slot = SLOT_CW_CEIL;
8763	  break;
8764
8765	case I387_CW_MASK_PM:
8766	  /* mask precision exception for nearbyint() */
8767	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8768	  slot = SLOT_CW_MASK_PM;
8769	  break;
8770
8771	default:
8772	  gcc_unreachable ();
8773	}
8774    }
8775  else
8776    {
8777      switch (mode)
8778	{
8779	case I387_CW_TRUNC:
8780	  /* round toward zero (truncate) */
8781	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8782	  slot = SLOT_CW_TRUNC;
8783	  break;
8784
8785	case I387_CW_FLOOR:
8786	  /* round down toward -oo */
8787	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8788	  slot = SLOT_CW_FLOOR;
8789	  break;
8790
8791	case I387_CW_CEIL:
8792	  /* round up toward +oo */
8793	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8794	  slot = SLOT_CW_CEIL;
8795	  break;
8796
8797	case I387_CW_MASK_PM:
8798	  /* mask precision exception for nearbyint() */
8799	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8800	  slot = SLOT_CW_MASK_PM;
8801	  break;
8802
8803	default:
8804	  gcc_unreachable ();
8805	}
8806    }
8807
8808  gcc_assert (slot < MAX_386_STACK_LOCALS);
8809
8810  new_mode = assign_386_stack_local (HImode, slot);
8811  emit_move_insn (new_mode, reg);
8812}
8813
8814/* Output code for INSN to convert a float to a signed int.  OPERANDS
8815   are the insn operands.  The output may be [HSD]Imode and the input
8816   operand may be [SDX]Fmode.  */
8817
8818const char *
8819output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8820{
8821  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8822  int dimode_p = GET_MODE (operands[0]) == DImode;
8823  int round_mode = get_attr_i387_cw (insn);
8824
8825  /* Jump through a hoop or two for DImode, since the hardware has no
8826     non-popping instruction.  We used to do this a different way, but
8827     that was somewhat fragile and broke with post-reload splitters.  */
8828  if ((dimode_p || fisttp) && !stack_top_dies)
8829    output_asm_insn ("fld\t%y1", operands);
8830
8831  gcc_assert (STACK_TOP_P (operands[1]));
8832  gcc_assert (GET_CODE (operands[0]) == MEM);
8833
8834  if (fisttp)
8835      output_asm_insn ("fisttp%z0\t%0", operands);
8836  else
8837    {
8838      if (round_mode != I387_CW_ANY)
8839	output_asm_insn ("fldcw\t%3", operands);
8840      if (stack_top_dies || dimode_p)
8841	output_asm_insn ("fistp%z0\t%0", operands);
8842      else
8843	output_asm_insn ("fist%z0\t%0", operands);
8844      if (round_mode != I387_CW_ANY)
8845	output_asm_insn ("fldcw\t%2", operands);
8846    }
8847
8848  return "";
8849}
8850
8851/* Output code for x87 ffreep insn.  The OPNO argument, which may only
8852   have the values zero or one, indicates the ffreep insn's operand
8853   from the OPERANDS array.  */
8854
8855static const char *
8856output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8857{
8858  if (TARGET_USE_FFREEP)
8859#if HAVE_AS_IX86_FFREEP
8860    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8861#else
8862    switch (REGNO (operands[opno]))
8863      {
8864      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8865      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8866      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8867      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8868      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8869      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8870      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8871      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8872      }
8873#endif
8874
8875  return opno ? "fstp\t%y1" : "fstp\t%y0";
8876}
8877
8878
8879/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8880   should be used.  UNORDERED_P is true when fucom should be used.  */
8881
8882const char *
8883output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8884{
8885  int stack_top_dies;
8886  rtx cmp_op0, cmp_op1;
8887  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8888
8889  if (eflags_p)
8890    {
8891      cmp_op0 = operands[0];
8892      cmp_op1 = operands[1];
8893    }
8894  else
8895    {
8896      cmp_op0 = operands[1];
8897      cmp_op1 = operands[2];
8898    }
8899
8900  if (is_sse)
8901    {
8902      if (GET_MODE (operands[0]) == SFmode)
8903	if (unordered_p)
8904	  return "ucomiss\t{%1, %0|%0, %1}";
8905	else
8906	  return "comiss\t{%1, %0|%0, %1}";
8907      else
8908	if (unordered_p)
8909	  return "ucomisd\t{%1, %0|%0, %1}";
8910	else
8911	  return "comisd\t{%1, %0|%0, %1}";
8912    }
8913
8914  gcc_assert (STACK_TOP_P (cmp_op0));
8915
8916  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8917
8918  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8919    {
8920      if (stack_top_dies)
8921	{
8922	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8923	  return output_387_ffreep (operands, 1);
8924	}
8925      else
8926	return "ftst\n\tfnstsw\t%0";
8927    }
8928
8929  if (STACK_REG_P (cmp_op1)
8930      && stack_top_dies
8931      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8932      && REGNO (cmp_op1) != FIRST_STACK_REG)
8933    {
8934      /* If both the top of the 387 stack dies, and the other operand
8935	 is also a stack register that dies, then this must be a
8936	 `fcompp' float compare */
8937
8938      if (eflags_p)
8939	{
8940	  /* There is no double popping fcomi variant.  Fortunately,
8941	     eflags is immune from the fstp's cc clobbering.  */
8942	  if (unordered_p)
8943	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8944	  else
8945	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8946	  return output_387_ffreep (operands, 0);
8947	}
8948      else
8949	{
8950	  if (unordered_p)
8951	    return "fucompp\n\tfnstsw\t%0";
8952	  else
8953	    return "fcompp\n\tfnstsw\t%0";
8954	}
8955    }
8956  else
8957    {
8958      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8959
8960      static const char * const alt[16] =
8961      {
8962	"fcom%z2\t%y2\n\tfnstsw\t%0",
8963	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8964	"fucom%z2\t%y2\n\tfnstsw\t%0",
8965	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8966
8967	"ficom%z2\t%y2\n\tfnstsw\t%0",
8968	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8969	NULL,
8970	NULL,
8971
8972	"fcomi\t{%y1, %0|%0, %y1}",
8973	"fcomip\t{%y1, %0|%0, %y1}",
8974	"fucomi\t{%y1, %0|%0, %y1}",
8975	"fucomip\t{%y1, %0|%0, %y1}",
8976
8977	NULL,
8978	NULL,
8979	NULL,
8980	NULL
8981      };
8982
8983      int mask;
8984      const char *ret;
8985
8986      mask  = eflags_p << 3;
8987      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8988      mask |= unordered_p << 1;
8989      mask |= stack_top_dies;
8990
8991      gcc_assert (mask < 16);
8992      ret = alt[mask];
8993      gcc_assert (ret);
8994
8995      return ret;
8996    }
8997}
8998
8999void
9000ix86_output_addr_vec_elt (FILE *file, int value)
9001{
9002  const char *directive = ASM_LONG;
9003
9004#ifdef ASM_QUAD
9005  if (TARGET_64BIT)
9006    directive = ASM_QUAD;
9007#else
9008  gcc_assert (!TARGET_64BIT);
9009#endif
9010
9011  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9012}
9013
9014void
9015ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9016{
9017  if (TARGET_64BIT)
9018    fprintf (file, "%s%s%d-%s%d\n",
9019	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
9020  else if (HAVE_AS_GOTOFF_IN_DATA)
9021    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9022#if TARGET_MACHO
9023  else if (TARGET_MACHO)
9024    {
9025      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9026      machopic_output_function_base_name (file);
9027      fprintf(file, "\n");
9028    }
9029#endif
9030  else
9031    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9032		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9033}
9034
9035/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9036   for the target.  */
9037
9038void
9039ix86_expand_clear (rtx dest)
9040{
9041  rtx tmp;
9042
9043  /* We play register width games, which are only valid after reload.  */
9044  gcc_assert (reload_completed);
9045
9046  /* Avoid HImode and its attendant prefix byte.  */
9047  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9048    dest = gen_rtx_REG (SImode, REGNO (dest));
9049
9050  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9051
9052  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
9053  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9054    {
9055      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9056      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9057    }
9058
9059  emit_insn (tmp);
9060}
9061
9062/* X is an unchanging MEM.  If it is a constant pool reference, return
9063   the constant pool rtx, else NULL.  */
9064
9065rtx
9066maybe_get_pool_constant (rtx x)
9067{
9068  x = ix86_delegitimize_address (XEXP (x, 0));
9069
9070  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9071    return get_pool_constant (x);
9072
9073  return NULL_RTX;
9074}
9075
9076void
9077ix86_expand_move (enum machine_mode mode, rtx operands[])
9078{
9079  int strict = (reload_in_progress || reload_completed);
9080  rtx op0, op1;
9081  enum tls_model model;
9082
9083  op0 = operands[0];
9084  op1 = operands[1];
9085
9086  if (GET_CODE (op1) == SYMBOL_REF)
9087    {
9088      model = SYMBOL_REF_TLS_MODEL (op1);
9089      if (model)
9090	{
9091	  op1 = legitimize_tls_address (op1, model, true);
9092	  op1 = force_operand (op1, op0);
9093	  if (op1 == op0)
9094	    return;
9095	}
9096    }
9097  else if (GET_CODE (op1) == CONST
9098	   && GET_CODE (XEXP (op1, 0)) == PLUS
9099	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9100    {
9101      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9102      if (model)
9103	{
9104	  rtx addend = XEXP (XEXP (op1, 0), 1);
9105	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9106	  op1 = force_operand (op1, NULL);
9107	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9108				     op0, 1, OPTAB_DIRECT);
9109	  if (op1 == op0)
9110	    return;
9111	}
9112    }
9113
9114  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9115    {
9116      if (TARGET_MACHO && !TARGET_64BIT)
9117	{
9118#if TARGET_MACHO
9119	  if (MACHOPIC_PURE)
9120	    {
9121	      rtx temp = ((reload_in_progress
9122			   || ((op0 && GET_CODE (op0) == REG)
9123			       && mode == Pmode))
9124			  ? op0 : gen_reg_rtx (Pmode));
9125	      op1 = machopic_indirect_data_reference (op1, temp);
9126	      op1 = machopic_legitimize_pic_address (op1, mode,
9127						     temp == op1 ? 0 : temp);
9128	    }
9129	  else if (MACHOPIC_INDIRECT)
9130	    op1 = machopic_indirect_data_reference (op1, 0);
9131	  if (op0 == op1)
9132	    return;
9133#endif
9134	}
9135      else
9136	{
9137	  if (GET_CODE (op0) == MEM)
9138	    op1 = force_reg (Pmode, op1);
9139	  else
9140	    op1 = legitimize_address (op1, op1, Pmode);
9141	}
9142    }
9143  else
9144    {
9145      if (GET_CODE (op0) == MEM
9146	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9147	      || !push_operand (op0, mode))
9148	  && GET_CODE (op1) == MEM)
9149	op1 = force_reg (mode, op1);
9150
9151      if (push_operand (op0, mode)
9152	  && ! general_no_elim_operand (op1, mode))
9153	op1 = copy_to_mode_reg (mode, op1);
9154
9155      /* Force large constants in 64bit compilation into register
9156	 to get them CSEed.  */
9157      if (TARGET_64BIT && mode == DImode
9158	  && immediate_operand (op1, mode)
9159	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9160	  && !register_operand (op0, mode)
9161	  && optimize && !reload_completed && !reload_in_progress)
9162	op1 = copy_to_mode_reg (mode, op1);
9163
9164      if (FLOAT_MODE_P (mode))
9165	{
9166	  /* If we are loading a floating point constant to a register,
9167	     force the value to memory now, since we'll get better code
9168	     out the back end.  */
9169
9170	  if (strict)
9171	    ;
9172	  else if (GET_CODE (op1) == CONST_DOUBLE)
9173	    {
9174	      op1 = validize_mem (force_const_mem (mode, op1));
9175	      if (!register_operand (op0, mode))
9176		{
9177		  rtx temp = gen_reg_rtx (mode);
9178		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9179		  emit_move_insn (op0, temp);
9180		  return;
9181		}
9182	    }
9183	}
9184    }
9185
9186  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9187}
9188
9189void
9190ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9191{
9192  rtx op0 = operands[0], op1 = operands[1];
9193
9194  /* Force constants other than zero into memory.  We do not know how
9195     the instructions used to build constants modify the upper 64 bits
9196     of the register, once we have that information we may be able
9197     to handle some of them more efficiently.  */
9198  if ((reload_in_progress | reload_completed) == 0
9199      && register_operand (op0, mode)
9200      && CONSTANT_P (op1)
9201      && standard_sse_constant_p (op1) <= 0)
9202    op1 = validize_mem (force_const_mem (mode, op1));
9203
9204  /* Make operand1 a register if it isn't already.  */
9205  if (!no_new_pseudos
9206      && !register_operand (op0, mode)
9207      && !register_operand (op1, mode))
9208    {
9209      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9210      return;
9211    }
9212
9213  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9214}
9215
9216/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9217   straight to ix86_expand_vector_move.  */
9218
9219void
9220ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9221{
9222  rtx op0, op1, m;
9223
9224  op0 = operands[0];
9225  op1 = operands[1];
9226
9227  if (MEM_P (op1))
9228    {
9229      /* If we're optimizing for size, movups is the smallest.  */
9230      if (optimize_size)
9231	{
9232	  op0 = gen_lowpart (V4SFmode, op0);
9233	  op1 = gen_lowpart (V4SFmode, op1);
9234	  emit_insn (gen_sse_movups (op0, op1));
9235	  return;
9236	}
9237
9238      /* ??? If we have typed data, then it would appear that using
9239	 movdqu is the only way to get unaligned data loaded with
9240	 integer type.  */
9241      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9242	{
9243	  op0 = gen_lowpart (V16QImode, op0);
9244	  op1 = gen_lowpart (V16QImode, op1);
9245	  emit_insn (gen_sse2_movdqu (op0, op1));
9246	  return;
9247	}
9248
9249      if (TARGET_SSE2 && mode == V2DFmode)
9250	{
9251	  rtx zero;
9252
9253	  /* When SSE registers are split into halves, we can avoid
9254	     writing to the top half twice.  */
9255	  if (TARGET_SSE_SPLIT_REGS)
9256	    {
9257	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9258	      zero = op0;
9259	    }
9260	  else
9261	    {
9262	      /* ??? Not sure about the best option for the Intel chips.
9263		 The following would seem to satisfy; the register is
9264		 entirely cleared, breaking the dependency chain.  We
9265		 then store to the upper half, with a dependency depth
9266		 of one.  A rumor has it that Intel recommends two movsd
9267		 followed by an unpacklpd, but this is unconfirmed.  And
9268		 given that the dependency depth of the unpacklpd would
9269		 still be one, I'm not sure why this would be better.  */
9270	      zero = CONST0_RTX (V2DFmode);
9271	    }
9272
9273	  m = adjust_address (op1, DFmode, 0);
9274	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9275	  m = adjust_address (op1, DFmode, 8);
9276	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9277	}
9278      else
9279	{
9280	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9281	    emit_move_insn (op0, CONST0_RTX (mode));
9282	  else
9283	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9284
9285	  if (mode != V4SFmode)
9286	    op0 = gen_lowpart (V4SFmode, op0);
9287	  m = adjust_address (op1, V2SFmode, 0);
9288	  emit_insn (gen_sse_loadlps (op0, op0, m));
9289	  m = adjust_address (op1, V2SFmode, 8);
9290	  emit_insn (gen_sse_loadhps (op0, op0, m));
9291	}
9292    }
9293  else if (MEM_P (op0))
9294    {
9295      /* If we're optimizing for size, movups is the smallest.  */
9296      if (optimize_size)
9297	{
9298	  op0 = gen_lowpart (V4SFmode, op0);
9299	  op1 = gen_lowpart (V4SFmode, op1);
9300	  emit_insn (gen_sse_movups (op0, op1));
9301	  return;
9302	}
9303
9304      /* ??? Similar to above, only less clear because of quote
9305	 typeless stores unquote.  */
9306      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9307	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9308        {
9309	  op0 = gen_lowpart (V16QImode, op0);
9310	  op1 = gen_lowpart (V16QImode, op1);
9311	  emit_insn (gen_sse2_movdqu (op0, op1));
9312	  return;
9313	}
9314
9315      if (TARGET_SSE2 && mode == V2DFmode)
9316	{
9317	  m = adjust_address (op0, DFmode, 0);
9318	  emit_insn (gen_sse2_storelpd (m, op1));
9319	  m = adjust_address (op0, DFmode, 8);
9320	  emit_insn (gen_sse2_storehpd (m, op1));
9321	}
9322      else
9323	{
9324	  if (mode != V4SFmode)
9325	    op1 = gen_lowpart (V4SFmode, op1);
9326	  m = adjust_address (op0, V2SFmode, 0);
9327	  emit_insn (gen_sse_storelps (m, op1));
9328	  m = adjust_address (op0, V2SFmode, 8);
9329	  emit_insn (gen_sse_storehps (m, op1));
9330	}
9331    }
9332  else
9333    gcc_unreachable ();
9334}
9335
9336/* Expand a push in MODE.  This is some mode for which we do not support
9337   proper push instructions, at least from the registers that we expect
9338   the value to live in.  */
9339
9340void
9341ix86_expand_push (enum machine_mode mode, rtx x)
9342{
9343  rtx tmp;
9344
9345  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9346			     GEN_INT (-GET_MODE_SIZE (mode)),
9347			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9348  if (tmp != stack_pointer_rtx)
9349    emit_move_insn (stack_pointer_rtx, tmp);
9350
9351  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9352  emit_move_insn (tmp, x);
9353}
9354
9355/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9356   destination to use for the operation.  If different from the true
9357   destination in operands[0], a copy operation will be required.  */
9358
9359rtx
9360ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9361			    rtx operands[])
9362{
9363  int matching_memory;
9364  rtx src1, src2, dst;
9365
9366  dst = operands[0];
9367  src1 = operands[1];
9368  src2 = operands[2];
9369
9370  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9371  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9372      && (rtx_equal_p (dst, src2)
9373	  || immediate_operand (src1, mode)))
9374    {
9375      rtx temp = src1;
9376      src1 = src2;
9377      src2 = temp;
9378    }
9379
9380  /* If the destination is memory, and we do not have matching source
9381     operands, do things in registers.  */
9382  matching_memory = 0;
9383  if (GET_CODE (dst) == MEM)
9384    {
9385      if (rtx_equal_p (dst, src1))
9386	matching_memory = 1;
9387      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9388	       && rtx_equal_p (dst, src2))
9389	matching_memory = 2;
9390      else
9391	dst = gen_reg_rtx (mode);
9392    }
9393
9394  /* Both source operands cannot be in memory.  */
9395  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9396    {
9397      if (matching_memory != 2)
9398	src2 = force_reg (mode, src2);
9399      else
9400	src1 = force_reg (mode, src1);
9401    }
9402
9403  /* If the operation is not commutable, source 1 cannot be a constant
9404     or non-matching memory.  */
9405  if ((CONSTANT_P (src1)
9406       || (!matching_memory && GET_CODE (src1) == MEM))
9407      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9408    src1 = force_reg (mode, src1);
9409
9410  src1 = operands[1] = src1;
9411  src2 = operands[2] = src2;
9412  return dst;
9413}
9414
9415/* Similarly, but assume that the destination has already been
9416   set up properly.  */
9417
9418void
9419ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9420				    enum machine_mode mode, rtx operands[])
9421{
9422  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9423  gcc_assert (dst == operands[0]);
9424}
9425
9426/* Attempt to expand a binary operator.  Make the expansion closer to the
9427   actual machine, then just general_operand, which will allow 3 separate
9428   memory references (one output, two input) in a single insn.  */
9429
9430void
9431ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9432			     rtx operands[])
9433{
9434  rtx src1, src2, dst, op, clob;
9435
9436  dst = ix86_fixup_binary_operands (code, mode, operands);
9437  src1 = operands[1];
9438  src2 = operands[2];
9439
9440 /* Emit the instruction.  */
9441
9442  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9443  if (reload_in_progress)
9444    {
9445      /* Reload doesn't know about the flags register, and doesn't know that
9446         it doesn't want to clobber it.  We can only do this with PLUS.  */
9447      gcc_assert (code == PLUS);
9448      emit_insn (op);
9449    }
9450  else
9451    {
9452      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9453      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9454    }
9455
9456  /* Fix up the destination if needed.  */
9457  if (dst != operands[0])
9458    emit_move_insn (operands[0], dst);
9459}
9460
9461/* Return TRUE or FALSE depending on whether the binary operator meets the
9462   appropriate constraints.  */
9463
9464int
9465ix86_binary_operator_ok (enum rtx_code code,
9466			 enum machine_mode mode ATTRIBUTE_UNUSED,
9467			 rtx operands[3])
9468{
9469  /* Both source operands cannot be in memory.  */
9470  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9471    return 0;
9472  /* If the operation is not commutable, source 1 cannot be a constant.  */
9473  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9474    return 0;
9475  /* If the destination is memory, we must have a matching source operand.  */
9476  if (GET_CODE (operands[0]) == MEM
9477      && ! (rtx_equal_p (operands[0], operands[1])
9478	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9479		&& rtx_equal_p (operands[0], operands[2]))))
9480    return 0;
9481  /* If the operation is not commutable and the source 1 is memory, we must
9482     have a matching destination.  */
9483  if (GET_CODE (operands[1]) == MEM
9484      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9485      && ! rtx_equal_p (operands[0], operands[1]))
9486    return 0;
9487  return 1;
9488}
9489
9490/* Attempt to expand a unary operator.  Make the expansion closer to the
9491   actual machine, then just general_operand, which will allow 2 separate
9492   memory references (one output, one input) in a single insn.  */
9493
9494void
9495ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9496			    rtx operands[])
9497{
9498  int matching_memory;
9499  rtx src, dst, op, clob;
9500
9501  dst = operands[0];
9502  src = operands[1];
9503
9504  /* If the destination is memory, and we do not have matching source
9505     operands, do things in registers.  */
9506  matching_memory = 0;
9507  if (MEM_P (dst))
9508    {
9509      if (rtx_equal_p (dst, src))
9510	matching_memory = 1;
9511      else
9512	dst = gen_reg_rtx (mode);
9513    }
9514
9515  /* When source operand is memory, destination must match.  */
9516  if (MEM_P (src) && !matching_memory)
9517    src = force_reg (mode, src);
9518
9519  /* Emit the instruction.  */
9520
9521  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9522  if (reload_in_progress || code == NOT)
9523    {
9524      /* Reload doesn't know about the flags register, and doesn't know that
9525         it doesn't want to clobber it.  */
9526      gcc_assert (code == NOT);
9527      emit_insn (op);
9528    }
9529  else
9530    {
9531      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9532      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9533    }
9534
9535  /* Fix up the destination if needed.  */
9536  if (dst != operands[0])
9537    emit_move_insn (operands[0], dst);
9538}
9539
9540/* Return TRUE or FALSE depending on whether the unary operator meets the
9541   appropriate constraints.  */
9542
9543int
9544ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9545			enum machine_mode mode ATTRIBUTE_UNUSED,
9546			rtx operands[2] ATTRIBUTE_UNUSED)
9547{
9548  /* If one of operands is memory, source and destination must match.  */
9549  if ((GET_CODE (operands[0]) == MEM
9550       || GET_CODE (operands[1]) == MEM)
9551      && ! rtx_equal_p (operands[0], operands[1]))
9552    return FALSE;
9553  return TRUE;
9554}
9555
9556/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9557   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9558   true, then replicate the mask for all elements of the vector register.
9559   If INVERT is true, then create a mask excluding the sign bit.  */
9560
9561rtx
9562ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9563{
9564  enum machine_mode vec_mode;
9565  HOST_WIDE_INT hi, lo;
9566  int shift = 63;
9567  rtvec v;
9568  rtx mask;
9569
9570  /* Find the sign bit, sign extended to 2*HWI.  */
9571  if (mode == SFmode)
9572    lo = 0x80000000, hi = lo < 0;
9573  else if (HOST_BITS_PER_WIDE_INT >= 64)
9574    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9575  else
9576    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9577
9578  if (invert)
9579    lo = ~lo, hi = ~hi;
9580
9581  /* Force this value into the low part of a fp vector constant.  */
9582  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9583  mask = gen_lowpart (mode, mask);
9584
9585  if (mode == SFmode)
9586    {
9587      if (vect)
9588	v = gen_rtvec (4, mask, mask, mask, mask);
9589      else
9590	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9591		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9592      vec_mode = V4SFmode;
9593    }
9594  else
9595    {
9596      if (vect)
9597	v = gen_rtvec (2, mask, mask);
9598      else
9599	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9600      vec_mode = V2DFmode;
9601    }
9602
9603  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9604}
9605
9606/* Generate code for floating point ABS or NEG.  */
9607
9608void
9609ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9610				rtx operands[])
9611{
9612  rtx mask, set, use, clob, dst, src;
9613  bool matching_memory;
9614  bool use_sse = false;
9615  bool vector_mode = VECTOR_MODE_P (mode);
9616  enum machine_mode elt_mode = mode;
9617
9618  if (vector_mode)
9619    {
9620      elt_mode = GET_MODE_INNER (mode);
9621      use_sse = true;
9622    }
9623  else if (TARGET_SSE_MATH)
9624    use_sse = SSE_FLOAT_MODE_P (mode);
9625
9626  /* NEG and ABS performed with SSE use bitwise mask operations.
9627     Create the appropriate mask now.  */
9628  if (use_sse)
9629    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9630  else
9631    mask = NULL_RTX;
9632
9633  dst = operands[0];
9634  src = operands[1];
9635
9636  /* If the destination is memory, and we don't have matching source
9637     operands or we're using the x87, do things in registers.  */
9638  matching_memory = false;
9639  if (MEM_P (dst))
9640    {
9641      if (use_sse && rtx_equal_p (dst, src))
9642	matching_memory = true;
9643      else
9644	dst = gen_reg_rtx (mode);
9645    }
9646  if (MEM_P (src) && !matching_memory)
9647    src = force_reg (mode, src);
9648
9649  if (vector_mode)
9650    {
9651      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9652      set = gen_rtx_SET (VOIDmode, dst, set);
9653      emit_insn (set);
9654    }
9655  else
9656    {
9657      set = gen_rtx_fmt_e (code, mode, src);
9658      set = gen_rtx_SET (VOIDmode, dst, set);
9659      if (mask)
9660        {
9661          use = gen_rtx_USE (VOIDmode, mask);
9662          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9663          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9664				       gen_rtvec (3, set, use, clob)));
9665        }
9666      else
9667	emit_insn (set);
9668    }
9669
9670  if (dst != operands[0])
9671    emit_move_insn (operands[0], dst);
9672}
9673
9674/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9675
9676void
9677ix86_expand_copysign (rtx operands[])
9678{
9679  enum machine_mode mode, vmode;
9680  rtx dest, op0, op1, mask, nmask;
9681
9682  dest = operands[0];
9683  op0 = operands[1];
9684  op1 = operands[2];
9685
9686  mode = GET_MODE (dest);
9687  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9688
9689  if (GET_CODE (op0) == CONST_DOUBLE)
9690    {
9691      rtvec v;
9692
9693      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9694	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9695
9696      if (op0 == CONST0_RTX (mode))
9697	op0 = CONST0_RTX (vmode);
9698      else
9699        {
9700	  if (mode == SFmode)
9701	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9702                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9703	  else
9704	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9705          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9706	}
9707
9708      mask = ix86_build_signbit_mask (mode, 0, 0);
9709
9710      if (mode == SFmode)
9711	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9712      else
9713	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9714    }
9715  else
9716    {
9717      nmask = ix86_build_signbit_mask (mode, 0, 1);
9718      mask = ix86_build_signbit_mask (mode, 0, 0);
9719
9720      if (mode == SFmode)
9721	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9722      else
9723	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9724    }
9725}
9726
9727/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9728   be a constant, and so has already been expanded into a vector constant.  */
9729
9730void
9731ix86_split_copysign_const (rtx operands[])
9732{
9733  enum machine_mode mode, vmode;
9734  rtx dest, op0, op1, mask, x;
9735
9736  dest = operands[0];
9737  op0 = operands[1];
9738  op1 = operands[2];
9739  mask = operands[3];
9740
9741  mode = GET_MODE (dest);
9742  vmode = GET_MODE (mask);
9743
9744  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9745  x = gen_rtx_AND (vmode, dest, mask);
9746  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9747
9748  if (op0 != CONST0_RTX (vmode))
9749    {
9750      x = gen_rtx_IOR (vmode, dest, op0);
9751      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9752    }
9753}
9754
9755/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9756   so we have to do two masks.  */
9757
9758void
9759ix86_split_copysign_var (rtx operands[])
9760{
9761  enum machine_mode mode, vmode;
9762  rtx dest, scratch, op0, op1, mask, nmask, x;
9763
9764  dest = operands[0];
9765  scratch = operands[1];
9766  op0 = operands[2];
9767  op1 = operands[3];
9768  nmask = operands[4];
9769  mask = operands[5];
9770
9771  mode = GET_MODE (dest);
9772  vmode = GET_MODE (mask);
9773
9774  if (rtx_equal_p (op0, op1))
9775    {
9776      /* Shouldn't happen often (it's useless, obviously), but when it does
9777	 we'd generate incorrect code if we continue below.  */
9778      emit_move_insn (dest, op0);
9779      return;
9780    }
9781
9782  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9783    {
9784      gcc_assert (REGNO (op1) == REGNO (scratch));
9785
9786      x = gen_rtx_AND (vmode, scratch, mask);
9787      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9788
9789      dest = mask;
9790      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9791      x = gen_rtx_NOT (vmode, dest);
9792      x = gen_rtx_AND (vmode, x, op0);
9793      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9794    }
9795  else
9796    {
9797      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9798	{
9799	  x = gen_rtx_AND (vmode, scratch, mask);
9800	}
9801      else						/* alternative 2,4 */
9802	{
9803          gcc_assert (REGNO (mask) == REGNO (scratch));
9804          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9805	  x = gen_rtx_AND (vmode, scratch, op1);
9806	}
9807      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9808
9809      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
9810	{
9811	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
9812	  x = gen_rtx_AND (vmode, dest, nmask);
9813	}
9814      else						/* alternative 3,4 */
9815	{
9816          gcc_assert (REGNO (nmask) == REGNO (dest));
9817	  dest = nmask;
9818	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9819	  x = gen_rtx_AND (vmode, dest, op0);
9820	}
9821      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9822    }
9823
9824  x = gen_rtx_IOR (vmode, dest, scratch);
9825  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9826}
9827
9828/* Return TRUE or FALSE depending on whether the first SET in INSN
9829   has source and destination with matching CC modes, and that the
9830   CC mode is at least as constrained as REQ_MODE.  */
9831
9832int
9833ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9834{
9835  rtx set;
9836  enum machine_mode set_mode;
9837
9838  set = PATTERN (insn);
9839  if (GET_CODE (set) == PARALLEL)
9840    set = XVECEXP (set, 0, 0);
9841  gcc_assert (GET_CODE (set) == SET);
9842  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9843
9844  set_mode = GET_MODE (SET_DEST (set));
9845  switch (set_mode)
9846    {
9847    case CCNOmode:
9848      if (req_mode != CCNOmode
9849	  && (req_mode != CCmode
9850	      || XEXP (SET_SRC (set), 1) != const0_rtx))
9851	return 0;
9852      break;
9853    case CCmode:
9854      if (req_mode == CCGCmode)
9855	return 0;
9856      /* FALLTHRU */
9857    case CCGCmode:
9858      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9859	return 0;
9860      /* FALLTHRU */
9861    case CCGOCmode:
9862      if (req_mode == CCZmode)
9863	return 0;
9864      /* FALLTHRU */
9865    case CCZmode:
9866      break;
9867
9868    default:
9869      gcc_unreachable ();
9870    }
9871
9872  return (GET_MODE (SET_SRC (set)) == set_mode);
9873}
9874
9875/* Generate insn patterns to do an integer compare of OPERANDS.  */
9876
9877static rtx
9878ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9879{
9880  enum machine_mode cmpmode;
9881  rtx tmp, flags;
9882
9883  cmpmode = SELECT_CC_MODE (code, op0, op1);
9884  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9885
9886  /* This is very simple, but making the interface the same as in the
9887     FP case makes the rest of the code easier.  */
9888  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9889  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9890
9891  /* Return the test that should be put into the flags user, i.e.
9892     the bcc, scc, or cmov instruction.  */
9893  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9894}
9895
9896/* Figure out whether to use ordered or unordered fp comparisons.
9897   Return the appropriate mode to use.  */
9898
9899enum machine_mode
9900ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9901{
9902  /* ??? In order to make all comparisons reversible, we do all comparisons
9903     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9904     all forms trapping and nontrapping comparisons, we can make inequality
9905     comparisons trapping again, since it results in better code when using
9906     FCOM based compares.  */
9907  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9908}
9909
9910enum machine_mode
9911ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9912{
9913  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9914    return ix86_fp_compare_mode (code);
9915  switch (code)
9916    {
9917      /* Only zero flag is needed.  */
9918    case EQ:			/* ZF=0 */
9919    case NE:			/* ZF!=0 */
9920      return CCZmode;
9921      /* Codes needing carry flag.  */
9922    case GEU:			/* CF=0 */
9923    case GTU:			/* CF=0 & ZF=0 */
9924    case LTU:			/* CF=1 */
9925    case LEU:			/* CF=1 | ZF=1 */
9926      return CCmode;
9927      /* Codes possibly doable only with sign flag when
9928         comparing against zero.  */
9929    case GE:			/* SF=OF   or   SF=0 */
9930    case LT:			/* SF<>OF  or   SF=1 */
9931      if (op1 == const0_rtx)
9932	return CCGOCmode;
9933      else
9934	/* For other cases Carry flag is not required.  */
9935	return CCGCmode;
9936      /* Codes doable only with sign flag when comparing
9937         against zero, but we miss jump instruction for it
9938         so we need to use relational tests against overflow
9939         that thus needs to be zero.  */
9940    case GT:			/* ZF=0 & SF=OF */
9941    case LE:			/* ZF=1 | SF<>OF */
9942      if (op1 == const0_rtx)
9943	return CCNOmode;
9944      else
9945	return CCGCmode;
9946      /* strcmp pattern do (use flags) and combine may ask us for proper
9947	 mode.  */
9948    case USE:
9949      return CCmode;
9950    default:
9951      gcc_unreachable ();
9952    }
9953}
9954
9955/* Return the fixed registers used for condition codes.  */
9956
9957static bool
9958ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9959{
9960  *p1 = FLAGS_REG;
9961  *p2 = FPSR_REG;
9962  return true;
9963}
9964
9965/* If two condition code modes are compatible, return a condition code
9966   mode which is compatible with both.  Otherwise, return
9967   VOIDmode.  */
9968
9969static enum machine_mode
9970ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9971{
9972  if (m1 == m2)
9973    return m1;
9974
9975  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9976    return VOIDmode;
9977
9978  if ((m1 == CCGCmode && m2 == CCGOCmode)
9979      || (m1 == CCGOCmode && m2 == CCGCmode))
9980    return CCGCmode;
9981
9982  switch (m1)
9983    {
9984    default:
9985      gcc_unreachable ();
9986
9987    case CCmode:
9988    case CCGCmode:
9989    case CCGOCmode:
9990    case CCNOmode:
9991    case CCZmode:
9992      switch (m2)
9993	{
9994	default:
9995	  return VOIDmode;
9996
9997	case CCmode:
9998	case CCGCmode:
9999	case CCGOCmode:
10000	case CCNOmode:
10001	case CCZmode:
10002	  return CCmode;
10003	}
10004
10005    case CCFPmode:
10006    case CCFPUmode:
10007      /* These are only compatible with themselves, which we already
10008	 checked above.  */
10009      return VOIDmode;
10010    }
10011}
10012
10013/* Return true if we should use an FCOMI instruction for this fp comparison.  */
10014
10015int
10016ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10017{
10018  enum rtx_code swapped_code = swap_condition (code);
10019  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10020	  || (ix86_fp_comparison_cost (swapped_code)
10021	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
10022}
10023
10024/* Swap, force into registers, or otherwise massage the two operands
10025   to a fp comparison.  The operands are updated in place; the new
10026   comparison code is returned.  */
10027
10028static enum rtx_code
10029ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10030{
10031  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10032  rtx op0 = *pop0, op1 = *pop1;
10033  enum machine_mode op_mode = GET_MODE (op0);
10034  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10035
10036  /* All of the unordered compare instructions only work on registers.
10037     The same is true of the fcomi compare instructions.  The XFmode
10038     compare instructions require registers except when comparing
10039     against zero or when converting operand 1 from fixed point to
10040     floating point.  */
10041
10042  if (!is_sse
10043      && (fpcmp_mode == CCFPUmode
10044	  || (op_mode == XFmode
10045	      && ! (standard_80387_constant_p (op0) == 1
10046		    || standard_80387_constant_p (op1) == 1)
10047	      && GET_CODE (op1) != FLOAT)
10048	  || ix86_use_fcomi_compare (code)))
10049    {
10050      op0 = force_reg (op_mode, op0);
10051      op1 = force_reg (op_mode, op1);
10052    }
10053  else
10054    {
10055      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
10056	 things around if they appear profitable, otherwise force op0
10057	 into a register.  */
10058
10059      if (standard_80387_constant_p (op0) == 0
10060	  || (GET_CODE (op0) == MEM
10061	      && ! (standard_80387_constant_p (op1) == 0
10062		    || GET_CODE (op1) == MEM)))
10063	{
10064	  rtx tmp;
10065	  tmp = op0, op0 = op1, op1 = tmp;
10066	  code = swap_condition (code);
10067	}
10068
10069      if (GET_CODE (op0) != REG)
10070	op0 = force_reg (op_mode, op0);
10071
10072      if (CONSTANT_P (op1))
10073	{
10074	  int tmp = standard_80387_constant_p (op1);
10075	  if (tmp == 0)
10076	    op1 = validize_mem (force_const_mem (op_mode, op1));
10077	  else if (tmp == 1)
10078	    {
10079	      if (TARGET_CMOVE)
10080		op1 = force_reg (op_mode, op1);
10081	    }
10082	  else
10083	    op1 = force_reg (op_mode, op1);
10084	}
10085    }
10086
10087  /* Try to rearrange the comparison to make it cheaper.  */
10088  if (ix86_fp_comparison_cost (code)
10089      > ix86_fp_comparison_cost (swap_condition (code))
10090      && (GET_CODE (op1) == REG || !no_new_pseudos))
10091    {
10092      rtx tmp;
10093      tmp = op0, op0 = op1, op1 = tmp;
10094      code = swap_condition (code);
10095      if (GET_CODE (op0) != REG)
10096	op0 = force_reg (op_mode, op0);
10097    }
10098
10099  *pop0 = op0;
10100  *pop1 = op1;
10101  return code;
10102}
10103
10104/* Convert comparison codes we use to represent FP comparison to integer
10105   code that will result in proper branch.  Return UNKNOWN if no such code
10106   is available.  */
10107
10108enum rtx_code
10109ix86_fp_compare_code_to_integer (enum rtx_code code)
10110{
10111  switch (code)
10112    {
10113    case GT:
10114      return GTU;
10115    case GE:
10116      return GEU;
10117    case ORDERED:
10118    case UNORDERED:
10119      return code;
10120      break;
10121    case UNEQ:
10122      return EQ;
10123      break;
10124    case UNLT:
10125      return LTU;
10126      break;
10127    case UNLE:
10128      return LEU;
10129      break;
10130    case LTGT:
10131      return NE;
10132      break;
10133    default:
10134      return UNKNOWN;
10135    }
10136}
10137
10138/* Split comparison code CODE into comparisons we can do using branch
10139   instructions.  BYPASS_CODE is comparison code for branch that will
10140   branch around FIRST_CODE and SECOND_CODE.  If some of branches
10141   is not required, set value to UNKNOWN.
10142   We never require more than two branches.  */
10143
10144void
10145ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10146			  enum rtx_code *first_code,
10147			  enum rtx_code *second_code)
10148{
10149  *first_code = code;
10150  *bypass_code = UNKNOWN;
10151  *second_code = UNKNOWN;
10152
10153  /* The fcomi comparison sets flags as follows:
10154
10155     cmp    ZF PF CF
10156     >      0  0  0
10157     <      0  0  1
10158     =      1  0  0
10159     un     1  1  1 */
10160
10161  switch (code)
10162    {
10163    case GT:			/* GTU - CF=0 & ZF=0 */
10164    case GE:			/* GEU - CF=0 */
10165    case ORDERED:		/* PF=0 */
10166    case UNORDERED:		/* PF=1 */
10167    case UNEQ:			/* EQ - ZF=1 */
10168    case UNLT:			/* LTU - CF=1 */
10169    case UNLE:			/* LEU - CF=1 | ZF=1 */
10170    case LTGT:			/* EQ - ZF=0 */
10171      break;
10172    case LT:			/* LTU - CF=1 - fails on unordered */
10173      *first_code = UNLT;
10174      *bypass_code = UNORDERED;
10175      break;
10176    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10177      *first_code = UNLE;
10178      *bypass_code = UNORDERED;
10179      break;
10180    case EQ:			/* EQ - ZF=1 - fails on unordered */
10181      *first_code = UNEQ;
10182      *bypass_code = UNORDERED;
10183      break;
10184    case NE:			/* NE - ZF=0 - fails on unordered */
10185      *first_code = LTGT;
10186      *second_code = UNORDERED;
10187      break;
10188    case UNGE:			/* GEU - CF=0 - fails on unordered */
10189      *first_code = GE;
10190      *second_code = UNORDERED;
10191      break;
10192    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10193      *first_code = GT;
10194      *second_code = UNORDERED;
10195      break;
10196    default:
10197      gcc_unreachable ();
10198    }
10199  if (!TARGET_IEEE_FP)
10200    {
10201      *second_code = UNKNOWN;
10202      *bypass_code = UNKNOWN;
10203    }
10204}
10205
10206/* Return cost of comparison done fcom + arithmetics operations on AX.
10207   All following functions do use number of instructions as a cost metrics.
10208   In future this should be tweaked to compute bytes for optimize_size and
10209   take into account performance of various instructions on various CPUs.  */
10210static int
10211ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10212{
10213  if (!TARGET_IEEE_FP)
10214    return 4;
10215  /* The cost of code output by ix86_expand_fp_compare.  */
10216  switch (code)
10217    {
10218    case UNLE:
10219    case UNLT:
10220    case LTGT:
10221    case GT:
10222    case GE:
10223    case UNORDERED:
10224    case ORDERED:
10225    case UNEQ:
10226      return 4;
10227      break;
10228    case LT:
10229    case NE:
10230    case EQ:
10231    case UNGE:
10232      return 5;
10233      break;
10234    case LE:
10235    case UNGT:
10236      return 6;
10237      break;
10238    default:
10239      gcc_unreachable ();
10240    }
10241}
10242
10243/* Return cost of comparison done using fcomi operation.
10244   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10245static int
10246ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10247{
10248  enum rtx_code bypass_code, first_code, second_code;
10249  /* Return arbitrarily high cost when instruction is not supported - this
10250     prevents gcc from using it.  */
10251  if (!TARGET_CMOVE)
10252    return 1024;
10253  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10254  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10255}
10256
10257/* Return cost of comparison done using sahf operation.
10258   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10259static int
10260ix86_fp_comparison_sahf_cost (enum rtx_code code)
10261{
10262  enum rtx_code bypass_code, first_code, second_code;
10263  /* Return arbitrarily high cost when instruction is not preferred - this
10264     avoids gcc from using it.  */
10265  if (!TARGET_USE_SAHF && !optimize_size)
10266    return 1024;
10267  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10268  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10269}
10270
10271/* Compute cost of the comparison done using any method.
10272   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10273static int
10274ix86_fp_comparison_cost (enum rtx_code code)
10275{
10276  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10277  int min;
10278
10279  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10280  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10281
10282  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10283  if (min > sahf_cost)
10284    min = sahf_cost;
10285  if (min > fcomi_cost)
10286    min = fcomi_cost;
10287  return min;
10288}
10289
10290/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10291
10292static rtx
10293ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10294			rtx *second_test, rtx *bypass_test)
10295{
10296  enum machine_mode fpcmp_mode, intcmp_mode;
10297  rtx tmp, tmp2;
10298  int cost = ix86_fp_comparison_cost (code);
10299  enum rtx_code bypass_code, first_code, second_code;
10300
10301  fpcmp_mode = ix86_fp_compare_mode (code);
10302  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10303
10304  if (second_test)
10305    *second_test = NULL_RTX;
10306  if (bypass_test)
10307    *bypass_test = NULL_RTX;
10308
10309  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10310
10311  /* Do fcomi/sahf based test when profitable.  */
10312  if ((bypass_code == UNKNOWN || bypass_test)
10313      && (second_code == UNKNOWN || second_test)
10314      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10315    {
10316      if (TARGET_CMOVE)
10317	{
10318	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10319	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10320			     tmp);
10321	  emit_insn (tmp);
10322	}
10323      else
10324	{
10325	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10326	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10327	  if (!scratch)
10328	    scratch = gen_reg_rtx (HImode);
10329	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10330	  emit_insn (gen_x86_sahf_1 (scratch));
10331	}
10332
10333      /* The FP codes work out to act like unsigned.  */
10334      intcmp_mode = fpcmp_mode;
10335      code = first_code;
10336      if (bypass_code != UNKNOWN)
10337	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10338				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10339				       const0_rtx);
10340      if (second_code != UNKNOWN)
10341	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10342				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10343				       const0_rtx);
10344    }
10345  else
10346    {
10347      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10348      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10349      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10350      if (!scratch)
10351	scratch = gen_reg_rtx (HImode);
10352      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10353
10354      /* In the unordered case, we have to check C2 for NaN's, which
10355	 doesn't happen to work out to anything nice combination-wise.
10356	 So do some bit twiddling on the value we've got in AH to come
10357	 up with an appropriate set of condition codes.  */
10358
10359      intcmp_mode = CCNOmode;
10360      switch (code)
10361	{
10362	case GT:
10363	case UNGT:
10364	  if (code == GT || !TARGET_IEEE_FP)
10365	    {
10366	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10367	      code = EQ;
10368	    }
10369	  else
10370	    {
10371	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10372	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10373	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10374	      intcmp_mode = CCmode;
10375	      code = GEU;
10376	    }
10377	  break;
10378	case LT:
10379	case UNLT:
10380	  if (code == LT && TARGET_IEEE_FP)
10381	    {
10382	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10383	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10384	      intcmp_mode = CCmode;
10385	      code = EQ;
10386	    }
10387	  else
10388	    {
10389	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10390	      code = NE;
10391	    }
10392	  break;
10393	case GE:
10394	case UNGE:
10395	  if (code == GE || !TARGET_IEEE_FP)
10396	    {
10397	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10398	      code = EQ;
10399	    }
10400	  else
10401	    {
10402	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10403	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10404					     GEN_INT (0x01)));
10405	      code = NE;
10406	    }
10407	  break;
10408	case LE:
10409	case UNLE:
10410	  if (code == LE && TARGET_IEEE_FP)
10411	    {
10412	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10413	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10414	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10415	      intcmp_mode = CCmode;
10416	      code = LTU;
10417	    }
10418	  else
10419	    {
10420	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10421	      code = NE;
10422	    }
10423	  break;
10424	case EQ:
10425	case UNEQ:
10426	  if (code == EQ && TARGET_IEEE_FP)
10427	    {
10428	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10429	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10430	      intcmp_mode = CCmode;
10431	      code = EQ;
10432	    }
10433	  else
10434	    {
10435	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10436	      code = NE;
10437	      break;
10438	    }
10439	  break;
10440	case NE:
10441	case LTGT:
10442	  if (code == NE && TARGET_IEEE_FP)
10443	    {
10444	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10445	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10446					     GEN_INT (0x40)));
10447	      code = NE;
10448	    }
10449	  else
10450	    {
10451	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10452	      code = EQ;
10453	    }
10454	  break;
10455
10456	case UNORDERED:
10457	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10458	  code = NE;
10459	  break;
10460	case ORDERED:
10461	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10462	  code = EQ;
10463	  break;
10464
10465	default:
10466	  gcc_unreachable ();
10467	}
10468    }
10469
10470  /* Return the test that should be put into the flags user, i.e.
10471     the bcc, scc, or cmov instruction.  */
10472  return gen_rtx_fmt_ee (code, VOIDmode,
10473			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10474			 const0_rtx);
10475}
10476
10477rtx
10478ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10479{
10480  rtx op0, op1, ret;
10481  op0 = ix86_compare_op0;
10482  op1 = ix86_compare_op1;
10483
10484  if (second_test)
10485    *second_test = NULL_RTX;
10486  if (bypass_test)
10487    *bypass_test = NULL_RTX;
10488
10489  if (ix86_compare_emitted)
10490    {
10491      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10492      ix86_compare_emitted = NULL_RTX;
10493    }
10494  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10495    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10496				  second_test, bypass_test);
10497  else
10498    ret = ix86_expand_int_compare (code, op0, op1);
10499
10500  return ret;
10501}
10502
10503/* Return true if the CODE will result in nontrivial jump sequence.  */
10504bool
10505ix86_fp_jump_nontrivial_p (enum rtx_code code)
10506{
10507  enum rtx_code bypass_code, first_code, second_code;
10508  if (!TARGET_CMOVE)
10509    return true;
10510  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10511  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10512}
10513
10514void
10515ix86_expand_branch (enum rtx_code code, rtx label)
10516{
10517  rtx tmp;
10518
10519  /* If we have emitted a compare insn, go straight to simple.
10520     ix86_expand_compare won't emit anything if ix86_compare_emitted
10521     is non NULL.  */
10522  if (ix86_compare_emitted)
10523    goto simple;
10524
10525  switch (GET_MODE (ix86_compare_op0))
10526    {
10527    case QImode:
10528    case HImode:
10529    case SImode:
10530      simple:
10531      tmp = ix86_expand_compare (code, NULL, NULL);
10532      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10533				  gen_rtx_LABEL_REF (VOIDmode, label),
10534				  pc_rtx);
10535      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10536      return;
10537
10538    case SFmode:
10539    case DFmode:
10540    case XFmode:
10541      {
10542	rtvec vec;
10543	int use_fcomi;
10544	enum rtx_code bypass_code, first_code, second_code;
10545
10546	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10547					     &ix86_compare_op1);
10548
10549	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10550
10551	/* Check whether we will use the natural sequence with one jump.  If
10552	   so, we can expand jump early.  Otherwise delay expansion by
10553	   creating compound insn to not confuse optimizers.  */
10554	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10555	    && TARGET_CMOVE)
10556	  {
10557	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10558				  gen_rtx_LABEL_REF (VOIDmode, label),
10559				  pc_rtx, NULL_RTX, NULL_RTX);
10560	  }
10561	else
10562	  {
10563	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10564				  ix86_compare_op0, ix86_compare_op1);
10565	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10566					gen_rtx_LABEL_REF (VOIDmode, label),
10567					pc_rtx);
10568	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10569
10570	    use_fcomi = ix86_use_fcomi_compare (code);
10571	    vec = rtvec_alloc (3 + !use_fcomi);
10572	    RTVEC_ELT (vec, 0) = tmp;
10573	    RTVEC_ELT (vec, 1)
10574	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10575	    RTVEC_ELT (vec, 2)
10576	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10577	    if (! use_fcomi)
10578	      RTVEC_ELT (vec, 3)
10579		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10580
10581	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10582	  }
10583	return;
10584      }
10585
10586    case DImode:
10587      if (TARGET_64BIT)
10588	goto simple;
10589    case TImode:
10590      /* Expand DImode branch into multiple compare+branch.  */
10591      {
10592	rtx lo[2], hi[2], label2;
10593	enum rtx_code code1, code2, code3;
10594	enum machine_mode submode;
10595
10596	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10597	  {
10598	    tmp = ix86_compare_op0;
10599	    ix86_compare_op0 = ix86_compare_op1;
10600	    ix86_compare_op1 = tmp;
10601	    code = swap_condition (code);
10602	  }
10603	if (GET_MODE (ix86_compare_op0) == DImode)
10604	  {
10605	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10606	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10607	    submode = SImode;
10608	  }
10609	else
10610	  {
10611	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10612	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10613	    submode = DImode;
10614	  }
10615
10616	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10617	   avoid two branches.  This costs one extra insn, so disable when
10618	   optimizing for size.  */
10619
10620	if ((code == EQ || code == NE)
10621	    && (!optimize_size
10622	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10623	  {
10624	    rtx xor0, xor1;
10625
10626	    xor1 = hi[0];
10627	    if (hi[1] != const0_rtx)
10628	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10629				   NULL_RTX, 0, OPTAB_WIDEN);
10630
10631	    xor0 = lo[0];
10632	    if (lo[1] != const0_rtx)
10633	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10634				   NULL_RTX, 0, OPTAB_WIDEN);
10635
10636	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10637				NULL_RTX, 0, OPTAB_WIDEN);
10638
10639	    ix86_compare_op0 = tmp;
10640	    ix86_compare_op1 = const0_rtx;
10641	    ix86_expand_branch (code, label);
10642	    return;
10643	  }
10644
10645	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10646	   op1 is a constant and the low word is zero, then we can just
10647	   examine the high word.  */
10648
10649	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10650	  switch (code)
10651	    {
10652	    case LT: case LTU: case GE: case GEU:
10653	      ix86_compare_op0 = hi[0];
10654	      ix86_compare_op1 = hi[1];
10655	      ix86_expand_branch (code, label);
10656	      return;
10657	    default:
10658	      break;
10659	    }
10660
10661	/* Otherwise, we need two or three jumps.  */
10662
10663	label2 = gen_label_rtx ();
10664
10665	code1 = code;
10666	code2 = swap_condition (code);
10667	code3 = unsigned_condition (code);
10668
10669	switch (code)
10670	  {
10671	  case LT: case GT: case LTU: case GTU:
10672	    break;
10673
10674	  case LE:   code1 = LT;  code2 = GT;  break;
10675	  case GE:   code1 = GT;  code2 = LT;  break;
10676	  case LEU:  code1 = LTU; code2 = GTU; break;
10677	  case GEU:  code1 = GTU; code2 = LTU; break;
10678
10679	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10680	  case NE:   code2 = UNKNOWN; break;
10681
10682	  default:
10683	    gcc_unreachable ();
10684	  }
10685
10686	/*
10687	 * a < b =>
10688	 *    if (hi(a) < hi(b)) goto true;
10689	 *    if (hi(a) > hi(b)) goto false;
10690	 *    if (lo(a) < lo(b)) goto true;
10691	 *  false:
10692	 */
10693
10694	ix86_compare_op0 = hi[0];
10695	ix86_compare_op1 = hi[1];
10696
10697	if (code1 != UNKNOWN)
10698	  ix86_expand_branch (code1, label);
10699	if (code2 != UNKNOWN)
10700	  ix86_expand_branch (code2, label2);
10701
10702	ix86_compare_op0 = lo[0];
10703	ix86_compare_op1 = lo[1];
10704	ix86_expand_branch (code3, label);
10705
10706	if (code2 != UNKNOWN)
10707	  emit_label (label2);
10708	return;
10709      }
10710
10711    default:
10712      gcc_unreachable ();
10713    }
10714}
10715
10716/* Split branch based on floating point condition.  */
10717void
10718ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10719		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10720{
10721  rtx second, bypass;
10722  rtx label = NULL_RTX;
10723  rtx condition;
10724  int bypass_probability = -1, second_probability = -1, probability = -1;
10725  rtx i;
10726
10727  if (target2 != pc_rtx)
10728    {
10729      rtx tmp = target2;
10730      code = reverse_condition_maybe_unordered (code);
10731      target2 = target1;
10732      target1 = tmp;
10733    }
10734
10735  condition = ix86_expand_fp_compare (code, op1, op2,
10736				      tmp, &second, &bypass);
10737
10738  /* Remove pushed operand from stack.  */
10739  if (pushed)
10740    ix86_free_from_memory (GET_MODE (pushed));
10741
10742  if (split_branch_probability >= 0)
10743    {
10744      /* Distribute the probabilities across the jumps.
10745	 Assume the BYPASS and SECOND to be always test
10746	 for UNORDERED.  */
10747      probability = split_branch_probability;
10748
10749      /* Value of 1 is low enough to make no need for probability
10750	 to be updated.  Later we may run some experiments and see
10751	 if unordered values are more frequent in practice.  */
10752      if (bypass)
10753	bypass_probability = 1;
10754      if (second)
10755	second_probability = 1;
10756    }
10757  if (bypass != NULL_RTX)
10758    {
10759      label = gen_label_rtx ();
10760      i = emit_jump_insn (gen_rtx_SET
10761			  (VOIDmode, pc_rtx,
10762			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10763						 bypass,
10764						 gen_rtx_LABEL_REF (VOIDmode,
10765								    label),
10766						 pc_rtx)));
10767      if (bypass_probability >= 0)
10768	REG_NOTES (i)
10769	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10770			       GEN_INT (bypass_probability),
10771			       REG_NOTES (i));
10772    }
10773  i = emit_jump_insn (gen_rtx_SET
10774		      (VOIDmode, pc_rtx,
10775		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10776					     condition, target1, target2)));
10777  if (probability >= 0)
10778    REG_NOTES (i)
10779      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10780			   GEN_INT (probability),
10781			   REG_NOTES (i));
10782  if (second != NULL_RTX)
10783    {
10784      i = emit_jump_insn (gen_rtx_SET
10785			  (VOIDmode, pc_rtx,
10786			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10787						 target2)));
10788      if (second_probability >= 0)
10789	REG_NOTES (i)
10790	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10791			       GEN_INT (second_probability),
10792			       REG_NOTES (i));
10793    }
10794  if (label != NULL_RTX)
10795    emit_label (label);
10796}
10797
10798int
10799ix86_expand_setcc (enum rtx_code code, rtx dest)
10800{
10801  rtx ret, tmp, tmpreg, equiv;
10802  rtx second_test, bypass_test;
10803
10804  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10805    return 0; /* FAIL */
10806
10807  gcc_assert (GET_MODE (dest) == QImode);
10808
10809  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10810  PUT_MODE (ret, QImode);
10811
10812  tmp = dest;
10813  tmpreg = dest;
10814
10815  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10816  if (bypass_test || second_test)
10817    {
10818      rtx test = second_test;
10819      int bypass = 0;
10820      rtx tmp2 = gen_reg_rtx (QImode);
10821      if (bypass_test)
10822	{
10823	  gcc_assert (!second_test);
10824	  test = bypass_test;
10825	  bypass = 1;
10826	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10827	}
10828      PUT_MODE (test, QImode);
10829      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10830
10831      if (bypass)
10832	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10833      else
10834	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10835    }
10836
10837  /* Attach a REG_EQUAL note describing the comparison result.  */
10838  if (ix86_compare_op0 && ix86_compare_op1)
10839    {
10840      equiv = simplify_gen_relational (code, QImode,
10841				       GET_MODE (ix86_compare_op0),
10842				       ix86_compare_op0, ix86_compare_op1);
10843      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10844    }
10845
10846  return 1; /* DONE */
10847}
10848
10849/* Expand comparison setting or clearing carry flag.  Return true when
10850   successful and set pop for the operation.  */
10851static bool
10852ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10853{
10854  enum machine_mode mode =
10855    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10856
10857  /* Do not handle DImode compares that go through special path.  Also we can't
10858     deal with FP compares yet.  This is possible to add.  */
10859  if (mode == (TARGET_64BIT ? TImode : DImode))
10860    return false;
10861  if (FLOAT_MODE_P (mode))
10862    {
10863      rtx second_test = NULL, bypass_test = NULL;
10864      rtx compare_op, compare_seq;
10865
10866      /* Shortcut:  following common codes never translate into carry flag compares.  */
10867      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10868	  || code == ORDERED || code == UNORDERED)
10869	return false;
10870
10871      /* These comparisons require zero flag; swap operands so they won't.  */
10872      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10873	  && !TARGET_IEEE_FP)
10874	{
10875	  rtx tmp = op0;
10876	  op0 = op1;
10877	  op1 = tmp;
10878	  code = swap_condition (code);
10879	}
10880
10881      /* Try to expand the comparison and verify that we end up with carry flag
10882	 based comparison.  This is fails to be true only when we decide to expand
10883	 comparison using arithmetic that is not too common scenario.  */
10884      start_sequence ();
10885      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10886					   &second_test, &bypass_test);
10887      compare_seq = get_insns ();
10888      end_sequence ();
10889
10890      if (second_test || bypass_test)
10891	return false;
10892      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10893	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10894        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10895      else
10896	code = GET_CODE (compare_op);
10897      if (code != LTU && code != GEU)
10898	return false;
10899      emit_insn (compare_seq);
10900      *pop = compare_op;
10901      return true;
10902    }
10903  if (!INTEGRAL_MODE_P (mode))
10904    return false;
10905  switch (code)
10906    {
10907    case LTU:
10908    case GEU:
10909      break;
10910
10911    /* Convert a==0 into (unsigned)a<1.  */
10912    case EQ:
10913    case NE:
10914      if (op1 != const0_rtx)
10915	return false;
10916      op1 = const1_rtx;
10917      code = (code == EQ ? LTU : GEU);
10918      break;
10919
10920    /* Convert a>b into b<a or a>=b-1.  */
10921    case GTU:
10922    case LEU:
10923      if (GET_CODE (op1) == CONST_INT)
10924	{
10925	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10926	  /* Bail out on overflow.  We still can swap operands but that
10927	     would force loading of the constant into register.  */
10928	  if (op1 == const0_rtx
10929	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10930	    return false;
10931	  code = (code == GTU ? GEU : LTU);
10932	}
10933      else
10934	{
10935	  rtx tmp = op1;
10936	  op1 = op0;
10937	  op0 = tmp;
10938	  code = (code == GTU ? LTU : GEU);
10939	}
10940      break;
10941
10942    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10943    case LT:
10944    case GE:
10945      if (mode == DImode || op1 != const0_rtx)
10946	return false;
10947      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10948      code = (code == LT ? GEU : LTU);
10949      break;
10950    case LE:
10951    case GT:
10952      if (mode == DImode || op1 != constm1_rtx)
10953	return false;
10954      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10955      code = (code == LE ? GEU : LTU);
10956      break;
10957
10958    default:
10959      return false;
10960    }
10961  /* Swapping operands may cause constant to appear as first operand.  */
10962  if (!nonimmediate_operand (op0, VOIDmode))
10963    {
10964      if (no_new_pseudos)
10965	return false;
10966      op0 = force_reg (mode, op0);
10967    }
10968  ix86_compare_op0 = op0;
10969  ix86_compare_op1 = op1;
10970  *pop = ix86_expand_compare (code, NULL, NULL);
10971  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10972  return true;
10973}
10974
10975int
10976ix86_expand_int_movcc (rtx operands[])
10977{
10978  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10979  rtx compare_seq, compare_op;
10980  rtx second_test, bypass_test;
10981  enum machine_mode mode = GET_MODE (operands[0]);
10982  bool sign_bit_compare_p = false;;
10983
10984  start_sequence ();
10985  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10986  compare_seq = get_insns ();
10987  end_sequence ();
10988
10989  compare_code = GET_CODE (compare_op);
10990
10991  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10992      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10993    sign_bit_compare_p = true;
10994
10995  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10996     HImode insns, we'd be swallowed in word prefix ops.  */
10997
10998  if ((mode != HImode || TARGET_FAST_PREFIX)
10999      && (mode != (TARGET_64BIT ? TImode : DImode))
11000      && GET_CODE (operands[2]) == CONST_INT
11001      && GET_CODE (operands[3]) == CONST_INT)
11002    {
11003      rtx out = operands[0];
11004      HOST_WIDE_INT ct = INTVAL (operands[2]);
11005      HOST_WIDE_INT cf = INTVAL (operands[3]);
11006      HOST_WIDE_INT diff;
11007
11008      diff = ct - cf;
11009      /*  Sign bit compares are better done using shifts than we do by using
11010	  sbb.  */
11011      if (sign_bit_compare_p
11012	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11013					     ix86_compare_op1, &compare_op))
11014	{
11015	  /* Detect overlap between destination and compare sources.  */
11016	  rtx tmp = out;
11017
11018          if (!sign_bit_compare_p)
11019	    {
11020	      bool fpcmp = false;
11021
11022	      compare_code = GET_CODE (compare_op);
11023
11024	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11025		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11026		{
11027		  fpcmp = true;
11028		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
11029		}
11030
11031	      /* To simplify rest of code, restrict to the GEU case.  */
11032	      if (compare_code == LTU)
11033		{
11034		  HOST_WIDE_INT tmp = ct;
11035		  ct = cf;
11036		  cf = tmp;
11037		  compare_code = reverse_condition (compare_code);
11038		  code = reverse_condition (code);
11039		}
11040	      else
11041		{
11042		  if (fpcmp)
11043		    PUT_CODE (compare_op,
11044			      reverse_condition_maybe_unordered
11045			        (GET_CODE (compare_op)));
11046		  else
11047		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11048		}
11049	      diff = ct - cf;
11050
11051	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11052		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
11053		tmp = gen_reg_rtx (mode);
11054
11055	      if (mode == DImode)
11056		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11057	      else
11058		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11059	    }
11060	  else
11061	    {
11062	      if (code == GT || code == GE)
11063		code = reverse_condition (code);
11064	      else
11065		{
11066		  HOST_WIDE_INT tmp = ct;
11067		  ct = cf;
11068		  cf = tmp;
11069		  diff = ct - cf;
11070		}
11071	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11072				     ix86_compare_op1, VOIDmode, 0, -1);
11073	    }
11074
11075	  if (diff == 1)
11076	    {
11077	      /*
11078	       * cmpl op0,op1
11079	       * sbbl dest,dest
11080	       * [addl dest, ct]
11081	       *
11082	       * Size 5 - 8.
11083	       */
11084	      if (ct)
11085		tmp = expand_simple_binop (mode, PLUS,
11086					   tmp, GEN_INT (ct),
11087					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11088	    }
11089	  else if (cf == -1)
11090	    {
11091	      /*
11092	       * cmpl op0,op1
11093	       * sbbl dest,dest
11094	       * orl $ct, dest
11095	       *
11096	       * Size 8.
11097	       */
11098	      tmp = expand_simple_binop (mode, IOR,
11099					 tmp, GEN_INT (ct),
11100					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11101	    }
11102	  else if (diff == -1 && ct)
11103	    {
11104	      /*
11105	       * cmpl op0,op1
11106	       * sbbl dest,dest
11107	       * notl dest
11108	       * [addl dest, cf]
11109	       *
11110	       * Size 8 - 11.
11111	       */
11112	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11113	      if (cf)
11114		tmp = expand_simple_binop (mode, PLUS,
11115					   copy_rtx (tmp), GEN_INT (cf),
11116					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11117	    }
11118	  else
11119	    {
11120	      /*
11121	       * cmpl op0,op1
11122	       * sbbl dest,dest
11123	       * [notl dest]
11124	       * andl cf - ct, dest
11125	       * [addl dest, ct]
11126	       *
11127	       * Size 8 - 11.
11128	       */
11129
11130	      if (cf == 0)
11131		{
11132		  cf = ct;
11133		  ct = 0;
11134		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11135		}
11136
11137	      tmp = expand_simple_binop (mode, AND,
11138					 copy_rtx (tmp),
11139					 gen_int_mode (cf - ct, mode),
11140					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11141	      if (ct)
11142		tmp = expand_simple_binop (mode, PLUS,
11143					   copy_rtx (tmp), GEN_INT (ct),
11144					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11145	    }
11146
11147	  if (!rtx_equal_p (tmp, out))
11148	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11149
11150	  return 1; /* DONE */
11151	}
11152
11153      if (diff < 0)
11154	{
11155	  HOST_WIDE_INT tmp;
11156	  tmp = ct, ct = cf, cf = tmp;
11157	  diff = -diff;
11158	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11159	    {
11160	      /* We may be reversing unordered compare to normal compare, that
11161		 is not valid in general (we may convert non-trapping condition
11162		 to trapping one), however on i386 we currently emit all
11163		 comparisons unordered.  */
11164	      compare_code = reverse_condition_maybe_unordered (compare_code);
11165	      code = reverse_condition_maybe_unordered (code);
11166	    }
11167	  else
11168	    {
11169	      compare_code = reverse_condition (compare_code);
11170	      code = reverse_condition (code);
11171	    }
11172	}
11173
11174      compare_code = UNKNOWN;
11175      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11176	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11177	{
11178	  if (ix86_compare_op1 == const0_rtx
11179	      && (code == LT || code == GE))
11180	    compare_code = code;
11181	  else if (ix86_compare_op1 == constm1_rtx)
11182	    {
11183	      if (code == LE)
11184		compare_code = LT;
11185	      else if (code == GT)
11186		compare_code = GE;
11187	    }
11188	}
11189
11190      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11191      if (compare_code != UNKNOWN
11192	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11193	  && (cf == -1 || ct == -1))
11194	{
11195	  /* If lea code below could be used, only optimize
11196	     if it results in a 2 insn sequence.  */
11197
11198	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11199		 || diff == 3 || diff == 5 || diff == 9)
11200	      || (compare_code == LT && ct == -1)
11201	      || (compare_code == GE && cf == -1))
11202	    {
11203	      /*
11204	       * notl op1	(if necessary)
11205	       * sarl $31, op1
11206	       * orl cf, op1
11207	       */
11208	      if (ct != -1)
11209		{
11210		  cf = ct;
11211		  ct = -1;
11212		  code = reverse_condition (code);
11213		}
11214
11215	      out = emit_store_flag (out, code, ix86_compare_op0,
11216				     ix86_compare_op1, VOIDmode, 0, -1);
11217
11218	      out = expand_simple_binop (mode, IOR,
11219					 out, GEN_INT (cf),
11220					 out, 1, OPTAB_DIRECT);
11221	      if (out != operands[0])
11222		emit_move_insn (operands[0], out);
11223
11224	      return 1; /* DONE */
11225	    }
11226	}
11227
11228
11229      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11230	   || diff == 3 || diff == 5 || diff == 9)
11231	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11232	  && (mode != DImode
11233	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11234	{
11235	  /*
11236	   * xorl dest,dest
11237	   * cmpl op1,op2
11238	   * setcc dest
11239	   * lea cf(dest*(ct-cf)),dest
11240	   *
11241	   * Size 14.
11242	   *
11243	   * This also catches the degenerate setcc-only case.
11244	   */
11245
11246	  rtx tmp;
11247	  int nops;
11248
11249	  out = emit_store_flag (out, code, ix86_compare_op0,
11250				 ix86_compare_op1, VOIDmode, 0, 1);
11251
11252	  nops = 0;
11253	  /* On x86_64 the lea instruction operates on Pmode, so we need
11254	     to get arithmetics done in proper mode to match.  */
11255	  if (diff == 1)
11256	    tmp = copy_rtx (out);
11257	  else
11258	    {
11259	      rtx out1;
11260	      out1 = copy_rtx (out);
11261	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11262	      nops++;
11263	      if (diff & 1)
11264		{
11265		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11266		  nops++;
11267		}
11268	    }
11269	  if (cf != 0)
11270	    {
11271	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11272	      nops++;
11273	    }
11274	  if (!rtx_equal_p (tmp, out))
11275	    {
11276	      if (nops == 1)
11277		out = force_operand (tmp, copy_rtx (out));
11278	      else
11279		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11280	    }
11281	  if (!rtx_equal_p (out, operands[0]))
11282	    emit_move_insn (operands[0], copy_rtx (out));
11283
11284	  return 1; /* DONE */
11285	}
11286
11287      /*
11288       * General case:			Jumpful:
11289       *   xorl dest,dest		cmpl op1, op2
11290       *   cmpl op1, op2		movl ct, dest
11291       *   setcc dest			jcc 1f
11292       *   decl dest			movl cf, dest
11293       *   andl (cf-ct),dest		1:
11294       *   addl ct,dest
11295       *
11296       * Size 20.			Size 14.
11297       *
11298       * This is reasonably steep, but branch mispredict costs are
11299       * high on modern cpus, so consider failing only if optimizing
11300       * for space.
11301       */
11302
11303      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11304	  && BRANCH_COST >= 2)
11305	{
11306	  if (cf == 0)
11307	    {
11308	      cf = ct;
11309	      ct = 0;
11310	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11311		/* We may be reversing unordered compare to normal compare,
11312		   that is not valid in general (we may convert non-trapping
11313		   condition to trapping one), however on i386 we currently
11314		   emit all comparisons unordered.  */
11315		code = reverse_condition_maybe_unordered (code);
11316	      else
11317		{
11318		  code = reverse_condition (code);
11319		  if (compare_code != UNKNOWN)
11320		    compare_code = reverse_condition (compare_code);
11321		}
11322	    }
11323
11324	  if (compare_code != UNKNOWN)
11325	    {
11326	      /* notl op1	(if needed)
11327		 sarl $31, op1
11328		 andl (cf-ct), op1
11329		 addl ct, op1
11330
11331		 For x < 0 (resp. x <= -1) there will be no notl,
11332		 so if possible swap the constants to get rid of the
11333		 complement.
11334		 True/false will be -1/0 while code below (store flag
11335		 followed by decrement) is 0/-1, so the constants need
11336		 to be exchanged once more.  */
11337
11338	      if (compare_code == GE || !cf)
11339		{
11340		  code = reverse_condition (code);
11341		  compare_code = LT;
11342		}
11343	      else
11344		{
11345		  HOST_WIDE_INT tmp = cf;
11346		  cf = ct;
11347		  ct = tmp;
11348		}
11349
11350	      out = emit_store_flag (out, code, ix86_compare_op0,
11351				     ix86_compare_op1, VOIDmode, 0, -1);
11352	    }
11353	  else
11354	    {
11355	      out = emit_store_flag (out, code, ix86_compare_op0,
11356				     ix86_compare_op1, VOIDmode, 0, 1);
11357
11358	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11359					 copy_rtx (out), 1, OPTAB_DIRECT);
11360	    }
11361
11362	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11363				     gen_int_mode (cf - ct, mode),
11364				     copy_rtx (out), 1, OPTAB_DIRECT);
11365	  if (ct)
11366	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11367				       copy_rtx (out), 1, OPTAB_DIRECT);
11368	  if (!rtx_equal_p (out, operands[0]))
11369	    emit_move_insn (operands[0], copy_rtx (out));
11370
11371	  return 1; /* DONE */
11372	}
11373    }
11374
11375  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11376    {
11377      /* Try a few things more with specific constants and a variable.  */
11378
11379      optab op;
11380      rtx var, orig_out, out, tmp;
11381
11382      if (BRANCH_COST <= 2)
11383	return 0; /* FAIL */
11384
11385      /* If one of the two operands is an interesting constant, load a
11386	 constant with the above and mask it in with a logical operation.  */
11387
11388      if (GET_CODE (operands[2]) == CONST_INT)
11389	{
11390	  var = operands[3];
11391	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11392	    operands[3] = constm1_rtx, op = and_optab;
11393	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11394	    operands[3] = const0_rtx, op = ior_optab;
11395	  else
11396	    return 0; /* FAIL */
11397	}
11398      else if (GET_CODE (operands[3]) == CONST_INT)
11399	{
11400	  var = operands[2];
11401	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11402	    operands[2] = constm1_rtx, op = and_optab;
11403	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11404	    operands[2] = const0_rtx, op = ior_optab;
11405	  else
11406	    return 0; /* FAIL */
11407	}
11408      else
11409        return 0; /* FAIL */
11410
11411      orig_out = operands[0];
11412      tmp = gen_reg_rtx (mode);
11413      operands[0] = tmp;
11414
11415      /* Recurse to get the constant loaded.  */
11416      if (ix86_expand_int_movcc (operands) == 0)
11417        return 0; /* FAIL */
11418
11419      /* Mask in the interesting variable.  */
11420      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11421			  OPTAB_WIDEN);
11422      if (!rtx_equal_p (out, orig_out))
11423	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11424
11425      return 1; /* DONE */
11426    }
11427
11428  /*
11429   * For comparison with above,
11430   *
11431   * movl cf,dest
11432   * movl ct,tmp
11433   * cmpl op1,op2
11434   * cmovcc tmp,dest
11435   *
11436   * Size 15.
11437   */
11438
11439  if (! nonimmediate_operand (operands[2], mode))
11440    operands[2] = force_reg (mode, operands[2]);
11441  if (! nonimmediate_operand (operands[3], mode))
11442    operands[3] = force_reg (mode, operands[3]);
11443
11444  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11445    {
11446      rtx tmp = gen_reg_rtx (mode);
11447      emit_move_insn (tmp, operands[3]);
11448      operands[3] = tmp;
11449    }
11450  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11451    {
11452      rtx tmp = gen_reg_rtx (mode);
11453      emit_move_insn (tmp, operands[2]);
11454      operands[2] = tmp;
11455    }
11456
11457  if (! register_operand (operands[2], VOIDmode)
11458      && (mode == QImode
11459          || ! register_operand (operands[3], VOIDmode)))
11460    operands[2] = force_reg (mode, operands[2]);
11461
11462  if (mode == QImode
11463      && ! register_operand (operands[3], VOIDmode))
11464    operands[3] = force_reg (mode, operands[3]);
11465
11466  emit_insn (compare_seq);
11467  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11468			  gen_rtx_IF_THEN_ELSE (mode,
11469						compare_op, operands[2],
11470						operands[3])));
11471  if (bypass_test)
11472    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11473			    gen_rtx_IF_THEN_ELSE (mode,
11474				  bypass_test,
11475				  copy_rtx (operands[3]),
11476				  copy_rtx (operands[0]))));
11477  if (second_test)
11478    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11479			    gen_rtx_IF_THEN_ELSE (mode,
11480				  second_test,
11481				  copy_rtx (operands[2]),
11482				  copy_rtx (operands[0]))));
11483
11484  return 1; /* DONE */
11485}
11486
11487/* Swap, force into registers, or otherwise massage the two operands
11488   to an sse comparison with a mask result.  Thus we differ a bit from
11489   ix86_prepare_fp_compare_args which expects to produce a flags result.
11490
11491   The DEST operand exists to help determine whether to commute commutative
11492   operators.  The POP0/POP1 operands are updated in place.  The new
11493   comparison code is returned, or UNKNOWN if not implementable.  */
11494
11495static enum rtx_code
11496ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11497				  rtx *pop0, rtx *pop1)
11498{
11499  rtx tmp;
11500
11501  switch (code)
11502    {
11503    case LTGT:
11504    case UNEQ:
11505      /* We have no LTGT as an operator.  We could implement it with
11506	 NE & ORDERED, but this requires an extra temporary.  It's
11507	 not clear that it's worth it.  */
11508      return UNKNOWN;
11509
11510    case LT:
11511    case LE:
11512    case UNGT:
11513    case UNGE:
11514      /* These are supported directly.  */
11515      break;
11516
11517    case EQ:
11518    case NE:
11519    case UNORDERED:
11520    case ORDERED:
11521      /* For commutative operators, try to canonicalize the destination
11522	 operand to be first in the comparison - this helps reload to
11523	 avoid extra moves.  */
11524      if (!dest || !rtx_equal_p (dest, *pop1))
11525	break;
11526      /* FALLTHRU */
11527
11528    case GE:
11529    case GT:
11530    case UNLE:
11531    case UNLT:
11532      /* These are not supported directly.  Swap the comparison operands
11533	 to transform into something that is supported.  */
11534      tmp = *pop0;
11535      *pop0 = *pop1;
11536      *pop1 = tmp;
11537      code = swap_condition (code);
11538      break;
11539
11540    default:
11541      gcc_unreachable ();
11542    }
11543
11544  return code;
11545}
11546
11547/* Detect conditional moves that exactly match min/max operational
11548   semantics.  Note that this is IEEE safe, as long as we don't
11549   interchange the operands.
11550
11551   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11552   and TRUE if the operation is successful and instructions are emitted.  */
11553
11554static bool
11555ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11556			   rtx cmp_op1, rtx if_true, rtx if_false)
11557{
11558  enum machine_mode mode;
11559  bool is_min;
11560  rtx tmp;
11561
11562  if (code == LT)
11563    ;
11564  else if (code == UNGE)
11565    {
11566      tmp = if_true;
11567      if_true = if_false;
11568      if_false = tmp;
11569    }
11570  else
11571    return false;
11572
11573  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11574    is_min = true;
11575  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11576    is_min = false;
11577  else
11578    return false;
11579
11580  mode = GET_MODE (dest);
11581
11582  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11583     but MODE may be a vector mode and thus not appropriate.  */
11584  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11585    {
11586      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11587      rtvec v;
11588
11589      if_true = force_reg (mode, if_true);
11590      v = gen_rtvec (2, if_true, if_false);
11591      tmp = gen_rtx_UNSPEC (mode, v, u);
11592    }
11593  else
11594    {
11595      code = is_min ? SMIN : SMAX;
11596      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11597    }
11598
11599  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11600  return true;
11601}
11602
11603/* Expand an sse vector comparison.  Return the register with the result.  */
11604
11605static rtx
11606ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11607		     rtx op_true, rtx op_false)
11608{
11609  enum machine_mode mode = GET_MODE (dest);
11610  rtx x;
11611
11612  cmp_op0 = force_reg (mode, cmp_op0);
11613  if (!nonimmediate_operand (cmp_op1, mode))
11614    cmp_op1 = force_reg (mode, cmp_op1);
11615
11616  if (optimize
11617      || reg_overlap_mentioned_p (dest, op_true)
11618      || reg_overlap_mentioned_p (dest, op_false))
11619    dest = gen_reg_rtx (mode);
11620
11621  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11622  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11623
11624  return dest;
11625}
11626
11627/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11628   operations.  This is used for both scalar and vector conditional moves.  */
11629
11630static void
11631ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11632{
11633  enum machine_mode mode = GET_MODE (dest);
11634  rtx t2, t3, x;
11635
11636  if (op_false == CONST0_RTX (mode))
11637    {
11638      op_true = force_reg (mode, op_true);
11639      x = gen_rtx_AND (mode, cmp, op_true);
11640      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11641    }
11642  else if (op_true == CONST0_RTX (mode))
11643    {
11644      op_false = force_reg (mode, op_false);
11645      x = gen_rtx_NOT (mode, cmp);
11646      x = gen_rtx_AND (mode, x, op_false);
11647      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11648    }
11649  else
11650    {
11651      op_true = force_reg (mode, op_true);
11652      op_false = force_reg (mode, op_false);
11653
11654      t2 = gen_reg_rtx (mode);
11655      if (optimize)
11656	t3 = gen_reg_rtx (mode);
11657      else
11658	t3 = dest;
11659
11660      x = gen_rtx_AND (mode, op_true, cmp);
11661      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11662
11663      x = gen_rtx_NOT (mode, cmp);
11664      x = gen_rtx_AND (mode, x, op_false);
11665      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11666
11667      x = gen_rtx_IOR (mode, t3, t2);
11668      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11669    }
11670}
11671
11672/* Expand a floating-point conditional move.  Return true if successful.  */
11673
11674int
11675ix86_expand_fp_movcc (rtx operands[])
11676{
11677  enum machine_mode mode = GET_MODE (operands[0]);
11678  enum rtx_code code = GET_CODE (operands[1]);
11679  rtx tmp, compare_op, second_test, bypass_test;
11680
11681  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11682    {
11683      enum machine_mode cmode;
11684
11685      /* Since we've no cmove for sse registers, don't force bad register
11686	 allocation just to gain access to it.  Deny movcc when the
11687	 comparison mode doesn't match the move mode.  */
11688      cmode = GET_MODE (ix86_compare_op0);
11689      if (cmode == VOIDmode)
11690	cmode = GET_MODE (ix86_compare_op1);
11691      if (cmode != mode)
11692	return 0;
11693
11694      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11695					       &ix86_compare_op0,
11696					       &ix86_compare_op1);
11697      if (code == UNKNOWN)
11698	return 0;
11699
11700      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11701				     ix86_compare_op1, operands[2],
11702				     operands[3]))
11703	return 1;
11704
11705      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11706				 ix86_compare_op1, operands[2], operands[3]);
11707      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11708      return 1;
11709    }
11710
11711  /* The floating point conditional move instructions don't directly
11712     support conditions resulting from a signed integer comparison.  */
11713
11714  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11715
11716  /* The floating point conditional move instructions don't directly
11717     support signed integer comparisons.  */
11718
11719  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11720    {
11721      gcc_assert (!second_test && !bypass_test);
11722      tmp = gen_reg_rtx (QImode);
11723      ix86_expand_setcc (code, tmp);
11724      code = NE;
11725      ix86_compare_op0 = tmp;
11726      ix86_compare_op1 = const0_rtx;
11727      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11728    }
11729  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11730    {
11731      tmp = gen_reg_rtx (mode);
11732      emit_move_insn (tmp, operands[3]);
11733      operands[3] = tmp;
11734    }
11735  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11736    {
11737      tmp = gen_reg_rtx (mode);
11738      emit_move_insn (tmp, operands[2]);
11739      operands[2] = tmp;
11740    }
11741
11742  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11743			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11744						operands[2], operands[3])));
11745  if (bypass_test)
11746    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11747			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11748						  operands[3], operands[0])));
11749  if (second_test)
11750    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11751			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11752						  operands[2], operands[0])));
11753
11754  return 1;
11755}
11756
11757/* Expand a floating-point vector conditional move; a vcond operation
11758   rather than a movcc operation.  */
11759
11760bool
11761ix86_expand_fp_vcond (rtx operands[])
11762{
11763  enum rtx_code code = GET_CODE (operands[3]);
11764  rtx cmp;
11765
11766  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11767					   &operands[4], &operands[5]);
11768  if (code == UNKNOWN)
11769    return false;
11770
11771  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11772				 operands[5], operands[1], operands[2]))
11773    return true;
11774
11775  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11776			     operands[1], operands[2]);
11777  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11778  return true;
11779}
11780
11781/* Expand a signed integral vector conditional move.  */
11782
11783bool
11784ix86_expand_int_vcond (rtx operands[])
11785{
11786  enum machine_mode mode = GET_MODE (operands[0]);
11787  enum rtx_code code = GET_CODE (operands[3]);
11788  bool negate = false;
11789  rtx x, cop0, cop1;
11790
11791  cop0 = operands[4];
11792  cop1 = operands[5];
11793
11794  /* Canonicalize the comparison to EQ, GT, GTU.  */
11795  switch (code)
11796    {
11797    case EQ:
11798    case GT:
11799    case GTU:
11800      break;
11801
11802    case NE:
11803    case LE:
11804    case LEU:
11805      code = reverse_condition (code);
11806      negate = true;
11807      break;
11808
11809    case GE:
11810    case GEU:
11811      code = reverse_condition (code);
11812      negate = true;
11813      /* FALLTHRU */
11814
11815    case LT:
11816    case LTU:
11817      code = swap_condition (code);
11818      x = cop0, cop0 = cop1, cop1 = x;
11819      break;
11820
11821    default:
11822      gcc_unreachable ();
11823    }
11824
11825  /* Unsigned parallel compare is not supported by the hardware.  Play some
11826     tricks to turn this into a signed comparison against 0.  */
11827  if (code == GTU)
11828    {
11829      cop0 = force_reg (mode, cop0);
11830
11831      switch (mode)
11832	{
11833	case V4SImode:
11834	  {
11835	    rtx t1, t2, mask;
11836
11837	    /* Perform a parallel modulo subtraction.  */
11838	    t1 = gen_reg_rtx (mode);
11839	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
11840
11841	    /* Extract the original sign bit of op0.  */
11842	    mask = GEN_INT (-0x80000000);
11843	    mask = gen_rtx_CONST_VECTOR (mode,
11844			gen_rtvec (4, mask, mask, mask, mask));
11845	    mask = force_reg (mode, mask);
11846	    t2 = gen_reg_rtx (mode);
11847	    emit_insn (gen_andv4si3 (t2, cop0, mask));
11848
11849	    /* XOR it back into the result of the subtraction.  This results
11850	       in the sign bit set iff we saw unsigned underflow.  */
11851	    x = gen_reg_rtx (mode);
11852	    emit_insn (gen_xorv4si3 (x, t1, t2));
11853
11854	    code = GT;
11855	  }
11856	  break;
11857
11858	case V16QImode:
11859	case V8HImode:
11860	  /* Perform a parallel unsigned saturating subtraction.  */
11861	  x = gen_reg_rtx (mode);
11862	  emit_insn (gen_rtx_SET (VOIDmode, x,
11863				  gen_rtx_US_MINUS (mode, cop0, cop1)));
11864
11865	  code = EQ;
11866	  negate = !negate;
11867	  break;
11868
11869	default:
11870	  gcc_unreachable ();
11871	}
11872
11873      cop0 = x;
11874      cop1 = CONST0_RTX (mode);
11875    }
11876
11877  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11878			   operands[1+negate], operands[2-negate]);
11879
11880  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11881			 operands[2-negate]);
11882  return true;
11883}
11884
11885/* Expand conditional increment or decrement using adb/sbb instructions.
11886   The default case using setcc followed by the conditional move can be
11887   done by generic code.  */
11888int
11889ix86_expand_int_addcc (rtx operands[])
11890{
11891  enum rtx_code code = GET_CODE (operands[1]);
11892  rtx compare_op;
11893  rtx val = const0_rtx;
11894  bool fpcmp = false;
11895  enum machine_mode mode = GET_MODE (operands[0]);
11896
11897  if (operands[3] != const1_rtx
11898      && operands[3] != constm1_rtx)
11899    return 0;
11900  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11901				       ix86_compare_op1, &compare_op))
11902     return 0;
11903  code = GET_CODE (compare_op);
11904
11905  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11906      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11907    {
11908      fpcmp = true;
11909      code = ix86_fp_compare_code_to_integer (code);
11910    }
11911
11912  if (code != LTU)
11913    {
11914      val = constm1_rtx;
11915      if (fpcmp)
11916	PUT_CODE (compare_op,
11917		  reverse_condition_maybe_unordered
11918		    (GET_CODE (compare_op)));
11919      else
11920	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11921    }
11922  PUT_MODE (compare_op, mode);
11923
11924  /* Construct either adc or sbb insn.  */
11925  if ((code == LTU) == (operands[3] == constm1_rtx))
11926    {
11927      switch (GET_MODE (operands[0]))
11928	{
11929	  case QImode:
11930            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11931	    break;
11932	  case HImode:
11933            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11934	    break;
11935	  case SImode:
11936            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11937	    break;
11938	  case DImode:
11939            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11940	    break;
11941	  default:
11942	    gcc_unreachable ();
11943	}
11944    }
11945  else
11946    {
11947      switch (GET_MODE (operands[0]))
11948	{
11949	  case QImode:
11950            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11951	    break;
11952	  case HImode:
11953            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11954	    break;
11955	  case SImode:
11956            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11957	    break;
11958	  case DImode:
11959            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11960	    break;
11961	  default:
11962	    gcc_unreachable ();
11963	}
11964    }
11965  return 1; /* DONE */
11966}
11967
11968
11969/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11970   works for floating pointer parameters and nonoffsetable memories.
11971   For pushes, it returns just stack offsets; the values will be saved
11972   in the right order.  Maximally three parts are generated.  */
11973
11974static int
11975ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11976{
11977  int size;
11978
11979  if (!TARGET_64BIT)
11980    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11981  else
11982    size = (GET_MODE_SIZE (mode) + 4) / 8;
11983
11984  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11985  gcc_assert (size >= 2 && size <= 3);
11986
11987  /* Optimize constant pool reference to immediates.  This is used by fp
11988     moves, that force all constants to memory to allow combining.  */
11989  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11990    {
11991      rtx tmp = maybe_get_pool_constant (operand);
11992      if (tmp)
11993	operand = tmp;
11994    }
11995
11996  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11997    {
11998      /* The only non-offsetable memories we handle are pushes.  */
11999      int ok = push_operand (operand, VOIDmode);
12000
12001      gcc_assert (ok);
12002
12003      operand = copy_rtx (operand);
12004      PUT_MODE (operand, Pmode);
12005      parts[0] = parts[1] = parts[2] = operand;
12006      return size;
12007    }
12008
12009  if (GET_CODE (operand) == CONST_VECTOR)
12010    {
12011      enum machine_mode imode = int_mode_for_mode (mode);
12012      /* Caution: if we looked through a constant pool memory above,
12013	 the operand may actually have a different mode now.  That's
12014	 ok, since we want to pun this all the way back to an integer.  */
12015      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12016      gcc_assert (operand != NULL);
12017      mode = imode;
12018    }
12019
12020  if (!TARGET_64BIT)
12021    {
12022      if (mode == DImode)
12023	split_di (&operand, 1, &parts[0], &parts[1]);
12024      else
12025	{
12026	  if (REG_P (operand))
12027	    {
12028	      gcc_assert (reload_completed);
12029	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12030	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12031	      if (size == 3)
12032		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12033	    }
12034	  else if (offsettable_memref_p (operand))
12035	    {
12036	      operand = adjust_address (operand, SImode, 0);
12037	      parts[0] = operand;
12038	      parts[1] = adjust_address (operand, SImode, 4);
12039	      if (size == 3)
12040		parts[2] = adjust_address (operand, SImode, 8);
12041	    }
12042	  else if (GET_CODE (operand) == CONST_DOUBLE)
12043	    {
12044	      REAL_VALUE_TYPE r;
12045	      long l[4];
12046
12047	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12048	      switch (mode)
12049		{
12050		case XFmode:
12051		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12052		  parts[2] = gen_int_mode (l[2], SImode);
12053		  break;
12054		case DFmode:
12055		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12056		  break;
12057		default:
12058		  gcc_unreachable ();
12059		}
12060	      parts[1] = gen_int_mode (l[1], SImode);
12061	      parts[0] = gen_int_mode (l[0], SImode);
12062	    }
12063	  else
12064	    gcc_unreachable ();
12065	}
12066    }
12067  else
12068    {
12069      if (mode == TImode)
12070	split_ti (&operand, 1, &parts[0], &parts[1]);
12071      if (mode == XFmode || mode == TFmode)
12072	{
12073	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12074	  if (REG_P (operand))
12075	    {
12076	      gcc_assert (reload_completed);
12077	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12078	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12079	    }
12080	  else if (offsettable_memref_p (operand))
12081	    {
12082	      operand = adjust_address (operand, DImode, 0);
12083	      parts[0] = operand;
12084	      parts[1] = adjust_address (operand, upper_mode, 8);
12085	    }
12086	  else if (GET_CODE (operand) == CONST_DOUBLE)
12087	    {
12088	      REAL_VALUE_TYPE r;
12089	      long l[4];
12090
12091	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12092	      real_to_target (l, &r, mode);
12093
12094	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
12095	      if (HOST_BITS_PER_WIDE_INT >= 64)
12096	        parts[0]
12097		  = gen_int_mode
12098		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12099		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12100		       DImode);
12101	      else
12102	        parts[0] = immed_double_const (l[0], l[1], DImode);
12103
12104	      if (upper_mode == SImode)
12105	        parts[1] = gen_int_mode (l[2], SImode);
12106	      else if (HOST_BITS_PER_WIDE_INT >= 64)
12107	        parts[1]
12108		  = gen_int_mode
12109		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12110		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12111		       DImode);
12112	      else
12113	        parts[1] = immed_double_const (l[2], l[3], DImode);
12114	    }
12115	  else
12116	    gcc_unreachable ();
12117	}
12118    }
12119
12120  return size;
12121}
12122
12123/* Emit insns to perform a move or push of DI, DF, and XF values.
12124   Return false when normal moves are needed; true when all required
12125   insns have been emitted.  Operands 2-4 contain the input values
12126   int the correct order; operands 5-7 contain the output values.  */
12127
12128void
12129ix86_split_long_move (rtx operands[])
12130{
12131  rtx part[2][3];
12132  int nparts;
12133  int push = 0;
12134  int collisions = 0;
12135  enum machine_mode mode = GET_MODE (operands[0]);
12136
12137  /* The DFmode expanders may ask us to move double.
12138     For 64bit target this is single move.  By hiding the fact
12139     here we simplify i386.md splitters.  */
12140  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12141    {
12142      /* Optimize constant pool reference to immediates.  This is used by
12143	 fp moves, that force all constants to memory to allow combining.  */
12144
12145      if (GET_CODE (operands[1]) == MEM
12146	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12147	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12148	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12149      if (push_operand (operands[0], VOIDmode))
12150	{
12151	  operands[0] = copy_rtx (operands[0]);
12152	  PUT_MODE (operands[0], Pmode);
12153	}
12154      else
12155        operands[0] = gen_lowpart (DImode, operands[0]);
12156      operands[1] = gen_lowpart (DImode, operands[1]);
12157      emit_move_insn (operands[0], operands[1]);
12158      return;
12159    }
12160
12161  /* The only non-offsettable memory we handle is push.  */
12162  if (push_operand (operands[0], VOIDmode))
12163    push = 1;
12164  else
12165    gcc_assert (GET_CODE (operands[0]) != MEM
12166		|| offsettable_memref_p (operands[0]));
12167
12168  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12169  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12170
12171  /* When emitting push, take care for source operands on the stack.  */
12172  if (push && GET_CODE (operands[1]) == MEM
12173      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12174    {
12175      if (nparts == 3)
12176	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12177				     XEXP (part[1][2], 0));
12178      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12179				   XEXP (part[1][1], 0));
12180    }
12181
12182  /* We need to do copy in the right order in case an address register
12183     of the source overlaps the destination.  */
12184  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12185    {
12186      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12187	collisions++;
12188      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12189	collisions++;
12190      if (nparts == 3
12191	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12192	collisions++;
12193
12194      /* Collision in the middle part can be handled by reordering.  */
12195      if (collisions == 1 && nparts == 3
12196	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12197	{
12198	  rtx tmp;
12199	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12200	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12201	}
12202
12203      /* If there are more collisions, we can't handle it by reordering.
12204	 Do an lea to the last part and use only one colliding move.  */
12205      else if (collisions > 1)
12206	{
12207	  rtx base;
12208
12209	  collisions = 1;
12210
12211	  base = part[0][nparts - 1];
12212
12213	  /* Handle the case when the last part isn't valid for lea.
12214	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12215	  if (GET_MODE (base) != Pmode)
12216	    base = gen_rtx_REG (Pmode, REGNO (base));
12217
12218	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12219	  part[1][0] = replace_equiv_address (part[1][0], base);
12220	  part[1][1] = replace_equiv_address (part[1][1],
12221				      plus_constant (base, UNITS_PER_WORD));
12222	  if (nparts == 3)
12223	    part[1][2] = replace_equiv_address (part[1][2],
12224				      plus_constant (base, 8));
12225	}
12226    }
12227
12228  if (push)
12229    {
12230      if (!TARGET_64BIT)
12231	{
12232	  if (nparts == 3)
12233	    {
12234	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12235                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12236	      emit_move_insn (part[0][2], part[1][2]);
12237	    }
12238	}
12239      else
12240	{
12241	  /* In 64bit mode we don't have 32bit push available.  In case this is
12242	     register, it is OK - we will just use larger counterpart.  We also
12243	     retype memory - these comes from attempt to avoid REX prefix on
12244	     moving of second half of TFmode value.  */
12245	  if (GET_MODE (part[1][1]) == SImode)
12246	    {
12247	      switch (GET_CODE (part[1][1]))
12248		{
12249		case MEM:
12250		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12251		  break;
12252
12253		case REG:
12254		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12255		  break;
12256
12257		default:
12258		  gcc_unreachable ();
12259		}
12260
12261	      if (GET_MODE (part[1][0]) == SImode)
12262		part[1][0] = part[1][1];
12263	    }
12264	}
12265      emit_move_insn (part[0][1], part[1][1]);
12266      emit_move_insn (part[0][0], part[1][0]);
12267      return;
12268    }
12269
12270  /* Choose correct order to not overwrite the source before it is copied.  */
12271  if ((REG_P (part[0][0])
12272       && REG_P (part[1][1])
12273       && (REGNO (part[0][0]) == REGNO (part[1][1])
12274	   || (nparts == 3
12275	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12276      || (collisions > 0
12277	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12278    {
12279      if (nparts == 3)
12280	{
12281	  operands[2] = part[0][2];
12282	  operands[3] = part[0][1];
12283	  operands[4] = part[0][0];
12284	  operands[5] = part[1][2];
12285	  operands[6] = part[1][1];
12286	  operands[7] = part[1][0];
12287	}
12288      else
12289	{
12290	  operands[2] = part[0][1];
12291	  operands[3] = part[0][0];
12292	  operands[5] = part[1][1];
12293	  operands[6] = part[1][0];
12294	}
12295    }
12296  else
12297    {
12298      if (nparts == 3)
12299	{
12300	  operands[2] = part[0][0];
12301	  operands[3] = part[0][1];
12302	  operands[4] = part[0][2];
12303	  operands[5] = part[1][0];
12304	  operands[6] = part[1][1];
12305	  operands[7] = part[1][2];
12306	}
12307      else
12308	{
12309	  operands[2] = part[0][0];
12310	  operands[3] = part[0][1];
12311	  operands[5] = part[1][0];
12312	  operands[6] = part[1][1];
12313	}
12314    }
12315
12316  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12317  if (optimize_size)
12318    {
12319      if (GET_CODE (operands[5]) == CONST_INT
12320	  && operands[5] != const0_rtx
12321	  && REG_P (operands[2]))
12322	{
12323	  if (GET_CODE (operands[6]) == CONST_INT
12324	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12325	    operands[6] = operands[2];
12326
12327	  if (nparts == 3
12328	      && GET_CODE (operands[7]) == CONST_INT
12329	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12330	    operands[7] = operands[2];
12331	}
12332
12333      if (nparts == 3
12334	  && GET_CODE (operands[6]) == CONST_INT
12335	  && operands[6] != const0_rtx
12336	  && REG_P (operands[3])
12337	  && GET_CODE (operands[7]) == CONST_INT
12338	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12339	operands[7] = operands[3];
12340    }
12341
12342  emit_move_insn (operands[2], operands[5]);
12343  emit_move_insn (operands[3], operands[6]);
12344  if (nparts == 3)
12345    emit_move_insn (operands[4], operands[7]);
12346
12347  return;
12348}
12349
12350/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12351   left shift by a constant, either using a single shift or
12352   a sequence of add instructions.  */
12353
12354static void
12355ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12356{
12357  if (count == 1)
12358    {
12359      emit_insn ((mode == DImode
12360		  ? gen_addsi3
12361		  : gen_adddi3) (operand, operand, operand));
12362    }
12363  else if (!optimize_size
12364	   && count * ix86_cost->add <= ix86_cost->shift_const)
12365    {
12366      int i;
12367      for (i=0; i<count; i++)
12368	{
12369	  emit_insn ((mode == DImode
12370		      ? gen_addsi3
12371		      : gen_adddi3) (operand, operand, operand));
12372	}
12373    }
12374  else
12375    emit_insn ((mode == DImode
12376		? gen_ashlsi3
12377		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12378}
12379
12380void
12381ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12382{
12383  rtx low[2], high[2];
12384  int count;
12385  const int single_width = mode == DImode ? 32 : 64;
12386
12387  if (GET_CODE (operands[2]) == CONST_INT)
12388    {
12389      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12390      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12391
12392      if (count >= single_width)
12393	{
12394	  emit_move_insn (high[0], low[1]);
12395	  emit_move_insn (low[0], const0_rtx);
12396
12397	  if (count > single_width)
12398	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12399	}
12400      else
12401	{
12402	  if (!rtx_equal_p (operands[0], operands[1]))
12403	    emit_move_insn (operands[0], operands[1]);
12404	  emit_insn ((mode == DImode
12405		     ? gen_x86_shld_1
12406		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12407	  ix86_expand_ashl_const (low[0], count, mode);
12408	}
12409      return;
12410    }
12411
12412  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12413
12414  if (operands[1] == const1_rtx)
12415    {
12416      /* Assuming we've chosen a QImode capable registers, then 1 << N
12417	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12418      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12419	{
12420	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12421
12422	  ix86_expand_clear (low[0]);
12423	  ix86_expand_clear (high[0]);
12424	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12425
12426	  d = gen_lowpart (QImode, low[0]);
12427	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12428	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12429	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12430
12431	  d = gen_lowpart (QImode, high[0]);
12432	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12433	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12434	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12435	}
12436
12437      /* Otherwise, we can get the same results by manually performing
12438	 a bit extract operation on bit 5/6, and then performing the two
12439	 shifts.  The two methods of getting 0/1 into low/high are exactly
12440	 the same size.  Avoiding the shift in the bit extract case helps
12441	 pentium4 a bit; no one else seems to care much either way.  */
12442      else
12443	{
12444	  rtx x;
12445
12446	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12447	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12448	  else
12449	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12450	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12451
12452	  emit_insn ((mode == DImode
12453		      ? gen_lshrsi3
12454		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12455	  emit_insn ((mode == DImode
12456		      ? gen_andsi3
12457		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12458	  emit_move_insn (low[0], high[0]);
12459	  emit_insn ((mode == DImode
12460		      ? gen_xorsi3
12461		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12462	}
12463
12464      emit_insn ((mode == DImode
12465		    ? gen_ashlsi3
12466		    : gen_ashldi3) (low[0], low[0], operands[2]));
12467      emit_insn ((mode == DImode
12468		    ? gen_ashlsi3
12469		    : gen_ashldi3) (high[0], high[0], operands[2]));
12470      return;
12471    }
12472
12473  if (operands[1] == constm1_rtx)
12474    {
12475      /* For -1 << N, we can avoid the shld instruction, because we
12476	 know that we're shifting 0...31/63 ones into a -1.  */
12477      emit_move_insn (low[0], constm1_rtx);
12478      if (optimize_size)
12479	emit_move_insn (high[0], low[0]);
12480      else
12481	emit_move_insn (high[0], constm1_rtx);
12482    }
12483  else
12484    {
12485      if (!rtx_equal_p (operands[0], operands[1]))
12486	emit_move_insn (operands[0], operands[1]);
12487
12488      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12489      emit_insn ((mode == DImode
12490		  ? gen_x86_shld_1
12491		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12492    }
12493
12494  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12495
12496  if (TARGET_CMOVE && scratch)
12497    {
12498      ix86_expand_clear (scratch);
12499      emit_insn ((mode == DImode
12500		  ? gen_x86_shift_adj_1
12501		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12502    }
12503  else
12504    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12505}
12506
12507void
12508ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12509{
12510  rtx low[2], high[2];
12511  int count;
12512  const int single_width = mode == DImode ? 32 : 64;
12513
12514  if (GET_CODE (operands[2]) == CONST_INT)
12515    {
12516      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12517      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12518
12519      if (count == single_width * 2 - 1)
12520	{
12521	  emit_move_insn (high[0], high[1]);
12522	  emit_insn ((mode == DImode
12523		      ? gen_ashrsi3
12524		      : gen_ashrdi3) (high[0], high[0],
12525				      GEN_INT (single_width - 1)));
12526	  emit_move_insn (low[0], high[0]);
12527
12528	}
12529      else if (count >= single_width)
12530	{
12531	  emit_move_insn (low[0], high[1]);
12532	  emit_move_insn (high[0], low[0]);
12533	  emit_insn ((mode == DImode
12534		      ? gen_ashrsi3
12535		      : gen_ashrdi3) (high[0], high[0],
12536				      GEN_INT (single_width - 1)));
12537	  if (count > single_width)
12538	    emit_insn ((mode == DImode
12539			? gen_ashrsi3
12540			: gen_ashrdi3) (low[0], low[0],
12541					GEN_INT (count - single_width)));
12542	}
12543      else
12544	{
12545	  if (!rtx_equal_p (operands[0], operands[1]))
12546	    emit_move_insn (operands[0], operands[1]);
12547	  emit_insn ((mode == DImode
12548		      ? gen_x86_shrd_1
12549		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12550	  emit_insn ((mode == DImode
12551		      ? gen_ashrsi3
12552		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12553	}
12554    }
12555  else
12556    {
12557      if (!rtx_equal_p (operands[0], operands[1]))
12558	emit_move_insn (operands[0], operands[1]);
12559
12560      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12561
12562      emit_insn ((mode == DImode
12563		  ? gen_x86_shrd_1
12564		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12565      emit_insn ((mode == DImode
12566		  ? gen_ashrsi3
12567		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12568
12569      if (TARGET_CMOVE && scratch)
12570	{
12571	  emit_move_insn (scratch, high[0]);
12572	  emit_insn ((mode == DImode
12573		      ? gen_ashrsi3
12574		      : gen_ashrdi3) (scratch, scratch,
12575				      GEN_INT (single_width - 1)));
12576	  emit_insn ((mode == DImode
12577		      ? gen_x86_shift_adj_1
12578		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12579					 scratch));
12580	}
12581      else
12582	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12583    }
12584}
12585
12586void
12587ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12588{
12589  rtx low[2], high[2];
12590  int count;
12591  const int single_width = mode == DImode ? 32 : 64;
12592
12593  if (GET_CODE (operands[2]) == CONST_INT)
12594    {
12595      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12596      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12597
12598      if (count >= single_width)
12599	{
12600	  emit_move_insn (low[0], high[1]);
12601	  ix86_expand_clear (high[0]);
12602
12603	  if (count > single_width)
12604	    emit_insn ((mode == DImode
12605			? gen_lshrsi3
12606			: gen_lshrdi3) (low[0], low[0],
12607					GEN_INT (count - single_width)));
12608	}
12609      else
12610	{
12611	  if (!rtx_equal_p (operands[0], operands[1]))
12612	    emit_move_insn (operands[0], operands[1]);
12613	  emit_insn ((mode == DImode
12614		      ? gen_x86_shrd_1
12615		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12616	  emit_insn ((mode == DImode
12617		      ? gen_lshrsi3
12618		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12619	}
12620    }
12621  else
12622    {
12623      if (!rtx_equal_p (operands[0], operands[1]))
12624	emit_move_insn (operands[0], operands[1]);
12625
12626      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12627
12628      emit_insn ((mode == DImode
12629		  ? gen_x86_shrd_1
12630		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12631      emit_insn ((mode == DImode
12632		  ? gen_lshrsi3
12633		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12634
12635      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12636      if (TARGET_CMOVE && scratch)
12637	{
12638	  ix86_expand_clear (scratch);
12639	  emit_insn ((mode == DImode
12640		      ? gen_x86_shift_adj_1
12641		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12642					       scratch));
12643	}
12644      else
12645	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12646    }
12647}
12648
12649/* Helper function for the string operations below.  Dest VARIABLE whether
12650   it is aligned to VALUE bytes.  If true, jump to the label.  */
12651static rtx
12652ix86_expand_aligntest (rtx variable, int value)
12653{
12654  rtx label = gen_label_rtx ();
12655  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12656  if (GET_MODE (variable) == DImode)
12657    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12658  else
12659    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12660  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12661			   1, label);
12662  return label;
12663}
12664
12665/* Adjust COUNTER by the VALUE.  */
12666static void
12667ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12668{
12669  if (GET_MODE (countreg) == DImode)
12670    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12671  else
12672    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12673}
12674
12675/* Zero extend possibly SImode EXP to Pmode register.  */
12676rtx
12677ix86_zero_extend_to_Pmode (rtx exp)
12678{
12679  rtx r;
12680  if (GET_MODE (exp) == VOIDmode)
12681    return force_reg (Pmode, exp);
12682  if (GET_MODE (exp) == Pmode)
12683    return copy_to_mode_reg (Pmode, exp);
12684  r = gen_reg_rtx (Pmode);
12685  emit_insn (gen_zero_extendsidi2 (r, exp));
12686  return r;
12687}
12688
12689/* Expand string move (memcpy) operation.  Use i386 string operations when
12690   profitable.  expand_clrmem contains similar code.  */
12691int
12692ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12693{
12694  rtx srcreg, destreg, countreg, srcexp, destexp;
12695  enum machine_mode counter_mode;
12696  HOST_WIDE_INT align = 0;
12697  unsigned HOST_WIDE_INT count = 0;
12698
12699  if (GET_CODE (align_exp) == CONST_INT)
12700    align = INTVAL (align_exp);
12701
12702  /* Can't use any of this if the user has appropriated esi or edi.  */
12703  if (global_regs[4] || global_regs[5])
12704    return 0;
12705
12706  /* This simple hack avoids all inlining code and simplifies code below.  */
12707  if (!TARGET_ALIGN_STRINGOPS)
12708    align = 64;
12709
12710  if (GET_CODE (count_exp) == CONST_INT)
12711    {
12712      count = INTVAL (count_exp);
12713      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12714	return 0;
12715    }
12716
12717  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12718     for 64bits use SImode when possible, otherwise DImode.
12719     Set count to number of bytes copied when known at compile time.  */
12720  if (!TARGET_64BIT
12721      || GET_MODE (count_exp) == SImode
12722      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12723    counter_mode = SImode;
12724  else
12725    counter_mode = DImode;
12726
12727  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12728
12729  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12730  if (destreg != XEXP (dst, 0))
12731    dst = replace_equiv_address_nv (dst, destreg);
12732  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12733  if (srcreg != XEXP (src, 0))
12734    src = replace_equiv_address_nv (src, srcreg);
12735
12736  /* When optimizing for size emit simple rep ; movsb instruction for
12737     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12738     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12739     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12740     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12741     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12742     known to be zero or not.  The rep; movsb sequence causes higher
12743     register pressure though, so take that into account.  */
12744
12745  if ((!optimize || optimize_size)
12746      && (count == 0
12747	  || ((count & 0x03)
12748	      && (!optimize_size
12749		  || count > 5 * 4
12750		  || (count & 3) + count / 4 > 6))))
12751    {
12752      emit_insn (gen_cld ());
12753      countreg = ix86_zero_extend_to_Pmode (count_exp);
12754      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12755      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12756      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12757			      destexp, srcexp));
12758    }
12759
12760  /* For constant aligned (or small unaligned) copies use rep movsl
12761     followed by code copying the rest.  For PentiumPro ensure 8 byte
12762     alignment to allow rep movsl acceleration.  */
12763
12764  else if (count != 0
12765	   && (align >= 8
12766	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12767	       || optimize_size || count < (unsigned int) 64))
12768    {
12769      unsigned HOST_WIDE_INT offset = 0;
12770      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12771      rtx srcmem, dstmem;
12772
12773      emit_insn (gen_cld ());
12774      if (count & ~(size - 1))
12775	{
12776	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12777	    {
12778	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12779
12780	      while (offset < (count & ~(size - 1)))
12781		{
12782		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12783							 srcreg, offset);
12784		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12785							 destreg, offset);
12786		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12787		  offset += size;
12788		}
12789	    }
12790	  else
12791	    {
12792	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12793				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12794	      countreg = copy_to_mode_reg (counter_mode, countreg);
12795	      countreg = ix86_zero_extend_to_Pmode (countreg);
12796
12797	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12798					GEN_INT (size == 4 ? 2 : 3));
12799	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12800	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12801
12802	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12803				      countreg, destexp, srcexp));
12804	      offset = count & ~(size - 1);
12805	    }
12806	}
12807      if (size == 8 && (count & 0x04))
12808	{
12809	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12810						 offset);
12811	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12812						 offset);
12813	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12814	  offset += 4;
12815	}
12816      if (count & 0x02)
12817	{
12818	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12819						 offset);
12820	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12821						 offset);
12822	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12823	  offset += 2;
12824	}
12825      if (count & 0x01)
12826	{
12827	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12828						 offset);
12829	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12830						 offset);
12831	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12832	}
12833    }
12834  /* The generic code based on the glibc implementation:
12835     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12836     allowing accelerated copying there)
12837     - copy the data using rep movsl
12838     - copy the rest.  */
12839  else
12840    {
12841      rtx countreg2;
12842      rtx label = NULL;
12843      rtx srcmem, dstmem;
12844      int desired_alignment = (TARGET_PENTIUMPRO
12845			       && (count == 0 || count >= (unsigned int) 260)
12846			       ? 8 : UNITS_PER_WORD);
12847      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12848      dst = change_address (dst, BLKmode, destreg);
12849      src = change_address (src, BLKmode, srcreg);
12850
12851      /* In case we don't know anything about the alignment, default to
12852         library version, since it is usually equally fast and result in
12853         shorter code.
12854
12855	 Also emit call when we know that the count is large and call overhead
12856	 will not be important.  */
12857      if (!TARGET_INLINE_ALL_STRINGOPS
12858	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12859	return 0;
12860
12861      if (TARGET_SINGLE_STRINGOP)
12862	emit_insn (gen_cld ());
12863
12864      countreg2 = gen_reg_rtx (Pmode);
12865      countreg = copy_to_mode_reg (counter_mode, count_exp);
12866
12867      /* We don't use loops to align destination and to copy parts smaller
12868         than 4 bytes, because gcc is able to optimize such code better (in
12869         the case the destination or the count really is aligned, gcc is often
12870         able to predict the branches) and also it is friendlier to the
12871         hardware branch prediction.
12872
12873         Using loops is beneficial for generic case, because we can
12874         handle small counts using the loops.  Many CPUs (such as Athlon)
12875         have large REP prefix setup costs.
12876
12877         This is quite costly.  Maybe we can revisit this decision later or
12878         add some customizability to this code.  */
12879
12880      if (count == 0 && align < desired_alignment)
12881	{
12882	  label = gen_label_rtx ();
12883	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12884				   LEU, 0, counter_mode, 1, label);
12885	}
12886      if (align <= 1)
12887	{
12888	  rtx label = ix86_expand_aligntest (destreg, 1);
12889	  srcmem = change_address (src, QImode, srcreg);
12890	  dstmem = change_address (dst, QImode, destreg);
12891	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12892	  ix86_adjust_counter (countreg, 1);
12893	  emit_label (label);
12894	  LABEL_NUSES (label) = 1;
12895	}
12896      if (align <= 2)
12897	{
12898	  rtx label = ix86_expand_aligntest (destreg, 2);
12899	  srcmem = change_address (src, HImode, srcreg);
12900	  dstmem = change_address (dst, HImode, destreg);
12901	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12902	  ix86_adjust_counter (countreg, 2);
12903	  emit_label (label);
12904	  LABEL_NUSES (label) = 1;
12905	}
12906      if (align <= 4 && desired_alignment > 4)
12907	{
12908	  rtx label = ix86_expand_aligntest (destreg, 4);
12909	  srcmem = change_address (src, SImode, srcreg);
12910	  dstmem = change_address (dst, SImode, destreg);
12911	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12912	  ix86_adjust_counter (countreg, 4);
12913	  emit_label (label);
12914	  LABEL_NUSES (label) = 1;
12915	}
12916
12917      if (label && desired_alignment > 4 && !TARGET_64BIT)
12918	{
12919	  emit_label (label);
12920	  LABEL_NUSES (label) = 1;
12921	  label = NULL_RTX;
12922	}
12923      if (!TARGET_SINGLE_STRINGOP)
12924	emit_insn (gen_cld ());
12925      if (TARGET_64BIT)
12926	{
12927	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12928				  GEN_INT (3)));
12929	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12930	}
12931      else
12932	{
12933	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12934	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12935	}
12936      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12937      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12938      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12939			      countreg2, destexp, srcexp));
12940
12941      if (label)
12942	{
12943	  emit_label (label);
12944	  LABEL_NUSES (label) = 1;
12945	}
12946      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12947	{
12948	  srcmem = change_address (src, SImode, srcreg);
12949	  dstmem = change_address (dst, SImode, destreg);
12950	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12951	}
12952      if ((align <= 4 || count == 0) && TARGET_64BIT)
12953	{
12954	  rtx label = ix86_expand_aligntest (countreg, 4);
12955	  srcmem = change_address (src, SImode, srcreg);
12956	  dstmem = change_address (dst, SImode, destreg);
12957	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12958	  emit_label (label);
12959	  LABEL_NUSES (label) = 1;
12960	}
12961      if (align > 2 && count != 0 && (count & 2))
12962	{
12963	  srcmem = change_address (src, HImode, srcreg);
12964	  dstmem = change_address (dst, HImode, destreg);
12965	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12966	}
12967      if (align <= 2 || count == 0)
12968	{
12969	  rtx label = ix86_expand_aligntest (countreg, 2);
12970	  srcmem = change_address (src, HImode, srcreg);
12971	  dstmem = change_address (dst, HImode, destreg);
12972	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12973	  emit_label (label);
12974	  LABEL_NUSES (label) = 1;
12975	}
12976      if (align > 1 && count != 0 && (count & 1))
12977	{
12978	  srcmem = change_address (src, QImode, srcreg);
12979	  dstmem = change_address (dst, QImode, destreg);
12980	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12981	}
12982      if (align <= 1 || count == 0)
12983	{
12984	  rtx label = ix86_expand_aligntest (countreg, 1);
12985	  srcmem = change_address (src, QImode, srcreg);
12986	  dstmem = change_address (dst, QImode, destreg);
12987	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12988	  emit_label (label);
12989	  LABEL_NUSES (label) = 1;
12990	}
12991    }
12992
12993  return 1;
12994}
12995
12996/* Expand string clear operation (bzero).  Use i386 string operations when
12997   profitable.  expand_movmem contains similar code.  */
12998int
12999ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13000{
13001  rtx destreg, zeroreg, countreg, destexp;
13002  enum machine_mode counter_mode;
13003  HOST_WIDE_INT align = 0;
13004  unsigned HOST_WIDE_INT count = 0;
13005
13006  if (GET_CODE (align_exp) == CONST_INT)
13007    align = INTVAL (align_exp);
13008
13009  /* Can't use any of this if the user has appropriated esi.  */
13010  if (global_regs[4])
13011    return 0;
13012
13013  /* This simple hack avoids all inlining code and simplifies code below.  */
13014  if (!TARGET_ALIGN_STRINGOPS)
13015    align = 32;
13016
13017  if (GET_CODE (count_exp) == CONST_INT)
13018    {
13019      count = INTVAL (count_exp);
13020      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13021	return 0;
13022    }
13023  /* Figure out proper mode for counter.  For 32bits it is always SImode,
13024     for 64bits use SImode when possible, otherwise DImode.
13025     Set count to number of bytes copied when known at compile time.  */
13026  if (!TARGET_64BIT
13027      || GET_MODE (count_exp) == SImode
13028      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13029    counter_mode = SImode;
13030  else
13031    counter_mode = DImode;
13032
13033  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13034  if (destreg != XEXP (dst, 0))
13035    dst = replace_equiv_address_nv (dst, destreg);
13036
13037
13038  /* When optimizing for size emit simple rep ; movsb instruction for
13039     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
13040     sequence is 7 bytes long, so if optimizing for size and count is
13041     small enough that some stosl, stosw and stosb instructions without
13042     rep are shorter, fall back into the next if.  */
13043
13044  if ((!optimize || optimize_size)
13045      && (count == 0
13046	  || ((count & 0x03)
13047	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13048    {
13049      emit_insn (gen_cld ());
13050
13051      countreg = ix86_zero_extend_to_Pmode (count_exp);
13052      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13053      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13054      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13055    }
13056  else if (count != 0
13057	   && (align >= 8
13058	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13059	       || optimize_size || count < (unsigned int) 64))
13060    {
13061      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13062      unsigned HOST_WIDE_INT offset = 0;
13063
13064      emit_insn (gen_cld ());
13065
13066      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13067      if (count & ~(size - 1))
13068	{
13069	  unsigned HOST_WIDE_INT repcount;
13070	  unsigned int max_nonrep;
13071
13072	  repcount = count >> (size == 4 ? 2 : 3);
13073	  if (!TARGET_64BIT)
13074	    repcount &= 0x3fffffff;
13075
13076	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13077	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13078	     bytes.  In both cases the latter seems to be faster for small
13079	     values of N.  */
13080	  max_nonrep = size == 4 ? 7 : 4;
13081	  if (!optimize_size)
13082	    switch (ix86_tune)
13083	      {
13084	      case PROCESSOR_PENTIUM4:
13085	      case PROCESSOR_NOCONA:
13086	        max_nonrep = 3;
13087	        break;
13088	      default:
13089	        break;
13090	      }
13091
13092	  if (repcount <= max_nonrep)
13093	    while (repcount-- > 0)
13094	      {
13095		rtx mem = adjust_automodify_address_nv (dst,
13096							GET_MODE (zeroreg),
13097							destreg, offset);
13098		emit_insn (gen_strset (destreg, mem, zeroreg));
13099		offset += size;
13100	      }
13101	  else
13102	    {
13103	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13104	      countreg = ix86_zero_extend_to_Pmode (countreg);
13105	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
13106					GEN_INT (size == 4 ? 2 : 3));
13107	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13108	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13109				       destexp));
13110	      offset = count & ~(size - 1);
13111	    }
13112	}
13113      if (size == 8 && (count & 0x04))
13114	{
13115	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13116						  offset);
13117	  emit_insn (gen_strset (destreg, mem,
13118				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13119	  offset += 4;
13120	}
13121      if (count & 0x02)
13122	{
13123	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13124						  offset);
13125	  emit_insn (gen_strset (destreg, mem,
13126				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13127	  offset += 2;
13128	}
13129      if (count & 0x01)
13130	{
13131	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13132						  offset);
13133	  emit_insn (gen_strset (destreg, mem,
13134				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13135	}
13136    }
13137  else
13138    {
13139      rtx countreg2;
13140      rtx label = NULL;
13141      /* Compute desired alignment of the string operation.  */
13142      int desired_alignment = (TARGET_PENTIUMPRO
13143			       && (count == 0 || count >= (unsigned int) 260)
13144			       ? 8 : UNITS_PER_WORD);
13145
13146      /* In case we don't know anything about the alignment, default to
13147         library version, since it is usually equally fast and result in
13148         shorter code.
13149
13150	 Also emit call when we know that the count is large and call overhead
13151	 will not be important.  */
13152      if (!TARGET_INLINE_ALL_STRINGOPS
13153	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13154	return 0;
13155
13156      if (TARGET_SINGLE_STRINGOP)
13157	emit_insn (gen_cld ());
13158
13159      countreg2 = gen_reg_rtx (Pmode);
13160      countreg = copy_to_mode_reg (counter_mode, count_exp);
13161      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13162      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13163      dst = change_address (dst, BLKmode, destreg);
13164
13165      if (count == 0 && align < desired_alignment)
13166	{
13167	  label = gen_label_rtx ();
13168	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13169				   LEU, 0, counter_mode, 1, label);
13170	}
13171      if (align <= 1)
13172	{
13173	  rtx label = ix86_expand_aligntest (destreg, 1);
13174	  emit_insn (gen_strset (destreg, dst,
13175				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13176	  ix86_adjust_counter (countreg, 1);
13177	  emit_label (label);
13178	  LABEL_NUSES (label) = 1;
13179	}
13180      if (align <= 2)
13181	{
13182	  rtx label = ix86_expand_aligntest (destreg, 2);
13183	  emit_insn (gen_strset (destreg, dst,
13184				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13185	  ix86_adjust_counter (countreg, 2);
13186	  emit_label (label);
13187	  LABEL_NUSES (label) = 1;
13188	}
13189      if (align <= 4 && desired_alignment > 4)
13190	{
13191	  rtx label = ix86_expand_aligntest (destreg, 4);
13192	  emit_insn (gen_strset (destreg, dst,
13193				 (TARGET_64BIT
13194				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13195				  : zeroreg)));
13196	  ix86_adjust_counter (countreg, 4);
13197	  emit_label (label);
13198	  LABEL_NUSES (label) = 1;
13199	}
13200
13201      if (label && desired_alignment > 4 && !TARGET_64BIT)
13202	{
13203	  emit_label (label);
13204	  LABEL_NUSES (label) = 1;
13205	  label = NULL_RTX;
13206	}
13207
13208      if (!TARGET_SINGLE_STRINGOP)
13209	emit_insn (gen_cld ());
13210      if (TARGET_64BIT)
13211	{
13212	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13213				  GEN_INT (3)));
13214	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13215	}
13216      else
13217	{
13218	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13219	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13220	}
13221      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13222      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13223
13224      if (label)
13225	{
13226	  emit_label (label);
13227	  LABEL_NUSES (label) = 1;
13228	}
13229
13230      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13231	emit_insn (gen_strset (destreg, dst,
13232			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13233      if (TARGET_64BIT && (align <= 4 || count == 0))
13234	{
13235	  rtx label = ix86_expand_aligntest (countreg, 4);
13236	  emit_insn (gen_strset (destreg, dst,
13237				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13238	  emit_label (label);
13239	  LABEL_NUSES (label) = 1;
13240	}
13241      if (align > 2 && count != 0 && (count & 2))
13242	emit_insn (gen_strset (destreg, dst,
13243			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13244      if (align <= 2 || count == 0)
13245	{
13246	  rtx label = ix86_expand_aligntest (countreg, 2);
13247	  emit_insn (gen_strset (destreg, dst,
13248				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13249	  emit_label (label);
13250	  LABEL_NUSES (label) = 1;
13251	}
13252      if (align > 1 && count != 0 && (count & 1))
13253	emit_insn (gen_strset (destreg, dst,
13254			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13255      if (align <= 1 || count == 0)
13256	{
13257	  rtx label = ix86_expand_aligntest (countreg, 1);
13258	  emit_insn (gen_strset (destreg, dst,
13259				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13260	  emit_label (label);
13261	  LABEL_NUSES (label) = 1;
13262	}
13263    }
13264  return 1;
13265}
13266
13267/* Expand strlen.  */
13268int
13269ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13270{
13271  rtx addr, scratch1, scratch2, scratch3, scratch4;
13272
13273  /* The generic case of strlen expander is long.  Avoid it's
13274     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13275
13276  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13277      && !TARGET_INLINE_ALL_STRINGOPS
13278      && !optimize_size
13279      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13280    return 0;
13281
13282  addr = force_reg (Pmode, XEXP (src, 0));
13283  scratch1 = gen_reg_rtx (Pmode);
13284
13285  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13286      && !optimize_size)
13287    {
13288      /* Well it seems that some optimizer does not combine a call like
13289         foo(strlen(bar), strlen(bar));
13290         when the move and the subtraction is done here.  It does calculate
13291         the length just once when these instructions are done inside of
13292         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13293         often used and I use one fewer register for the lifetime of
13294         output_strlen_unroll() this is better.  */
13295
13296      emit_move_insn (out, addr);
13297
13298      ix86_expand_strlensi_unroll_1 (out, src, align);
13299
13300      /* strlensi_unroll_1 returns the address of the zero at the end of
13301         the string, like memchr(), so compute the length by subtracting
13302         the start address.  */
13303      if (TARGET_64BIT)
13304	emit_insn (gen_subdi3 (out, out, addr));
13305      else
13306	emit_insn (gen_subsi3 (out, out, addr));
13307    }
13308  else
13309    {
13310      rtx unspec;
13311      scratch2 = gen_reg_rtx (Pmode);
13312      scratch3 = gen_reg_rtx (Pmode);
13313      scratch4 = force_reg (Pmode, constm1_rtx);
13314
13315      emit_move_insn (scratch3, addr);
13316      eoschar = force_reg (QImode, eoschar);
13317
13318      emit_insn (gen_cld ());
13319      src = replace_equiv_address_nv (src, scratch3);
13320
13321      /* If .md starts supporting :P, this can be done in .md.  */
13322      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13323						 scratch4), UNSPEC_SCAS);
13324      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13325      if (TARGET_64BIT)
13326	{
13327	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13328	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13329	}
13330      else
13331	{
13332	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13333	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13334	}
13335    }
13336  return 1;
13337}
13338
13339/* Expand the appropriate insns for doing strlen if not just doing
13340   repnz; scasb
13341
13342   out = result, initialized with the start address
13343   align_rtx = alignment of the address.
13344   scratch = scratch register, initialized with the startaddress when
13345	not aligned, otherwise undefined
13346
13347   This is just the body. It needs the initializations mentioned above and
13348   some address computing at the end.  These things are done in i386.md.  */
13349
13350static void
13351ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13352{
13353  int align;
13354  rtx tmp;
13355  rtx align_2_label = NULL_RTX;
13356  rtx align_3_label = NULL_RTX;
13357  rtx align_4_label = gen_label_rtx ();
13358  rtx end_0_label = gen_label_rtx ();
13359  rtx mem;
13360  rtx tmpreg = gen_reg_rtx (SImode);
13361  rtx scratch = gen_reg_rtx (SImode);
13362  rtx cmp;
13363
13364  align = 0;
13365  if (GET_CODE (align_rtx) == CONST_INT)
13366    align = INTVAL (align_rtx);
13367
13368  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13369
13370  /* Is there a known alignment and is it less than 4?  */
13371  if (align < 4)
13372    {
13373      rtx scratch1 = gen_reg_rtx (Pmode);
13374      emit_move_insn (scratch1, out);
13375      /* Is there a known alignment and is it not 2? */
13376      if (align != 2)
13377	{
13378	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13379	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13380
13381	  /* Leave just the 3 lower bits.  */
13382	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13383				    NULL_RTX, 0, OPTAB_WIDEN);
13384
13385	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13386				   Pmode, 1, align_4_label);
13387	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13388				   Pmode, 1, align_2_label);
13389	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13390				   Pmode, 1, align_3_label);
13391	}
13392      else
13393        {
13394	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13395	     check if is aligned to 4 - byte.  */
13396
13397	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13398				    NULL_RTX, 0, OPTAB_WIDEN);
13399
13400	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13401				   Pmode, 1, align_4_label);
13402        }
13403
13404      mem = change_address (src, QImode, out);
13405
13406      /* Now compare the bytes.  */
13407
13408      /* Compare the first n unaligned byte on a byte per byte basis.  */
13409      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13410			       QImode, 1, end_0_label);
13411
13412      /* Increment the address.  */
13413      if (TARGET_64BIT)
13414	emit_insn (gen_adddi3 (out, out, const1_rtx));
13415      else
13416	emit_insn (gen_addsi3 (out, out, const1_rtx));
13417
13418      /* Not needed with an alignment of 2 */
13419      if (align != 2)
13420	{
13421	  emit_label (align_2_label);
13422
13423	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13424				   end_0_label);
13425
13426	  if (TARGET_64BIT)
13427	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13428	  else
13429	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13430
13431	  emit_label (align_3_label);
13432	}
13433
13434      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13435			       end_0_label);
13436
13437      if (TARGET_64BIT)
13438	emit_insn (gen_adddi3 (out, out, const1_rtx));
13439      else
13440	emit_insn (gen_addsi3 (out, out, const1_rtx));
13441    }
13442
13443  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13444     align this loop.  It gives only huge programs, but does not help to
13445     speed up.  */
13446  emit_label (align_4_label);
13447
13448  mem = change_address (src, SImode, out);
13449  emit_move_insn (scratch, mem);
13450  if (TARGET_64BIT)
13451    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13452  else
13453    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13454
13455  /* This formula yields a nonzero result iff one of the bytes is zero.
13456     This saves three branches inside loop and many cycles.  */
13457
13458  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13459  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13460  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13461  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13462			 gen_int_mode (0x80808080, SImode)));
13463  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13464			   align_4_label);
13465
13466  if (TARGET_CMOVE)
13467    {
13468       rtx reg = gen_reg_rtx (SImode);
13469       rtx reg2 = gen_reg_rtx (Pmode);
13470       emit_move_insn (reg, tmpreg);
13471       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13472
13473       /* If zero is not in the first two bytes, move two bytes forward.  */
13474       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13475       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13476       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13477       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13478			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13479						     reg,
13480						     tmpreg)));
13481       /* Emit lea manually to avoid clobbering of flags.  */
13482       emit_insn (gen_rtx_SET (SImode, reg2,
13483			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13484
13485       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13486       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13487       emit_insn (gen_rtx_SET (VOIDmode, out,
13488			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13489						     reg2,
13490						     out)));
13491
13492    }
13493  else
13494    {
13495       rtx end_2_label = gen_label_rtx ();
13496       /* Is zero in the first two bytes? */
13497
13498       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13499       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13500       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13501       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13502                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13503                            pc_rtx);
13504       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13505       JUMP_LABEL (tmp) = end_2_label;
13506
13507       /* Not in the first two.  Move two bytes forward.  */
13508       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13509       if (TARGET_64BIT)
13510	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13511       else
13512	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13513
13514       emit_label (end_2_label);
13515
13516    }
13517
13518  /* Avoid branch in fixing the byte.  */
13519  tmpreg = gen_lowpart (QImode, tmpreg);
13520  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13521  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13522  if (TARGET_64BIT)
13523    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13524  else
13525    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13526
13527  emit_label (end_0_label);
13528}
13529
13530void
13531ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13532		  rtx callarg2 ATTRIBUTE_UNUSED,
13533		  rtx pop, int sibcall)
13534{
13535  rtx use = NULL, call;
13536
13537  if (pop == const0_rtx)
13538    pop = NULL;
13539  gcc_assert (!TARGET_64BIT || !pop);
13540
13541  if (TARGET_MACHO && !TARGET_64BIT)
13542    {
13543#if TARGET_MACHO
13544      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13545	fnaddr = machopic_indirect_call_target (fnaddr);
13546#endif
13547    }
13548  else
13549    {
13550      /* Static functions and indirect calls don't need the pic register.  */
13551      if (! TARGET_64BIT && flag_pic
13552	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13553	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13554	use_reg (&use, pic_offset_table_rtx);
13555    }
13556
13557  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13558    {
13559      rtx al = gen_rtx_REG (QImode, 0);
13560      emit_move_insn (al, callarg2);
13561      use_reg (&use, al);
13562    }
13563
13564  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13565    {
13566      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13567      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13568    }
13569  if (sibcall && TARGET_64BIT
13570      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13571    {
13572      rtx addr;
13573      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13574      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13575      emit_move_insn (fnaddr, addr);
13576      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13577    }
13578
13579  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13580  if (retval)
13581    call = gen_rtx_SET (VOIDmode, retval, call);
13582  if (pop)
13583    {
13584      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13585      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13586      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13587    }
13588
13589  call = emit_call_insn (call);
13590  if (use)
13591    CALL_INSN_FUNCTION_USAGE (call) = use;
13592}
13593
13594
13595/* Clear stack slot assignments remembered from previous functions.
13596   This is called from INIT_EXPANDERS once before RTL is emitted for each
13597   function.  */
13598
13599static struct machine_function *
13600ix86_init_machine_status (void)
13601{
13602  struct machine_function *f;
13603
13604  f = ggc_alloc_cleared (sizeof (struct machine_function));
13605  f->use_fast_prologue_epilogue_nregs = -1;
13606  f->tls_descriptor_call_expanded_p = 0;
13607
13608  return f;
13609}
13610
13611/* Return a MEM corresponding to a stack slot with mode MODE.
13612   Allocate a new slot if necessary.
13613
13614   The RTL for a function can have several slots available: N is
13615   which slot to use.  */
13616
13617rtx
13618assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13619{
13620  struct stack_local_entry *s;
13621
13622  gcc_assert (n < MAX_386_STACK_LOCALS);
13623
13624  /* Virtual slot is valid only before vregs are instantiated.  */
13625  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13626
13627  for (s = ix86_stack_locals; s; s = s->next)
13628    if (s->mode == mode && s->n == n)
13629      return s->rtl;
13630
13631  s = (struct stack_local_entry *)
13632    ggc_alloc (sizeof (struct stack_local_entry));
13633  s->n = n;
13634  s->mode = mode;
13635  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13636
13637  s->next = ix86_stack_locals;
13638  ix86_stack_locals = s;
13639  return s->rtl;
13640}
13641
13642/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13643
13644static GTY(()) rtx ix86_tls_symbol;
13645rtx
13646ix86_tls_get_addr (void)
13647{
13648
13649  if (!ix86_tls_symbol)
13650    {
13651      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13652					    (TARGET_ANY_GNU_TLS
13653					     && !TARGET_64BIT)
13654					    ? "___tls_get_addr"
13655					    : "__tls_get_addr");
13656    }
13657
13658  return ix86_tls_symbol;
13659}
13660
13661/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13662
13663static GTY(()) rtx ix86_tls_module_base_symbol;
13664rtx
13665ix86_tls_module_base (void)
13666{
13667
13668  if (!ix86_tls_module_base_symbol)
13669    {
13670      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13671							"_TLS_MODULE_BASE_");
13672      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13673	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13674    }
13675
13676  return ix86_tls_module_base_symbol;
13677}
13678
13679/* Calculate the length of the memory address in the instruction
13680   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13681
13682int
13683memory_address_length (rtx addr)
13684{
13685  struct ix86_address parts;
13686  rtx base, index, disp;
13687  int len;
13688  int ok;
13689
13690  if (GET_CODE (addr) == PRE_DEC
13691      || GET_CODE (addr) == POST_INC
13692      || GET_CODE (addr) == PRE_MODIFY
13693      || GET_CODE (addr) == POST_MODIFY)
13694    return 0;
13695
13696  ok = ix86_decompose_address (addr, &parts);
13697  gcc_assert (ok);
13698
13699  if (parts.base && GET_CODE (parts.base) == SUBREG)
13700    parts.base = SUBREG_REG (parts.base);
13701  if (parts.index && GET_CODE (parts.index) == SUBREG)
13702    parts.index = SUBREG_REG (parts.index);
13703
13704  base = parts.base;
13705  index = parts.index;
13706  disp = parts.disp;
13707  len = 0;
13708
13709  /* Rule of thumb:
13710       - esp as the base always wants an index,
13711       - ebp as the base always wants a displacement.  */
13712
13713  /* Register Indirect.  */
13714  if (base && !index && !disp)
13715    {
13716      /* esp (for its index) and ebp (for its displacement) need
13717	 the two-byte modrm form.  */
13718      if (addr == stack_pointer_rtx
13719	  || addr == arg_pointer_rtx
13720	  || addr == frame_pointer_rtx
13721	  || addr == hard_frame_pointer_rtx)
13722	len = 1;
13723    }
13724
13725  /* Direct Addressing.  */
13726  else if (disp && !base && !index)
13727    len = 4;
13728
13729  else
13730    {
13731      /* Find the length of the displacement constant.  */
13732      if (disp)
13733	{
13734	  if (base && satisfies_constraint_K (disp))
13735	    len = 1;
13736	  else
13737	    len = 4;
13738	}
13739      /* ebp always wants a displacement.  */
13740      else if (base == hard_frame_pointer_rtx)
13741        len = 1;
13742
13743      /* An index requires the two-byte modrm form....  */
13744      if (index
13745	  /* ...like esp, which always wants an index.  */
13746	  || base == stack_pointer_rtx
13747	  || base == arg_pointer_rtx
13748	  || base == frame_pointer_rtx)
13749	len += 1;
13750    }
13751
13752  return len;
13753}
13754
13755/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13756   is set, expect that insn have 8bit immediate alternative.  */
13757int
13758ix86_attr_length_immediate_default (rtx insn, int shortform)
13759{
13760  int len = 0;
13761  int i;
13762  extract_insn_cached (insn);
13763  for (i = recog_data.n_operands - 1; i >= 0; --i)
13764    if (CONSTANT_P (recog_data.operand[i]))
13765      {
13766	gcc_assert (!len);
13767	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13768	  len = 1;
13769	else
13770	  {
13771	    switch (get_attr_mode (insn))
13772	      {
13773		case MODE_QI:
13774		  len+=1;
13775		  break;
13776		case MODE_HI:
13777		  len+=2;
13778		  break;
13779		case MODE_SI:
13780		  len+=4;
13781		  break;
13782		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13783		case MODE_DI:
13784		  len+=4;
13785		  break;
13786		default:
13787		  fatal_insn ("unknown insn mode", insn);
13788	      }
13789	  }
13790      }
13791  return len;
13792}
13793/* Compute default value for "length_address" attribute.  */
13794int
13795ix86_attr_length_address_default (rtx insn)
13796{
13797  int i;
13798
13799  if (get_attr_type (insn) == TYPE_LEA)
13800    {
13801      rtx set = PATTERN (insn);
13802
13803      if (GET_CODE (set) == PARALLEL)
13804	set = XVECEXP (set, 0, 0);
13805
13806      gcc_assert (GET_CODE (set) == SET);
13807
13808      return memory_address_length (SET_SRC (set));
13809    }
13810
13811  extract_insn_cached (insn);
13812  for (i = recog_data.n_operands - 1; i >= 0; --i)
13813    if (GET_CODE (recog_data.operand[i]) == MEM)
13814      {
13815	return memory_address_length (XEXP (recog_data.operand[i], 0));
13816	break;
13817      }
13818  return 0;
13819}
13820
13821/* Return the maximum number of instructions a cpu can issue.  */
13822
13823static int
13824ix86_issue_rate (void)
13825{
13826  switch (ix86_tune)
13827    {
13828    case PROCESSOR_PENTIUM:
13829    case PROCESSOR_K6:
13830      return 2;
13831
13832    case PROCESSOR_PENTIUMPRO:
13833    case PROCESSOR_PENTIUM4:
13834    case PROCESSOR_ATHLON:
13835    case PROCESSOR_K8:
13836    case PROCESSOR_NOCONA:
13837    case PROCESSOR_GENERIC32:
13838    case PROCESSOR_GENERIC64:
13839      return 3;
13840
13841    case PROCESSOR_CORE2:
13842      return 4;
13843
13844    default:
13845      return 1;
13846    }
13847}
13848
13849/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13850   by DEP_INSN and nothing set by DEP_INSN.  */
13851
13852static int
13853ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13854{
13855  rtx set, set2;
13856
13857  /* Simplify the test for uninteresting insns.  */
13858  if (insn_type != TYPE_SETCC
13859      && insn_type != TYPE_ICMOV
13860      && insn_type != TYPE_FCMOV
13861      && insn_type != TYPE_IBR)
13862    return 0;
13863
13864  if ((set = single_set (dep_insn)) != 0)
13865    {
13866      set = SET_DEST (set);
13867      set2 = NULL_RTX;
13868    }
13869  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13870	   && XVECLEN (PATTERN (dep_insn), 0) == 2
13871	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13872	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13873    {
13874      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13875      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13876    }
13877  else
13878    return 0;
13879
13880  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13881    return 0;
13882
13883  /* This test is true if the dependent insn reads the flags but
13884     not any other potentially set register.  */
13885  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13886    return 0;
13887
13888  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13889    return 0;
13890
13891  return 1;
13892}
13893
13894/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13895   address with operands set by DEP_INSN.  */
13896
13897static int
13898ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13899{
13900  rtx addr;
13901
13902  if (insn_type == TYPE_LEA
13903      && TARGET_PENTIUM)
13904    {
13905      addr = PATTERN (insn);
13906
13907      if (GET_CODE (addr) == PARALLEL)
13908	addr = XVECEXP (addr, 0, 0);
13909
13910      gcc_assert (GET_CODE (addr) == SET);
13911
13912      addr = SET_SRC (addr);
13913    }
13914  else
13915    {
13916      int i;
13917      extract_insn_cached (insn);
13918      for (i = recog_data.n_operands - 1; i >= 0; --i)
13919	if (GET_CODE (recog_data.operand[i]) == MEM)
13920	  {
13921	    addr = XEXP (recog_data.operand[i], 0);
13922	    goto found;
13923	  }
13924      return 0;
13925    found:;
13926    }
13927
13928  return modified_in_p (addr, dep_insn);
13929}
13930
13931static int
13932ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13933{
13934  enum attr_type insn_type, dep_insn_type;
13935  enum attr_memory memory;
13936  rtx set, set2;
13937  int dep_insn_code_number;
13938
13939  /* Anti and output dependencies have zero cost on all CPUs.  */
13940  if (REG_NOTE_KIND (link) != 0)
13941    return 0;
13942
13943  dep_insn_code_number = recog_memoized (dep_insn);
13944
13945  /* If we can't recognize the insns, we can't really do anything.  */
13946  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13947    return cost;
13948
13949  insn_type = get_attr_type (insn);
13950  dep_insn_type = get_attr_type (dep_insn);
13951
13952  switch (ix86_tune)
13953    {
13954    case PROCESSOR_PENTIUM:
13955      /* Address Generation Interlock adds a cycle of latency.  */
13956      if (ix86_agi_dependent (insn, dep_insn, insn_type))
13957	cost += 1;
13958
13959      /* ??? Compares pair with jump/setcc.  */
13960      if (ix86_flags_dependent (insn, dep_insn, insn_type))
13961	cost = 0;
13962
13963      /* Floating point stores require value to be ready one cycle earlier.  */
13964      if (insn_type == TYPE_FMOV
13965	  && get_attr_memory (insn) == MEMORY_STORE
13966	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13967	cost += 1;
13968      break;
13969
13970    case PROCESSOR_PENTIUMPRO:
13971      memory = get_attr_memory (insn);
13972
13973      /* INT->FP conversion is expensive.  */
13974      if (get_attr_fp_int_src (dep_insn))
13975	cost += 5;
13976
13977      /* There is one cycle extra latency between an FP op and a store.  */
13978      if (insn_type == TYPE_FMOV
13979	  && (set = single_set (dep_insn)) != NULL_RTX
13980	  && (set2 = single_set (insn)) != NULL_RTX
13981	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13982	  && GET_CODE (SET_DEST (set2)) == MEM)
13983	cost += 1;
13984
13985      /* Show ability of reorder buffer to hide latency of load by executing
13986	 in parallel with previous instruction in case
13987	 previous instruction is not needed to compute the address.  */
13988      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13989	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13990	{
13991	  /* Claim moves to take one cycle, as core can issue one load
13992	     at time and the next load can start cycle later.  */
13993	  if (dep_insn_type == TYPE_IMOV
13994	      || dep_insn_type == TYPE_FMOV)
13995	    cost = 1;
13996	  else if (cost > 1)
13997	    cost--;
13998	}
13999      break;
14000
14001    case PROCESSOR_K6:
14002      memory = get_attr_memory (insn);
14003
14004      /* The esp dependency is resolved before the instruction is really
14005         finished.  */
14006      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14007	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14008	return 1;
14009
14010      /* INT->FP conversion is expensive.  */
14011      if (get_attr_fp_int_src (dep_insn))
14012	cost += 5;
14013
14014      /* Show ability of reorder buffer to hide latency of load by executing
14015	 in parallel with previous instruction in case
14016	 previous instruction is not needed to compute the address.  */
14017      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14018	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14019	{
14020	  /* Claim moves to take one cycle, as core can issue one load
14021	     at time and the next load can start cycle later.  */
14022	  if (dep_insn_type == TYPE_IMOV
14023	      || dep_insn_type == TYPE_FMOV)
14024	    cost = 1;
14025	  else if (cost > 2)
14026	    cost -= 2;
14027	  else
14028	    cost = 1;
14029	}
14030      break;
14031
14032    case PROCESSOR_ATHLON:
14033    case PROCESSOR_K8:
14034    case PROCESSOR_GENERIC32:
14035    case PROCESSOR_GENERIC64:
14036      memory = get_attr_memory (insn);
14037
14038      /* Show ability of reorder buffer to hide latency of load by executing
14039	 in parallel with previous instruction in case
14040	 previous instruction is not needed to compute the address.  */
14041      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14042	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14043	{
14044	  enum attr_unit unit = get_attr_unit (insn);
14045	  int loadcost = 3;
14046
14047	  /* Because of the difference between the length of integer and
14048	     floating unit pipeline preparation stages, the memory operands
14049	     for floating point are cheaper.
14050
14051	     ??? For Athlon it the difference is most probably 2.  */
14052	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14053	    loadcost = 3;
14054	  else
14055	    loadcost = TARGET_ATHLON ? 2 : 0;
14056
14057	  if (cost >= loadcost)
14058	    cost -= loadcost;
14059	  else
14060	    cost = 0;
14061	}
14062
14063    default:
14064      break;
14065    }
14066
14067  return cost;
14068}
14069
14070/* How many alternative schedules to try.  This should be as wide as the
14071   scheduling freedom in the DFA, but no wider.  Making this value too
14072   large results extra work for the scheduler.  */
14073
14074static int
14075ia32_multipass_dfa_lookahead (void)
14076{
14077  if (ix86_tune == PROCESSOR_PENTIUM)
14078    return 2;
14079
14080  if (ix86_tune == PROCESSOR_PENTIUMPRO
14081      || ix86_tune == PROCESSOR_K6)
14082    return 1;
14083
14084  else
14085    return 0;
14086}
14087
14088
14089/* Compute the alignment given to a constant that is being placed in memory.
14090   EXP is the constant and ALIGN is the alignment that the object would
14091   ordinarily have.
14092   The value of this function is used instead of that alignment to align
14093   the object.  */
14094
14095int
14096ix86_constant_alignment (tree exp, int align)
14097{
14098  if (TREE_CODE (exp) == REAL_CST)
14099    {
14100      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14101	return 64;
14102      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14103	return 128;
14104    }
14105  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14106      	   && !TARGET_NO_ALIGN_LONG_STRINGS
14107	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14108    return BITS_PER_WORD;
14109
14110  return align;
14111}
14112
14113/* Compute the alignment for a static variable.
14114   TYPE is the data type, and ALIGN is the alignment that
14115   the object would ordinarily have.  The value of this function is used
14116   instead of that alignment to align the object.  */
14117
14118int
14119ix86_data_alignment (tree type, int align)
14120{
14121  int max_align = optimize_size ? BITS_PER_WORD : 256;
14122
14123  if (AGGREGATE_TYPE_P (type)
14124      && TYPE_SIZE (type)
14125      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14126      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14127	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14128      && align < max_align)
14129    align = max_align;
14130
14131  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14132     to 16byte boundary.  */
14133  if (TARGET_64BIT)
14134    {
14135      if (AGGREGATE_TYPE_P (type)
14136	   && TYPE_SIZE (type)
14137	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14138	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14139	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14140	return 128;
14141    }
14142
14143  if (TREE_CODE (type) == ARRAY_TYPE)
14144    {
14145      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14146	return 64;
14147      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14148	return 128;
14149    }
14150  else if (TREE_CODE (type) == COMPLEX_TYPE)
14151    {
14152
14153      if (TYPE_MODE (type) == DCmode && align < 64)
14154	return 64;
14155      if (TYPE_MODE (type) == XCmode && align < 128)
14156	return 128;
14157    }
14158  else if ((TREE_CODE (type) == RECORD_TYPE
14159	    || TREE_CODE (type) == UNION_TYPE
14160	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14161	   && TYPE_FIELDS (type))
14162    {
14163      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14164	return 64;
14165      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14166	return 128;
14167    }
14168  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14169	   || TREE_CODE (type) == INTEGER_TYPE)
14170    {
14171      if (TYPE_MODE (type) == DFmode && align < 64)
14172	return 64;
14173      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14174	return 128;
14175    }
14176
14177  return align;
14178}
14179
14180/* Compute the alignment for a local variable.
14181   TYPE is the data type, and ALIGN is the alignment that
14182   the object would ordinarily have.  The value of this macro is used
14183   instead of that alignment to align the object.  */
14184
14185int
14186ix86_local_alignment (tree type, int align)
14187{
14188  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14189     to 16byte boundary.  */
14190  if (TARGET_64BIT)
14191    {
14192      if (AGGREGATE_TYPE_P (type)
14193	   && TYPE_SIZE (type)
14194	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14195	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14196	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14197	return 128;
14198    }
14199  if (TREE_CODE (type) == ARRAY_TYPE)
14200    {
14201      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14202	return 64;
14203      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14204	return 128;
14205    }
14206  else if (TREE_CODE (type) == COMPLEX_TYPE)
14207    {
14208      if (TYPE_MODE (type) == DCmode && align < 64)
14209	return 64;
14210      if (TYPE_MODE (type) == XCmode && align < 128)
14211	return 128;
14212    }
14213  else if ((TREE_CODE (type) == RECORD_TYPE
14214	    || TREE_CODE (type) == UNION_TYPE
14215	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14216	   && TYPE_FIELDS (type))
14217    {
14218      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14219	return 64;
14220      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14221	return 128;
14222    }
14223  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14224	   || TREE_CODE (type) == INTEGER_TYPE)
14225    {
14226
14227      if (TYPE_MODE (type) == DFmode && align < 64)
14228	return 64;
14229      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14230	return 128;
14231    }
14232  return align;
14233}
14234
14235/* Emit RTL insns to initialize the variable parts of a trampoline.
14236   FNADDR is an RTX for the address of the function's pure code.
14237   CXT is an RTX for the static chain value for the function.  */
14238void
14239x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14240{
14241  if (!TARGET_64BIT)
14242    {
14243      /* Compute offset from the end of the jmp to the target function.  */
14244      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14245			       plus_constant (tramp, 10),
14246			       NULL_RTX, 1, OPTAB_DIRECT);
14247      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14248		      gen_int_mode (0xb9, QImode));
14249      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14250      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14251		      gen_int_mode (0xe9, QImode));
14252      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14253    }
14254  else
14255    {
14256      int offset = 0;
14257      /* Try to load address using shorter movl instead of movabs.
14258         We may want to support movq for kernel mode, but kernel does not use
14259         trampolines at the moment.  */
14260      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14261	{
14262	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14263	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14264			  gen_int_mode (0xbb41, HImode));
14265	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14266			  gen_lowpart (SImode, fnaddr));
14267	  offset += 6;
14268	}
14269      else
14270	{
14271	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14272			  gen_int_mode (0xbb49, HImode));
14273	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14274			  fnaddr);
14275	  offset += 10;
14276	}
14277      /* Load static chain using movabs to r10.  */
14278      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14279		      gen_int_mode (0xba49, HImode));
14280      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14281		      cxt);
14282      offset += 10;
14283      /* Jump to the r11 */
14284      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14285		      gen_int_mode (0xff49, HImode));
14286      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14287		      gen_int_mode (0xe3, QImode));
14288      offset += 3;
14289      gcc_assert (offset <= TRAMPOLINE_SIZE);
14290    }
14291
14292#ifdef ENABLE_EXECUTE_STACK
14293  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14294		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14295#endif
14296}
14297
14298/* Codes for all the SSE/MMX builtins.  */
14299enum ix86_builtins
14300{
14301  IX86_BUILTIN_ADDPS,
14302  IX86_BUILTIN_ADDSS,
14303  IX86_BUILTIN_DIVPS,
14304  IX86_BUILTIN_DIVSS,
14305  IX86_BUILTIN_MULPS,
14306  IX86_BUILTIN_MULSS,
14307  IX86_BUILTIN_SUBPS,
14308  IX86_BUILTIN_SUBSS,
14309
14310  IX86_BUILTIN_CMPEQPS,
14311  IX86_BUILTIN_CMPLTPS,
14312  IX86_BUILTIN_CMPLEPS,
14313  IX86_BUILTIN_CMPGTPS,
14314  IX86_BUILTIN_CMPGEPS,
14315  IX86_BUILTIN_CMPNEQPS,
14316  IX86_BUILTIN_CMPNLTPS,
14317  IX86_BUILTIN_CMPNLEPS,
14318  IX86_BUILTIN_CMPNGTPS,
14319  IX86_BUILTIN_CMPNGEPS,
14320  IX86_BUILTIN_CMPORDPS,
14321  IX86_BUILTIN_CMPUNORDPS,
14322  IX86_BUILTIN_CMPEQSS,
14323  IX86_BUILTIN_CMPLTSS,
14324  IX86_BUILTIN_CMPLESS,
14325  IX86_BUILTIN_CMPNEQSS,
14326  IX86_BUILTIN_CMPNLTSS,
14327  IX86_BUILTIN_CMPNLESS,
14328  IX86_BUILTIN_CMPNGTSS,
14329  IX86_BUILTIN_CMPNGESS,
14330  IX86_BUILTIN_CMPORDSS,
14331  IX86_BUILTIN_CMPUNORDSS,
14332
14333  IX86_BUILTIN_COMIEQSS,
14334  IX86_BUILTIN_COMILTSS,
14335  IX86_BUILTIN_COMILESS,
14336  IX86_BUILTIN_COMIGTSS,
14337  IX86_BUILTIN_COMIGESS,
14338  IX86_BUILTIN_COMINEQSS,
14339  IX86_BUILTIN_UCOMIEQSS,
14340  IX86_BUILTIN_UCOMILTSS,
14341  IX86_BUILTIN_UCOMILESS,
14342  IX86_BUILTIN_UCOMIGTSS,
14343  IX86_BUILTIN_UCOMIGESS,
14344  IX86_BUILTIN_UCOMINEQSS,
14345
14346  IX86_BUILTIN_CVTPI2PS,
14347  IX86_BUILTIN_CVTPS2PI,
14348  IX86_BUILTIN_CVTSI2SS,
14349  IX86_BUILTIN_CVTSI642SS,
14350  IX86_BUILTIN_CVTSS2SI,
14351  IX86_BUILTIN_CVTSS2SI64,
14352  IX86_BUILTIN_CVTTPS2PI,
14353  IX86_BUILTIN_CVTTSS2SI,
14354  IX86_BUILTIN_CVTTSS2SI64,
14355
14356  IX86_BUILTIN_MAXPS,
14357  IX86_BUILTIN_MAXSS,
14358  IX86_BUILTIN_MINPS,
14359  IX86_BUILTIN_MINSS,
14360
14361  IX86_BUILTIN_LOADUPS,
14362  IX86_BUILTIN_STOREUPS,
14363  IX86_BUILTIN_MOVSS,
14364
14365  IX86_BUILTIN_MOVHLPS,
14366  IX86_BUILTIN_MOVLHPS,
14367  IX86_BUILTIN_LOADHPS,
14368  IX86_BUILTIN_LOADLPS,
14369  IX86_BUILTIN_STOREHPS,
14370  IX86_BUILTIN_STORELPS,
14371
14372  IX86_BUILTIN_MASKMOVQ,
14373  IX86_BUILTIN_MOVMSKPS,
14374  IX86_BUILTIN_PMOVMSKB,
14375
14376  IX86_BUILTIN_MOVNTPS,
14377  IX86_BUILTIN_MOVNTQ,
14378
14379  IX86_BUILTIN_LOADDQU,
14380  IX86_BUILTIN_STOREDQU,
14381
14382  IX86_BUILTIN_PACKSSWB,
14383  IX86_BUILTIN_PACKSSDW,
14384  IX86_BUILTIN_PACKUSWB,
14385
14386  IX86_BUILTIN_PADDB,
14387  IX86_BUILTIN_PADDW,
14388  IX86_BUILTIN_PADDD,
14389  IX86_BUILTIN_PADDQ,
14390  IX86_BUILTIN_PADDSB,
14391  IX86_BUILTIN_PADDSW,
14392  IX86_BUILTIN_PADDUSB,
14393  IX86_BUILTIN_PADDUSW,
14394  IX86_BUILTIN_PSUBB,
14395  IX86_BUILTIN_PSUBW,
14396  IX86_BUILTIN_PSUBD,
14397  IX86_BUILTIN_PSUBQ,
14398  IX86_BUILTIN_PSUBSB,
14399  IX86_BUILTIN_PSUBSW,
14400  IX86_BUILTIN_PSUBUSB,
14401  IX86_BUILTIN_PSUBUSW,
14402
14403  IX86_BUILTIN_PAND,
14404  IX86_BUILTIN_PANDN,
14405  IX86_BUILTIN_POR,
14406  IX86_BUILTIN_PXOR,
14407
14408  IX86_BUILTIN_PAVGB,
14409  IX86_BUILTIN_PAVGW,
14410
14411  IX86_BUILTIN_PCMPEQB,
14412  IX86_BUILTIN_PCMPEQW,
14413  IX86_BUILTIN_PCMPEQD,
14414  IX86_BUILTIN_PCMPGTB,
14415  IX86_BUILTIN_PCMPGTW,
14416  IX86_BUILTIN_PCMPGTD,
14417
14418  IX86_BUILTIN_PMADDWD,
14419
14420  IX86_BUILTIN_PMAXSW,
14421  IX86_BUILTIN_PMAXUB,
14422  IX86_BUILTIN_PMINSW,
14423  IX86_BUILTIN_PMINUB,
14424
14425  IX86_BUILTIN_PMULHUW,
14426  IX86_BUILTIN_PMULHW,
14427  IX86_BUILTIN_PMULLW,
14428
14429  IX86_BUILTIN_PSADBW,
14430  IX86_BUILTIN_PSHUFW,
14431
14432  IX86_BUILTIN_PSLLW,
14433  IX86_BUILTIN_PSLLD,
14434  IX86_BUILTIN_PSLLQ,
14435  IX86_BUILTIN_PSRAW,
14436  IX86_BUILTIN_PSRAD,
14437  IX86_BUILTIN_PSRLW,
14438  IX86_BUILTIN_PSRLD,
14439  IX86_BUILTIN_PSRLQ,
14440  IX86_BUILTIN_PSLLWI,
14441  IX86_BUILTIN_PSLLDI,
14442  IX86_BUILTIN_PSLLQI,
14443  IX86_BUILTIN_PSRAWI,
14444  IX86_BUILTIN_PSRADI,
14445  IX86_BUILTIN_PSRLWI,
14446  IX86_BUILTIN_PSRLDI,
14447  IX86_BUILTIN_PSRLQI,
14448
14449  IX86_BUILTIN_PUNPCKHBW,
14450  IX86_BUILTIN_PUNPCKHWD,
14451  IX86_BUILTIN_PUNPCKHDQ,
14452  IX86_BUILTIN_PUNPCKLBW,
14453  IX86_BUILTIN_PUNPCKLWD,
14454  IX86_BUILTIN_PUNPCKLDQ,
14455
14456  IX86_BUILTIN_SHUFPS,
14457
14458  IX86_BUILTIN_RCPPS,
14459  IX86_BUILTIN_RCPSS,
14460  IX86_BUILTIN_RSQRTPS,
14461  IX86_BUILTIN_RSQRTSS,
14462  IX86_BUILTIN_SQRTPS,
14463  IX86_BUILTIN_SQRTSS,
14464
14465  IX86_BUILTIN_UNPCKHPS,
14466  IX86_BUILTIN_UNPCKLPS,
14467
14468  IX86_BUILTIN_ANDPS,
14469  IX86_BUILTIN_ANDNPS,
14470  IX86_BUILTIN_ORPS,
14471  IX86_BUILTIN_XORPS,
14472
14473  IX86_BUILTIN_EMMS,
14474  IX86_BUILTIN_LDMXCSR,
14475  IX86_BUILTIN_STMXCSR,
14476  IX86_BUILTIN_SFENCE,
14477
14478  /* 3DNow! Original */
14479  IX86_BUILTIN_FEMMS,
14480  IX86_BUILTIN_PAVGUSB,
14481  IX86_BUILTIN_PF2ID,
14482  IX86_BUILTIN_PFACC,
14483  IX86_BUILTIN_PFADD,
14484  IX86_BUILTIN_PFCMPEQ,
14485  IX86_BUILTIN_PFCMPGE,
14486  IX86_BUILTIN_PFCMPGT,
14487  IX86_BUILTIN_PFMAX,
14488  IX86_BUILTIN_PFMIN,
14489  IX86_BUILTIN_PFMUL,
14490  IX86_BUILTIN_PFRCP,
14491  IX86_BUILTIN_PFRCPIT1,
14492  IX86_BUILTIN_PFRCPIT2,
14493  IX86_BUILTIN_PFRSQIT1,
14494  IX86_BUILTIN_PFRSQRT,
14495  IX86_BUILTIN_PFSUB,
14496  IX86_BUILTIN_PFSUBR,
14497  IX86_BUILTIN_PI2FD,
14498  IX86_BUILTIN_PMULHRW,
14499
14500  /* 3DNow! Athlon Extensions */
14501  IX86_BUILTIN_PF2IW,
14502  IX86_BUILTIN_PFNACC,
14503  IX86_BUILTIN_PFPNACC,
14504  IX86_BUILTIN_PI2FW,
14505  IX86_BUILTIN_PSWAPDSI,
14506  IX86_BUILTIN_PSWAPDSF,
14507
14508  /* SSE2 */
14509  IX86_BUILTIN_ADDPD,
14510  IX86_BUILTIN_ADDSD,
14511  IX86_BUILTIN_DIVPD,
14512  IX86_BUILTIN_DIVSD,
14513  IX86_BUILTIN_MULPD,
14514  IX86_BUILTIN_MULSD,
14515  IX86_BUILTIN_SUBPD,
14516  IX86_BUILTIN_SUBSD,
14517
14518  IX86_BUILTIN_CMPEQPD,
14519  IX86_BUILTIN_CMPLTPD,
14520  IX86_BUILTIN_CMPLEPD,
14521  IX86_BUILTIN_CMPGTPD,
14522  IX86_BUILTIN_CMPGEPD,
14523  IX86_BUILTIN_CMPNEQPD,
14524  IX86_BUILTIN_CMPNLTPD,
14525  IX86_BUILTIN_CMPNLEPD,
14526  IX86_BUILTIN_CMPNGTPD,
14527  IX86_BUILTIN_CMPNGEPD,
14528  IX86_BUILTIN_CMPORDPD,
14529  IX86_BUILTIN_CMPUNORDPD,
14530  IX86_BUILTIN_CMPNEPD,
14531  IX86_BUILTIN_CMPEQSD,
14532  IX86_BUILTIN_CMPLTSD,
14533  IX86_BUILTIN_CMPLESD,
14534  IX86_BUILTIN_CMPNEQSD,
14535  IX86_BUILTIN_CMPNLTSD,
14536  IX86_BUILTIN_CMPNLESD,
14537  IX86_BUILTIN_CMPORDSD,
14538  IX86_BUILTIN_CMPUNORDSD,
14539  IX86_BUILTIN_CMPNESD,
14540
14541  IX86_BUILTIN_COMIEQSD,
14542  IX86_BUILTIN_COMILTSD,
14543  IX86_BUILTIN_COMILESD,
14544  IX86_BUILTIN_COMIGTSD,
14545  IX86_BUILTIN_COMIGESD,
14546  IX86_BUILTIN_COMINEQSD,
14547  IX86_BUILTIN_UCOMIEQSD,
14548  IX86_BUILTIN_UCOMILTSD,
14549  IX86_BUILTIN_UCOMILESD,
14550  IX86_BUILTIN_UCOMIGTSD,
14551  IX86_BUILTIN_UCOMIGESD,
14552  IX86_BUILTIN_UCOMINEQSD,
14553
14554  IX86_BUILTIN_MAXPD,
14555  IX86_BUILTIN_MAXSD,
14556  IX86_BUILTIN_MINPD,
14557  IX86_BUILTIN_MINSD,
14558
14559  IX86_BUILTIN_ANDPD,
14560  IX86_BUILTIN_ANDNPD,
14561  IX86_BUILTIN_ORPD,
14562  IX86_BUILTIN_XORPD,
14563
14564  IX86_BUILTIN_SQRTPD,
14565  IX86_BUILTIN_SQRTSD,
14566
14567  IX86_BUILTIN_UNPCKHPD,
14568  IX86_BUILTIN_UNPCKLPD,
14569
14570  IX86_BUILTIN_SHUFPD,
14571
14572  IX86_BUILTIN_LOADUPD,
14573  IX86_BUILTIN_STOREUPD,
14574  IX86_BUILTIN_MOVSD,
14575
14576  IX86_BUILTIN_LOADHPD,
14577  IX86_BUILTIN_LOADLPD,
14578
14579  IX86_BUILTIN_CVTDQ2PD,
14580  IX86_BUILTIN_CVTDQ2PS,
14581
14582  IX86_BUILTIN_CVTPD2DQ,
14583  IX86_BUILTIN_CVTPD2PI,
14584  IX86_BUILTIN_CVTPD2PS,
14585  IX86_BUILTIN_CVTTPD2DQ,
14586  IX86_BUILTIN_CVTTPD2PI,
14587
14588  IX86_BUILTIN_CVTPI2PD,
14589  IX86_BUILTIN_CVTSI2SD,
14590  IX86_BUILTIN_CVTSI642SD,
14591
14592  IX86_BUILTIN_CVTSD2SI,
14593  IX86_BUILTIN_CVTSD2SI64,
14594  IX86_BUILTIN_CVTSD2SS,
14595  IX86_BUILTIN_CVTSS2SD,
14596  IX86_BUILTIN_CVTTSD2SI,
14597  IX86_BUILTIN_CVTTSD2SI64,
14598
14599  IX86_BUILTIN_CVTPS2DQ,
14600  IX86_BUILTIN_CVTPS2PD,
14601  IX86_BUILTIN_CVTTPS2DQ,
14602
14603  IX86_BUILTIN_MOVNTI,
14604  IX86_BUILTIN_MOVNTPD,
14605  IX86_BUILTIN_MOVNTDQ,
14606
14607  /* SSE2 MMX */
14608  IX86_BUILTIN_MASKMOVDQU,
14609  IX86_BUILTIN_MOVMSKPD,
14610  IX86_BUILTIN_PMOVMSKB128,
14611
14612  IX86_BUILTIN_PACKSSWB128,
14613  IX86_BUILTIN_PACKSSDW128,
14614  IX86_BUILTIN_PACKUSWB128,
14615
14616  IX86_BUILTIN_PADDB128,
14617  IX86_BUILTIN_PADDW128,
14618  IX86_BUILTIN_PADDD128,
14619  IX86_BUILTIN_PADDQ128,
14620  IX86_BUILTIN_PADDSB128,
14621  IX86_BUILTIN_PADDSW128,
14622  IX86_BUILTIN_PADDUSB128,
14623  IX86_BUILTIN_PADDUSW128,
14624  IX86_BUILTIN_PSUBB128,
14625  IX86_BUILTIN_PSUBW128,
14626  IX86_BUILTIN_PSUBD128,
14627  IX86_BUILTIN_PSUBQ128,
14628  IX86_BUILTIN_PSUBSB128,
14629  IX86_BUILTIN_PSUBSW128,
14630  IX86_BUILTIN_PSUBUSB128,
14631  IX86_BUILTIN_PSUBUSW128,
14632
14633  IX86_BUILTIN_PAND128,
14634  IX86_BUILTIN_PANDN128,
14635  IX86_BUILTIN_POR128,
14636  IX86_BUILTIN_PXOR128,
14637
14638  IX86_BUILTIN_PAVGB128,
14639  IX86_BUILTIN_PAVGW128,
14640
14641  IX86_BUILTIN_PCMPEQB128,
14642  IX86_BUILTIN_PCMPEQW128,
14643  IX86_BUILTIN_PCMPEQD128,
14644  IX86_BUILTIN_PCMPGTB128,
14645  IX86_BUILTIN_PCMPGTW128,
14646  IX86_BUILTIN_PCMPGTD128,
14647
14648  IX86_BUILTIN_PMADDWD128,
14649
14650  IX86_BUILTIN_PMAXSW128,
14651  IX86_BUILTIN_PMAXUB128,
14652  IX86_BUILTIN_PMINSW128,
14653  IX86_BUILTIN_PMINUB128,
14654
14655  IX86_BUILTIN_PMULUDQ,
14656  IX86_BUILTIN_PMULUDQ128,
14657  IX86_BUILTIN_PMULHUW128,
14658  IX86_BUILTIN_PMULHW128,
14659  IX86_BUILTIN_PMULLW128,
14660
14661  IX86_BUILTIN_PSADBW128,
14662  IX86_BUILTIN_PSHUFHW,
14663  IX86_BUILTIN_PSHUFLW,
14664  IX86_BUILTIN_PSHUFD,
14665
14666  IX86_BUILTIN_PSLLW128,
14667  IX86_BUILTIN_PSLLD128,
14668  IX86_BUILTIN_PSLLQ128,
14669  IX86_BUILTIN_PSRAW128,
14670  IX86_BUILTIN_PSRAD128,
14671  IX86_BUILTIN_PSRLW128,
14672  IX86_BUILTIN_PSRLD128,
14673  IX86_BUILTIN_PSRLQ128,
14674  IX86_BUILTIN_PSLLDQI128,
14675  IX86_BUILTIN_PSLLWI128,
14676  IX86_BUILTIN_PSLLDI128,
14677  IX86_BUILTIN_PSLLQI128,
14678  IX86_BUILTIN_PSRAWI128,
14679  IX86_BUILTIN_PSRADI128,
14680  IX86_BUILTIN_PSRLDQI128,
14681  IX86_BUILTIN_PSRLWI128,
14682  IX86_BUILTIN_PSRLDI128,
14683  IX86_BUILTIN_PSRLQI128,
14684
14685  IX86_BUILTIN_PUNPCKHBW128,
14686  IX86_BUILTIN_PUNPCKHWD128,
14687  IX86_BUILTIN_PUNPCKHDQ128,
14688  IX86_BUILTIN_PUNPCKHQDQ128,
14689  IX86_BUILTIN_PUNPCKLBW128,
14690  IX86_BUILTIN_PUNPCKLWD128,
14691  IX86_BUILTIN_PUNPCKLDQ128,
14692  IX86_BUILTIN_PUNPCKLQDQ128,
14693
14694  IX86_BUILTIN_CLFLUSH,
14695  IX86_BUILTIN_MFENCE,
14696  IX86_BUILTIN_LFENCE,
14697
14698  /* Prescott New Instructions.  */
14699  IX86_BUILTIN_ADDSUBPS,
14700  IX86_BUILTIN_HADDPS,
14701  IX86_BUILTIN_HSUBPS,
14702  IX86_BUILTIN_MOVSHDUP,
14703  IX86_BUILTIN_MOVSLDUP,
14704  IX86_BUILTIN_ADDSUBPD,
14705  IX86_BUILTIN_HADDPD,
14706  IX86_BUILTIN_HSUBPD,
14707  IX86_BUILTIN_LDDQU,
14708
14709  IX86_BUILTIN_MONITOR,
14710  IX86_BUILTIN_MWAIT,
14711
14712  /* SSSE3.  */
14713  IX86_BUILTIN_PHADDW,
14714  IX86_BUILTIN_PHADDD,
14715  IX86_BUILTIN_PHADDSW,
14716  IX86_BUILTIN_PHSUBW,
14717  IX86_BUILTIN_PHSUBD,
14718  IX86_BUILTIN_PHSUBSW,
14719  IX86_BUILTIN_PMADDUBSW,
14720  IX86_BUILTIN_PMULHRSW,
14721  IX86_BUILTIN_PSHUFB,
14722  IX86_BUILTIN_PSIGNB,
14723  IX86_BUILTIN_PSIGNW,
14724  IX86_BUILTIN_PSIGND,
14725  IX86_BUILTIN_PALIGNR,
14726  IX86_BUILTIN_PABSB,
14727  IX86_BUILTIN_PABSW,
14728  IX86_BUILTIN_PABSD,
14729
14730  IX86_BUILTIN_PHADDW128,
14731  IX86_BUILTIN_PHADDD128,
14732  IX86_BUILTIN_PHADDSW128,
14733  IX86_BUILTIN_PHSUBW128,
14734  IX86_BUILTIN_PHSUBD128,
14735  IX86_BUILTIN_PHSUBSW128,
14736  IX86_BUILTIN_PMADDUBSW128,
14737  IX86_BUILTIN_PMULHRSW128,
14738  IX86_BUILTIN_PSHUFB128,
14739  IX86_BUILTIN_PSIGNB128,
14740  IX86_BUILTIN_PSIGNW128,
14741  IX86_BUILTIN_PSIGND128,
14742  IX86_BUILTIN_PALIGNR128,
14743  IX86_BUILTIN_PABSB128,
14744  IX86_BUILTIN_PABSW128,
14745  IX86_BUILTIN_PABSD128,
14746
14747  IX86_BUILTIN_VEC_INIT_V2SI,
14748  IX86_BUILTIN_VEC_INIT_V4HI,
14749  IX86_BUILTIN_VEC_INIT_V8QI,
14750  IX86_BUILTIN_VEC_EXT_V2DF,
14751  IX86_BUILTIN_VEC_EXT_V2DI,
14752  IX86_BUILTIN_VEC_EXT_V4SF,
14753  IX86_BUILTIN_VEC_EXT_V4SI,
14754  IX86_BUILTIN_VEC_EXT_V8HI,
14755  IX86_BUILTIN_VEC_EXT_V16QI,
14756  IX86_BUILTIN_VEC_EXT_V2SI,
14757  IX86_BUILTIN_VEC_EXT_V4HI,
14758  IX86_BUILTIN_VEC_SET_V8HI,
14759  IX86_BUILTIN_VEC_SET_V4HI,
14760
14761  IX86_BUILTIN_MAX
14762};
14763
14764#define def_builtin(MASK, NAME, TYPE, CODE)				\
14765do {									\
14766  if ((MASK) & target_flags						\
14767      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14768    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14769				 NULL, NULL_TREE);			\
14770} while (0)
14771
14772/* Bits for builtin_description.flag.  */
14773
14774/* Set when we don't support the comparison natively, and should
14775   swap_comparison in order to support it.  */
14776#define BUILTIN_DESC_SWAP_OPERANDS	1
14777
14778struct builtin_description
14779{
14780  const unsigned int mask;
14781  const enum insn_code icode;
14782  const char *const name;
14783  const enum ix86_builtins code;
14784  const enum rtx_code comparison;
14785  const unsigned int flag;
14786};
14787
14788static const struct builtin_description bdesc_comi[] =
14789{
14790  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14791  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14792  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14793  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14794  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14795  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14796  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14797  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14798  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14799  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14800  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14801  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14802  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14803  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14804  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14805  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14806  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14807  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14808  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14809  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14810  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14811  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14812  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14813  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14814};
14815
14816static const struct builtin_description bdesc_2arg[] =
14817{
14818  /* SSE */
14819  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14820  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14821  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14822  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14823  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14824  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14825  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14826  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14827
14828  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14829  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14830  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14831  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14832    BUILTIN_DESC_SWAP_OPERANDS },
14833  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14834    BUILTIN_DESC_SWAP_OPERANDS },
14835  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14836  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14837  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14838  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14839  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14840    BUILTIN_DESC_SWAP_OPERANDS },
14841  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14842    BUILTIN_DESC_SWAP_OPERANDS },
14843  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14844  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14845  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14846  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14847  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14848  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14849  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14850  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14851  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14852    BUILTIN_DESC_SWAP_OPERANDS },
14853  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14854    BUILTIN_DESC_SWAP_OPERANDS },
14855  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14856
14857  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14858  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14859  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14860  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14861
14862  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14863  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14864  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14865  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14866
14867  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14868  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14869  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14870  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14871  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14872
14873  /* MMX */
14874  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14875  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14876  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14877  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14878  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14879  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14880  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14881  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14882
14883  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14884  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14885  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14886  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14887  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14888  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14889  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14890  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14891
14892  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14893  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14894  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14895
14896  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14897  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14898  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14899  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14900
14901  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14902  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14903
14904  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14905  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14906  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14907  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14908  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14909  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14910
14911  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14912  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14913  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14914  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14915
14916  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14917  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14918  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14919  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14920  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14921  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14922
14923  /* Special.  */
14924  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14925  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14926  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14927
14928  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14929  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14930  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14931
14932  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14933  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14934  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14935  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14936  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14937  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14938
14939  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14940  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14941  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14942  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14943  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14944  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14945
14946  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14947  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14948  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14949  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14950
14951  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14952  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14953
14954  /* SSE2 */
14955  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14956  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14957  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14958  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14959  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14960  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14961  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14962  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14963
14964  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14965  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14966  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14967  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14968    BUILTIN_DESC_SWAP_OPERANDS },
14969  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14970    BUILTIN_DESC_SWAP_OPERANDS },
14971  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14972  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14973  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14974  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14975  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14976    BUILTIN_DESC_SWAP_OPERANDS },
14977  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14978    BUILTIN_DESC_SWAP_OPERANDS },
14979  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14980  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14981  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14982  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14983  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14984  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14985  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14986  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14987  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14988
14989  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14990  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14991  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14992  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14993
14994  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14995  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14996  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14997  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14998
14999  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15000  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15001  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15002
15003  /* SSE2 MMX */
15004  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15005  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15006  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15007  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15008  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15009  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15010  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15011  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15012
15013  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15014  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15015  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15016  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15017  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15018  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15019  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15020  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15021
15022  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15023  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15024
15025  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15026  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15027  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15028  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15029
15030  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15031  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15032
15033  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15034  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15035  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15036  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15037  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15038  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15039
15040  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15041  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15042  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15043  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15044
15045  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15046  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15047  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15048  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15049  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15050  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15051  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15052  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15053
15054  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15055  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15056  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15057
15058  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15059  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15060
15061  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15062  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15063
15064  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15065  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15066  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15067
15068  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15069  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15070  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15071
15072  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15073  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15074
15075  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15076
15077  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15078  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15079  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15080  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15081
15082  /* SSE3 MMX */
15083  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15084  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15085  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15086  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15087  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15088  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15089
15090  /* SSSE3 */
15091  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15092  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15093  { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15094  { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15095  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15096  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15097  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15098  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15099  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15100  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15101  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15102  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15103  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15104  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15105  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15106  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15107  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15108  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15109  { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15110  { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15111  { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15112  { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15113  { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15114  { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15115};
15116
15117static const struct builtin_description bdesc_1arg[] =
15118{
15119  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15120  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15121
15122  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15123  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15124  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15125
15126  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15127  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15128  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15129  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15130  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15131  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15132
15133  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15134  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15135
15136  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15137
15138  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15139  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15140
15141  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15142  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15143  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15144  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15145  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15146
15147  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15148
15149  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15150  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15151  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15152  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15153
15154  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15155  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15156  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15157
15158  /* SSE3 */
15159  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15160  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15161
15162  /* SSSE3 */
15163  { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15164  { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15165  { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15166  { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15167  { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15168  { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15169};
15170
15171static void
15172ix86_init_builtins (void)
15173{
15174  if (TARGET_MMX)
15175    ix86_init_mmx_sse_builtins ();
15176}
15177
15178/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
15179   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
15180   builtins.  */
15181static void
15182ix86_init_mmx_sse_builtins (void)
15183{
15184  const struct builtin_description * d;
15185  size_t i;
15186
15187  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15188  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15189  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15190  tree V2DI_type_node
15191    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15192  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15193  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15194  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15195  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15196  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15197  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15198
15199  tree pchar_type_node = build_pointer_type (char_type_node);
15200  tree pcchar_type_node = build_pointer_type (
15201			     build_type_variant (char_type_node, 1, 0));
15202  tree pfloat_type_node = build_pointer_type (float_type_node);
15203  tree pcfloat_type_node = build_pointer_type (
15204			     build_type_variant (float_type_node, 1, 0));
15205  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15206  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15207  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15208
15209  /* Comparisons.  */
15210  tree int_ftype_v4sf_v4sf
15211    = build_function_type_list (integer_type_node,
15212				V4SF_type_node, V4SF_type_node, NULL_TREE);
15213  tree v4si_ftype_v4sf_v4sf
15214    = build_function_type_list (V4SI_type_node,
15215				V4SF_type_node, V4SF_type_node, NULL_TREE);
15216  /* MMX/SSE/integer conversions.  */
15217  tree int_ftype_v4sf
15218    = build_function_type_list (integer_type_node,
15219				V4SF_type_node, NULL_TREE);
15220  tree int64_ftype_v4sf
15221    = build_function_type_list (long_long_integer_type_node,
15222				V4SF_type_node, NULL_TREE);
15223  tree int_ftype_v8qi
15224    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15225  tree v4sf_ftype_v4sf_int
15226    = build_function_type_list (V4SF_type_node,
15227				V4SF_type_node, integer_type_node, NULL_TREE);
15228  tree v4sf_ftype_v4sf_int64
15229    = build_function_type_list (V4SF_type_node,
15230				V4SF_type_node, long_long_integer_type_node,
15231				NULL_TREE);
15232  tree v4sf_ftype_v4sf_v2si
15233    = build_function_type_list (V4SF_type_node,
15234				V4SF_type_node, V2SI_type_node, NULL_TREE);
15235
15236  /* Miscellaneous.  */
15237  tree v8qi_ftype_v4hi_v4hi
15238    = build_function_type_list (V8QI_type_node,
15239				V4HI_type_node, V4HI_type_node, NULL_TREE);
15240  tree v4hi_ftype_v2si_v2si
15241    = build_function_type_list (V4HI_type_node,
15242				V2SI_type_node, V2SI_type_node, NULL_TREE);
15243  tree v4sf_ftype_v4sf_v4sf_int
15244    = build_function_type_list (V4SF_type_node,
15245				V4SF_type_node, V4SF_type_node,
15246				integer_type_node, NULL_TREE);
15247  tree v2si_ftype_v4hi_v4hi
15248    = build_function_type_list (V2SI_type_node,
15249				V4HI_type_node, V4HI_type_node, NULL_TREE);
15250  tree v4hi_ftype_v4hi_int
15251    = build_function_type_list (V4HI_type_node,
15252				V4HI_type_node, integer_type_node, NULL_TREE);
15253  tree v4hi_ftype_v4hi_di
15254    = build_function_type_list (V4HI_type_node,
15255				V4HI_type_node, long_long_unsigned_type_node,
15256				NULL_TREE);
15257  tree v2si_ftype_v2si_di
15258    = build_function_type_list (V2SI_type_node,
15259				V2SI_type_node, long_long_unsigned_type_node,
15260				NULL_TREE);
15261  tree void_ftype_void
15262    = build_function_type (void_type_node, void_list_node);
15263  tree void_ftype_unsigned
15264    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15265  tree void_ftype_unsigned_unsigned
15266    = build_function_type_list (void_type_node, unsigned_type_node,
15267				unsigned_type_node, NULL_TREE);
15268  tree void_ftype_pcvoid_unsigned_unsigned
15269    = build_function_type_list (void_type_node, const_ptr_type_node,
15270				unsigned_type_node, unsigned_type_node,
15271				NULL_TREE);
15272  tree unsigned_ftype_void
15273    = build_function_type (unsigned_type_node, void_list_node);
15274  tree v2si_ftype_v4sf
15275    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15276  /* Loads/stores.  */
15277  tree void_ftype_v8qi_v8qi_pchar
15278    = build_function_type_list (void_type_node,
15279				V8QI_type_node, V8QI_type_node,
15280				pchar_type_node, NULL_TREE);
15281  tree v4sf_ftype_pcfloat
15282    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15283  /* @@@ the type is bogus */
15284  tree v4sf_ftype_v4sf_pv2si
15285    = build_function_type_list (V4SF_type_node,
15286				V4SF_type_node, pv2si_type_node, NULL_TREE);
15287  tree void_ftype_pv2si_v4sf
15288    = build_function_type_list (void_type_node,
15289				pv2si_type_node, V4SF_type_node, NULL_TREE);
15290  tree void_ftype_pfloat_v4sf
15291    = build_function_type_list (void_type_node,
15292				pfloat_type_node, V4SF_type_node, NULL_TREE);
15293  tree void_ftype_pdi_di
15294    = build_function_type_list (void_type_node,
15295				pdi_type_node, long_long_unsigned_type_node,
15296				NULL_TREE);
15297  tree void_ftype_pv2di_v2di
15298    = build_function_type_list (void_type_node,
15299				pv2di_type_node, V2DI_type_node, NULL_TREE);
15300  /* Normal vector unops.  */
15301  tree v4sf_ftype_v4sf
15302    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15303  tree v16qi_ftype_v16qi
15304    = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15305  tree v8hi_ftype_v8hi
15306    = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15307  tree v4si_ftype_v4si
15308    = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15309  tree v8qi_ftype_v8qi
15310    = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15311  tree v4hi_ftype_v4hi
15312    = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15313
15314  /* Normal vector binops.  */
15315  tree v4sf_ftype_v4sf_v4sf
15316    = build_function_type_list (V4SF_type_node,
15317				V4SF_type_node, V4SF_type_node, NULL_TREE);
15318  tree v8qi_ftype_v8qi_v8qi
15319    = build_function_type_list (V8QI_type_node,
15320				V8QI_type_node, V8QI_type_node, NULL_TREE);
15321  tree v4hi_ftype_v4hi_v4hi
15322    = build_function_type_list (V4HI_type_node,
15323				V4HI_type_node, V4HI_type_node, NULL_TREE);
15324  tree v2si_ftype_v2si_v2si
15325    = build_function_type_list (V2SI_type_node,
15326				V2SI_type_node, V2SI_type_node, NULL_TREE);
15327  tree di_ftype_di_di
15328    = build_function_type_list (long_long_unsigned_type_node,
15329				long_long_unsigned_type_node,
15330				long_long_unsigned_type_node, NULL_TREE);
15331
15332  tree di_ftype_di_di_int
15333    = build_function_type_list (long_long_unsigned_type_node,
15334				long_long_unsigned_type_node,
15335				long_long_unsigned_type_node,
15336				integer_type_node, NULL_TREE);
15337
15338  tree v2si_ftype_v2sf
15339    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15340  tree v2sf_ftype_v2si
15341    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15342  tree v2si_ftype_v2si
15343    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15344  tree v2sf_ftype_v2sf
15345    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15346  tree v2sf_ftype_v2sf_v2sf
15347    = build_function_type_list (V2SF_type_node,
15348				V2SF_type_node, V2SF_type_node, NULL_TREE);
15349  tree v2si_ftype_v2sf_v2sf
15350    = build_function_type_list (V2SI_type_node,
15351				V2SF_type_node, V2SF_type_node, NULL_TREE);
15352  tree pint_type_node    = build_pointer_type (integer_type_node);
15353  tree pdouble_type_node = build_pointer_type (double_type_node);
15354  tree pcdouble_type_node = build_pointer_type (
15355				build_type_variant (double_type_node, 1, 0));
15356  tree int_ftype_v2df_v2df
15357    = build_function_type_list (integer_type_node,
15358				V2DF_type_node, V2DF_type_node, NULL_TREE);
15359
15360  tree void_ftype_pcvoid
15361    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15362  tree v4sf_ftype_v4si
15363    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15364  tree v4si_ftype_v4sf
15365    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15366  tree v2df_ftype_v4si
15367    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15368  tree v4si_ftype_v2df
15369    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15370  tree v2si_ftype_v2df
15371    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15372  tree v4sf_ftype_v2df
15373    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15374  tree v2df_ftype_v2si
15375    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15376  tree v2df_ftype_v4sf
15377    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15378  tree int_ftype_v2df
15379    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15380  tree int64_ftype_v2df
15381    = build_function_type_list (long_long_integer_type_node,
15382				V2DF_type_node, NULL_TREE);
15383  tree v2df_ftype_v2df_int
15384    = build_function_type_list (V2DF_type_node,
15385				V2DF_type_node, integer_type_node, NULL_TREE);
15386  tree v2df_ftype_v2df_int64
15387    = build_function_type_list (V2DF_type_node,
15388				V2DF_type_node, long_long_integer_type_node,
15389				NULL_TREE);
15390  tree v4sf_ftype_v4sf_v2df
15391    = build_function_type_list (V4SF_type_node,
15392				V4SF_type_node, V2DF_type_node, NULL_TREE);
15393  tree v2df_ftype_v2df_v4sf
15394    = build_function_type_list (V2DF_type_node,
15395				V2DF_type_node, V4SF_type_node, NULL_TREE);
15396  tree v2df_ftype_v2df_v2df_int
15397    = build_function_type_list (V2DF_type_node,
15398				V2DF_type_node, V2DF_type_node,
15399				integer_type_node,
15400				NULL_TREE);
15401  tree v2df_ftype_v2df_pcdouble
15402    = build_function_type_list (V2DF_type_node,
15403				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15404  tree void_ftype_pdouble_v2df
15405    = build_function_type_list (void_type_node,
15406				pdouble_type_node, V2DF_type_node, NULL_TREE);
15407  tree void_ftype_pint_int
15408    = build_function_type_list (void_type_node,
15409				pint_type_node, integer_type_node, NULL_TREE);
15410  tree void_ftype_v16qi_v16qi_pchar
15411    = build_function_type_list (void_type_node,
15412				V16QI_type_node, V16QI_type_node,
15413				pchar_type_node, NULL_TREE);
15414  tree v2df_ftype_pcdouble
15415    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15416  tree v2df_ftype_v2df_v2df
15417    = build_function_type_list (V2DF_type_node,
15418				V2DF_type_node, V2DF_type_node, NULL_TREE);
15419  tree v16qi_ftype_v16qi_v16qi
15420    = build_function_type_list (V16QI_type_node,
15421				V16QI_type_node, V16QI_type_node, NULL_TREE);
15422  tree v8hi_ftype_v8hi_v8hi
15423    = build_function_type_list (V8HI_type_node,
15424				V8HI_type_node, V8HI_type_node, NULL_TREE);
15425  tree v4si_ftype_v4si_v4si
15426    = build_function_type_list (V4SI_type_node,
15427				V4SI_type_node, V4SI_type_node, NULL_TREE);
15428  tree v2di_ftype_v2di_v2di
15429    = build_function_type_list (V2DI_type_node,
15430				V2DI_type_node, V2DI_type_node, NULL_TREE);
15431  tree v2di_ftype_v2df_v2df
15432    = build_function_type_list (V2DI_type_node,
15433				V2DF_type_node, V2DF_type_node, NULL_TREE);
15434  tree v2df_ftype_v2df
15435    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15436  tree v2di_ftype_v2di_int
15437    = build_function_type_list (V2DI_type_node,
15438				V2DI_type_node, integer_type_node, NULL_TREE);
15439  tree v2di_ftype_v2di_v2di_int
15440    = build_function_type_list (V2DI_type_node, V2DI_type_node,
15441				V2DI_type_node, integer_type_node, NULL_TREE);
15442  tree v4si_ftype_v4si_int
15443    = build_function_type_list (V4SI_type_node,
15444				V4SI_type_node, integer_type_node, NULL_TREE);
15445  tree v8hi_ftype_v8hi_int
15446    = build_function_type_list (V8HI_type_node,
15447				V8HI_type_node, integer_type_node, NULL_TREE);
15448  tree v4si_ftype_v8hi_v8hi
15449    = build_function_type_list (V4SI_type_node,
15450				V8HI_type_node, V8HI_type_node, NULL_TREE);
15451  tree di_ftype_v8qi_v8qi
15452    = build_function_type_list (long_long_unsigned_type_node,
15453				V8QI_type_node, V8QI_type_node, NULL_TREE);
15454  tree di_ftype_v2si_v2si
15455    = build_function_type_list (long_long_unsigned_type_node,
15456				V2SI_type_node, V2SI_type_node, NULL_TREE);
15457  tree v2di_ftype_v16qi_v16qi
15458    = build_function_type_list (V2DI_type_node,
15459				V16QI_type_node, V16QI_type_node, NULL_TREE);
15460  tree v2di_ftype_v4si_v4si
15461    = build_function_type_list (V2DI_type_node,
15462				V4SI_type_node, V4SI_type_node, NULL_TREE);
15463  tree int_ftype_v16qi
15464    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15465  tree v16qi_ftype_pcchar
15466    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15467  tree void_ftype_pchar_v16qi
15468    = build_function_type_list (void_type_node,
15469			        pchar_type_node, V16QI_type_node, NULL_TREE);
15470
15471  tree float80_type;
15472  tree float128_type;
15473  tree ftype;
15474
15475  /* The __float80 type.  */
15476  if (TYPE_MODE (long_double_type_node) == XFmode)
15477    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15478					       "__float80");
15479  else
15480    {
15481      /* The __float80 type.  */
15482      float80_type = make_node (REAL_TYPE);
15483      TYPE_PRECISION (float80_type) = 80;
15484      layout_type (float80_type);
15485      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15486    }
15487
15488  if (TARGET_64BIT)
15489    {
15490      float128_type = make_node (REAL_TYPE);
15491      TYPE_PRECISION (float128_type) = 128;
15492      layout_type (float128_type);
15493      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15494    }
15495
15496  /* Add all builtins that are more or less simple operations on two
15497     operands.  */
15498  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15499    {
15500      /* Use one of the operands; the target can have a different mode for
15501	 mask-generating compares.  */
15502      enum machine_mode mode;
15503      tree type;
15504
15505      if (d->name == 0)
15506	continue;
15507      mode = insn_data[d->icode].operand[1].mode;
15508
15509      switch (mode)
15510	{
15511	case V16QImode:
15512	  type = v16qi_ftype_v16qi_v16qi;
15513	  break;
15514	case V8HImode:
15515	  type = v8hi_ftype_v8hi_v8hi;
15516	  break;
15517	case V4SImode:
15518	  type = v4si_ftype_v4si_v4si;
15519	  break;
15520	case V2DImode:
15521	  type = v2di_ftype_v2di_v2di;
15522	  break;
15523	case V2DFmode:
15524	  type = v2df_ftype_v2df_v2df;
15525	  break;
15526	case V4SFmode:
15527	  type = v4sf_ftype_v4sf_v4sf;
15528	  break;
15529	case V8QImode:
15530	  type = v8qi_ftype_v8qi_v8qi;
15531	  break;
15532	case V4HImode:
15533	  type = v4hi_ftype_v4hi_v4hi;
15534	  break;
15535	case V2SImode:
15536	  type = v2si_ftype_v2si_v2si;
15537	  break;
15538	case DImode:
15539	  type = di_ftype_di_di;
15540	  break;
15541
15542	default:
15543	  gcc_unreachable ();
15544	}
15545
15546      /* Override for comparisons.  */
15547      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15548	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15549	type = v4si_ftype_v4sf_v4sf;
15550
15551      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15552	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15553	type = v2di_ftype_v2df_v2df;
15554
15555      def_builtin (d->mask, d->name, type, d->code);
15556    }
15557
15558  /* Add all builtins that are more or less simple operations on 1 operand.  */
15559  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15560    {
15561      enum machine_mode mode;
15562      tree type;
15563
15564      if (d->name == 0)
15565	continue;
15566      mode = insn_data[d->icode].operand[1].mode;
15567
15568      switch (mode)
15569	{
15570	case V16QImode:
15571	  type = v16qi_ftype_v16qi;
15572	  break;
15573	case V8HImode:
15574	  type = v8hi_ftype_v8hi;
15575	  break;
15576	case V4SImode:
15577	  type = v4si_ftype_v4si;
15578	  break;
15579	case V2DFmode:
15580	  type = v2df_ftype_v2df;
15581	  break;
15582	case V4SFmode:
15583	  type = v4sf_ftype_v4sf;
15584	  break;
15585	case V8QImode:
15586	  type = v8qi_ftype_v8qi;
15587	  break;
15588	case V4HImode:
15589	  type = v4hi_ftype_v4hi;
15590	  break;
15591	case V2SImode:
15592	  type = v2si_ftype_v2si;
15593	  break;
15594
15595	default:
15596	  abort ();
15597	}
15598
15599      def_builtin (d->mask, d->name, type, d->code);
15600    }
15601
15602  /* Add the remaining MMX insns with somewhat more complicated types.  */
15603  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15604  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15605  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15606  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15607
15608  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15609  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15610  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15611
15612  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15613  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15614
15615  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15616  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15617
15618  /* comi/ucomi insns.  */
15619  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15620    if (d->mask == MASK_SSE2)
15621      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15622    else
15623      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15624
15625  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15626  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15627  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15628
15629  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15630  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15631  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15632  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15633  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15634  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15635  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15636  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15637  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15638  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15639  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15640
15641  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15642
15643  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15644  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15645
15646  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15647  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15648  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15649  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15650
15651  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15652  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15653  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15654  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15655
15656  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15657
15658  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15659
15660  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15661  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15662  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15663  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15664  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15665  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15666
15667  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15668
15669  /* Original 3DNow!  */
15670  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15671  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15672  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15673  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15674  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15675  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15676  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15677  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15678  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15679  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15680  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15681  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15682  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15683  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15684  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15685  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15686  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15687  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15688  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15689  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15690
15691  /* 3DNow! extension as used in the Athlon CPU.  */
15692  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15693  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15694  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15695  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15696  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15697  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15698
15699  /* SSE2 */
15700  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15701
15702  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15703  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15704
15705  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15706  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15707
15708  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15709  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15710  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15711  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15712  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15713
15714  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15715  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15716  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15717  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15718
15719  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15720  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15721
15722  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15723
15724  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15725  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15726
15727  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15728  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15729  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15730  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15731  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15732
15733  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15734
15735  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15736  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15737  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15738  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15739
15740  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15741  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15742  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15743
15744  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15745  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15746  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15747  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15748
15749  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15750  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15751  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15752
15753  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15754  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15755
15756  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15757  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15758
15759  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15760  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15761  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15762
15763  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15764  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15765  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15766
15767  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15768  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15769
15770  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15771  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15772  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15773  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15774
15775  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15776  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15777  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15778  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15779
15780  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15781  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15782
15783  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15784
15785  /* Prescott New Instructions.  */
15786  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15787	       void_ftype_pcvoid_unsigned_unsigned,
15788	       IX86_BUILTIN_MONITOR);
15789  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15790	       void_ftype_unsigned_unsigned,
15791	       IX86_BUILTIN_MWAIT);
15792  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15793	       v4sf_ftype_v4sf,
15794	       IX86_BUILTIN_MOVSHDUP);
15795  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15796	       v4sf_ftype_v4sf,
15797	       IX86_BUILTIN_MOVSLDUP);
15798  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15799	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15800
15801  /* SSSE3.  */
15802  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15803	       v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15804  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15805	       IX86_BUILTIN_PALIGNR);
15806
15807  /* Access to the vec_init patterns.  */
15808  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15809				    integer_type_node, NULL_TREE);
15810  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15811	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15812
15813  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15814				    short_integer_type_node,
15815				    short_integer_type_node,
15816				    short_integer_type_node, NULL_TREE);
15817  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15818	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15819
15820  ftype = build_function_type_list (V8QI_type_node, char_type_node,
15821				    char_type_node, char_type_node,
15822				    char_type_node, char_type_node,
15823				    char_type_node, char_type_node,
15824				    char_type_node, NULL_TREE);
15825  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15826	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15827
15828  /* Access to the vec_extract patterns.  */
15829  ftype = build_function_type_list (double_type_node, V2DF_type_node,
15830				    integer_type_node, NULL_TREE);
15831  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15832	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15833
15834  ftype = build_function_type_list (long_long_integer_type_node,
15835				    V2DI_type_node, integer_type_node,
15836				    NULL_TREE);
15837  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15838	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15839
15840  ftype = build_function_type_list (float_type_node, V4SF_type_node,
15841				    integer_type_node, NULL_TREE);
15842  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15843	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15844
15845  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15846				    integer_type_node, NULL_TREE);
15847  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15848	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15849
15850  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15851				    integer_type_node, NULL_TREE);
15852  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15853	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15854
15855  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15856				    integer_type_node, NULL_TREE);
15857  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15858	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15859
15860  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15861				    integer_type_node, NULL_TREE);
15862  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15863	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15864
15865  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15866				    integer_type_node, NULL_TREE);
15867  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15868
15869  /* Access to the vec_set patterns.  */
15870  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15871				    intHI_type_node,
15872				    integer_type_node, NULL_TREE);
15873  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15874	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
15875
15876  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15877				    intHI_type_node,
15878				    integer_type_node, NULL_TREE);
15879  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15880	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
15881}
15882
15883/* Errors in the source file can cause expand_expr to return const0_rtx
15884   where we expect a vector.  To avoid crashing, use one of the vector
15885   clear instructions.  */
15886static rtx
15887safe_vector_operand (rtx x, enum machine_mode mode)
15888{
15889  if (x == const0_rtx)
15890    x = CONST0_RTX (mode);
15891  return x;
15892}
15893
15894/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
15895
15896static rtx
15897ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15898{
15899  rtx pat, xops[3];
15900  tree arg0 = TREE_VALUE (arglist);
15901  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15902  rtx op0 = expand_normal (arg0);
15903  rtx op1 = expand_normal (arg1);
15904  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15905  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15906  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15907
15908  if (VECTOR_MODE_P (mode0))
15909    op0 = safe_vector_operand (op0, mode0);
15910  if (VECTOR_MODE_P (mode1))
15911    op1 = safe_vector_operand (op1, mode1);
15912
15913  if (optimize || !target
15914      || GET_MODE (target) != tmode
15915      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15916    target = gen_reg_rtx (tmode);
15917
15918  if (GET_MODE (op1) == SImode && mode1 == TImode)
15919    {
15920      rtx x = gen_reg_rtx (V4SImode);
15921      emit_insn (gen_sse2_loadd (x, op1));
15922      op1 = gen_lowpart (TImode, x);
15923    }
15924
15925  /* The insn must want input operands in the same modes as the
15926     result.  */
15927  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15928	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15929
15930  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15931    op0 = copy_to_mode_reg (mode0, op0);
15932  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15933    op1 = copy_to_mode_reg (mode1, op1);
15934
15935  /* ??? Using ix86_fixup_binary_operands is problematic when
15936     we've got mismatched modes.  Fake it.  */
15937
15938  xops[0] = target;
15939  xops[1] = op0;
15940  xops[2] = op1;
15941
15942  if (tmode == mode0 && tmode == mode1)
15943    {
15944      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15945      op0 = xops[1];
15946      op1 = xops[2];
15947    }
15948  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15949    {
15950      op0 = force_reg (mode0, op0);
15951      op1 = force_reg (mode1, op1);
15952      target = gen_reg_rtx (tmode);
15953    }
15954
15955  pat = GEN_FCN (icode) (target, op0, op1);
15956  if (! pat)
15957    return 0;
15958  emit_insn (pat);
15959  return target;
15960}
15961
15962/* Subroutine of ix86_expand_builtin to take care of stores.  */
15963
15964static rtx
15965ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15966{
15967  rtx pat;
15968  tree arg0 = TREE_VALUE (arglist);
15969  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15970  rtx op0 = expand_normal (arg0);
15971  rtx op1 = expand_normal (arg1);
15972  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15973  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15974
15975  if (VECTOR_MODE_P (mode1))
15976    op1 = safe_vector_operand (op1, mode1);
15977
15978  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15979  op1 = copy_to_mode_reg (mode1, op1);
15980
15981  pat = GEN_FCN (icode) (op0, op1);
15982  if (pat)
15983    emit_insn (pat);
15984  return 0;
15985}
15986
15987/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15988
15989static rtx
15990ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15991			  rtx target, int do_load)
15992{
15993  rtx pat;
15994  tree arg0 = TREE_VALUE (arglist);
15995  rtx op0 = expand_normal (arg0);
15996  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15997  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15998
15999  if (optimize || !target
16000      || GET_MODE (target) != tmode
16001      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16002    target = gen_reg_rtx (tmode);
16003  if (do_load)
16004    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16005  else
16006    {
16007      if (VECTOR_MODE_P (mode0))
16008	op0 = safe_vector_operand (op0, mode0);
16009
16010      if ((optimize && !register_operand (op0, mode0))
16011	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16012	op0 = copy_to_mode_reg (mode0, op0);
16013    }
16014
16015  pat = GEN_FCN (icode) (target, op0);
16016  if (! pat)
16017    return 0;
16018  emit_insn (pat);
16019  return target;
16020}
16021
16022/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16023   sqrtss, rsqrtss, rcpss.  */
16024
16025static rtx
16026ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16027{
16028  rtx pat;
16029  tree arg0 = TREE_VALUE (arglist);
16030  rtx op1, op0 = expand_normal (arg0);
16031  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16032  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16033
16034  if (optimize || !target
16035      || GET_MODE (target) != tmode
16036      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16037    target = gen_reg_rtx (tmode);
16038
16039  if (VECTOR_MODE_P (mode0))
16040    op0 = safe_vector_operand (op0, mode0);
16041
16042  if ((optimize && !register_operand (op0, mode0))
16043      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16044    op0 = copy_to_mode_reg (mode0, op0);
16045
16046  op1 = op0;
16047  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16048    op1 = copy_to_mode_reg (mode0, op1);
16049
16050  pat = GEN_FCN (icode) (target, op0, op1);
16051  if (! pat)
16052    return 0;
16053  emit_insn (pat);
16054  return target;
16055}
16056
16057/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
16058
16059static rtx
16060ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16061			 rtx target)
16062{
16063  rtx pat;
16064  tree arg0 = TREE_VALUE (arglist);
16065  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16066  rtx op0 = expand_normal (arg0);
16067  rtx op1 = expand_normal (arg1);
16068  rtx op2;
16069  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16070  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16071  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16072  enum rtx_code comparison = d->comparison;
16073
16074  if (VECTOR_MODE_P (mode0))
16075    op0 = safe_vector_operand (op0, mode0);
16076  if (VECTOR_MODE_P (mode1))
16077    op1 = safe_vector_operand (op1, mode1);
16078
16079  /* Swap operands if we have a comparison that isn't available in
16080     hardware.  */
16081  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16082    {
16083      rtx tmp = gen_reg_rtx (mode1);
16084      emit_move_insn (tmp, op1);
16085      op1 = op0;
16086      op0 = tmp;
16087    }
16088
16089  if (optimize || !target
16090      || GET_MODE (target) != tmode
16091      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16092    target = gen_reg_rtx (tmode);
16093
16094  if ((optimize && !register_operand (op0, mode0))
16095      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16096    op0 = copy_to_mode_reg (mode0, op0);
16097  if ((optimize && !register_operand (op1, mode1))
16098      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16099    op1 = copy_to_mode_reg (mode1, op1);
16100
16101  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16102  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16103  if (! pat)
16104    return 0;
16105  emit_insn (pat);
16106  return target;
16107}
16108
16109/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
16110
16111static rtx
16112ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16113		      rtx target)
16114{
16115  rtx pat;
16116  tree arg0 = TREE_VALUE (arglist);
16117  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16118  rtx op0 = expand_normal (arg0);
16119  rtx op1 = expand_normal (arg1);
16120  rtx op2;
16121  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16122  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16123  enum rtx_code comparison = d->comparison;
16124
16125  if (VECTOR_MODE_P (mode0))
16126    op0 = safe_vector_operand (op0, mode0);
16127  if (VECTOR_MODE_P (mode1))
16128    op1 = safe_vector_operand (op1, mode1);
16129
16130  /* Swap operands if we have a comparison that isn't available in
16131     hardware.  */
16132  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16133    {
16134      rtx tmp = op1;
16135      op1 = op0;
16136      op0 = tmp;
16137    }
16138
16139  target = gen_reg_rtx (SImode);
16140  emit_move_insn (target, const0_rtx);
16141  target = gen_rtx_SUBREG (QImode, target, 0);
16142
16143  if ((optimize && !register_operand (op0, mode0))
16144      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16145    op0 = copy_to_mode_reg (mode0, op0);
16146  if ((optimize && !register_operand (op1, mode1))
16147      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16148    op1 = copy_to_mode_reg (mode1, op1);
16149
16150  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16151  pat = GEN_FCN (d->icode) (op0, op1);
16152  if (! pat)
16153    return 0;
16154  emit_insn (pat);
16155  emit_insn (gen_rtx_SET (VOIDmode,
16156			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16157			  gen_rtx_fmt_ee (comparison, QImode,
16158					  SET_DEST (pat),
16159					  const0_rtx)));
16160
16161  return SUBREG_REG (target);
16162}
16163
16164/* Return the integer constant in ARG.  Constrain it to be in the range
16165   of the subparts of VEC_TYPE; issue an error if not.  */
16166
16167static int
16168get_element_number (tree vec_type, tree arg)
16169{
16170  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16171
16172  if (!host_integerp (arg, 1)
16173      || (elt = tree_low_cst (arg, 1), elt > max))
16174    {
16175      error ("selector must be an integer constant in the range 0..%wi", max);
16176      return 0;
16177    }
16178
16179  return elt;
16180}
16181
16182/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16183   ix86_expand_vector_init.  We DO have language-level syntax for this, in
16184   the form of  (type){ init-list }.  Except that since we can't place emms
16185   instructions from inside the compiler, we can't allow the use of MMX
16186   registers unless the user explicitly asks for it.  So we do *not* define
16187   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
16188   we have builtins invoked by mmintrin.h that gives us license to emit
16189   these sorts of instructions.  */
16190
16191static rtx
16192ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16193{
16194  enum machine_mode tmode = TYPE_MODE (type);
16195  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16196  int i, n_elt = GET_MODE_NUNITS (tmode);
16197  rtvec v = rtvec_alloc (n_elt);
16198
16199  gcc_assert (VECTOR_MODE_P (tmode));
16200
16201  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16202    {
16203      rtx x = expand_normal (TREE_VALUE (arglist));
16204      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16205    }
16206
16207  gcc_assert (arglist == NULL);
16208
16209  if (!target || !register_operand (target, tmode))
16210    target = gen_reg_rtx (tmode);
16211
16212  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16213  return target;
16214}
16215
16216/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16217   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
16218   had a language-level syntax for referencing vector elements.  */
16219
16220static rtx
16221ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16222{
16223  enum machine_mode tmode, mode0;
16224  tree arg0, arg1;
16225  int elt;
16226  rtx op0;
16227
16228  arg0 = TREE_VALUE (arglist);
16229  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16230
16231  op0 = expand_normal (arg0);
16232  elt = get_element_number (TREE_TYPE (arg0), arg1);
16233
16234  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16235  mode0 = TYPE_MODE (TREE_TYPE (arg0));
16236  gcc_assert (VECTOR_MODE_P (mode0));
16237
16238  op0 = force_reg (mode0, op0);
16239
16240  if (optimize || !target || !register_operand (target, tmode))
16241    target = gen_reg_rtx (tmode);
16242
16243  ix86_expand_vector_extract (true, target, op0, elt);
16244
16245  return target;
16246}
16247
16248/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16249   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
16250   a language-level syntax for referencing vector elements.  */
16251
16252static rtx
16253ix86_expand_vec_set_builtin (tree arglist)
16254{
16255  enum machine_mode tmode, mode1;
16256  tree arg0, arg1, arg2;
16257  int elt;
16258  rtx op0, op1, target;
16259
16260  arg0 = TREE_VALUE (arglist);
16261  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16262  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16263
16264  tmode = TYPE_MODE (TREE_TYPE (arg0));
16265  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16266  gcc_assert (VECTOR_MODE_P (tmode));
16267
16268  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16269  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16270  elt = get_element_number (TREE_TYPE (arg0), arg2);
16271
16272  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16273    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16274
16275  op0 = force_reg (tmode, op0);
16276  op1 = force_reg (mode1, op1);
16277
16278  /* OP0 is the source of these builtin functions and shouldn't be
16279     modified.  Create a copy, use it and return it as target.  */
16280  target = gen_reg_rtx (tmode);
16281  emit_move_insn (target, op0);
16282  ix86_expand_vector_set (true, target, op1, elt);
16283
16284  return target;
16285}
16286
16287/* Expand an expression EXP that calls a built-in function,
16288   with result going to TARGET if that's convenient
16289   (and in mode MODE if that's convenient).
16290   SUBTARGET may be used as the target for computing one of EXP's operands.
16291   IGNORE is nonzero if the value is to be ignored.  */
16292
16293static rtx
16294ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16295		     enum machine_mode mode ATTRIBUTE_UNUSED,
16296		     int ignore ATTRIBUTE_UNUSED)
16297{
16298  const struct builtin_description *d;
16299  size_t i;
16300  enum insn_code icode;
16301  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16302  tree arglist = TREE_OPERAND (exp, 1);
16303  tree arg0, arg1, arg2;
16304  rtx op0, op1, op2, pat;
16305  enum machine_mode tmode, mode0, mode1, mode2, mode3;
16306  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16307
16308  switch (fcode)
16309    {
16310    case IX86_BUILTIN_EMMS:
16311      emit_insn (gen_mmx_emms ());
16312      return 0;
16313
16314    case IX86_BUILTIN_SFENCE:
16315      emit_insn (gen_sse_sfence ());
16316      return 0;
16317
16318    case IX86_BUILTIN_MASKMOVQ:
16319    case IX86_BUILTIN_MASKMOVDQU:
16320      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16321	       ? CODE_FOR_mmx_maskmovq
16322	       : CODE_FOR_sse2_maskmovdqu);
16323      /* Note the arg order is different from the operand order.  */
16324      arg1 = TREE_VALUE (arglist);
16325      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16326      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16327      op0 = expand_normal (arg0);
16328      op1 = expand_normal (arg1);
16329      op2 = expand_normal (arg2);
16330      mode0 = insn_data[icode].operand[0].mode;
16331      mode1 = insn_data[icode].operand[1].mode;
16332      mode2 = insn_data[icode].operand[2].mode;
16333
16334      op0 = force_reg (Pmode, op0);
16335      op0 = gen_rtx_MEM (mode1, op0);
16336
16337      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16338	op0 = copy_to_mode_reg (mode0, op0);
16339      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16340	op1 = copy_to_mode_reg (mode1, op1);
16341      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16342	op2 = copy_to_mode_reg (mode2, op2);
16343      pat = GEN_FCN (icode) (op0, op1, op2);
16344      if (! pat)
16345	return 0;
16346      emit_insn (pat);
16347      return 0;
16348
16349    case IX86_BUILTIN_SQRTSS:
16350      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16351    case IX86_BUILTIN_RSQRTSS:
16352      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16353    case IX86_BUILTIN_RCPSS:
16354      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16355
16356    case IX86_BUILTIN_LOADUPS:
16357      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16358
16359    case IX86_BUILTIN_STOREUPS:
16360      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16361
16362    case IX86_BUILTIN_LOADHPS:
16363    case IX86_BUILTIN_LOADLPS:
16364    case IX86_BUILTIN_LOADHPD:
16365    case IX86_BUILTIN_LOADLPD:
16366      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16367	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16368	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16369	       : CODE_FOR_sse2_loadlpd);
16370      arg0 = TREE_VALUE (arglist);
16371      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16372      op0 = expand_normal (arg0);
16373      op1 = expand_normal (arg1);
16374      tmode = insn_data[icode].operand[0].mode;
16375      mode0 = insn_data[icode].operand[1].mode;
16376      mode1 = insn_data[icode].operand[2].mode;
16377
16378      op0 = force_reg (mode0, op0);
16379      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16380      if (optimize || target == 0
16381	  || GET_MODE (target) != tmode
16382	  || !register_operand (target, tmode))
16383	target = gen_reg_rtx (tmode);
16384      pat = GEN_FCN (icode) (target, op0, op1);
16385      if (! pat)
16386	return 0;
16387      emit_insn (pat);
16388      return target;
16389
16390    case IX86_BUILTIN_STOREHPS:
16391    case IX86_BUILTIN_STORELPS:
16392      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16393	       : CODE_FOR_sse_storelps);
16394      arg0 = TREE_VALUE (arglist);
16395      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16396      op0 = expand_normal (arg0);
16397      op1 = expand_normal (arg1);
16398      mode0 = insn_data[icode].operand[0].mode;
16399      mode1 = insn_data[icode].operand[1].mode;
16400
16401      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16402      op1 = force_reg (mode1, op1);
16403
16404      pat = GEN_FCN (icode) (op0, op1);
16405      if (! pat)
16406	return 0;
16407      emit_insn (pat);
16408      return const0_rtx;
16409
16410    case IX86_BUILTIN_MOVNTPS:
16411      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16412    case IX86_BUILTIN_MOVNTQ:
16413      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16414
16415    case IX86_BUILTIN_LDMXCSR:
16416      op0 = expand_normal (TREE_VALUE (arglist));
16417      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16418      emit_move_insn (target, op0);
16419      emit_insn (gen_sse_ldmxcsr (target));
16420      return 0;
16421
16422    case IX86_BUILTIN_STMXCSR:
16423      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16424      emit_insn (gen_sse_stmxcsr (target));
16425      return copy_to_mode_reg (SImode, target);
16426
16427    case IX86_BUILTIN_SHUFPS:
16428    case IX86_BUILTIN_SHUFPD:
16429      icode = (fcode == IX86_BUILTIN_SHUFPS
16430	       ? CODE_FOR_sse_shufps
16431	       : CODE_FOR_sse2_shufpd);
16432      arg0 = TREE_VALUE (arglist);
16433      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16434      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16435      op0 = expand_normal (arg0);
16436      op1 = expand_normal (arg1);
16437      op2 = expand_normal (arg2);
16438      tmode = insn_data[icode].operand[0].mode;
16439      mode0 = insn_data[icode].operand[1].mode;
16440      mode1 = insn_data[icode].operand[2].mode;
16441      mode2 = insn_data[icode].operand[3].mode;
16442
16443      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16444	op0 = copy_to_mode_reg (mode0, op0);
16445      if ((optimize && !register_operand (op1, mode1))
16446	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16447	op1 = copy_to_mode_reg (mode1, op1);
16448      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16449	{
16450	  /* @@@ better error message */
16451	  error ("mask must be an immediate");
16452	  return gen_reg_rtx (tmode);
16453	}
16454      if (optimize || target == 0
16455	  || GET_MODE (target) != tmode
16456	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16457	target = gen_reg_rtx (tmode);
16458      pat = GEN_FCN (icode) (target, op0, op1, op2);
16459      if (! pat)
16460	return 0;
16461      emit_insn (pat);
16462      return target;
16463
16464    case IX86_BUILTIN_PSHUFW:
16465    case IX86_BUILTIN_PSHUFD:
16466    case IX86_BUILTIN_PSHUFHW:
16467    case IX86_BUILTIN_PSHUFLW:
16468      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16469	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16470	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16471	       : CODE_FOR_mmx_pshufw);
16472      arg0 = TREE_VALUE (arglist);
16473      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16474      op0 = expand_normal (arg0);
16475      op1 = expand_normal (arg1);
16476      tmode = insn_data[icode].operand[0].mode;
16477      mode1 = insn_data[icode].operand[1].mode;
16478      mode2 = insn_data[icode].operand[2].mode;
16479
16480      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16481	op0 = copy_to_mode_reg (mode1, op0);
16482      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16483	{
16484	  /* @@@ better error message */
16485	  error ("mask must be an immediate");
16486	  return const0_rtx;
16487	}
16488      if (target == 0
16489	  || GET_MODE (target) != tmode
16490	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16491	target = gen_reg_rtx (tmode);
16492      pat = GEN_FCN (icode) (target, op0, op1);
16493      if (! pat)
16494	return 0;
16495      emit_insn (pat);
16496      return target;
16497
16498    case IX86_BUILTIN_PSLLWI128:
16499      icode = CODE_FOR_ashlv8hi3;
16500      goto do_pshifti;
16501    case IX86_BUILTIN_PSLLDI128:
16502      icode = CODE_FOR_ashlv4si3;
16503      goto do_pshifti;
16504    case IX86_BUILTIN_PSLLQI128:
16505      icode = CODE_FOR_ashlv2di3;
16506      goto do_pshifti;
16507    case IX86_BUILTIN_PSRAWI128:
16508      icode = CODE_FOR_ashrv8hi3;
16509      goto do_pshifti;
16510    case IX86_BUILTIN_PSRADI128:
16511      icode = CODE_FOR_ashrv4si3;
16512      goto do_pshifti;
16513    case IX86_BUILTIN_PSRLWI128:
16514      icode = CODE_FOR_lshrv8hi3;
16515      goto do_pshifti;
16516    case IX86_BUILTIN_PSRLDI128:
16517      icode = CODE_FOR_lshrv4si3;
16518      goto do_pshifti;
16519    case IX86_BUILTIN_PSRLQI128:
16520      icode = CODE_FOR_lshrv2di3;
16521      goto do_pshifti;
16522    do_pshifti:
16523      arg0 = TREE_VALUE (arglist);
16524      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16525      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16526      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16527
16528      if (GET_CODE (op1) != CONST_INT)
16529	{
16530	  error ("shift must be an immediate");
16531	  return const0_rtx;
16532	}
16533      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16534	op1 = GEN_INT (255);
16535
16536      tmode = insn_data[icode].operand[0].mode;
16537      mode1 = insn_data[icode].operand[1].mode;
16538      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16539	op0 = copy_to_reg (op0);
16540
16541      target = gen_reg_rtx (tmode);
16542      pat = GEN_FCN (icode) (target, op0, op1);
16543      if (!pat)
16544	return 0;
16545      emit_insn (pat);
16546      return target;
16547
16548    case IX86_BUILTIN_PSLLW128:
16549      icode = CODE_FOR_ashlv8hi3;
16550      goto do_pshift;
16551    case IX86_BUILTIN_PSLLD128:
16552      icode = CODE_FOR_ashlv4si3;
16553      goto do_pshift;
16554    case IX86_BUILTIN_PSLLQ128:
16555      icode = CODE_FOR_ashlv2di3;
16556      goto do_pshift;
16557    case IX86_BUILTIN_PSRAW128:
16558      icode = CODE_FOR_ashrv8hi3;
16559      goto do_pshift;
16560    case IX86_BUILTIN_PSRAD128:
16561      icode = CODE_FOR_ashrv4si3;
16562      goto do_pshift;
16563    case IX86_BUILTIN_PSRLW128:
16564      icode = CODE_FOR_lshrv8hi3;
16565      goto do_pshift;
16566    case IX86_BUILTIN_PSRLD128:
16567      icode = CODE_FOR_lshrv4si3;
16568      goto do_pshift;
16569    case IX86_BUILTIN_PSRLQ128:
16570      icode = CODE_FOR_lshrv2di3;
16571      goto do_pshift;
16572    do_pshift:
16573      arg0 = TREE_VALUE (arglist);
16574      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16575      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16576      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16577
16578      tmode = insn_data[icode].operand[0].mode;
16579      mode1 = insn_data[icode].operand[1].mode;
16580
16581      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16582	op0 = copy_to_reg (op0);
16583
16584      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16585      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16586	op1 = copy_to_reg (op1);
16587
16588      target = gen_reg_rtx (tmode);
16589      pat = GEN_FCN (icode) (target, op0, op1);
16590      if (!pat)
16591	return 0;
16592      emit_insn (pat);
16593      return target;
16594
16595    case IX86_BUILTIN_PSLLDQI128:
16596    case IX86_BUILTIN_PSRLDQI128:
16597      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16598	       : CODE_FOR_sse2_lshrti3);
16599      arg0 = TREE_VALUE (arglist);
16600      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16601      op0 = expand_normal (arg0);
16602      op1 = expand_normal (arg1);
16603      tmode = insn_data[icode].operand[0].mode;
16604      mode1 = insn_data[icode].operand[1].mode;
16605      mode2 = insn_data[icode].operand[2].mode;
16606
16607      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16608	{
16609	  op0 = copy_to_reg (op0);
16610	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16611	}
16612      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16613	{
16614	  error ("shift must be an immediate");
16615	  return const0_rtx;
16616	}
16617      target = gen_reg_rtx (V2DImode);
16618      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16619			     op0, op1);
16620      if (! pat)
16621	return 0;
16622      emit_insn (pat);
16623      return target;
16624
16625    case IX86_BUILTIN_FEMMS:
16626      emit_insn (gen_mmx_femms ());
16627      return NULL_RTX;
16628
16629    case IX86_BUILTIN_PAVGUSB:
16630      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16631
16632    case IX86_BUILTIN_PF2ID:
16633      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16634
16635    case IX86_BUILTIN_PFACC:
16636      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16637
16638    case IX86_BUILTIN_PFADD:
16639     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16640
16641    case IX86_BUILTIN_PFCMPEQ:
16642      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16643
16644    case IX86_BUILTIN_PFCMPGE:
16645      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16646
16647    case IX86_BUILTIN_PFCMPGT:
16648      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16649
16650    case IX86_BUILTIN_PFMAX:
16651      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16652
16653    case IX86_BUILTIN_PFMIN:
16654      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16655
16656    case IX86_BUILTIN_PFMUL:
16657      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16658
16659    case IX86_BUILTIN_PFRCP:
16660      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16661
16662    case IX86_BUILTIN_PFRCPIT1:
16663      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16664
16665    case IX86_BUILTIN_PFRCPIT2:
16666      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16667
16668    case IX86_BUILTIN_PFRSQIT1:
16669      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16670
16671    case IX86_BUILTIN_PFRSQRT:
16672      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16673
16674    case IX86_BUILTIN_PFSUB:
16675      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16676
16677    case IX86_BUILTIN_PFSUBR:
16678      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16679
16680    case IX86_BUILTIN_PI2FD:
16681      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16682
16683    case IX86_BUILTIN_PMULHRW:
16684      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16685
16686    case IX86_BUILTIN_PF2IW:
16687      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16688
16689    case IX86_BUILTIN_PFNACC:
16690      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16691
16692    case IX86_BUILTIN_PFPNACC:
16693      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16694
16695    case IX86_BUILTIN_PI2FW:
16696      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16697
16698    case IX86_BUILTIN_PSWAPDSI:
16699      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16700
16701    case IX86_BUILTIN_PSWAPDSF:
16702      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16703
16704    case IX86_BUILTIN_SQRTSD:
16705      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16706    case IX86_BUILTIN_LOADUPD:
16707      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16708    case IX86_BUILTIN_STOREUPD:
16709      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16710
16711    case IX86_BUILTIN_MFENCE:
16712	emit_insn (gen_sse2_mfence ());
16713	return 0;
16714    case IX86_BUILTIN_LFENCE:
16715	emit_insn (gen_sse2_lfence ());
16716	return 0;
16717
16718    case IX86_BUILTIN_CLFLUSH:
16719	arg0 = TREE_VALUE (arglist);
16720	op0 = expand_normal (arg0);
16721	icode = CODE_FOR_sse2_clflush;
16722	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16723	    op0 = copy_to_mode_reg (Pmode, op0);
16724
16725	emit_insn (gen_sse2_clflush (op0));
16726	return 0;
16727
16728    case IX86_BUILTIN_MOVNTPD:
16729      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16730    case IX86_BUILTIN_MOVNTDQ:
16731      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16732    case IX86_BUILTIN_MOVNTI:
16733      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16734
16735    case IX86_BUILTIN_LOADDQU:
16736      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16737    case IX86_BUILTIN_STOREDQU:
16738      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16739
16740    case IX86_BUILTIN_MONITOR:
16741      arg0 = TREE_VALUE (arglist);
16742      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16743      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16744      op0 = expand_normal (arg0);
16745      op1 = expand_normal (arg1);
16746      op2 = expand_normal (arg2);
16747      if (!REG_P (op0))
16748	op0 = copy_to_mode_reg (Pmode, op0);
16749      if (!REG_P (op1))
16750	op1 = copy_to_mode_reg (SImode, op1);
16751      if (!REG_P (op2))
16752	op2 = copy_to_mode_reg (SImode, op2);
16753      if (!TARGET_64BIT)
16754	emit_insn (gen_sse3_monitor (op0, op1, op2));
16755      else
16756	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16757      return 0;
16758
16759    case IX86_BUILTIN_MWAIT:
16760      arg0 = TREE_VALUE (arglist);
16761      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16762      op0 = expand_normal (arg0);
16763      op1 = expand_normal (arg1);
16764      if (!REG_P (op0))
16765	op0 = copy_to_mode_reg (SImode, op0);
16766      if (!REG_P (op1))
16767	op1 = copy_to_mode_reg (SImode, op1);
16768      emit_insn (gen_sse3_mwait (op0, op1));
16769      return 0;
16770
16771    case IX86_BUILTIN_LDDQU:
16772      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16773				       target, 1);
16774
16775    case IX86_BUILTIN_PALIGNR:
16776    case IX86_BUILTIN_PALIGNR128:
16777      if (fcode == IX86_BUILTIN_PALIGNR)
16778	{
16779	  icode = CODE_FOR_ssse3_palignrdi;
16780	  mode = DImode;
16781	}
16782      else
16783	{
16784	  icode = CODE_FOR_ssse3_palignrti;
16785	  mode = V2DImode;
16786	}
16787      arg0 = TREE_VALUE (arglist);
16788      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16789      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16790      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16791      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16792      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16793      tmode = insn_data[icode].operand[0].mode;
16794      mode1 = insn_data[icode].operand[1].mode;
16795      mode2 = insn_data[icode].operand[2].mode;
16796      mode3 = insn_data[icode].operand[3].mode;
16797
16798      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16799	{
16800	  op0 = copy_to_reg (op0);
16801	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16802	}
16803      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16804	{
16805	  op1 = copy_to_reg (op1);
16806	  op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16807	}
16808      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16809	{
16810	  error ("shift must be an immediate");
16811	  return const0_rtx;
16812	}
16813      target = gen_reg_rtx (mode);
16814      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16815			     op0, op1, op2);
16816      if (! pat)
16817	return 0;
16818      emit_insn (pat);
16819      return target;
16820
16821    case IX86_BUILTIN_VEC_INIT_V2SI:
16822    case IX86_BUILTIN_VEC_INIT_V4HI:
16823    case IX86_BUILTIN_VEC_INIT_V8QI:
16824      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16825
16826    case IX86_BUILTIN_VEC_EXT_V2DF:
16827    case IX86_BUILTIN_VEC_EXT_V2DI:
16828    case IX86_BUILTIN_VEC_EXT_V4SF:
16829    case IX86_BUILTIN_VEC_EXT_V4SI:
16830    case IX86_BUILTIN_VEC_EXT_V8HI:
16831    case IX86_BUILTIN_VEC_EXT_V16QI:
16832    case IX86_BUILTIN_VEC_EXT_V2SI:
16833    case IX86_BUILTIN_VEC_EXT_V4HI:
16834      return ix86_expand_vec_ext_builtin (arglist, target);
16835
16836    case IX86_BUILTIN_VEC_SET_V8HI:
16837    case IX86_BUILTIN_VEC_SET_V4HI:
16838      return ix86_expand_vec_set_builtin (arglist);
16839
16840    default:
16841      break;
16842    }
16843
16844  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16845    if (d->code == fcode)
16846      {
16847	/* Compares are treated specially.  */
16848	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16849	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16850	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
16851	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16852	  return ix86_expand_sse_compare (d, arglist, target);
16853
16854	return ix86_expand_binop_builtin (d->icode, arglist, target);
16855      }
16856
16857  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16858    if (d->code == fcode)
16859      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16860
16861  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16862    if (d->code == fcode)
16863      return ix86_expand_sse_comi (d, arglist, target);
16864
16865  gcc_unreachable ();
16866}
16867
16868/* Store OPERAND to the memory after reload is completed.  This means
16869   that we can't easily use assign_stack_local.  */
16870rtx
16871ix86_force_to_memory (enum machine_mode mode, rtx operand)
16872{
16873  rtx result;
16874
16875  gcc_assert (reload_completed);
16876  if (TARGET_RED_ZONE)
16877    {
16878      result = gen_rtx_MEM (mode,
16879			    gen_rtx_PLUS (Pmode,
16880					  stack_pointer_rtx,
16881					  GEN_INT (-RED_ZONE_SIZE)));
16882      emit_move_insn (result, operand);
16883    }
16884  else if (!TARGET_RED_ZONE && TARGET_64BIT)
16885    {
16886      switch (mode)
16887	{
16888	case HImode:
16889	case SImode:
16890	  operand = gen_lowpart (DImode, operand);
16891	  /* FALLTHRU */
16892	case DImode:
16893	  emit_insn (
16894		      gen_rtx_SET (VOIDmode,
16895				   gen_rtx_MEM (DImode,
16896						gen_rtx_PRE_DEC (DImode,
16897							stack_pointer_rtx)),
16898				   operand));
16899	  break;
16900	default:
16901	  gcc_unreachable ();
16902	}
16903      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16904    }
16905  else
16906    {
16907      switch (mode)
16908	{
16909	case DImode:
16910	  {
16911	    rtx operands[2];
16912	    split_di (&operand, 1, operands, operands + 1);
16913	    emit_insn (
16914			gen_rtx_SET (VOIDmode,
16915				     gen_rtx_MEM (SImode,
16916						  gen_rtx_PRE_DEC (Pmode,
16917							stack_pointer_rtx)),
16918				     operands[1]));
16919	    emit_insn (
16920			gen_rtx_SET (VOIDmode,
16921				     gen_rtx_MEM (SImode,
16922						  gen_rtx_PRE_DEC (Pmode,
16923							stack_pointer_rtx)),
16924				     operands[0]));
16925	  }
16926	  break;
16927	case HImode:
16928	  /* Store HImodes as SImodes.  */
16929	  operand = gen_lowpart (SImode, operand);
16930	  /* FALLTHRU */
16931	case SImode:
16932	  emit_insn (
16933		      gen_rtx_SET (VOIDmode,
16934				   gen_rtx_MEM (GET_MODE (operand),
16935						gen_rtx_PRE_DEC (SImode,
16936							stack_pointer_rtx)),
16937				   operand));
16938	  break;
16939	default:
16940	  gcc_unreachable ();
16941	}
16942      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16943    }
16944  return result;
16945}
16946
16947/* Free operand from the memory.  */
16948void
16949ix86_free_from_memory (enum machine_mode mode)
16950{
16951  if (!TARGET_RED_ZONE)
16952    {
16953      int size;
16954
16955      if (mode == DImode || TARGET_64BIT)
16956	size = 8;
16957      else
16958	size = 4;
16959      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
16960         to pop or add instruction if registers are available.  */
16961      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16962			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16963					    GEN_INT (size))));
16964    }
16965}
16966
16967/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16968   QImode must go into class Q_REGS.
16969   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16970   movdf to do mem-to-mem moves through integer regs.  */
16971enum reg_class
16972ix86_preferred_reload_class (rtx x, enum reg_class class)
16973{
16974  enum machine_mode mode = GET_MODE (x);
16975
16976  /* We're only allowed to return a subclass of CLASS.  Many of the
16977     following checks fail for NO_REGS, so eliminate that early.  */
16978  if (class == NO_REGS)
16979    return NO_REGS;
16980
16981  /* All classes can load zeros.  */
16982  if (x == CONST0_RTX (mode))
16983    return class;
16984
16985  /* Force constants into memory if we are loading a (nonzero) constant into
16986     an MMX or SSE register.  This is because there are no MMX/SSE instructions
16987     to load from a constant.  */
16988  if (CONSTANT_P (x)
16989      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16990    return NO_REGS;
16991
16992  /* Prefer SSE regs only, if we can use them for math.  */
16993  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16994    return SSE_CLASS_P (class) ? class : NO_REGS;
16995
16996  /* Floating-point constants need more complex checks.  */
16997  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16998    {
16999      /* General regs can load everything.  */
17000      if (reg_class_subset_p (class, GENERAL_REGS))
17001        return class;
17002
17003      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
17004	 zero above.  We only want to wind up preferring 80387 registers if
17005	 we plan on doing computation with them.  */
17006      if (TARGET_80387
17007	  && standard_80387_constant_p (x))
17008	{
17009	  /* Limit class to non-sse.  */
17010	  if (class == FLOAT_SSE_REGS)
17011	    return FLOAT_REGS;
17012	  if (class == FP_TOP_SSE_REGS)
17013	    return FP_TOP_REG;
17014	  if (class == FP_SECOND_SSE_REGS)
17015	    return FP_SECOND_REG;
17016	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17017	    return class;
17018	}
17019
17020      return NO_REGS;
17021    }
17022
17023  /* Generally when we see PLUS here, it's the function invariant
17024     (plus soft-fp const_int).  Which can only be computed into general
17025     regs.  */
17026  if (GET_CODE (x) == PLUS)
17027    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17028
17029  /* QImode constants are easy to load, but non-constant QImode data
17030     must go into Q_REGS.  */
17031  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17032    {
17033      if (reg_class_subset_p (class, Q_REGS))
17034	return class;
17035      if (reg_class_subset_p (Q_REGS, class))
17036	return Q_REGS;
17037      return NO_REGS;
17038    }
17039
17040  return class;
17041}
17042
17043/* Discourage putting floating-point values in SSE registers unless
17044   SSE math is being used, and likewise for the 387 registers.  */
17045enum reg_class
17046ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17047{
17048  enum machine_mode mode = GET_MODE (x);
17049
17050  /* Restrict the output reload class to the register bank that we are doing
17051     math on.  If we would like not to return a subset of CLASS, reject this
17052     alternative: if reload cannot do this, it will still use its choice.  */
17053  mode = GET_MODE (x);
17054  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17055    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17056
17057  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17058    {
17059      if (class == FP_TOP_SSE_REGS)
17060	return FP_TOP_REG;
17061      else if (class == FP_SECOND_SSE_REGS)
17062	return FP_SECOND_REG;
17063      else
17064	return FLOAT_CLASS_P (class) ? class : NO_REGS;
17065    }
17066
17067  return class;
17068}
17069
17070/* If we are copying between general and FP registers, we need a memory
17071   location. The same is true for SSE and MMX registers.
17072
17073   The macro can't work reliably when one of the CLASSES is class containing
17074   registers from multiple units (SSE, MMX, integer).  We avoid this by never
17075   combining those units in single alternative in the machine description.
17076   Ensure that this constraint holds to avoid unexpected surprises.
17077
17078   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17079   enforce these sanity checks.  */
17080
17081int
17082ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17083			      enum machine_mode mode, int strict)
17084{
17085  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17086      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17087      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17088      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17089      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17090      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17091    {
17092      gcc_assert (!strict);
17093      return true;
17094    }
17095
17096  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17097    return true;
17098
17099  /* ??? This is a lie.  We do have moves between mmx/general, and for
17100     mmx/sse2.  But by saying we need secondary memory we discourage the
17101     register allocator from using the mmx registers unless needed.  */
17102  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17103    return true;
17104
17105  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17106    {
17107      /* SSE1 doesn't have any direct moves from other classes.  */
17108      if (!TARGET_SSE2)
17109	return true;
17110
17111      /* If the target says that inter-unit moves are more expensive
17112	 than moving through memory, then don't generate them.  */
17113      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17114	return true;
17115
17116      /* Between SSE and general, we have moves no larger than word size.  */
17117      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17118	return true;
17119
17120      /* ??? For the cost of one register reformat penalty, we could use
17121	 the same instructions to move SFmode and DFmode data, but the
17122	 relevant move patterns don't support those alternatives.  */
17123      if (mode == SFmode || mode == DFmode)
17124	return true;
17125    }
17126
17127  return false;
17128}
17129
17130/* Return true if the registers in CLASS cannot represent the change from
17131   modes FROM to TO.  */
17132
17133bool
17134ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17135			       enum reg_class class)
17136{
17137  if (from == to)
17138    return false;
17139
17140  /* x87 registers can't do subreg at all, as all values are reformatted
17141     to extended precision.  */
17142  if (MAYBE_FLOAT_CLASS_P (class))
17143    return true;
17144
17145  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17146    {
17147      /* Vector registers do not support QI or HImode loads.  If we don't
17148	 disallow a change to these modes, reload will assume it's ok to
17149	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
17150	 the vec_dupv4hi pattern.  */
17151      if (GET_MODE_SIZE (from) < 4)
17152	return true;
17153
17154      /* Vector registers do not support subreg with nonzero offsets, which
17155	 are otherwise valid for integer registers.  Since we can't see
17156	 whether we have a nonzero offset from here, prohibit all
17157         nonparadoxical subregs changing size.  */
17158      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17159	return true;
17160    }
17161
17162  return false;
17163}
17164
17165/* Return the cost of moving data from a register in class CLASS1 to
17166   one in class CLASS2.
17167
17168   It is not required that the cost always equal 2 when FROM is the same as TO;
17169   on some machines it is expensive to move between registers if they are not
17170   general registers.  */
17171
17172int
17173ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17174			 enum reg_class class2)
17175{
17176  /* In case we require secondary memory, compute cost of the store followed
17177     by load.  In order to avoid bad register allocation choices, we need
17178     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
17179
17180  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17181    {
17182      int cost = 1;
17183
17184      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17185		   MEMORY_MOVE_COST (mode, class1, 1));
17186      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17187		   MEMORY_MOVE_COST (mode, class2, 1));
17188
17189      /* In case of copying from general_purpose_register we may emit multiple
17190         stores followed by single load causing memory size mismatch stall.
17191         Count this as arbitrarily high cost of 20.  */
17192      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17193	cost += 20;
17194
17195      /* In the case of FP/MMX moves, the registers actually overlap, and we
17196	 have to switch modes in order to treat them differently.  */
17197      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17198          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17199	cost += 20;
17200
17201      return cost;
17202    }
17203
17204  /* Moves between SSE/MMX and integer unit are expensive.  */
17205  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17206      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17207    return ix86_cost->mmxsse_to_integer;
17208  if (MAYBE_FLOAT_CLASS_P (class1))
17209    return ix86_cost->fp_move;
17210  if (MAYBE_SSE_CLASS_P (class1))
17211    return ix86_cost->sse_move;
17212  if (MAYBE_MMX_CLASS_P (class1))
17213    return ix86_cost->mmx_move;
17214  return 2;
17215}
17216
17217/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
17218
17219bool
17220ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17221{
17222  /* Flags and only flags can only hold CCmode values.  */
17223  if (CC_REGNO_P (regno))
17224    return GET_MODE_CLASS (mode) == MODE_CC;
17225  if (GET_MODE_CLASS (mode) == MODE_CC
17226      || GET_MODE_CLASS (mode) == MODE_RANDOM
17227      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17228    return 0;
17229  if (FP_REGNO_P (regno))
17230    return VALID_FP_MODE_P (mode);
17231  if (SSE_REGNO_P (regno))
17232    {
17233      /* We implement the move patterns for all vector modes into and
17234	 out of SSE registers, even when no operation instructions
17235	 are available.  */
17236      return (VALID_SSE_REG_MODE (mode)
17237	      || VALID_SSE2_REG_MODE (mode)
17238	      || VALID_MMX_REG_MODE (mode)
17239	      || VALID_MMX_REG_MODE_3DNOW (mode));
17240    }
17241  if (MMX_REGNO_P (regno))
17242    {
17243      /* We implement the move patterns for 3DNOW modes even in MMX mode,
17244	 so if the register is available at all, then we can move data of
17245	 the given mode into or out of it.  */
17246      return (VALID_MMX_REG_MODE (mode)
17247	      || VALID_MMX_REG_MODE_3DNOW (mode));
17248    }
17249
17250  if (mode == QImode)
17251    {
17252      /* Take care for QImode values - they can be in non-QI regs,
17253	 but then they do cause partial register stalls.  */
17254      if (regno < 4 || TARGET_64BIT)
17255	return 1;
17256      if (!TARGET_PARTIAL_REG_STALL)
17257	return 1;
17258      return reload_in_progress || reload_completed;
17259    }
17260  /* We handle both integer and floats in the general purpose registers.  */
17261  else if (VALID_INT_MODE_P (mode))
17262    return 1;
17263  else if (VALID_FP_MODE_P (mode))
17264    return 1;
17265  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
17266     on to use that value in smaller contexts, this can easily force a
17267     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
17268     supporting DImode, allow it.  */
17269  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17270    return 1;
17271
17272  return 0;
17273}
17274
17275/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
17276   tieable integer mode.  */
17277
17278static bool
17279ix86_tieable_integer_mode_p (enum machine_mode mode)
17280{
17281  switch (mode)
17282    {
17283    case HImode:
17284    case SImode:
17285      return true;
17286
17287    case QImode:
17288      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17289
17290    case DImode:
17291      return TARGET_64BIT;
17292
17293    default:
17294      return false;
17295    }
17296}
17297
17298/* Return true if MODE1 is accessible in a register that can hold MODE2
17299   without copying.  That is, all register classes that can hold MODE2
17300   can also hold MODE1.  */
17301
17302bool
17303ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17304{
17305  if (mode1 == mode2)
17306    return true;
17307
17308  if (ix86_tieable_integer_mode_p (mode1)
17309      && ix86_tieable_integer_mode_p (mode2))
17310    return true;
17311
17312  /* MODE2 being XFmode implies fp stack or general regs, which means we
17313     can tie any smaller floating point modes to it.  Note that we do not
17314     tie this with TFmode.  */
17315  if (mode2 == XFmode)
17316    return mode1 == SFmode || mode1 == DFmode;
17317
17318  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17319     that we can tie it with SFmode.  */
17320  if (mode2 == DFmode)
17321    return mode1 == SFmode;
17322
17323  /* If MODE2 is only appropriate for an SSE register, then tie with
17324     any other mode acceptable to SSE registers.  */
17325  if (GET_MODE_SIZE (mode2) >= 8
17326      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17327    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17328
17329  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17330     with any other mode acceptable to MMX registers.  */
17331  if (GET_MODE_SIZE (mode2) == 8
17332      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17333    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17334
17335  return false;
17336}
17337
17338/* Return the cost of moving data of mode M between a
17339   register and memory.  A value of 2 is the default; this cost is
17340   relative to those in `REGISTER_MOVE_COST'.
17341
17342   If moving between registers and memory is more expensive than
17343   between two registers, you should define this macro to express the
17344   relative cost.
17345
17346   Model also increased moving costs of QImode registers in non
17347   Q_REGS classes.
17348 */
17349int
17350ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17351{
17352  if (FLOAT_CLASS_P (class))
17353    {
17354      int index;
17355      switch (mode)
17356	{
17357	  case SFmode:
17358	    index = 0;
17359	    break;
17360	  case DFmode:
17361	    index = 1;
17362	    break;
17363	  case XFmode:
17364	    index = 2;
17365	    break;
17366	  default:
17367	    return 100;
17368	}
17369      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17370    }
17371  if (SSE_CLASS_P (class))
17372    {
17373      int index;
17374      switch (GET_MODE_SIZE (mode))
17375	{
17376	  case 4:
17377	    index = 0;
17378	    break;
17379	  case 8:
17380	    index = 1;
17381	    break;
17382	  case 16:
17383	    index = 2;
17384	    break;
17385	  default:
17386	    return 100;
17387	}
17388      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17389    }
17390  if (MMX_CLASS_P (class))
17391    {
17392      int index;
17393      switch (GET_MODE_SIZE (mode))
17394	{
17395	  case 4:
17396	    index = 0;
17397	    break;
17398	  case 8:
17399	    index = 1;
17400	    break;
17401	  default:
17402	    return 100;
17403	}
17404      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17405    }
17406  switch (GET_MODE_SIZE (mode))
17407    {
17408      case 1:
17409	if (in)
17410	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17411		  : ix86_cost->movzbl_load);
17412	else
17413	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17414		  : ix86_cost->int_store[0] + 4);
17415	break;
17416      case 2:
17417	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17418      default:
17419	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17420	if (mode == TFmode)
17421	  mode = XFmode;
17422	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17423		* (((int) GET_MODE_SIZE (mode)
17424		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17425    }
17426}
17427
17428/* Compute a (partial) cost for rtx X.  Return true if the complete
17429   cost has been computed, and false if subexpressions should be
17430   scanned.  In either case, *TOTAL contains the cost result.  */
17431
17432static bool
17433ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17434{
17435  enum machine_mode mode = GET_MODE (x);
17436
17437  switch (code)
17438    {
17439    case CONST_INT:
17440    case CONST:
17441    case LABEL_REF:
17442    case SYMBOL_REF:
17443      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17444	*total = 3;
17445      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17446	*total = 2;
17447      else if (flag_pic && SYMBOLIC_CONST (x)
17448	       && (!TARGET_64BIT
17449		   || (!GET_CODE (x) != LABEL_REF
17450		       && (GET_CODE (x) != SYMBOL_REF
17451		           || !SYMBOL_REF_LOCAL_P (x)))))
17452	*total = 1;
17453      else
17454	*total = 0;
17455      return true;
17456
17457    case CONST_DOUBLE:
17458      if (mode == VOIDmode)
17459	*total = 0;
17460      else
17461	switch (standard_80387_constant_p (x))
17462	  {
17463	  case 1: /* 0.0 */
17464	    *total = 1;
17465	    break;
17466	  default: /* Other constants */
17467	    *total = 2;
17468	    break;
17469	  case 0:
17470	  case -1:
17471	    /* Start with (MEM (SYMBOL_REF)), since that's where
17472	       it'll probably end up.  Add a penalty for size.  */
17473	    *total = (COSTS_N_INSNS (1)
17474		      + (flag_pic != 0 && !TARGET_64BIT)
17475		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17476	    break;
17477	  }
17478      return true;
17479
17480    case ZERO_EXTEND:
17481      /* The zero extensions is often completely free on x86_64, so make
17482	 it as cheap as possible.  */
17483      if (TARGET_64BIT && mode == DImode
17484	  && GET_MODE (XEXP (x, 0)) == SImode)
17485	*total = 1;
17486      else if (TARGET_ZERO_EXTEND_WITH_AND)
17487	*total = ix86_cost->add;
17488      else
17489	*total = ix86_cost->movzx;
17490      return false;
17491
17492    case SIGN_EXTEND:
17493      *total = ix86_cost->movsx;
17494      return false;
17495
17496    case ASHIFT:
17497      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17498	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17499	{
17500	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17501	  if (value == 1)
17502	    {
17503	      *total = ix86_cost->add;
17504	      return false;
17505	    }
17506	  if ((value == 2 || value == 3)
17507	      && ix86_cost->lea <= ix86_cost->shift_const)
17508	    {
17509	      *total = ix86_cost->lea;
17510	      return false;
17511	    }
17512	}
17513      /* FALLTHRU */
17514
17515    case ROTATE:
17516    case ASHIFTRT:
17517    case LSHIFTRT:
17518    case ROTATERT:
17519      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17520	{
17521	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17522	    {
17523	      if (INTVAL (XEXP (x, 1)) > 32)
17524		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17525	      else
17526		*total = ix86_cost->shift_const * 2;
17527	    }
17528	  else
17529	    {
17530	      if (GET_CODE (XEXP (x, 1)) == AND)
17531		*total = ix86_cost->shift_var * 2;
17532	      else
17533		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17534	    }
17535	}
17536      else
17537	{
17538	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17539	    *total = ix86_cost->shift_const;
17540	  else
17541	    *total = ix86_cost->shift_var;
17542	}
17543      return false;
17544
17545    case MULT:
17546      if (FLOAT_MODE_P (mode))
17547	{
17548	  *total = ix86_cost->fmul;
17549	  return false;
17550	}
17551      else
17552	{
17553	  rtx op0 = XEXP (x, 0);
17554	  rtx op1 = XEXP (x, 1);
17555	  int nbits;
17556	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17557	    {
17558	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17559	      for (nbits = 0; value != 0; value &= value - 1)
17560	        nbits++;
17561	    }
17562	  else
17563	    /* This is arbitrary.  */
17564	    nbits = 7;
17565
17566	  /* Compute costs correctly for widening multiplication.  */
17567	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17568	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17569	         == GET_MODE_SIZE (mode))
17570	    {
17571	      int is_mulwiden = 0;
17572	      enum machine_mode inner_mode = GET_MODE (op0);
17573
17574	      if (GET_CODE (op0) == GET_CODE (op1))
17575		is_mulwiden = 1, op1 = XEXP (op1, 0);
17576	      else if (GET_CODE (op1) == CONST_INT)
17577		{
17578		  if (GET_CODE (op0) == SIGN_EXTEND)
17579		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17580			          == INTVAL (op1);
17581		  else
17582		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17583	        }
17584
17585	      if (is_mulwiden)
17586	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17587	    }
17588
17589  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17590		    + nbits * ix86_cost->mult_bit
17591	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17592
17593          return true;
17594	}
17595
17596    case DIV:
17597    case UDIV:
17598    case MOD:
17599    case UMOD:
17600      if (FLOAT_MODE_P (mode))
17601	*total = ix86_cost->fdiv;
17602      else
17603	*total = ix86_cost->divide[MODE_INDEX (mode)];
17604      return false;
17605
17606    case PLUS:
17607      if (FLOAT_MODE_P (mode))
17608	*total = ix86_cost->fadd;
17609      else if (GET_MODE_CLASS (mode) == MODE_INT
17610	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17611	{
17612	  if (GET_CODE (XEXP (x, 0)) == PLUS
17613	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17614	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17615	      && CONSTANT_P (XEXP (x, 1)))
17616	    {
17617	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17618	      if (val == 2 || val == 4 || val == 8)
17619		{
17620		  *total = ix86_cost->lea;
17621		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17622		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17623				      outer_code);
17624		  *total += rtx_cost (XEXP (x, 1), outer_code);
17625		  return true;
17626		}
17627	    }
17628	  else if (GET_CODE (XEXP (x, 0)) == MULT
17629		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17630	    {
17631	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17632	      if (val == 2 || val == 4 || val == 8)
17633		{
17634		  *total = ix86_cost->lea;
17635		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17636		  *total += rtx_cost (XEXP (x, 1), outer_code);
17637		  return true;
17638		}
17639	    }
17640	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17641	    {
17642	      *total = ix86_cost->lea;
17643	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17644	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17645	      *total += rtx_cost (XEXP (x, 1), outer_code);
17646	      return true;
17647	    }
17648	}
17649      /* FALLTHRU */
17650
17651    case MINUS:
17652      if (FLOAT_MODE_P (mode))
17653	{
17654	  *total = ix86_cost->fadd;
17655	  return false;
17656	}
17657      /* FALLTHRU */
17658
17659    case AND:
17660    case IOR:
17661    case XOR:
17662      if (!TARGET_64BIT && mode == DImode)
17663	{
17664	  *total = (ix86_cost->add * 2
17665		    + (rtx_cost (XEXP (x, 0), outer_code)
17666		       << (GET_MODE (XEXP (x, 0)) != DImode))
17667		    + (rtx_cost (XEXP (x, 1), outer_code)
17668	               << (GET_MODE (XEXP (x, 1)) != DImode)));
17669	  return true;
17670	}
17671      /* FALLTHRU */
17672
17673    case NEG:
17674      if (FLOAT_MODE_P (mode))
17675	{
17676	  *total = ix86_cost->fchs;
17677	  return false;
17678	}
17679      /* FALLTHRU */
17680
17681    case NOT:
17682      if (!TARGET_64BIT && mode == DImode)
17683	*total = ix86_cost->add * 2;
17684      else
17685	*total = ix86_cost->add;
17686      return false;
17687
17688    case COMPARE:
17689      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17690	  && XEXP (XEXP (x, 0), 1) == const1_rtx
17691	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17692	  && XEXP (x, 1) == const0_rtx)
17693	{
17694	  /* This kind of construct is implemented using test[bwl].
17695	     Treat it as if we had an AND.  */
17696	  *total = (ix86_cost->add
17697		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17698		    + rtx_cost (const1_rtx, outer_code));
17699	  return true;
17700	}
17701      return false;
17702
17703    case FLOAT_EXTEND:
17704      if (!TARGET_SSE_MATH
17705	  || mode == XFmode
17706	  || (mode == DFmode && !TARGET_SSE2))
17707	/* For standard 80387 constants, raise the cost to prevent
17708	   compress_float_constant() to generate load from memory.  */
17709	switch (standard_80387_constant_p (XEXP (x, 0)))
17710	  {
17711	  case -1:
17712	  case 0:
17713	    *total = 0;
17714	    break;
17715	  case 1: /* 0.0 */
17716	    *total = 1;
17717	    break;
17718	  default:
17719	    *total = (x86_ext_80387_constants & TUNEMASK
17720		      || optimize_size
17721		      ? 1 : 0);
17722	  }
17723      return false;
17724
17725    case ABS:
17726      if (FLOAT_MODE_P (mode))
17727	*total = ix86_cost->fabs;
17728      return false;
17729
17730    case SQRT:
17731      if (FLOAT_MODE_P (mode))
17732	*total = ix86_cost->fsqrt;
17733      return false;
17734
17735    case UNSPEC:
17736      if (XINT (x, 1) == UNSPEC_TP)
17737	*total = 0;
17738      return false;
17739
17740    default:
17741      return false;
17742    }
17743}
17744
17745#if TARGET_MACHO
17746
17747static int current_machopic_label_num;
17748
17749/* Given a symbol name and its associated stub, write out the
17750   definition of the stub.  */
17751
17752void
17753machopic_output_stub (FILE *file, const char *symb, const char *stub)
17754{
17755  unsigned int length;
17756  char *binder_name, *symbol_name, lazy_ptr_name[32];
17757  int label = ++current_machopic_label_num;
17758
17759  /* For 64-bit we shouldn't get here.  */
17760  gcc_assert (!TARGET_64BIT);
17761
17762  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
17763  symb = (*targetm.strip_name_encoding) (symb);
17764
17765  length = strlen (stub);
17766  binder_name = alloca (length + 32);
17767  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17768
17769  length = strlen (symb);
17770  symbol_name = alloca (length + 32);
17771  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17772
17773  sprintf (lazy_ptr_name, "L%d$lz", label);
17774
17775  if (MACHOPIC_PURE)
17776    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17777  else
17778    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17779
17780  fprintf (file, "%s:\n", stub);
17781  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17782
17783  if (MACHOPIC_PURE)
17784    {
17785      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17786      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17787      fprintf (file, "\tjmp\t*%%edx\n");
17788    }
17789  else
17790    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17791
17792  fprintf (file, "%s:\n", binder_name);
17793
17794  if (MACHOPIC_PURE)
17795    {
17796      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17797      fprintf (file, "\tpushl\t%%eax\n");
17798    }
17799  else
17800    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17801
17802  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17803
17804  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17805  fprintf (file, "%s:\n", lazy_ptr_name);
17806  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17807  fprintf (file, "\t.long %s\n", binder_name);
17808}
17809
17810void
17811darwin_x86_file_end (void)
17812{
17813  darwin_file_end ();
17814  ix86_file_end ();
17815}
17816#endif /* TARGET_MACHO */
17817
17818/* Order the registers for register allocator.  */
17819
17820void
17821x86_order_regs_for_local_alloc (void)
17822{
17823   int pos = 0;
17824   int i;
17825
17826   /* First allocate the local general purpose registers.  */
17827   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17828     if (GENERAL_REGNO_P (i) && call_used_regs[i])
17829	reg_alloc_order [pos++] = i;
17830
17831   /* Global general purpose registers.  */
17832   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17833     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17834	reg_alloc_order [pos++] = i;
17835
17836   /* x87 registers come first in case we are doing FP math
17837      using them.  */
17838   if (!TARGET_SSE_MATH)
17839     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17840       reg_alloc_order [pos++] = i;
17841
17842   /* SSE registers.  */
17843   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17844     reg_alloc_order [pos++] = i;
17845   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17846     reg_alloc_order [pos++] = i;
17847
17848   /* x87 registers.  */
17849   if (TARGET_SSE_MATH)
17850     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17851       reg_alloc_order [pos++] = i;
17852
17853   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17854     reg_alloc_order [pos++] = i;
17855
17856   /* Initialize the rest of array as we do not allocate some registers
17857      at all.  */
17858   while (pos < FIRST_PSEUDO_REGISTER)
17859     reg_alloc_order [pos++] = 0;
17860}
17861
17862/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17863   struct attribute_spec.handler.  */
17864static tree
17865ix86_handle_struct_attribute (tree *node, tree name,
17866			      tree args ATTRIBUTE_UNUSED,
17867			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17868{
17869  tree *type = NULL;
17870  if (DECL_P (*node))
17871    {
17872      if (TREE_CODE (*node) == TYPE_DECL)
17873	type = &TREE_TYPE (*node);
17874    }
17875  else
17876    type = node;
17877
17878  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17879		 || TREE_CODE (*type) == UNION_TYPE)))
17880    {
17881      warning (OPT_Wattributes, "%qs attribute ignored",
17882	       IDENTIFIER_POINTER (name));
17883      *no_add_attrs = true;
17884    }
17885
17886  else if ((is_attribute_p ("ms_struct", name)
17887	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17888	   || ((is_attribute_p ("gcc_struct", name)
17889		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17890    {
17891      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17892               IDENTIFIER_POINTER (name));
17893      *no_add_attrs = true;
17894    }
17895
17896  return NULL_TREE;
17897}
17898
17899static bool
17900ix86_ms_bitfield_layout_p (tree record_type)
17901{
17902  return (TARGET_MS_BITFIELD_LAYOUT &&
17903	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17904    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17905}
17906
17907/* Returns an expression indicating where the this parameter is
17908   located on entry to the FUNCTION.  */
17909
17910static rtx
17911x86_this_parameter (tree function)
17912{
17913  tree type = TREE_TYPE (function);
17914
17915  if (TARGET_64BIT)
17916    {
17917      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17918      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17919    }
17920
17921  if (ix86_function_regparm (type, function) > 0)
17922    {
17923      tree parm;
17924
17925      parm = TYPE_ARG_TYPES (type);
17926      /* Figure out whether or not the function has a variable number of
17927	 arguments.  */
17928      for (; parm; parm = TREE_CHAIN (parm))
17929	if (TREE_VALUE (parm) == void_type_node)
17930	  break;
17931      /* If not, the this parameter is in the first argument.  */
17932      if (parm)
17933	{
17934	  int regno = 0;
17935	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17936	    regno = 2;
17937	  return gen_rtx_REG (SImode, regno);
17938	}
17939    }
17940
17941  if (aggregate_value_p (TREE_TYPE (type), type))
17942    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17943  else
17944    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17945}
17946
17947/* Determine whether x86_output_mi_thunk can succeed.  */
17948
17949static bool
17950x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17951			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17952			 HOST_WIDE_INT vcall_offset, tree function)
17953{
17954  /* 64-bit can handle anything.  */
17955  if (TARGET_64BIT)
17956    return true;
17957
17958  /* For 32-bit, everything's fine if we have one free register.  */
17959  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17960    return true;
17961
17962  /* Need a free register for vcall_offset.  */
17963  if (vcall_offset)
17964    return false;
17965
17966  /* Need a free register for GOT references.  */
17967  if (flag_pic && !(*targetm.binds_local_p) (function))
17968    return false;
17969
17970  /* Otherwise ok.  */
17971  return true;
17972}
17973
17974/* Output the assembler code for a thunk function.  THUNK_DECL is the
17975   declaration for the thunk function itself, FUNCTION is the decl for
17976   the target function.  DELTA is an immediate constant offset to be
17977   added to THIS.  If VCALL_OFFSET is nonzero, the word at
17978   *(*this + vcall_offset) should be added to THIS.  */
17979
17980static void
17981x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17982		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17983		     HOST_WIDE_INT vcall_offset, tree function)
17984{
17985  rtx xops[3];
17986  rtx this = x86_this_parameter (function);
17987  rtx this_reg, tmp;
17988
17989  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
17990     pull it in now and let DELTA benefit.  */
17991  if (REG_P (this))
17992    this_reg = this;
17993  else if (vcall_offset)
17994    {
17995      /* Put the this parameter into %eax.  */
17996      xops[0] = this;
17997      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17998      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17999    }
18000  else
18001    this_reg = NULL_RTX;
18002
18003  /* Adjust the this parameter by a fixed constant.  */
18004  if (delta)
18005    {
18006      xops[0] = GEN_INT (delta);
18007      xops[1] = this_reg ? this_reg : this;
18008      if (TARGET_64BIT)
18009	{
18010	  if (!x86_64_general_operand (xops[0], DImode))
18011	    {
18012	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18013	      xops[1] = tmp;
18014	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18015	      xops[0] = tmp;
18016	      xops[1] = this;
18017	    }
18018	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18019	}
18020      else
18021	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18022    }
18023
18024  /* Adjust the this parameter by a value stored in the vtable.  */
18025  if (vcall_offset)
18026    {
18027      if (TARGET_64BIT)
18028	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18029      else
18030	{
18031	  int tmp_regno = 2 /* ECX */;
18032	  if (lookup_attribute ("fastcall",
18033	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
18034	    tmp_regno = 0 /* EAX */;
18035	  tmp = gen_rtx_REG (SImode, tmp_regno);
18036	}
18037
18038      xops[0] = gen_rtx_MEM (Pmode, this_reg);
18039      xops[1] = tmp;
18040      if (TARGET_64BIT)
18041	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18042      else
18043	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18044
18045      /* Adjust the this parameter.  */
18046      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18047      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18048	{
18049	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18050	  xops[0] = GEN_INT (vcall_offset);
18051	  xops[1] = tmp2;
18052	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18053	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18054	}
18055      xops[1] = this_reg;
18056      if (TARGET_64BIT)
18057	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18058      else
18059	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18060    }
18061
18062  /* If necessary, drop THIS back to its stack slot.  */
18063  if (this_reg && this_reg != this)
18064    {
18065      xops[0] = this_reg;
18066      xops[1] = this;
18067      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18068    }
18069
18070  xops[0] = XEXP (DECL_RTL (function), 0);
18071  if (TARGET_64BIT)
18072    {
18073      if (!flag_pic || (*targetm.binds_local_p) (function))
18074	output_asm_insn ("jmp\t%P0", xops);
18075      else
18076	{
18077	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18078	  tmp = gen_rtx_CONST (Pmode, tmp);
18079	  tmp = gen_rtx_MEM (QImode, tmp);
18080	  xops[0] = tmp;
18081	  output_asm_insn ("jmp\t%A0", xops);
18082	}
18083    }
18084  else
18085    {
18086      if (!flag_pic || (*targetm.binds_local_p) (function))
18087	output_asm_insn ("jmp\t%P0", xops);
18088      else
18089#if TARGET_MACHO
18090	if (TARGET_MACHO)
18091	  {
18092	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
18093	    tmp = (gen_rtx_SYMBOL_REF
18094		   (Pmode,
18095		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18096	    tmp = gen_rtx_MEM (QImode, tmp);
18097	    xops[0] = tmp;
18098	    output_asm_insn ("jmp\t%0", xops);
18099	  }
18100	else
18101#endif /* TARGET_MACHO */
18102	{
18103	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18104	  output_set_got (tmp, NULL_RTX);
18105
18106	  xops[1] = tmp;
18107	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18108	  output_asm_insn ("jmp\t{*}%1", xops);
18109	}
18110    }
18111}
18112
18113static void
18114x86_file_start (void)
18115{
18116  default_file_start ();
18117#if TARGET_MACHO
18118  darwin_file_start ();
18119#endif
18120  if (X86_FILE_START_VERSION_DIRECTIVE)
18121    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18122  if (X86_FILE_START_FLTUSED)
18123    fputs ("\t.global\t__fltused\n", asm_out_file);
18124  if (ix86_asm_dialect == ASM_INTEL)
18125    fputs ("\t.intel_syntax\n", asm_out_file);
18126}
18127
18128int
18129x86_field_alignment (tree field, int computed)
18130{
18131  enum machine_mode mode;
18132  tree type = TREE_TYPE (field);
18133
18134  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18135    return computed;
18136  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18137		    ? get_inner_array_type (type) : type);
18138  if (mode == DFmode || mode == DCmode
18139      || GET_MODE_CLASS (mode) == MODE_INT
18140      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18141    return MIN (32, computed);
18142  return computed;
18143}
18144
18145/* Output assembler code to FILE to increment profiler label # LABELNO
18146   for profiling a function entry.  */
18147void
18148x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18149{
18150  if (TARGET_64BIT)
18151    if (flag_pic)
18152      {
18153#ifndef NO_PROFILE_COUNTERS
18154	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18155#endif
18156	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18157      }
18158    else
18159      {
18160#ifndef NO_PROFILE_COUNTERS
18161	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18162#endif
18163	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18164      }
18165  else if (flag_pic)
18166    {
18167#ifndef NO_PROFILE_COUNTERS
18168      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18169	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18170#endif
18171      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18172    }
18173  else
18174    {
18175#ifndef NO_PROFILE_COUNTERS
18176      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18177	       PROFILE_COUNT_REGISTER);
18178#endif
18179      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18180    }
18181}
18182
18183/* We don't have exact information about the insn sizes, but we may assume
18184   quite safely that we are informed about all 1 byte insns and memory
18185   address sizes.  This is enough to eliminate unnecessary padding in
18186   99% of cases.  */
18187
18188static int
18189min_insn_size (rtx insn)
18190{
18191  int l = 0;
18192
18193  if (!INSN_P (insn) || !active_insn_p (insn))
18194    return 0;
18195
18196  /* Discard alignments we've emit and jump instructions.  */
18197  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18198      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18199    return 0;
18200  if (GET_CODE (insn) == JUMP_INSN
18201      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18202	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18203    return 0;
18204
18205  /* Important case - calls are always 5 bytes.
18206     It is common to have many calls in the row.  */
18207  if (GET_CODE (insn) == CALL_INSN
18208      && symbolic_reference_mentioned_p (PATTERN (insn))
18209      && !SIBLING_CALL_P (insn))
18210    return 5;
18211  if (get_attr_length (insn) <= 1)
18212    return 1;
18213
18214  /* For normal instructions we may rely on the sizes of addresses
18215     and the presence of symbol to require 4 bytes of encoding.
18216     This is not the case for jumps where references are PC relative.  */
18217  if (GET_CODE (insn) != JUMP_INSN)
18218    {
18219      l = get_attr_length_address (insn);
18220      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18221	l = 4;
18222    }
18223  if (l)
18224    return 1+l;
18225  else
18226    return 2;
18227}
18228
18229/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18230   window.  */
18231
18232static void
18233ix86_avoid_jump_misspredicts (void)
18234{
18235  rtx insn, start = get_insns ();
18236  int nbytes = 0, njumps = 0;
18237  int isjump = 0;
18238
18239  /* Look for all minimal intervals of instructions containing 4 jumps.
18240     The intervals are bounded by START and INSN.  NBYTES is the total
18241     size of instructions in the interval including INSN and not including
18242     START.  When the NBYTES is smaller than 16 bytes, it is possible
18243     that the end of START and INSN ends up in the same 16byte page.
18244
18245     The smallest offset in the page INSN can start is the case where START
18246     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
18247     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18248     */
18249  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18250    {
18251
18252      nbytes += min_insn_size (insn);
18253      if (dump_file)
18254        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18255		INSN_UID (insn), min_insn_size (insn));
18256      if ((GET_CODE (insn) == JUMP_INSN
18257	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
18258	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18259	  || GET_CODE (insn) == CALL_INSN)
18260	njumps++;
18261      else
18262	continue;
18263
18264      while (njumps > 3)
18265	{
18266	  start = NEXT_INSN (start);
18267	  if ((GET_CODE (start) == JUMP_INSN
18268	       && GET_CODE (PATTERN (start)) != ADDR_VEC
18269	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18270	      || GET_CODE (start) == CALL_INSN)
18271	    njumps--, isjump = 1;
18272	  else
18273	    isjump = 0;
18274	  nbytes -= min_insn_size (start);
18275	}
18276      gcc_assert (njumps >= 0);
18277      if (dump_file)
18278        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18279		INSN_UID (start), INSN_UID (insn), nbytes);
18280
18281      if (njumps == 3 && isjump && nbytes < 16)
18282	{
18283	  int padsize = 15 - nbytes + min_insn_size (insn);
18284
18285	  if (dump_file)
18286	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18287		     INSN_UID (insn), padsize);
18288          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18289	}
18290    }
18291}
18292
18293/* AMD Athlon works faster
18294   when RET is not destination of conditional jump or directly preceded
18295   by other jump instruction.  We avoid the penalty by inserting NOP just
18296   before the RET instructions in such cases.  */
18297static void
18298ix86_pad_returns (void)
18299{
18300  edge e;
18301  edge_iterator ei;
18302
18303  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18304    {
18305      basic_block bb = e->src;
18306      rtx ret = BB_END (bb);
18307      rtx prev;
18308      bool replace = false;
18309
18310      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18311	  || !maybe_hot_bb_p (bb))
18312	continue;
18313      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18314	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18315	  break;
18316      if (prev && GET_CODE (prev) == CODE_LABEL)
18317	{
18318	  edge e;
18319	  edge_iterator ei;
18320
18321	  FOR_EACH_EDGE (e, ei, bb->preds)
18322	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
18323		&& !(e->flags & EDGE_FALLTHRU))
18324	      replace = true;
18325	}
18326      if (!replace)
18327	{
18328	  prev = prev_active_insn (ret);
18329	  if (prev
18330	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18331		  || GET_CODE (prev) == CALL_INSN))
18332	    replace = true;
18333	  /* Empty functions get branch mispredict even when the jump destination
18334	     is not visible to us.  */
18335	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18336	    replace = true;
18337	}
18338      if (replace)
18339	{
18340	  emit_insn_before (gen_return_internal_long (), ret);
18341	  delete_insn (ret);
18342	}
18343    }
18344}
18345
18346/* Implement machine specific optimizations.  We implement padding of returns
18347   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18348static void
18349ix86_reorg (void)
18350{
18351  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18352    ix86_pad_returns ();
18353  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18354    ix86_avoid_jump_misspredicts ();
18355}
18356
18357/* Return nonzero when QImode register that must be represented via REX prefix
18358   is used.  */
18359bool
18360x86_extended_QIreg_mentioned_p (rtx insn)
18361{
18362  int i;
18363  extract_insn_cached (insn);
18364  for (i = 0; i < recog_data.n_operands; i++)
18365    if (REG_P (recog_data.operand[i])
18366	&& REGNO (recog_data.operand[i]) >= 4)
18367       return true;
18368  return false;
18369}
18370
18371/* Return nonzero when P points to register encoded via REX prefix.
18372   Called via for_each_rtx.  */
18373static int
18374extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18375{
18376   unsigned int regno;
18377   if (!REG_P (*p))
18378     return 0;
18379   regno = REGNO (*p);
18380   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18381}
18382
18383/* Return true when INSN mentions register that must be encoded using REX
18384   prefix.  */
18385bool
18386x86_extended_reg_mentioned_p (rtx insn)
18387{
18388  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18389}
18390
18391/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18392   optabs would emit if we didn't have TFmode patterns.  */
18393
18394void
18395x86_emit_floatuns (rtx operands[2])
18396{
18397  rtx neglab, donelab, i0, i1, f0, in, out;
18398  enum machine_mode mode, inmode;
18399
18400  inmode = GET_MODE (operands[1]);
18401  gcc_assert (inmode == SImode || inmode == DImode);
18402
18403  out = operands[0];
18404  in = force_reg (inmode, operands[1]);
18405  mode = GET_MODE (out);
18406  neglab = gen_label_rtx ();
18407  donelab = gen_label_rtx ();
18408  i1 = gen_reg_rtx (Pmode);
18409  f0 = gen_reg_rtx (mode);
18410
18411  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18412
18413  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18414  emit_jump_insn (gen_jump (donelab));
18415  emit_barrier ();
18416
18417  emit_label (neglab);
18418
18419  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18420  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18421  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18422  expand_float (f0, i0, 0);
18423  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18424
18425  emit_label (donelab);
18426}
18427
18428/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18429   with all elements equal to VAR.  Return true if successful.  */
18430
18431static bool
18432ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18433				   rtx target, rtx val)
18434{
18435  enum machine_mode smode, wsmode, wvmode;
18436  rtx x;
18437
18438  switch (mode)
18439    {
18440    case V2SImode:
18441    case V2SFmode:
18442      if (!mmx_ok)
18443	return false;
18444      /* FALLTHRU */
18445
18446    case V2DFmode:
18447    case V2DImode:
18448    case V4SFmode:
18449    case V4SImode:
18450      val = force_reg (GET_MODE_INNER (mode), val);
18451      x = gen_rtx_VEC_DUPLICATE (mode, val);
18452      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18453      return true;
18454
18455    case V4HImode:
18456      if (!mmx_ok)
18457	return false;
18458      if (TARGET_SSE || TARGET_3DNOW_A)
18459	{
18460	  val = gen_lowpart (SImode, val);
18461	  x = gen_rtx_TRUNCATE (HImode, val);
18462	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18463	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18464	  return true;
18465	}
18466      else
18467	{
18468	  smode = HImode;
18469	  wsmode = SImode;
18470	  wvmode = V2SImode;
18471	  goto widen;
18472	}
18473
18474    case V8QImode:
18475      if (!mmx_ok)
18476	return false;
18477      smode = QImode;
18478      wsmode = HImode;
18479      wvmode = V4HImode;
18480      goto widen;
18481    case V8HImode:
18482      if (TARGET_SSE2)
18483	{
18484	  rtx tmp1, tmp2;
18485	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18486	  tmp1 = gen_reg_rtx (SImode);
18487	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18488	  /* Insert the SImode value as low element of V4SImode vector. */
18489	  tmp2 = gen_reg_rtx (V4SImode);
18490	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18491				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18492				    CONST0_RTX (V4SImode),
18493				    const1_rtx);
18494	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18495	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18496	  tmp1 = gen_reg_rtx (V8HImode);
18497	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18498	  /* Duplicate the low short through the whole low SImode word.  */
18499	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18500	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18501	  tmp2 = gen_reg_rtx (V4SImode);
18502	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18503	  /* Replicate the low element of the V4SImode vector.  */
18504	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18505	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18506	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18507	  return true;
18508	}
18509      smode = HImode;
18510      wsmode = SImode;
18511      wvmode = V4SImode;
18512      goto widen;
18513    case V16QImode:
18514      if (TARGET_SSE2)
18515	{
18516	  rtx tmp1, tmp2;
18517	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18518	  tmp1 = gen_reg_rtx (SImode);
18519	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18520	  /* Insert the SImode value as low element of V4SImode vector. */
18521	  tmp2 = gen_reg_rtx (V4SImode);
18522	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18523				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18524				    CONST0_RTX (V4SImode),
18525				    const1_rtx);
18526	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18527	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18528	  tmp1 = gen_reg_rtx (V16QImode);
18529	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18530	  /* Duplicate the low byte through the whole low SImode word.  */
18531	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18532	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18533	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18534	  tmp2 = gen_reg_rtx (V4SImode);
18535	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18536	  /* Replicate the low element of the V4SImode vector.  */
18537	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18538	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18539	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18540	  return true;
18541	}
18542      smode = QImode;
18543      wsmode = HImode;
18544      wvmode = V8HImode;
18545      goto widen;
18546    widen:
18547      /* Replicate the value once into the next wider mode and recurse.  */
18548      val = convert_modes (wsmode, smode, val, true);
18549      x = expand_simple_binop (wsmode, ASHIFT, val,
18550			       GEN_INT (GET_MODE_BITSIZE (smode)),
18551			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18552      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18553
18554      x = gen_reg_rtx (wvmode);
18555      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18556	gcc_unreachable ();
18557      emit_move_insn (target, gen_lowpart (mode, x));
18558      return true;
18559
18560    default:
18561      return false;
18562    }
18563}
18564
18565/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18566   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18567   if successful.  */
18568
18569static bool
18570ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18571				     rtx target, rtx var, int one_var)
18572{
18573  enum machine_mode vsimode;
18574  rtx new_target;
18575  rtx x, tmp;
18576
18577  switch (mode)
18578    {
18579    case V2SFmode:
18580    case V2SImode:
18581      if (!mmx_ok)
18582	return false;
18583      /* FALLTHRU */
18584
18585    case V2DFmode:
18586    case V2DImode:
18587      if (one_var != 0)
18588	return false;
18589      var = force_reg (GET_MODE_INNER (mode), var);
18590      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18591      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18592      return true;
18593
18594    case V4SFmode:
18595    case V4SImode:
18596      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18597	new_target = gen_reg_rtx (mode);
18598      else
18599	new_target = target;
18600      var = force_reg (GET_MODE_INNER (mode), var);
18601      x = gen_rtx_VEC_DUPLICATE (mode, var);
18602      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18603      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18604      if (one_var != 0)
18605	{
18606	  /* We need to shuffle the value to the correct position, so
18607	     create a new pseudo to store the intermediate result.  */
18608
18609	  /* With SSE2, we can use the integer shuffle insns.  */
18610	  if (mode != V4SFmode && TARGET_SSE2)
18611	    {
18612	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18613					    GEN_INT (1),
18614					    GEN_INT (one_var == 1 ? 0 : 1),
18615					    GEN_INT (one_var == 2 ? 0 : 1),
18616					    GEN_INT (one_var == 3 ? 0 : 1)));
18617	      if (target != new_target)
18618		emit_move_insn (target, new_target);
18619	      return true;
18620	    }
18621
18622	  /* Otherwise convert the intermediate result to V4SFmode and
18623	     use the SSE1 shuffle instructions.  */
18624	  if (mode != V4SFmode)
18625	    {
18626	      tmp = gen_reg_rtx (V4SFmode);
18627	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18628	    }
18629	  else
18630	    tmp = new_target;
18631
18632	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18633				       GEN_INT (1),
18634				       GEN_INT (one_var == 1 ? 0 : 1),
18635				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18636				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18637
18638	  if (mode != V4SFmode)
18639	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18640	  else if (tmp != target)
18641	    emit_move_insn (target, tmp);
18642	}
18643      else if (target != new_target)
18644	emit_move_insn (target, new_target);
18645      return true;
18646
18647    case V8HImode:
18648    case V16QImode:
18649      vsimode = V4SImode;
18650      goto widen;
18651    case V4HImode:
18652    case V8QImode:
18653      if (!mmx_ok)
18654	return false;
18655      vsimode = V2SImode;
18656      goto widen;
18657    widen:
18658      if (one_var != 0)
18659	return false;
18660
18661      /* Zero extend the variable element to SImode and recurse.  */
18662      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18663
18664      x = gen_reg_rtx (vsimode);
18665      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18666						var, one_var))
18667	gcc_unreachable ();
18668
18669      emit_move_insn (target, gen_lowpart (mode, x));
18670      return true;
18671
18672    default:
18673      return false;
18674    }
18675}
18676
18677/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18678   consisting of the values in VALS.  It is known that all elements
18679   except ONE_VAR are constants.  Return true if successful.  */
18680
18681static bool
18682ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18683				 rtx target, rtx vals, int one_var)
18684{
18685  rtx var = XVECEXP (vals, 0, one_var);
18686  enum machine_mode wmode;
18687  rtx const_vec, x;
18688
18689  const_vec = copy_rtx (vals);
18690  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18691  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18692
18693  switch (mode)
18694    {
18695    case V2DFmode:
18696    case V2DImode:
18697    case V2SFmode:
18698    case V2SImode:
18699      /* For the two element vectors, it's just as easy to use
18700	 the general case.  */
18701      return false;
18702
18703    case V4SFmode:
18704    case V4SImode:
18705    case V8HImode:
18706    case V4HImode:
18707      break;
18708
18709    case V16QImode:
18710      wmode = V8HImode;
18711      goto widen;
18712    case V8QImode:
18713      wmode = V4HImode;
18714      goto widen;
18715    widen:
18716      /* There's no way to set one QImode entry easily.  Combine
18717	 the variable value with its adjacent constant value, and
18718	 promote to an HImode set.  */
18719      x = XVECEXP (vals, 0, one_var ^ 1);
18720      if (one_var & 1)
18721	{
18722	  var = convert_modes (HImode, QImode, var, true);
18723	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18724				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
18725	  x = GEN_INT (INTVAL (x) & 0xff);
18726	}
18727      else
18728	{
18729	  var = convert_modes (HImode, QImode, var, true);
18730	  x = gen_int_mode (INTVAL (x) << 8, HImode);
18731	}
18732      if (x != const0_rtx)
18733	var = expand_simple_binop (HImode, IOR, var, x, var,
18734				   1, OPTAB_LIB_WIDEN);
18735
18736      x = gen_reg_rtx (wmode);
18737      emit_move_insn (x, gen_lowpart (wmode, const_vec));
18738      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18739
18740      emit_move_insn (target, gen_lowpart (mode, x));
18741      return true;
18742
18743    default:
18744      return false;
18745    }
18746
18747  emit_move_insn (target, const_vec);
18748  ix86_expand_vector_set (mmx_ok, target, var, one_var);
18749  return true;
18750}
18751
18752/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
18753   all values variable, and none identical.  */
18754
18755static void
18756ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18757				 rtx target, rtx vals)
18758{
18759  enum machine_mode half_mode = GET_MODE_INNER (mode);
18760  rtx op0 = NULL, op1 = NULL;
18761  bool use_vec_concat = false;
18762
18763  switch (mode)
18764    {
18765    case V2SFmode:
18766    case V2SImode:
18767      if (!mmx_ok && !TARGET_SSE)
18768	break;
18769      /* FALLTHRU */
18770
18771    case V2DFmode:
18772    case V2DImode:
18773      /* For the two element vectors, we always implement VEC_CONCAT.  */
18774      op0 = XVECEXP (vals, 0, 0);
18775      op1 = XVECEXP (vals, 0, 1);
18776      use_vec_concat = true;
18777      break;
18778
18779    case V4SFmode:
18780      half_mode = V2SFmode;
18781      goto half;
18782    case V4SImode:
18783      half_mode = V2SImode;
18784      goto half;
18785    half:
18786      {
18787	rtvec v;
18788
18789	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
18790	   Recurse to load the two halves.  */
18791
18792	op0 = gen_reg_rtx (half_mode);
18793	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18794	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18795
18796	op1 = gen_reg_rtx (half_mode);
18797	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18798	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18799
18800	use_vec_concat = true;
18801      }
18802      break;
18803
18804    case V8HImode:
18805    case V16QImode:
18806    case V4HImode:
18807    case V8QImode:
18808      break;
18809
18810    default:
18811      gcc_unreachable ();
18812    }
18813
18814  if (use_vec_concat)
18815    {
18816      if (!register_operand (op0, half_mode))
18817	op0 = force_reg (half_mode, op0);
18818      if (!register_operand (op1, half_mode))
18819	op1 = force_reg (half_mode, op1);
18820
18821      emit_insn (gen_rtx_SET (VOIDmode, target,
18822			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
18823    }
18824  else
18825    {
18826      int i, j, n_elts, n_words, n_elt_per_word;
18827      enum machine_mode inner_mode;
18828      rtx words[4], shift;
18829
18830      inner_mode = GET_MODE_INNER (mode);
18831      n_elts = GET_MODE_NUNITS (mode);
18832      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18833      n_elt_per_word = n_elts / n_words;
18834      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18835
18836      for (i = 0; i < n_words; ++i)
18837	{
18838	  rtx word = NULL_RTX;
18839
18840	  for (j = 0; j < n_elt_per_word; ++j)
18841	    {
18842	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18843	      elt = convert_modes (word_mode, inner_mode, elt, true);
18844
18845	      if (j == 0)
18846		word = elt;
18847	      else
18848		{
18849		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18850					      word, 1, OPTAB_LIB_WIDEN);
18851		  word = expand_simple_binop (word_mode, IOR, word, elt,
18852					      word, 1, OPTAB_LIB_WIDEN);
18853		}
18854	    }
18855
18856	  words[i] = word;
18857	}
18858
18859      if (n_words == 1)
18860	emit_move_insn (target, gen_lowpart (mode, words[0]));
18861      else if (n_words == 2)
18862	{
18863	  rtx tmp = gen_reg_rtx (mode);
18864	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18865	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18866	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18867	  emit_move_insn (target, tmp);
18868	}
18869      else if (n_words == 4)
18870	{
18871	  rtx tmp = gen_reg_rtx (V4SImode);
18872	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18873	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18874	  emit_move_insn (target, gen_lowpart (mode, tmp));
18875	}
18876      else
18877	gcc_unreachable ();
18878    }
18879}
18880
18881/* Initialize vector TARGET via VALS.  Suppress the use of MMX
18882   instructions unless MMX_OK is true.  */
18883
18884void
18885ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18886{
18887  enum machine_mode mode = GET_MODE (target);
18888  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18889  int n_elts = GET_MODE_NUNITS (mode);
18890  int n_var = 0, one_var = -1;
18891  bool all_same = true, all_const_zero = true;
18892  int i;
18893  rtx x;
18894
18895  for (i = 0; i < n_elts; ++i)
18896    {
18897      x = XVECEXP (vals, 0, i);
18898      if (!CONSTANT_P (x))
18899	n_var++, one_var = i;
18900      else if (x != CONST0_RTX (inner_mode))
18901	all_const_zero = false;
18902      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18903	all_same = false;
18904    }
18905
18906  /* Constants are best loaded from the constant pool.  */
18907  if (n_var == 0)
18908    {
18909      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18910      return;
18911    }
18912
18913  /* If all values are identical, broadcast the value.  */
18914  if (all_same
18915      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18916					    XVECEXP (vals, 0, 0)))
18917    return;
18918
18919  /* Values where only one field is non-constant are best loaded from
18920     the pool and overwritten via move later.  */
18921  if (n_var == 1)
18922    {
18923      if (all_const_zero
18924	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18925						  XVECEXP (vals, 0, one_var),
18926						  one_var))
18927	return;
18928
18929      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18930	return;
18931    }
18932
18933  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18934}
18935
18936void
18937ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18938{
18939  enum machine_mode mode = GET_MODE (target);
18940  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18941  bool use_vec_merge = false;
18942  rtx tmp;
18943
18944  switch (mode)
18945    {
18946    case V2SFmode:
18947    case V2SImode:
18948      if (mmx_ok)
18949	{
18950	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18951	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18952	  if (elt == 0)
18953	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18954	  else
18955	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18956	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18957	  return;
18958	}
18959      break;
18960
18961    case V2DFmode:
18962    case V2DImode:
18963      {
18964	rtx op0, op1;
18965
18966	/* For the two element vectors, we implement a VEC_CONCAT with
18967	   the extraction of the other element.  */
18968
18969	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18970	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18971
18972	if (elt == 0)
18973	  op0 = val, op1 = tmp;
18974	else
18975	  op0 = tmp, op1 = val;
18976
18977	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18978	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18979      }
18980      return;
18981
18982    case V4SFmode:
18983      switch (elt)
18984	{
18985	case 0:
18986	  use_vec_merge = true;
18987	  break;
18988
18989	case 1:
18990	  /* tmp = target = A B C D */
18991	  tmp = copy_to_reg (target);
18992	  /* target = A A B B */
18993	  emit_insn (gen_sse_unpcklps (target, target, target));
18994	  /* target = X A B B */
18995	  ix86_expand_vector_set (false, target, val, 0);
18996	  /* target = A X C D  */
18997	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18998				       GEN_INT (1), GEN_INT (0),
18999				       GEN_INT (2+4), GEN_INT (3+4)));
19000	  return;
19001
19002	case 2:
19003	  /* tmp = target = A B C D */
19004	  tmp = copy_to_reg (target);
19005	  /* tmp = X B C D */
19006	  ix86_expand_vector_set (false, tmp, val, 0);
19007	  /* target = A B X D */
19008	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19009				       GEN_INT (0), GEN_INT (1),
19010				       GEN_INT (0+4), GEN_INT (3+4)));
19011	  return;
19012
19013	case 3:
19014	  /* tmp = target = A B C D */
19015	  tmp = copy_to_reg (target);
19016	  /* tmp = X B C D */
19017	  ix86_expand_vector_set (false, tmp, val, 0);
19018	  /* target = A B X D */
19019	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19020				       GEN_INT (0), GEN_INT (1),
19021				       GEN_INT (2+4), GEN_INT (0+4)));
19022	  return;
19023
19024	default:
19025	  gcc_unreachable ();
19026	}
19027      break;
19028
19029    case V4SImode:
19030      /* Element 0 handled by vec_merge below.  */
19031      if (elt == 0)
19032	{
19033	  use_vec_merge = true;
19034	  break;
19035	}
19036
19037      if (TARGET_SSE2)
19038	{
19039	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
19040	     store into element 0, then shuffle them back.  */
19041
19042	  rtx order[4];
19043
19044	  order[0] = GEN_INT (elt);
19045	  order[1] = const1_rtx;
19046	  order[2] = const2_rtx;
19047	  order[3] = GEN_INT (3);
19048	  order[elt] = const0_rtx;
19049
19050	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19051					order[1], order[2], order[3]));
19052
19053	  ix86_expand_vector_set (false, target, val, 0);
19054
19055	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19056					order[1], order[2], order[3]));
19057	}
19058      else
19059	{
19060	  /* For SSE1, we have to reuse the V4SF code.  */
19061	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19062				  gen_lowpart (SFmode, val), elt);
19063	}
19064      return;
19065
19066    case V8HImode:
19067      use_vec_merge = TARGET_SSE2;
19068      break;
19069    case V4HImode:
19070      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19071      break;
19072
19073    case V16QImode:
19074    case V8QImode:
19075    default:
19076      break;
19077    }
19078
19079  if (use_vec_merge)
19080    {
19081      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19082      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19083      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19084    }
19085  else
19086    {
19087      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19088
19089      emit_move_insn (mem, target);
19090
19091      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19092      emit_move_insn (tmp, val);
19093
19094      emit_move_insn (target, mem);
19095    }
19096}
19097
19098void
19099ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19100{
19101  enum machine_mode mode = GET_MODE (vec);
19102  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19103  bool use_vec_extr = false;
19104  rtx tmp;
19105
19106  switch (mode)
19107    {
19108    case V2SImode:
19109    case V2SFmode:
19110      if (!mmx_ok)
19111	break;
19112      /* FALLTHRU */
19113
19114    case V2DFmode:
19115    case V2DImode:
19116      use_vec_extr = true;
19117      break;
19118
19119    case V4SFmode:
19120      switch (elt)
19121	{
19122	case 0:
19123	  tmp = vec;
19124	  break;
19125
19126	case 1:
19127	case 3:
19128	  tmp = gen_reg_rtx (mode);
19129	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19130				       GEN_INT (elt), GEN_INT (elt),
19131				       GEN_INT (elt+4), GEN_INT (elt+4)));
19132	  break;
19133
19134	case 2:
19135	  tmp = gen_reg_rtx (mode);
19136	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19137	  break;
19138
19139	default:
19140	  gcc_unreachable ();
19141	}
19142      vec = tmp;
19143      use_vec_extr = true;
19144      elt = 0;
19145      break;
19146
19147    case V4SImode:
19148      if (TARGET_SSE2)
19149	{
19150	  switch (elt)
19151	    {
19152	    case 0:
19153	      tmp = vec;
19154	      break;
19155
19156	    case 1:
19157	    case 3:
19158	      tmp = gen_reg_rtx (mode);
19159	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19160					    GEN_INT (elt), GEN_INT (elt),
19161					    GEN_INT (elt), GEN_INT (elt)));
19162	      break;
19163
19164	    case 2:
19165	      tmp = gen_reg_rtx (mode);
19166	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19167	      break;
19168
19169	    default:
19170	      gcc_unreachable ();
19171	    }
19172	  vec = tmp;
19173	  use_vec_extr = true;
19174	  elt = 0;
19175	}
19176      else
19177	{
19178	  /* For SSE1, we have to reuse the V4SF code.  */
19179	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19180				      gen_lowpart (V4SFmode, vec), elt);
19181	  return;
19182	}
19183      break;
19184
19185    case V8HImode:
19186      use_vec_extr = TARGET_SSE2;
19187      break;
19188    case V4HImode:
19189      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19190      break;
19191
19192    case V16QImode:
19193    case V8QImode:
19194      /* ??? Could extract the appropriate HImode element and shift.  */
19195    default:
19196      break;
19197    }
19198
19199  if (use_vec_extr)
19200    {
19201      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19202      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19203
19204      /* Let the rtl optimizers know about the zero extension performed.  */
19205      if (inner_mode == HImode)
19206	{
19207	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19208	  target = gen_lowpart (SImode, target);
19209	}
19210
19211      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19212    }
19213  else
19214    {
19215      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19216
19217      emit_move_insn (mem, vec);
19218
19219      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19220      emit_move_insn (target, tmp);
19221    }
19222}
19223
19224/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
19225   pattern to reduce; DEST is the destination; IN is the input vector.  */
19226
19227void
19228ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19229{
19230  rtx tmp1, tmp2, tmp3;
19231
19232  tmp1 = gen_reg_rtx (V4SFmode);
19233  tmp2 = gen_reg_rtx (V4SFmode);
19234  tmp3 = gen_reg_rtx (V4SFmode);
19235
19236  emit_insn (gen_sse_movhlps (tmp1, in, in));
19237  emit_insn (fn (tmp2, tmp1, in));
19238
19239  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19240			       GEN_INT (1), GEN_INT (1),
19241			       GEN_INT (1+4), GEN_INT (1+4)));
19242  emit_insn (fn (dest, tmp2, tmp3));
19243}
19244
19245/* Target hook for scalar_mode_supported_p.  */
19246static bool
19247ix86_scalar_mode_supported_p (enum machine_mode mode)
19248{
19249  if (DECIMAL_FLOAT_MODE_P (mode))
19250    return true;
19251  else
19252    return default_scalar_mode_supported_p (mode);
19253}
19254
19255/* Implements target hook vector_mode_supported_p.  */
19256static bool
19257ix86_vector_mode_supported_p (enum machine_mode mode)
19258{
19259  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19260    return true;
19261  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19262    return true;
19263  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19264    return true;
19265  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19266    return true;
19267  return false;
19268}
19269
19270/* Worker function for TARGET_MD_ASM_CLOBBERS.
19271
19272   We do this in the new i386 backend to maintain source compatibility
19273   with the old cc0-based compiler.  */
19274
19275static tree
19276ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19277		      tree inputs ATTRIBUTE_UNUSED,
19278		      tree clobbers)
19279{
19280  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19281			clobbers);
19282  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19283			clobbers);
19284  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19285			clobbers);
19286  return clobbers;
19287}
19288
19289/* Return true if this goes in small data/bss.  */
19290
19291static bool
19292ix86_in_large_data_p (tree exp)
19293{
19294  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19295    return false;
19296
19297  /* Functions are never large data.  */
19298  if (TREE_CODE (exp) == FUNCTION_DECL)
19299    return false;
19300
19301  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19302    {
19303      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19304      if (strcmp (section, ".ldata") == 0
19305	  || strcmp (section, ".lbss") == 0)
19306	return true;
19307      return false;
19308    }
19309  else
19310    {
19311      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19312
19313      /* If this is an incomplete type with size 0, then we can't put it
19314	 in data because it might be too big when completed.  */
19315      if (!size || size > ix86_section_threshold)
19316	return true;
19317    }
19318
19319  return false;
19320}
19321static void
19322ix86_encode_section_info (tree decl, rtx rtl, int first)
19323{
19324  default_encode_section_info (decl, rtl, first);
19325
19326  if (TREE_CODE (decl) == VAR_DECL
19327      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19328      && ix86_in_large_data_p (decl))
19329    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19330}
19331
19332/* Worker function for REVERSE_CONDITION.  */
19333
19334enum rtx_code
19335ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19336{
19337  return (mode != CCFPmode && mode != CCFPUmode
19338	  ? reverse_condition (code)
19339	  : reverse_condition_maybe_unordered (code));
19340}
19341
19342/* Output code to perform an x87 FP register move, from OPERANDS[1]
19343   to OPERANDS[0].  */
19344
19345const char *
19346output_387_reg_move (rtx insn, rtx *operands)
19347{
19348  if (REG_P (operands[1])
19349      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19350    {
19351      if (REGNO (operands[0]) == FIRST_STACK_REG)
19352	return output_387_ffreep (operands, 0);
19353      return "fstp\t%y0";
19354    }
19355  if (STACK_TOP_P (operands[0]))
19356    return "fld%z1\t%y1";
19357  return "fst\t%y0";
19358}
19359
19360/* Output code to perform a conditional jump to LABEL, if C2 flag in
19361   FP status register is set.  */
19362
19363void
19364ix86_emit_fp_unordered_jump (rtx label)
19365{
19366  rtx reg = gen_reg_rtx (HImode);
19367  rtx temp;
19368
19369  emit_insn (gen_x86_fnstsw_1 (reg));
19370
19371  if (TARGET_USE_SAHF)
19372    {
19373      emit_insn (gen_x86_sahf_1 (reg));
19374
19375      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19376      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19377    }
19378  else
19379    {
19380      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19381
19382      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19383      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19384    }
19385
19386  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19387			      gen_rtx_LABEL_REF (VOIDmode, label),
19388			      pc_rtx);
19389  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19390  emit_jump_insn (temp);
19391}
19392
19393/* Output code to perform a log1p XFmode calculation.  */
19394
19395void ix86_emit_i387_log1p (rtx op0, rtx op1)
19396{
19397  rtx label1 = gen_label_rtx ();
19398  rtx label2 = gen_label_rtx ();
19399
19400  rtx tmp = gen_reg_rtx (XFmode);
19401  rtx tmp2 = gen_reg_rtx (XFmode);
19402
19403  emit_insn (gen_absxf2 (tmp, op1));
19404  emit_insn (gen_cmpxf (tmp,
19405    CONST_DOUBLE_FROM_REAL_VALUE (
19406       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19407       XFmode)));
19408  emit_jump_insn (gen_bge (label1));
19409
19410  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19411  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19412  emit_jump (label2);
19413
19414  emit_label (label1);
19415  emit_move_insn (tmp, CONST1_RTX (XFmode));
19416  emit_insn (gen_addxf3 (tmp, op1, tmp));
19417  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19418  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19419
19420  emit_label (label2);
19421}
19422
19423/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19424
19425static void
19426i386_solaris_elf_named_section (const char *name, unsigned int flags,
19427				tree decl)
19428{
19429  /* With Binutils 2.15, the "@unwind" marker must be specified on
19430     every occurrence of the ".eh_frame" section, not just the first
19431     one.  */
19432  if (TARGET_64BIT
19433      && strcmp (name, ".eh_frame") == 0)
19434    {
19435      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19436	       flags & SECTION_WRITE ? "aw" : "a");
19437      return;
19438    }
19439  default_elf_asm_named_section (name, flags, decl);
19440}
19441
19442/* Return the mangling of TYPE if it is an extended fundamental type.  */
19443
19444static const char *
19445ix86_mangle_fundamental_type (tree type)
19446{
19447  switch (TYPE_MODE (type))
19448    {
19449    case TFmode:
19450      /* __float128 is "g".  */
19451      return "g";
19452    case XFmode:
19453      /* "long double" or __float80 is "e".  */
19454      return "e";
19455    default:
19456      return NULL;
19457    }
19458}
19459
19460/* For 32-bit code we can save PIC register setup by using
19461   __stack_chk_fail_local hidden function instead of calling
19462   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19463   register, so it is better to call __stack_chk_fail directly.  */
19464
19465static tree
19466ix86_stack_protect_fail (void)
19467{
19468  return TARGET_64BIT
19469	 ? default_external_stack_protect_fail ()
19470	 : default_hidden_stack_protect_fail ();
19471}
19472
19473/* Select a format to encode pointers in exception handling data.  CODE
19474   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19475   true if the symbol may be affected by dynamic relocations.
19476
19477   ??? All x86 object file formats are capable of representing this.
19478   After all, the relocation needed is the same as for the call insn.
19479   Whether or not a particular assembler allows us to enter such, I
19480   guess we'll have to see.  */
19481int
19482asm_preferred_eh_data_format (int code, int global)
19483{
19484  if (flag_pic)
19485    {
19486      int type = DW_EH_PE_sdata8;
19487      if (!TARGET_64BIT
19488	  || ix86_cmodel == CM_SMALL_PIC
19489	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19490	type = DW_EH_PE_sdata4;
19491      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19492    }
19493  if (ix86_cmodel == CM_SMALL
19494      || (ix86_cmodel == CM_MEDIUM && code))
19495    return DW_EH_PE_udata4;
19496  return DW_EH_PE_absptr;
19497}
19498
19499#include "gt-i386.h"
19500