i386.c revision 267654
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/* $FreeBSD: releng/9.3/contrib/gcc/config/i386/i386.c 260075 2013-12-30 03:40:16Z pfg $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables.  */
61#define MODE_INDEX(mode)					\
62  ((mode) == QImode ? 0						\
63   : (mode) == HImode ? 1					\
64   : (mode) == SImode ? 2					\
65   : (mode) == DImode ? 3					\
66   : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = {	/* costs for tuning for size */
74  COSTS_N_BYTES (2),			/* cost of an add instruction */
75  COSTS_N_BYTES (3),			/* cost of a lea instruction */
76  COSTS_N_BYTES (2),			/* variable shift costs */
77  COSTS_N_BYTES (3),			/* constant shift costs */
78  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
79   COSTS_N_BYTES (3),			/*                               HI */
80   COSTS_N_BYTES (3),			/*                               SI */
81   COSTS_N_BYTES (3),			/*                               DI */
82   COSTS_N_BYTES (5)},			/*                            other */
83  0,					/* cost of multiply per each bit set */
84  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
85   COSTS_N_BYTES (3),			/*                          HI */
86   COSTS_N_BYTES (3),			/*                          SI */
87   COSTS_N_BYTES (3),			/*                          DI */
88   COSTS_N_BYTES (5)},			/*                       other */
89  COSTS_N_BYTES (3),			/* cost of movsx */
90  COSTS_N_BYTES (3),			/* cost of movzx */
91  0,					/* "large" insn */
92  2,					/* MOVE_RATIO */
93  2,					/* cost for loading QImode using movzbl */
94  {2, 2, 2},				/* cost of loading integer registers
95					   in QImode, HImode and SImode.
96					   Relative to reg-reg move (2).  */
97  {2, 2, 2},				/* cost of storing integer registers */
98  2,					/* cost of reg,reg fld/fst */
99  {2, 2, 2},				/* cost of loading fp registers
100					   in SFmode, DFmode and XFmode */
101  {2, 2, 2},				/* cost of storing fp registers
102					   in SFmode, DFmode and XFmode */
103  3,					/* cost of moving MMX register */
104  {3, 3},				/* cost of loading MMX registers
105					   in SImode and DImode */
106  {3, 3},				/* cost of storing MMX registers
107					   in SImode and DImode */
108  3,					/* cost of moving SSE register */
109  {3, 3, 3},				/* cost of loading SSE registers
110					   in SImode, DImode and TImode */
111  {3, 3, 3},				/* cost of storing SSE registers
112					   in SImode, DImode and TImode */
113  3,					/* MMX or SSE register to integer */
114  0,					/* size of prefetch block */
115  0,					/* number of parallel prefetches */
116  2,					/* Branch cost */
117  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
118  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
119  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
120  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
121  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
122  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = {	/* 386 specific costs */
128  COSTS_N_INSNS (1),			/* cost of an add instruction */
129  COSTS_N_INSNS (1),			/* cost of a lea instruction */
130  COSTS_N_INSNS (3),			/* variable shift costs */
131  COSTS_N_INSNS (2),			/* constant shift costs */
132  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
133   COSTS_N_INSNS (6),			/*                               HI */
134   COSTS_N_INSNS (6),			/*                               SI */
135   COSTS_N_INSNS (6),			/*                               DI */
136   COSTS_N_INSNS (6)},			/*                               other */
137  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
138  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
139   COSTS_N_INSNS (23),			/*                          HI */
140   COSTS_N_INSNS (23),			/*                          SI */
141   COSTS_N_INSNS (23),			/*                          DI */
142   COSTS_N_INSNS (23)},			/*                          other */
143  COSTS_N_INSNS (3),			/* cost of movsx */
144  COSTS_N_INSNS (2),			/* cost of movzx */
145  15,					/* "large" insn */
146  3,					/* MOVE_RATIO */
147  4,					/* cost for loading QImode using movzbl */
148  {2, 4, 2},				/* cost of loading integer registers
149					   in QImode, HImode and SImode.
150					   Relative to reg-reg move (2).  */
151  {2, 4, 2},				/* cost of storing integer registers */
152  2,					/* cost of reg,reg fld/fst */
153  {8, 8, 8},				/* cost of loading fp registers
154					   in SFmode, DFmode and XFmode */
155  {8, 8, 8},				/* cost of storing fp registers
156					   in SFmode, DFmode and XFmode */
157  2,					/* cost of moving MMX register */
158  {4, 8},				/* cost of loading MMX registers
159					   in SImode and DImode */
160  {4, 8},				/* cost of storing MMX registers
161					   in SImode and DImode */
162  2,					/* cost of moving SSE register */
163  {4, 8, 16},				/* cost of loading SSE registers
164					   in SImode, DImode and TImode */
165  {4, 8, 16},				/* cost of storing SSE registers
166					   in SImode, DImode and TImode */
167  3,					/* MMX or SSE register to integer */
168  0,					/* size of prefetch block */
169  0,					/* number of parallel prefetches */
170  1,					/* Branch cost */
171  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
172  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
173  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
174  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
175  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
176  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
177};
178
179static const
180struct processor_costs i486_cost = {	/* 486 specific costs */
181  COSTS_N_INSNS (1),			/* cost of an add instruction */
182  COSTS_N_INSNS (1),			/* cost of a lea instruction */
183  COSTS_N_INSNS (3),			/* variable shift costs */
184  COSTS_N_INSNS (2),			/* constant shift costs */
185  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
186   COSTS_N_INSNS (12),			/*                               HI */
187   COSTS_N_INSNS (12),			/*                               SI */
188   COSTS_N_INSNS (12),			/*                               DI */
189   COSTS_N_INSNS (12)},			/*                               other */
190  1,					/* cost of multiply per each bit set */
191  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
192   COSTS_N_INSNS (40),			/*                          HI */
193   COSTS_N_INSNS (40),			/*                          SI */
194   COSTS_N_INSNS (40),			/*                          DI */
195   COSTS_N_INSNS (40)},			/*                          other */
196  COSTS_N_INSNS (3),			/* cost of movsx */
197  COSTS_N_INSNS (2),			/* cost of movzx */
198  15,					/* "large" insn */
199  3,					/* MOVE_RATIO */
200  4,					/* cost for loading QImode using movzbl */
201  {2, 4, 2},				/* cost of loading integer registers
202					   in QImode, HImode and SImode.
203					   Relative to reg-reg move (2).  */
204  {2, 4, 2},				/* cost of storing integer registers */
205  2,					/* cost of reg,reg fld/fst */
206  {8, 8, 8},				/* cost of loading fp registers
207					   in SFmode, DFmode and XFmode */
208  {8, 8, 8},				/* cost of storing fp registers
209					   in SFmode, DFmode and XFmode */
210  2,					/* cost of moving MMX register */
211  {4, 8},				/* cost of loading MMX registers
212					   in SImode and DImode */
213  {4, 8},				/* cost of storing MMX registers
214					   in SImode and DImode */
215  2,					/* cost of moving SSE register */
216  {4, 8, 16},				/* cost of loading SSE registers
217					   in SImode, DImode and TImode */
218  {4, 8, 16},				/* cost of storing SSE registers
219					   in SImode, DImode and TImode */
220  3,					/* MMX or SSE register to integer */
221  0,					/* size of prefetch block */
222  0,					/* number of parallel prefetches */
223  1,					/* Branch cost */
224  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
225  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
226  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
227  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
228  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
229  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
230};
231
232static const
233struct processor_costs pentium_cost = {
234  COSTS_N_INSNS (1),			/* cost of an add instruction */
235  COSTS_N_INSNS (1),			/* cost of a lea instruction */
236  COSTS_N_INSNS (4),			/* variable shift costs */
237  COSTS_N_INSNS (1),			/* constant shift costs */
238  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
239   COSTS_N_INSNS (11),			/*                               HI */
240   COSTS_N_INSNS (11),			/*                               SI */
241   COSTS_N_INSNS (11),			/*                               DI */
242   COSTS_N_INSNS (11)},			/*                               other */
243  0,					/* cost of multiply per each bit set */
244  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
245   COSTS_N_INSNS (25),			/*                          HI */
246   COSTS_N_INSNS (25),			/*                          SI */
247   COSTS_N_INSNS (25),			/*                          DI */
248   COSTS_N_INSNS (25)},			/*                          other */
249  COSTS_N_INSNS (3),			/* cost of movsx */
250  COSTS_N_INSNS (2),			/* cost of movzx */
251  8,					/* "large" insn */
252  6,					/* MOVE_RATIO */
253  6,					/* cost for loading QImode using movzbl */
254  {2, 4, 2},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 4, 2},				/* cost of storing integer registers */
258  2,					/* cost of reg,reg fld/fst */
259  {2, 2, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 6},				/* cost of storing fp registers
262					   in SFmode, DFmode and XFmode */
263  8,					/* cost of moving MMX register */
264  {8, 8},				/* cost of loading MMX registers
265					   in SImode and DImode */
266  {8, 8},				/* cost of storing MMX registers
267					   in SImode and DImode */
268  2,					/* cost of moving SSE register */
269  {4, 8, 16},				/* cost of loading SSE registers
270					   in SImode, DImode and TImode */
271  {4, 8, 16},				/* cost of storing SSE registers
272					   in SImode, DImode and TImode */
273  3,					/* MMX or SSE register to integer */
274  0,					/* size of prefetch block */
275  0,					/* number of parallel prefetches */
276  2,					/* Branch cost */
277  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
278  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
279  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
280  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
281  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
282  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287  COSTS_N_INSNS (1),			/* cost of an add instruction */
288  COSTS_N_INSNS (1),			/* cost of a lea instruction */
289  COSTS_N_INSNS (1),			/* variable shift costs */
290  COSTS_N_INSNS (1),			/* constant shift costs */
291  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
292   COSTS_N_INSNS (4),			/*                               HI */
293   COSTS_N_INSNS (4),			/*                               SI */
294   COSTS_N_INSNS (4),			/*                               DI */
295   COSTS_N_INSNS (4)},			/*                               other */
296  0,					/* cost of multiply per each bit set */
297  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
298   COSTS_N_INSNS (17),			/*                          HI */
299   COSTS_N_INSNS (17),			/*                          SI */
300   COSTS_N_INSNS (17),			/*                          DI */
301   COSTS_N_INSNS (17)},			/*                          other */
302  COSTS_N_INSNS (1),			/* cost of movsx */
303  COSTS_N_INSNS (1),			/* cost of movzx */
304  8,					/* "large" insn */
305  6,					/* MOVE_RATIO */
306  2,					/* cost for loading QImode using movzbl */
307  {4, 4, 4},				/* cost of loading integer registers
308					   in QImode, HImode and SImode.
309					   Relative to reg-reg move (2).  */
310  {2, 2, 2},				/* cost of storing integer registers */
311  2,					/* cost of reg,reg fld/fst */
312  {2, 2, 6},				/* cost of loading fp registers
313					   in SFmode, DFmode and XFmode */
314  {4, 4, 6},				/* cost of storing fp registers
315					   in SFmode, DFmode and XFmode */
316  2,					/* cost of moving MMX register */
317  {2, 2},				/* cost of loading MMX registers
318					   in SImode and DImode */
319  {2, 2},				/* cost of storing MMX registers
320					   in SImode and DImode */
321  2,					/* cost of moving SSE register */
322  {2, 2, 8},				/* cost of loading SSE registers
323					   in SImode, DImode and TImode */
324  {2, 2, 8},				/* cost of storing SSE registers
325					   in SImode, DImode and TImode */
326  3,					/* MMX or SSE register to integer */
327  32,					/* size of prefetch block */
328  6,					/* number of parallel prefetches */
329  2,					/* Branch cost */
330  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
331  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
332  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
333  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
334  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
335  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
336};
337
338static const
339struct processor_costs geode_cost = {
340  COSTS_N_INSNS (1),			/* cost of an add instruction */
341  COSTS_N_INSNS (1),			/* cost of a lea instruction */
342  COSTS_N_INSNS (2),			/* variable shift costs */
343  COSTS_N_INSNS (1),			/* constant shift costs */
344  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
345   COSTS_N_INSNS (4),			/*                               HI */
346   COSTS_N_INSNS (7),			/*                               SI */
347   COSTS_N_INSNS (7),			/*                               DI */
348   COSTS_N_INSNS (7)},			/*                               other */
349  0,					/* cost of multiply per each bit set */
350  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
351   COSTS_N_INSNS (23),			/*                          HI */
352   COSTS_N_INSNS (39),			/*                          SI */
353   COSTS_N_INSNS (39),			/*                          DI */
354   COSTS_N_INSNS (39)},			/*                          other */
355  COSTS_N_INSNS (1),			/* cost of movsx */
356  COSTS_N_INSNS (1),			/* cost of movzx */
357  8,					/* "large" insn */
358  4,					/* MOVE_RATIO */
359  1,					/* cost for loading QImode using movzbl */
360  {1, 1, 1},				/* cost of loading integer registers
361					   in QImode, HImode and SImode.
362					   Relative to reg-reg move (2).  */
363  {1, 1, 1},				/* cost of storing integer registers */
364  1,					/* cost of reg,reg fld/fst */
365  {1, 1, 1},				/* cost of loading fp registers
366					   in SFmode, DFmode and XFmode */
367  {4, 6, 6},				/* cost of storing fp registers
368					   in SFmode, DFmode and XFmode */
369
370  1,					/* cost of moving MMX register */
371  {1, 1},				/* cost of loading MMX registers
372					   in SImode and DImode */
373  {1, 1},				/* cost of storing MMX registers
374					   in SImode and DImode */
375  1,					/* cost of moving SSE register */
376  {1, 1, 1},				/* cost of loading SSE registers
377					   in SImode, DImode and TImode */
378  {1, 1, 1},				/* cost of storing SSE registers
379					   in SImode, DImode and TImode */
380  1,					/* MMX or SSE register to integer */
381  32,					/* size of prefetch block */
382  1,					/* number of parallel prefetches */
383  1,					/* Branch cost */
384  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
385  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
386  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
387  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
388  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
389  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
390};
391
392static const
393struct processor_costs k6_cost = {
394  COSTS_N_INSNS (1),			/* cost of an add instruction */
395  COSTS_N_INSNS (2),			/* cost of a lea instruction */
396  COSTS_N_INSNS (1),			/* variable shift costs */
397  COSTS_N_INSNS (1),			/* constant shift costs */
398  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
399   COSTS_N_INSNS (3),			/*                               HI */
400   COSTS_N_INSNS (3),			/*                               SI */
401   COSTS_N_INSNS (3),			/*                               DI */
402   COSTS_N_INSNS (3)},			/*                               other */
403  0,					/* cost of multiply per each bit set */
404  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
405   COSTS_N_INSNS (18),			/*                          HI */
406   COSTS_N_INSNS (18),			/*                          SI */
407   COSTS_N_INSNS (18),			/*                          DI */
408   COSTS_N_INSNS (18)},			/*                          other */
409  COSTS_N_INSNS (2),			/* cost of movsx */
410  COSTS_N_INSNS (2),			/* cost of movzx */
411  8,					/* "large" insn */
412  4,					/* MOVE_RATIO */
413  3,					/* cost for loading QImode using movzbl */
414  {4, 5, 4},				/* cost of loading integer registers
415					   in QImode, HImode and SImode.
416					   Relative to reg-reg move (2).  */
417  {2, 3, 2},				/* cost of storing integer registers */
418  4,					/* cost of reg,reg fld/fst */
419  {6, 6, 6},				/* cost of loading fp registers
420					   in SFmode, DFmode and XFmode */
421  {4, 4, 4},				/* cost of storing fp registers
422					   in SFmode, DFmode and XFmode */
423  2,					/* cost of moving MMX register */
424  {2, 2},				/* cost of loading MMX registers
425					   in SImode and DImode */
426  {2, 2},				/* cost of storing MMX registers
427					   in SImode and DImode */
428  2,					/* cost of moving SSE register */
429  {2, 2, 8},				/* cost of loading SSE registers
430					   in SImode, DImode and TImode */
431  {2, 2, 8},				/* cost of storing SSE registers
432					   in SImode, DImode and TImode */
433  6,					/* MMX or SSE register to integer */
434  32,					/* size of prefetch block */
435  1,					/* number of parallel prefetches */
436  1,					/* Branch cost */
437  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
438  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
439  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
440  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
441  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
442  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
443};
444
445static const
446struct processor_costs athlon_cost = {
447  COSTS_N_INSNS (1),			/* cost of an add instruction */
448  COSTS_N_INSNS (2),			/* cost of a lea instruction */
449  COSTS_N_INSNS (1),			/* variable shift costs */
450  COSTS_N_INSNS (1),			/* constant shift costs */
451  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
452   COSTS_N_INSNS (5),			/*                               HI */
453   COSTS_N_INSNS (5),			/*                               SI */
454   COSTS_N_INSNS (5),			/*                               DI */
455   COSTS_N_INSNS (5)},			/*                               other */
456  0,					/* cost of multiply per each bit set */
457  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
458   COSTS_N_INSNS (26),			/*                          HI */
459   COSTS_N_INSNS (42),			/*                          SI */
460   COSTS_N_INSNS (74),			/*                          DI */
461   COSTS_N_INSNS (74)},			/*                          other */
462  COSTS_N_INSNS (1),			/* cost of movsx */
463  COSTS_N_INSNS (1),			/* cost of movzx */
464  8,					/* "large" insn */
465  9,					/* MOVE_RATIO */
466  4,					/* cost for loading QImode using movzbl */
467  {3, 4, 3},				/* cost of loading integer registers
468					   in QImode, HImode and SImode.
469					   Relative to reg-reg move (2).  */
470  {3, 4, 3},				/* cost of storing integer registers */
471  4,					/* cost of reg,reg fld/fst */
472  {4, 4, 12},				/* cost of loading fp registers
473					   in SFmode, DFmode and XFmode */
474  {6, 6, 8},				/* cost of storing fp registers
475					   in SFmode, DFmode and XFmode */
476  2,					/* cost of moving MMX register */
477  {4, 4},				/* cost of loading MMX registers
478					   in SImode and DImode */
479  {4, 4},				/* cost of storing MMX registers
480					   in SImode and DImode */
481  2,					/* cost of moving SSE register */
482  {4, 4, 6},				/* cost of loading SSE registers
483					   in SImode, DImode and TImode */
484  {4, 4, 5},				/* cost of storing SSE registers
485					   in SImode, DImode and TImode */
486  5,					/* MMX or SSE register to integer */
487  64,					/* size of prefetch block */
488  6,					/* number of parallel prefetches */
489  5,					/* Branch cost */
490  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
491  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
492  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
493  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
494  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
495  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
496};
497
498static const
499struct processor_costs k8_cost = {
500  COSTS_N_INSNS (1),			/* cost of an add instruction */
501  COSTS_N_INSNS (2),			/* cost of a lea instruction */
502  COSTS_N_INSNS (1),			/* variable shift costs */
503  COSTS_N_INSNS (1),			/* constant shift costs */
504  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
505   COSTS_N_INSNS (4),			/*                               HI */
506   COSTS_N_INSNS (3),			/*                               SI */
507   COSTS_N_INSNS (4),			/*                               DI */
508   COSTS_N_INSNS (5)},			/*                               other */
509  0,					/* cost of multiply per each bit set */
510  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
511   COSTS_N_INSNS (26),			/*                          HI */
512   COSTS_N_INSNS (42),			/*                          SI */
513   COSTS_N_INSNS (74),			/*                          DI */
514   COSTS_N_INSNS (74)},			/*                          other */
515  COSTS_N_INSNS (1),			/* cost of movsx */
516  COSTS_N_INSNS (1),			/* cost of movzx */
517  8,					/* "large" insn */
518  9,					/* MOVE_RATIO */
519  4,					/* cost for loading QImode using movzbl */
520  {3, 4, 3},				/* cost of loading integer registers
521					   in QImode, HImode and SImode.
522					   Relative to reg-reg move (2).  */
523  {3, 4, 3},				/* cost of storing integer registers */
524  4,					/* cost of reg,reg fld/fst */
525  {4, 4, 12},				/* cost of loading fp registers
526					   in SFmode, DFmode and XFmode */
527  {6, 6, 8},				/* cost of storing fp registers
528					   in SFmode, DFmode and XFmode */
529  2,					/* cost of moving MMX register */
530  {3, 3},				/* cost of loading MMX registers
531					   in SImode and DImode */
532  {4, 4},				/* cost of storing MMX registers
533					   in SImode and DImode */
534  2,					/* cost of moving SSE register */
535  {4, 3, 6},				/* cost of loading SSE registers
536					   in SImode, DImode and TImode */
537  {4, 4, 5},				/* cost of storing SSE registers
538					   in SImode, DImode and TImode */
539  5,					/* MMX or SSE register to integer */
540  64,					/* size of prefetch block */
541  6,					/* number of parallel prefetches */
542  5,					/* Branch cost */
543  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
544  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
545  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
546  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
547  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
548  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
549};
550
551struct processor_costs amdfam10_cost = {
552  COSTS_N_INSNS (1),                    /* cost of an add instruction */
553  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
554  COSTS_N_INSNS (1),                    /* variable shift costs */
555  COSTS_N_INSNS (1),                    /* constant shift costs */
556  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
557   COSTS_N_INSNS (4),                   /*                               HI */
558   COSTS_N_INSNS (3),                   /*                               SI */
559   COSTS_N_INSNS (4),                   /*                               DI */
560   COSTS_N_INSNS (5)},                  /*                               other */
561  0,                                    /* cost of multiply per each bit set */
562  {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
563   COSTS_N_INSNS (35),                  /*                          HI */
564   COSTS_N_INSNS (51),                  /*                          SI */
565   COSTS_N_INSNS (83),                  /*                          DI */
566   COSTS_N_INSNS (83)},                 /*                          other */
567  COSTS_N_INSNS (1),			/* cost of movsx */
568  COSTS_N_INSNS (1),			/* cost of movzx */
569  8,					/* "large" insn */
570  9,					/* MOVE_RATIO */
571  4,					/* cost for loading QImode using movzbl */
572  {3, 4, 3},				/* cost of loading integer registers
573					   in QImode, HImode and SImode.
574					   Relative to reg-reg move (2).  */
575  {3, 4, 3},				/* cost of storing integer registers */
576  4,					/* cost of reg,reg fld/fst */
577  {4, 4, 12},				/* cost of loading fp registers
578		   			   in SFmode, DFmode and XFmode */
579  {6, 6, 8},				/* cost of storing fp registers
580 		   			   in SFmode, DFmode and XFmode */
581  2,					/* cost of moving MMX register */
582  {3, 3},				/* cost of loading MMX registers
583					   in SImode and DImode */
584  {4, 4},				/* cost of storing MMX registers
585					   in SImode and DImode */
586  2,					/* cost of moving SSE register */
587  {4, 4, 3},				/* cost of loading SSE registers
588					   in SImode, DImode and TImode */
589  {4, 4, 5},				/* cost of storing SSE registers
590					   in SImode, DImode and TImode */
591  3,					/* MMX or SSE register to integer */
592  					/* On K8
593  					    MOVD reg64, xmmreg 	Double	FSTORE 4
594					    MOVD reg32, xmmreg 	Double	FSTORE 4
595					   On AMDFAM10
596					    MOVD reg64, xmmreg 	Double	FADD 3
597                                                                1/1  1/1
598					    MOVD reg32, xmmreg 	Double	FADD 3
599                                                                1/1  1/1 */
600  64,					/* size of prefetch block */
601  /* New AMD processors never drop prefetches; if they cannot be performed
602     immediately, they are queued.  We set number of simultaneous prefetches
603     to a large constant to reflect this (it probably is not a good idea not
604     to limit number of prefetches at all, as their execution also takes some
605     time).  */
606  100,					/* number of parallel prefetches */
607  5,					/* Branch cost */
608  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
609  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
610  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
611  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
612  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
613  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
614};
615
616static const
617struct processor_costs pentium4_cost = {
618  COSTS_N_INSNS (1),			/* cost of an add instruction */
619  COSTS_N_INSNS (3),			/* cost of a lea instruction */
620  COSTS_N_INSNS (4),			/* variable shift costs */
621  COSTS_N_INSNS (4),			/* constant shift costs */
622  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
623   COSTS_N_INSNS (15),			/*                               HI */
624   COSTS_N_INSNS (15),			/*                               SI */
625   COSTS_N_INSNS (15),			/*                               DI */
626   COSTS_N_INSNS (15)},			/*                               other */
627  0,					/* cost of multiply per each bit set */
628  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
629   COSTS_N_INSNS (56),			/*                          HI */
630   COSTS_N_INSNS (56),			/*                          SI */
631   COSTS_N_INSNS (56),			/*                          DI */
632   COSTS_N_INSNS (56)},			/*                          other */
633  COSTS_N_INSNS (1),			/* cost of movsx */
634  COSTS_N_INSNS (1),			/* cost of movzx */
635  16,					/* "large" insn */
636  6,					/* MOVE_RATIO */
637  2,					/* cost for loading QImode using movzbl */
638  {4, 5, 4},				/* cost of loading integer registers
639					   in QImode, HImode and SImode.
640					   Relative to reg-reg move (2).  */
641  {2, 3, 2},				/* cost of storing integer registers */
642  2,					/* cost of reg,reg fld/fst */
643  {2, 2, 6},				/* cost of loading fp registers
644					   in SFmode, DFmode and XFmode */
645  {4, 4, 6},				/* cost of storing fp registers
646					   in SFmode, DFmode and XFmode */
647  2,					/* cost of moving MMX register */
648  {2, 2},				/* cost of loading MMX registers
649					   in SImode and DImode */
650  {2, 2},				/* cost of storing MMX registers
651					   in SImode and DImode */
652  12,					/* cost of moving SSE register */
653  {12, 12, 12},				/* cost of loading SSE registers
654					   in SImode, DImode and TImode */
655  {2, 2, 8},				/* cost of storing SSE registers
656					   in SImode, DImode and TImode */
657  10,					/* MMX or SSE register to integer */
658  64,					/* size of prefetch block */
659  6,					/* number of parallel prefetches */
660  2,					/* Branch cost */
661  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
662  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
663  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
664  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
665  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
666  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
667};
668
669static const
670struct processor_costs nocona_cost = {
671  COSTS_N_INSNS (1),			/* cost of an add instruction */
672  COSTS_N_INSNS (1),			/* cost of a lea instruction */
673  COSTS_N_INSNS (1),			/* variable shift costs */
674  COSTS_N_INSNS (1),			/* constant shift costs */
675  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
676   COSTS_N_INSNS (10),			/*                               HI */
677   COSTS_N_INSNS (10),			/*                               SI */
678   COSTS_N_INSNS (10),			/*                               DI */
679   COSTS_N_INSNS (10)},			/*                               other */
680  0,					/* cost of multiply per each bit set */
681  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
682   COSTS_N_INSNS (66),			/*                          HI */
683   COSTS_N_INSNS (66),			/*                          SI */
684   COSTS_N_INSNS (66),			/*                          DI */
685   COSTS_N_INSNS (66)},			/*                          other */
686  COSTS_N_INSNS (1),			/* cost of movsx */
687  COSTS_N_INSNS (1),			/* cost of movzx */
688  16,					/* "large" insn */
689  17,					/* MOVE_RATIO */
690  4,					/* cost for loading QImode using movzbl */
691  {4, 4, 4},				/* cost of loading integer registers
692					   in QImode, HImode and SImode.
693					   Relative to reg-reg move (2).  */
694  {4, 4, 4},				/* cost of storing integer registers */
695  3,					/* cost of reg,reg fld/fst */
696  {12, 12, 12},				/* cost of loading fp registers
697					   in SFmode, DFmode and XFmode */
698  {4, 4, 4},				/* cost of storing fp registers
699					   in SFmode, DFmode and XFmode */
700  6,					/* cost of moving MMX register */
701  {12, 12},				/* cost of loading MMX registers
702					   in SImode and DImode */
703  {12, 12},				/* cost of storing MMX registers
704					   in SImode and DImode */
705  6,					/* cost of moving SSE register */
706  {12, 12, 12},				/* cost of loading SSE registers
707					   in SImode, DImode and TImode */
708  {12, 12, 12},				/* cost of storing SSE registers
709					   in SImode, DImode and TImode */
710  8,					/* MMX or SSE register to integer */
711  128,					/* size of prefetch block */
712  8,					/* number of parallel prefetches */
713  1,					/* Branch cost */
714  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
715  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
716  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
717  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
718  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
719  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
720};
721
722static const
723struct processor_costs core2_cost = {
724  COSTS_N_INSNS (1),			/* cost of an add instruction */
725  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
726  COSTS_N_INSNS (1),			/* variable shift costs */
727  COSTS_N_INSNS (1),			/* constant shift costs */
728  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
729   COSTS_N_INSNS (3),			/*                               HI */
730   COSTS_N_INSNS (3),			/*                               SI */
731   COSTS_N_INSNS (3),			/*                               DI */
732   COSTS_N_INSNS (3)},			/*                               other */
733  0,					/* cost of multiply per each bit set */
734  {COSTS_N_INSNS (22),			/* cost of a divide/mod for QI */
735   COSTS_N_INSNS (22),			/*                          HI */
736   COSTS_N_INSNS (22),			/*                          SI */
737   COSTS_N_INSNS (22),			/*                          DI */
738   COSTS_N_INSNS (22)},			/*                          other */
739  COSTS_N_INSNS (1),			/* cost of movsx */
740  COSTS_N_INSNS (1),			/* cost of movzx */
741  8,					/* "large" insn */
742  16,					/* MOVE_RATIO */
743  2,					/* cost for loading QImode using movzbl */
744  {6, 6, 6},				/* cost of loading integer registers
745					   in QImode, HImode and SImode.
746					   Relative to reg-reg move (2).  */
747  {4, 4, 4},				/* cost of storing integer registers */
748  2,					/* cost of reg,reg fld/fst */
749  {6, 6, 6},				/* cost of loading fp registers
750					   in SFmode, DFmode and XFmode */
751  {4, 4, 4},				/* cost of loading integer registers */
752  2,					/* cost of moving MMX register */
753  {6, 6},				/* cost of loading MMX registers
754					   in SImode and DImode */
755  {4, 4},				/* cost of storing MMX registers
756					   in SImode and DImode */
757  2,					/* cost of moving SSE register */
758  {6, 6, 6},				/* cost of loading SSE registers
759					   in SImode, DImode and TImode */
760  {4, 4, 4},				/* cost of storing SSE registers
761					   in SImode, DImode and TImode */
762  2,					/* MMX or SSE register to integer */
763  128,					/* size of prefetch block */
764  8,					/* number of parallel prefetches */
765  3,					/* Branch cost */
766  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
767  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
768  COSTS_N_INSNS (32),			/* cost of FDIV instruction.  */
769  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
770  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
771  COSTS_N_INSNS (58),			/* cost of FSQRT instruction.  */
772};
773
774/* Generic64 should produce code tuned for Nocona and K8.  */
775static const
776struct processor_costs generic64_cost = {
777  COSTS_N_INSNS (1),			/* cost of an add instruction */
778  /* On all chips taken into consideration lea is 2 cycles and more.  With
779     this cost however our current implementation of synth_mult results in
780     use of unnecessary temporary registers causing regression on several
781     SPECfp benchmarks.  */
782  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
783  COSTS_N_INSNS (1),			/* variable shift costs */
784  COSTS_N_INSNS (1),			/* constant shift costs */
785  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
786   COSTS_N_INSNS (4),			/*                               HI */
787   COSTS_N_INSNS (3),			/*                               SI */
788   COSTS_N_INSNS (4),			/*                               DI */
789   COSTS_N_INSNS (2)},			/*                               other */
790  0,					/* cost of multiply per each bit set */
791  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
792   COSTS_N_INSNS (26),			/*                          HI */
793   COSTS_N_INSNS (42),			/*                          SI */
794   COSTS_N_INSNS (74),			/*                          DI */
795   COSTS_N_INSNS (74)},			/*                          other */
796  COSTS_N_INSNS (1),			/* cost of movsx */
797  COSTS_N_INSNS (1),			/* cost of movzx */
798  8,					/* "large" insn */
799  17,					/* MOVE_RATIO */
800  4,					/* cost for loading QImode using movzbl */
801  {4, 4, 4},				/* cost of loading integer registers
802					   in QImode, HImode and SImode.
803					   Relative to reg-reg move (2).  */
804  {4, 4, 4},				/* cost of storing integer registers */
805  4,					/* cost of reg,reg fld/fst */
806  {12, 12, 12},				/* cost of loading fp registers
807					   in SFmode, DFmode and XFmode */
808  {6, 6, 8},				/* cost of storing fp registers
809					   in SFmode, DFmode and XFmode */
810  2,					/* cost of moving MMX register */
811  {8, 8},				/* cost of loading MMX registers
812					   in SImode and DImode */
813  {8, 8},				/* cost of storing MMX registers
814					   in SImode and DImode */
815  2,					/* cost of moving SSE register */
816  {8, 8, 8},				/* cost of loading SSE registers
817					   in SImode, DImode and TImode */
818  {8, 8, 8},				/* cost of storing SSE registers
819					   in SImode, DImode and TImode */
820  5,					/* MMX or SSE register to integer */
821  64,					/* size of prefetch block */
822  6,					/* number of parallel prefetches */
823  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824     is increased to perhaps more appropriate value of 5.  */
825  3,					/* Branch cost */
826  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
827  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
828  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
829  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
830  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
831  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
832};
833
834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
835static const
836struct processor_costs generic32_cost = {
837  COSTS_N_INSNS (1),			/* cost of an add instruction */
838  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
839  COSTS_N_INSNS (1),			/* variable shift costs */
840  COSTS_N_INSNS (1),			/* constant shift costs */
841  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
842   COSTS_N_INSNS (4),			/*                               HI */
843   COSTS_N_INSNS (3),			/*                               SI */
844   COSTS_N_INSNS (4),			/*                               DI */
845   COSTS_N_INSNS (2)},			/*                               other */
846  0,					/* cost of multiply per each bit set */
847  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
848   COSTS_N_INSNS (26),			/*                          HI */
849   COSTS_N_INSNS (42),			/*                          SI */
850   COSTS_N_INSNS (74),			/*                          DI */
851   COSTS_N_INSNS (74)},			/*                          other */
852  COSTS_N_INSNS (1),			/* cost of movsx */
853  COSTS_N_INSNS (1),			/* cost of movzx */
854  8,					/* "large" insn */
855  17,					/* MOVE_RATIO */
856  4,					/* cost for loading QImode using movzbl */
857  {4, 4, 4},				/* cost of loading integer registers
858					   in QImode, HImode and SImode.
859					   Relative to reg-reg move (2).  */
860  {4, 4, 4},				/* cost of storing integer registers */
861  4,					/* cost of reg,reg fld/fst */
862  {12, 12, 12},				/* cost of loading fp registers
863					   in SFmode, DFmode and XFmode */
864  {6, 6, 8},				/* cost of storing fp registers
865					   in SFmode, DFmode and XFmode */
866  2,					/* cost of moving MMX register */
867  {8, 8},				/* cost of loading MMX registers
868					   in SImode and DImode */
869  {8, 8},				/* cost of storing MMX registers
870					   in SImode and DImode */
871  2,					/* cost of moving SSE register */
872  {8, 8, 8},				/* cost of loading SSE registers
873					   in SImode, DImode and TImode */
874  {8, 8, 8},				/* cost of storing SSE registers
875					   in SImode, DImode and TImode */
876  5,					/* MMX or SSE register to integer */
877  64,					/* size of prefetch block */
878  6,					/* number of parallel prefetches */
879  3,					/* Branch cost */
880  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
881  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
882  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
883  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
884  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
885  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
886};
887
888const struct processor_costs *ix86_cost = &pentium_cost;
889
890/* Processor feature/optimization bitmasks.  */
891#define m_386 (1<<PROCESSOR_I386)
892#define m_486 (1<<PROCESSOR_I486)
893#define m_PENT (1<<PROCESSOR_PENTIUM)
894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895#define m_GEODE  (1<<PROCESSOR_GEODE)
896#define m_K6_GEODE  (m_K6 | m_GEODE)
897#define m_K6  (1<<PROCESSOR_K6)
898#define m_ATHLON  (1<<PROCESSOR_ATHLON)
899#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
900#define m_K8  (1<<PROCESSOR_K8)
901#define m_ATHLON_K8  (m_K8 | m_ATHLON)
902#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
903#define m_NOCONA  (1<<PROCESSOR_NOCONA)
904#define m_CORE2  (1<<PROCESSOR_CORE2)
905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908#define m_ATHLON_K8_AMDFAM10  (m_K8 | m_ATHLON | m_AMDFAM10)
909
910/* Generic instruction choice should be common subset of supported CPUs
911   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
912
913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
915   generic because it is not working well with PPro base chips.  */
916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
917                          | m_GENERIC64;
918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919                            | m_NOCONA | m_CORE2 | m_GENERIC;
920const int x86_zero_extend_with_and = m_486 | m_PENT;
921/* Enable to zero extend integer registers to avoid partial dependencies */
922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923                     | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924const int x86_double_with_add = ~m_386;
925const int x86_use_bit_test = m_386;
926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927                              | m_K6 | m_CORE2 | m_GENERIC;
928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
929                      | m_NOCONA;
930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932                            | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933/* Branch hints were put in P4 based on simulation result. But
934   after P4 was made, no performance benefit was observed with
935   branch hints. It also increases the code size. As the result,
936   icc never generates branch hints.  */
937const int x86_branch_hints = 0;
938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939                         /*m_GENERIC | m_ATHLON_K8 ? */
940/* We probably ought to watch for partial register stalls on Generic32
941   compilation setting as well.  However in current implementation the
942   partial register stalls are not eliminated very well - they can
943   be introduced via subregs synthesized by combine and can happen
944   in caller/callee saving sequences.
945   Because this option pays back little on PPro based chips and is in conflict
946   with partial reg. dependencies used by Athlon/P4 based chips, it is better
947   to leave it off for generic32 for now.  */
948const int x86_partial_reg_stall = m_PPRO;
949const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952                                  | m_CORE2 | m_GENERIC);
953const int x86_use_mov0 = m_K6;
954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955const int x86_read_modify_write = ~m_PENT;
956const int x86_read_modify = ~(m_PENT | m_PPRO);
957const int x86_split_long_moves = m_PPRO;
958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959                               | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
960                               /* m_PENT4 ? */
961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963const int x86_qimode_math = ~(0);
964const int x86_promote_qi_regs = 0;
965/* On PPro this flag is meant to avoid partial register stalls.  Just like
966   the x86_partial_reg_stall this option might be considered for Generic32
967   if our scheme for avoiding partial stalls was more effective.  */
968const int x86_himode_math = ~(m_PPRO);
969const int x86_promote_hi_regs = m_PPRO;
970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972                          | m_CORE2 | m_GENERIC;
973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974                          | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976                          | m_CORE2 | m_GENERIC;
977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978                          | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979/* Enable if integer moves are preferred for DFmode copies */
980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981                                       | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983                                       | m_CORE2 | m_GENERIC;
984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985                                      | m_CORE2 | m_GENERIC;
986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987   for outgoing arguments will be computed and placed into the variable
988   `current_function_outgoing_args_size'. No space will be pushed onto the stack
989   for each call; instead, the function prologue should increase the stack frame
990   size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
991   not proper. */
992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993                                         | m_NOCONA | m_PPRO | m_CORE2
994                                         | m_GENERIC;
995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997const int x86_shift1 = ~m_486;
998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999                                           | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000                                           | m_NOCONA | m_CORE2 | m_GENERIC;
1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002   that thread 128bit SSE registers as single units versus K8 based chips that
1003   divide SSE registers to two 64bit halves.
1004   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005   to allow register renaming on 128bit SSE units, but usually results in one
1006   extra microop on 64bit SSE units.  Experimental results shows that disabling
1007   this option on P4 brings over 20% SPECfp regression, while enabling it on
1008   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1009   of moves.  */
1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011                                           | m_GENERIC | m_AMDFAM10;
1012/* Set for machines where the type and dependencies are resolved on SSE
1013   register parts instead of whole registers, so we may maintain just
1014   lower part of scalar values in proper format leaving the upper part
1015   undefined.  */
1016const int x86_sse_split_regs = m_ATHLON_K8;
1017/* Code generation for scalar reg-reg moves of single and double precision data:
1018     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1019       movaps reg, reg
1020     else
1021       movss reg, reg
1022     if (x86_sse_partial_reg_dependency == true)
1023       movapd reg, reg
1024     else
1025       movsd reg, reg
1026
1027   Code generation for scalar loads of double precision data:
1028     if (x86_sse_split_regs == true)
1029       movlpd mem, reg      (gas syntax)
1030     else
1031       movsd mem, reg
1032
1033   Code generation for unaligned packed loads of single precision data
1034   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035     if (x86_sse_unaligned_move_optimal)
1036       movups mem, reg
1037
1038     if (x86_sse_partial_reg_dependency == true)
1039       {
1040         xorps  reg, reg
1041         movlps mem, reg
1042         movhps mem+8, reg
1043       }
1044     else
1045       {
1046         movlps mem, reg
1047         movhps mem+8, reg
1048       }
1049
1050   Code generation for unaligned packed loads of double precision data
1051   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052     if (x86_sse_unaligned_move_optimal)
1053       movupd mem, reg
1054
1055     if (x86_sse_split_regs == true)
1056       {
1057         movlpd mem, reg
1058         movhpd mem+8, reg
1059       }
1060     else
1061       {
1062         movsd  mem, reg
1063         movhpd mem+8, reg
1064       }
1065 */
1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1072
1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074   integer data in xmm registers.  Which results in pretty abysmal code.  */
1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1076
1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078                                    | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079/* Some CPU cores are not able to predict more than 4 branch instructions in
1080   the 16 byte window.  */
1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082                                | m_NOCONA | m_CORE2 | m_GENERIC;
1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084                         | m_CORE2 | m_GENERIC;
1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086/* Compare and exchange was added for 80486.  */
1087const int x86_cmpxchg = ~m_386;
1088/* Compare and exchange 8 bytes was added for pentium.  */
1089const int x86_cmpxchg8b = ~(m_386 | m_486);
1090/* Exchange and add was added for 80486.  */
1091const int x86_xadd = ~m_386;
1092const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1093
1094/* In case the average insn count for single function invocation is
1095   lower than this constant, emit fast (but longer) prologue and
1096   epilogue code.  */
1097#define FAST_PROLOGUE_INSN_COUNT 20
1098
1099/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1100static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1101static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1102static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1103
1104/* Array of the smallest class containing reg number REGNO, indexed by
1105   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1106
1107enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1108{
1109  /* ax, dx, cx, bx */
1110  AREG, DREG, CREG, BREG,
1111  /* si, di, bp, sp */
1112  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1113  /* FP registers */
1114  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1115  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1116  /* arg pointer */
1117  NON_Q_REGS,
1118  /* flags, fpsr, dirflag, frame */
1119  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1120  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1121  SSE_REGS, SSE_REGS,
1122  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1123  MMX_REGS, MMX_REGS,
1124  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1125  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1126  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1127  SSE_REGS, SSE_REGS,
1128};
1129
1130/* The "default" register map used in 32bit mode.  */
1131
1132int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1133{
1134  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
1135  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
1136  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1137  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
1138  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1139  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1140  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1141};
1142
1143static int const x86_64_int_parameter_registers[6] =
1144{
1145  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1146  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1147};
1148
1149static int const x86_64_int_return_registers[4] =
1150{
1151  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1152};
1153
1154/* The "default" register map used in 64bit mode.  */
1155int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1156{
1157  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
1158  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
1159  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1160  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
1161  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1162  8,9,10,11,12,13,14,15,		/* extended integer registers */
1163  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
1164};
1165
1166/* Define the register numbers to be used in Dwarf debugging information.
1167   The SVR4 reference port C compiler uses the following register numbers
1168   in its Dwarf output code:
1169	0 for %eax (gcc regno = 0)
1170	1 for %ecx (gcc regno = 2)
1171	2 for %edx (gcc regno = 1)
1172	3 for %ebx (gcc regno = 3)
1173	4 for %esp (gcc regno = 7)
1174	5 for %ebp (gcc regno = 6)
1175	6 for %esi (gcc regno = 4)
1176	7 for %edi (gcc regno = 5)
1177   The following three DWARF register numbers are never generated by
1178   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1179   believes these numbers have these meanings.
1180	8  for %eip    (no gcc equivalent)
1181	9  for %eflags (gcc regno = 17)
1182	10 for %trapno (no gcc equivalent)
1183   It is not at all clear how we should number the FP stack registers
1184   for the x86 architecture.  If the version of SDB on x86/svr4 were
1185   a bit less brain dead with respect to floating-point then we would
1186   have a precedent to follow with respect to DWARF register numbers
1187   for x86 FP registers, but the SDB on x86/svr4 is so completely
1188   broken with respect to FP registers that it is hardly worth thinking
1189   of it as something to strive for compatibility with.
1190   The version of x86/svr4 SDB I have at the moment does (partially)
1191   seem to believe that DWARF register number 11 is associated with
1192   the x86 register %st(0), but that's about all.  Higher DWARF
1193   register numbers don't seem to be associated with anything in
1194   particular, and even for DWARF regno 11, SDB only seems to under-
1195   stand that it should say that a variable lives in %st(0) (when
1196   asked via an `=' command) if we said it was in DWARF regno 11,
1197   but SDB still prints garbage when asked for the value of the
1198   variable in question (via a `/' command).
1199   (Also note that the labels SDB prints for various FP stack regs
1200   when doing an `x' command are all wrong.)
1201   Note that these problems generally don't affect the native SVR4
1202   C compiler because it doesn't allow the use of -O with -g and
1203   because when it is *not* optimizing, it allocates a memory
1204   location for each floating-point variable, and the memory
1205   location is what gets described in the DWARF AT_location
1206   attribute for the variable in question.
1207   Regardless of the severe mental illness of the x86/svr4 SDB, we
1208   do something sensible here and we use the following DWARF
1209   register numbers.  Note that these are all stack-top-relative
1210   numbers.
1211	11 for %st(0) (gcc regno = 8)
1212	12 for %st(1) (gcc regno = 9)
1213	13 for %st(2) (gcc regno = 10)
1214	14 for %st(3) (gcc regno = 11)
1215	15 for %st(4) (gcc regno = 12)
1216	16 for %st(5) (gcc regno = 13)
1217	17 for %st(6) (gcc regno = 14)
1218	18 for %st(7) (gcc regno = 15)
1219*/
1220int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1221{
1222  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
1223  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
1224  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1225  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
1226  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
1227  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1228  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1229};
1230
1231/* Test and compare insns in i386.md store the information needed to
1232   generate branch and scc insns here.  */
1233
1234rtx ix86_compare_op0 = NULL_RTX;
1235rtx ix86_compare_op1 = NULL_RTX;
1236rtx ix86_compare_emitted = NULL_RTX;
1237
1238/* Size of the register save area.  */
1239#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1240
1241/* Define the structure for the machine field in struct function.  */
1242
1243struct stack_local_entry GTY(())
1244{
1245  unsigned short mode;
1246  unsigned short n;
1247  rtx rtl;
1248  struct stack_local_entry *next;
1249};
1250
1251/* Structure describing stack frame layout.
1252   Stack grows downward:
1253
1254   [arguments]
1255					      <- ARG_POINTER
1256   saved pc
1257
1258   saved frame pointer if frame_pointer_needed
1259					      <- HARD_FRAME_POINTER
1260   [saved regs]
1261
1262   [padding1]          \
1263		        )
1264   [va_arg registers]  (
1265		        > to_allocate	      <- FRAME_POINTER
1266   [frame]	       (
1267		        )
1268   [padding2]	       /
1269  */
1270struct ix86_frame
1271{
1272  int nregs;
1273  int padding1;
1274  int va_arg_size;
1275  HOST_WIDE_INT frame;
1276  int padding2;
1277  int outgoing_arguments_size;
1278  int red_zone_size;
1279
1280  HOST_WIDE_INT to_allocate;
1281  /* The offsets relative to ARG_POINTER.  */
1282  HOST_WIDE_INT frame_pointer_offset;
1283  HOST_WIDE_INT hard_frame_pointer_offset;
1284  HOST_WIDE_INT stack_pointer_offset;
1285
1286  /* When save_regs_using_mov is set, emit prologue using
1287     move instead of push instructions.  */
1288  bool save_regs_using_mov;
1289};
1290
1291/* Code model option.  */
1292enum cmodel ix86_cmodel;
1293/* Asm dialect.  */
1294enum asm_dialect ix86_asm_dialect = ASM_ATT;
1295/* TLS dialects.  */
1296enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1297
1298/* Which unit we are generating floating point math for.  */
1299enum fpmath_unit ix86_fpmath;
1300
1301/* Which cpu are we scheduling for.  */
1302enum processor_type ix86_tune;
1303/* Which instruction set architecture to use.  */
1304enum processor_type ix86_arch;
1305
1306/* true if sse prefetch instruction is not NOOP.  */
1307int x86_prefetch_sse;
1308
1309/* true if cmpxchg16b is supported.  */
1310int x86_cmpxchg16b;
1311
1312/* ix86_regparm_string as a number */
1313static int ix86_regparm;
1314
1315/* -mstackrealign option */
1316extern int ix86_force_align_arg_pointer;
1317static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1318
1319/* Preferred alignment for stack boundary in bits.  */
1320unsigned int ix86_preferred_stack_boundary;
1321
1322/* Values 1-5: see jump.c */
1323int ix86_branch_cost;
1324
1325/* Variables which are this size or smaller are put in the data/bss
1326   or ldata/lbss sections.  */
1327
1328int ix86_section_threshold = 65536;
1329
1330/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1331char internal_label_prefix[16];
1332int internal_label_prefix_len;
1333
1334static bool ix86_handle_option (size_t, const char *, int);
1335static void output_pic_addr_const (FILE *, rtx, int);
1336static void put_condition_code (enum rtx_code, enum machine_mode,
1337				int, int, FILE *);
1338static const char *get_some_local_dynamic_name (void);
1339static int get_some_local_dynamic_name_1 (rtx *, void *);
1340static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1341static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1342						   rtx *);
1343static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1344static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1345						   enum machine_mode);
1346static rtx get_thread_pointer (int);
1347static rtx legitimize_tls_address (rtx, enum tls_model, int);
1348static void get_pc_thunk_name (char [32], unsigned int);
1349static rtx gen_push (rtx);
1350static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1351static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1352static struct machine_function * ix86_init_machine_status (void);
1353static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1354static int ix86_nsaved_regs (void);
1355static void ix86_emit_save_regs (void);
1356static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1357static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1358static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1359static HOST_WIDE_INT ix86_GOT_alias_set (void);
1360static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1361static rtx ix86_expand_aligntest (rtx, int);
1362static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1363static int ix86_issue_rate (void);
1364static int ix86_adjust_cost (rtx, rtx, rtx, int);
1365static int ia32_multipass_dfa_lookahead (void);
1366static void ix86_init_mmx_sse_builtins (void);
1367static rtx x86_this_parameter (tree);
1368static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1369				 HOST_WIDE_INT, tree);
1370static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1371static void x86_file_start (void);
1372static void ix86_reorg (void);
1373static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1374static tree ix86_build_builtin_va_list (void);
1375static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1376					 tree, int *, int);
1377static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1378static bool ix86_scalar_mode_supported_p (enum machine_mode);
1379static bool ix86_vector_mode_supported_p (enum machine_mode);
1380
1381static int ix86_address_cost (rtx);
1382static bool ix86_cannot_force_const_mem (rtx);
1383static rtx ix86_delegitimize_address (rtx);
1384
1385static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1386
1387struct builtin_description;
1388static rtx ix86_expand_sse_comi (const struct builtin_description *,
1389				 tree, rtx);
1390static rtx ix86_expand_sse_compare (const struct builtin_description *,
1391				    tree, rtx);
1392static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1393static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1394static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1395static rtx ix86_expand_store_builtin (enum insn_code, tree);
1396static rtx safe_vector_operand (rtx, enum machine_mode);
1397static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1398static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1399static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1400static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1401static int ix86_fp_comparison_cost (enum rtx_code code);
1402static unsigned int ix86_select_alt_pic_regnum (void);
1403static int ix86_save_reg (unsigned int, int);
1404static void ix86_compute_frame_layout (struct ix86_frame *);
1405static int ix86_comp_type_attributes (tree, tree);
1406static int ix86_function_regparm (tree, tree);
1407const struct attribute_spec ix86_attribute_table[];
1408static bool ix86_function_ok_for_sibcall (tree, tree);
1409static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1410static int ix86_value_regno (enum machine_mode, tree, tree);
1411static bool contains_128bit_aligned_vector_p (tree);
1412static rtx ix86_struct_value_rtx (tree, int);
1413static bool ix86_ms_bitfield_layout_p (tree);
1414static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1415static int extended_reg_mentioned_1 (rtx *, void *);
1416static bool ix86_rtx_costs (rtx, int, int, int *);
1417static int min_insn_size (rtx);
1418static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1419static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1420static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1421				    tree, bool);
1422static void ix86_init_builtins (void);
1423static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1424static const char *ix86_mangle_fundamental_type (tree);
1425static tree ix86_stack_protect_fail (void);
1426static rtx ix86_internal_arg_pointer (void);
1427static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1428
1429/* This function is only used on Solaris.  */
1430static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1431  ATTRIBUTE_UNUSED;
1432
1433/* Register class used for passing given 64bit part of the argument.
1434   These represent classes as documented by the PS ABI, with the exception
1435   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1436   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1437
1438   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1439   whenever possible (upper half does contain padding).
1440 */
1441enum x86_64_reg_class
1442  {
1443    X86_64_NO_CLASS,
1444    X86_64_INTEGER_CLASS,
1445    X86_64_INTEGERSI_CLASS,
1446    X86_64_SSE_CLASS,
1447    X86_64_SSESF_CLASS,
1448    X86_64_SSEDF_CLASS,
1449    X86_64_SSEUP_CLASS,
1450    X86_64_X87_CLASS,
1451    X86_64_X87UP_CLASS,
1452    X86_64_COMPLEX_X87_CLASS,
1453    X86_64_MEMORY_CLASS
1454  };
1455static const char * const x86_64_reg_class_name[] = {
1456  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1457  "sseup", "x87", "x87up", "cplx87", "no"
1458};
1459
1460#define MAX_CLASSES 4
1461
1462/* Table of constants used by fldpi, fldln2, etc....  */
1463static REAL_VALUE_TYPE ext_80387_constants_table [5];
1464static bool ext_80387_constants_init = 0;
1465static void init_ext_80387_constants (void);
1466static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1467static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1468static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1469static section *x86_64_elf_select_section (tree decl, int reloc,
1470					   unsigned HOST_WIDE_INT align)
1471					     ATTRIBUTE_UNUSED;
1472
1473/* Initialize the GCC target structure.  */
1474#undef TARGET_ATTRIBUTE_TABLE
1475#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1476#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1477#  undef TARGET_MERGE_DECL_ATTRIBUTES
1478#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1479#endif
1480
1481#undef TARGET_COMP_TYPE_ATTRIBUTES
1482#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1483
1484#undef TARGET_INIT_BUILTINS
1485#define TARGET_INIT_BUILTINS ix86_init_builtins
1486#undef TARGET_EXPAND_BUILTIN
1487#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1488
1489#undef TARGET_ASM_FUNCTION_EPILOGUE
1490#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1491
1492#undef TARGET_ENCODE_SECTION_INFO
1493#ifndef SUBTARGET_ENCODE_SECTION_INFO
1494#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1495#else
1496#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1497#endif
1498
1499#undef TARGET_ASM_OPEN_PAREN
1500#define TARGET_ASM_OPEN_PAREN ""
1501#undef TARGET_ASM_CLOSE_PAREN
1502#define TARGET_ASM_CLOSE_PAREN ""
1503
1504#undef TARGET_ASM_ALIGNED_HI_OP
1505#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1506#undef TARGET_ASM_ALIGNED_SI_OP
1507#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1508#ifdef ASM_QUAD
1509#undef TARGET_ASM_ALIGNED_DI_OP
1510#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1511#endif
1512
1513#undef TARGET_ASM_UNALIGNED_HI_OP
1514#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1515#undef TARGET_ASM_UNALIGNED_SI_OP
1516#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1517#undef TARGET_ASM_UNALIGNED_DI_OP
1518#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1519
1520#undef TARGET_SCHED_ADJUST_COST
1521#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1522#undef TARGET_SCHED_ISSUE_RATE
1523#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1524#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1525#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1526  ia32_multipass_dfa_lookahead
1527
1528#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1529#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1530
1531#ifdef HAVE_AS_TLS
1532#undef TARGET_HAVE_TLS
1533#define TARGET_HAVE_TLS true
1534#endif
1535#undef TARGET_CANNOT_FORCE_CONST_MEM
1536#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1537#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1538#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1539
1540#undef TARGET_DELEGITIMIZE_ADDRESS
1541#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1542
1543#undef TARGET_MS_BITFIELD_LAYOUT_P
1544#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1545
1546#if TARGET_MACHO
1547#undef TARGET_BINDS_LOCAL_P
1548#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1549#endif
1550
1551#undef TARGET_ASM_OUTPUT_MI_THUNK
1552#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1553#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1554#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1555
1556#undef TARGET_ASM_FILE_START
1557#define TARGET_ASM_FILE_START x86_file_start
1558
1559#undef TARGET_DEFAULT_TARGET_FLAGS
1560#define TARGET_DEFAULT_TARGET_FLAGS	\
1561  (TARGET_DEFAULT			\
1562   | TARGET_64BIT_DEFAULT		\
1563   | TARGET_SUBTARGET_DEFAULT		\
1564   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1565
1566#undef TARGET_HANDLE_OPTION
1567#define TARGET_HANDLE_OPTION ix86_handle_option
1568
1569#undef TARGET_RTX_COSTS
1570#define TARGET_RTX_COSTS ix86_rtx_costs
1571#undef TARGET_ADDRESS_COST
1572#define TARGET_ADDRESS_COST ix86_address_cost
1573
1574#undef TARGET_FIXED_CONDITION_CODE_REGS
1575#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1576#undef TARGET_CC_MODES_COMPATIBLE
1577#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1578
1579#undef TARGET_MACHINE_DEPENDENT_REORG
1580#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1581
1582#undef TARGET_BUILD_BUILTIN_VA_LIST
1583#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1584
1585#undef TARGET_MD_ASM_CLOBBERS
1586#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1587
1588#undef TARGET_PROMOTE_PROTOTYPES
1589#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1590#undef TARGET_STRUCT_VALUE_RTX
1591#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1592#undef TARGET_SETUP_INCOMING_VARARGS
1593#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1594#undef TARGET_MUST_PASS_IN_STACK
1595#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1596#undef TARGET_PASS_BY_REFERENCE
1597#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1598#undef TARGET_INTERNAL_ARG_POINTER
1599#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1600#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1601#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1602
1603#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1604#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1605
1606#undef TARGET_SCALAR_MODE_SUPPORTED_P
1607#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1608
1609#undef TARGET_VECTOR_MODE_SUPPORTED_P
1610#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1611
1612#ifdef HAVE_AS_TLS
1613#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1614#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1615#endif
1616
1617#ifdef SUBTARGET_INSERT_ATTRIBUTES
1618#undef TARGET_INSERT_ATTRIBUTES
1619#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1620#endif
1621
1622#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1623#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1624
1625#undef TARGET_STACK_PROTECT_FAIL
1626#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1627
1628#undef TARGET_FUNCTION_VALUE
1629#define TARGET_FUNCTION_VALUE ix86_function_value
1630
1631struct gcc_target targetm = TARGET_INITIALIZER;
1632
1633
1634/* The svr4 ABI for the i386 says that records and unions are returned
1635   in memory.  */
1636#ifndef DEFAULT_PCC_STRUCT_RETURN
1637#define DEFAULT_PCC_STRUCT_RETURN 1
1638#endif
1639
1640/* Implement TARGET_HANDLE_OPTION.  */
1641
1642static bool
1643ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1644{
1645  switch (code)
1646    {
1647    case OPT_m3dnow:
1648      if (!value)
1649	{
1650	  target_flags &= ~MASK_3DNOW_A;
1651	  target_flags_explicit |= MASK_3DNOW_A;
1652	}
1653      return true;
1654
1655    case OPT_mmmx:
1656      if (!value)
1657	{
1658	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1659	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1660	}
1661      return true;
1662
1663    case OPT_msse:
1664      if (!value)
1665	{
1666	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1667	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1668	}
1669      return true;
1670
1671    case OPT_msse2:
1672      if (!value)
1673	{
1674	  target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1675	  target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1676	}
1677      return true;
1678
1679    case OPT_msse3:
1680      if (!value)
1681	{
1682	  target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1683	  target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1684	}
1685      return true;
1686
1687    default:
1688      return true;
1689    }
1690}
1691
1692/* Sometimes certain combinations of command options do not make
1693   sense on a particular target machine.  You can define a macro
1694   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1695   defined, is executed once just after all the command options have
1696   been parsed.
1697
1698   Don't use this macro to turn on various extra optimizations for
1699   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1700
1701void
1702override_options (void)
1703{
1704  int i;
1705  int ix86_tune_defaulted = 0;
1706
1707  /* Comes from final.c -- no real reason to change it.  */
1708#define MAX_CODE_ALIGN 16
1709
1710  static struct ptt
1711    {
1712      const struct processor_costs *cost;	/* Processor costs */
1713      const int target_enable;			/* Target flags to enable.  */
1714      const int target_disable;			/* Target flags to disable.  */
1715      const int align_loop;			/* Default alignments.  */
1716      const int align_loop_max_skip;
1717      const int align_jump;
1718      const int align_jump_max_skip;
1719      const int align_func;
1720    }
1721  const processor_target_table[PROCESSOR_max] =
1722    {
1723      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1724      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1725      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1726      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1727      {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1728      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1729      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1730      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1731      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1732      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1733      {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1734      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1735      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1736      {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1737    };
1738
1739  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1740  static struct pta
1741    {
1742      const char *const name;		/* processor name or nickname.  */
1743      const enum processor_type processor;
1744      const enum pta_flags
1745	{
1746	  PTA_SSE = 1,
1747	  PTA_SSE2 = 2,
1748	  PTA_SSE3 = 4,
1749	  PTA_MMX = 8,
1750	  PTA_PREFETCH_SSE = 16,
1751	  PTA_3DNOW = 32,
1752	  PTA_3DNOW_A = 64,
1753	  PTA_64BIT = 128,
1754	  PTA_SSSE3 = 256,
1755	  PTA_CX16 = 512,
1756	  PTA_POPCNT = 1024,
1757	  PTA_ABM = 2048,
1758 	  PTA_SSE4A = 4096
1759	} flags;
1760    }
1761  const processor_alias_table[] =
1762    {
1763      {"i386", PROCESSOR_I386, 0},
1764      {"i486", PROCESSOR_I486, 0},
1765      {"i586", PROCESSOR_PENTIUM, 0},
1766      {"pentium", PROCESSOR_PENTIUM, 0},
1767      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1768      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1769      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1770      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1771      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1772      {"i686", PROCESSOR_PENTIUMPRO, 0},
1773      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1774      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1775      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1776      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1777      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1778      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1779				       | PTA_MMX | PTA_PREFETCH_SSE},
1780      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1781				        | PTA_MMX | PTA_PREFETCH_SSE},
1782      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1783				        | PTA_MMX | PTA_PREFETCH_SSE},
1784      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1785					| PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1786      {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1787                                        | PTA_64BIT | PTA_MMX
1788					| PTA_PREFETCH_SSE | PTA_CX16},
1789      {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1790				   | PTA_3DNOW_A},
1791      {"k6", PROCESSOR_K6, PTA_MMX},
1792      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1793      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1794      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1795				   | PTA_3DNOW_A},
1796      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1797					 | PTA_3DNOW | PTA_3DNOW_A},
1798      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1799				    | PTA_3DNOW_A | PTA_SSE},
1800      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1801				      | PTA_3DNOW_A | PTA_SSE},
1802      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1803				      | PTA_3DNOW_A | PTA_SSE},
1804      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1805			       | PTA_SSE | PTA_SSE2 },
1806      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1807				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1808      {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1809				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1810				      | PTA_SSE3 },
1811      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1812				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1813      {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1814				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1815				      | PTA_SSE3 },
1816      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818      {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1820				      | PTA_SSE3 },
1821      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823      {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1824                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1825                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1826                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
1827      {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1828                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1829                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1830                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
1831      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1832      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1833    };
1834
1835  int const pta_size = ARRAY_SIZE (processor_alias_table);
1836
1837#ifdef SUBTARGET_OVERRIDE_OPTIONS
1838  SUBTARGET_OVERRIDE_OPTIONS;
1839#endif
1840
1841#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1842  SUBSUBTARGET_OVERRIDE_OPTIONS;
1843#endif
1844
1845  /* -fPIC is the default for x86_64.  */
1846  if (TARGET_MACHO && TARGET_64BIT)
1847    flag_pic = 2;
1848
1849  /* Set the default values for switches whose default depends on TARGET_64BIT
1850     in case they weren't overwritten by command line options.  */
1851  if (TARGET_64BIT)
1852    {
1853      /* Mach-O doesn't support omitting the frame pointer for now.  */
1854      if (flag_omit_frame_pointer == 2)
1855	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1856      if (flag_asynchronous_unwind_tables == 2)
1857	flag_asynchronous_unwind_tables = 1;
1858      if (flag_pcc_struct_return == 2)
1859	flag_pcc_struct_return = 0;
1860    }
1861  else
1862    {
1863      if (flag_omit_frame_pointer == 2)
1864	flag_omit_frame_pointer = 0;
1865      if (flag_asynchronous_unwind_tables == 2)
1866	flag_asynchronous_unwind_tables = 0;
1867      if (flag_pcc_struct_return == 2)
1868	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1869    }
1870
1871  /* Need to check -mtune=generic first.  */
1872  if (ix86_tune_string)
1873    {
1874      if (!strcmp (ix86_tune_string, "generic")
1875	  || !strcmp (ix86_tune_string, "i686")
1876	  /* As special support for cross compilers we read -mtune=native
1877	     as -mtune=generic.  With native compilers we won't see the
1878	     -mtune=native, as it was changed by the driver.  */
1879	  || !strcmp (ix86_tune_string, "native"))
1880	{
1881	  if (TARGET_64BIT)
1882	    ix86_tune_string = "generic64";
1883	  else
1884	    ix86_tune_string = "generic32";
1885	}
1886      else if (!strncmp (ix86_tune_string, "generic", 7))
1887	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1888    }
1889  else
1890    {
1891      if (ix86_arch_string)
1892	ix86_tune_string = ix86_arch_string;
1893      if (!ix86_tune_string)
1894	{
1895	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1896	  ix86_tune_defaulted = 1;
1897	}
1898
1899      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1900	 need to use a sensible tune option.  */
1901      if (!strcmp (ix86_tune_string, "generic")
1902	  || !strcmp (ix86_tune_string, "x86-64")
1903	  || !strcmp (ix86_tune_string, "i686"))
1904	{
1905	  if (TARGET_64BIT)
1906	    ix86_tune_string = "generic64";
1907	  else
1908	    ix86_tune_string = "generic32";
1909	}
1910    }
1911  if (!strcmp (ix86_tune_string, "x86-64"))
1912    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1913	     "-mtune=generic instead as appropriate.");
1914
1915  if (!ix86_arch_string)
1916    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1917  if (!strcmp (ix86_arch_string, "generic"))
1918    error ("generic CPU can be used only for -mtune= switch");
1919  if (!strncmp (ix86_arch_string, "generic", 7))
1920    error ("bad value (%s) for -march= switch", ix86_arch_string);
1921
1922  if (ix86_cmodel_string != 0)
1923    {
1924      if (!strcmp (ix86_cmodel_string, "small"))
1925	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1926      else if (!strcmp (ix86_cmodel_string, "medium"))
1927	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1928      else if (flag_pic)
1929	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1930      else if (!strcmp (ix86_cmodel_string, "32"))
1931	ix86_cmodel = CM_32;
1932      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1933	ix86_cmodel = CM_KERNEL;
1934      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1935	ix86_cmodel = CM_LARGE;
1936      else
1937	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1938    }
1939  else
1940    {
1941      ix86_cmodel = CM_32;
1942      if (TARGET_64BIT)
1943	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1944    }
1945  if (ix86_asm_string != 0)
1946    {
1947      if (! TARGET_MACHO
1948	  && !strcmp (ix86_asm_string, "intel"))
1949	ix86_asm_dialect = ASM_INTEL;
1950      else if (!strcmp (ix86_asm_string, "att"))
1951	ix86_asm_dialect = ASM_ATT;
1952      else
1953	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1954    }
1955  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1956    error ("code model %qs not supported in the %s bit mode",
1957	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1958  if (ix86_cmodel == CM_LARGE)
1959    sorry ("code model %<large%> not supported yet");
1960  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1961    sorry ("%i-bit mode not compiled in",
1962	   (target_flags & MASK_64BIT) ? 64 : 32);
1963
1964  for (i = 0; i < pta_size; i++)
1965    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1966      {
1967	ix86_arch = processor_alias_table[i].processor;
1968	/* Default cpu tuning to the architecture.  */
1969	ix86_tune = ix86_arch;
1970	if (processor_alias_table[i].flags & PTA_MMX
1971	    && !(target_flags_explicit & MASK_MMX))
1972	  target_flags |= MASK_MMX;
1973	if (processor_alias_table[i].flags & PTA_3DNOW
1974	    && !(target_flags_explicit & MASK_3DNOW))
1975	  target_flags |= MASK_3DNOW;
1976	if (processor_alias_table[i].flags & PTA_3DNOW_A
1977	    && !(target_flags_explicit & MASK_3DNOW_A))
1978	  target_flags |= MASK_3DNOW_A;
1979	if (processor_alias_table[i].flags & PTA_SSE
1980	    && !(target_flags_explicit & MASK_SSE))
1981	  target_flags |= MASK_SSE;
1982	if (processor_alias_table[i].flags & PTA_SSE2
1983	    && !(target_flags_explicit & MASK_SSE2))
1984	  target_flags |= MASK_SSE2;
1985	if (processor_alias_table[i].flags & PTA_SSE3
1986	    && !(target_flags_explicit & MASK_SSE3))
1987	  target_flags |= MASK_SSE3;
1988	if (processor_alias_table[i].flags & PTA_SSSE3
1989	    && !(target_flags_explicit & MASK_SSSE3))
1990	  target_flags |= MASK_SSSE3;
1991	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1992	  x86_prefetch_sse = true;
1993	if (processor_alias_table[i].flags & PTA_CX16)
1994	  x86_cmpxchg16b = true;
1995	if (processor_alias_table[i].flags & PTA_POPCNT
1996	    && !(target_flags_explicit & MASK_POPCNT))
1997	  target_flags |= MASK_POPCNT;
1998	if (processor_alias_table[i].flags & PTA_ABM
1999	    && !(target_flags_explicit & MASK_ABM))
2000	  target_flags |= MASK_ABM;
2001	if (processor_alias_table[i].flags & PTA_SSE4A
2002	    && !(target_flags_explicit & MASK_SSE4A))
2003	  target_flags |= MASK_SSE4A;
2004	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2005	  error ("CPU you selected does not support x86-64 "
2006		 "instruction set");
2007	break;
2008      }
2009
2010  if (i == pta_size)
2011    error ("bad value (%s) for -march= switch", ix86_arch_string);
2012
2013  for (i = 0; i < pta_size; i++)
2014    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2015      {
2016	ix86_tune = processor_alias_table[i].processor;
2017	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2018	  {
2019	    if (ix86_tune_defaulted)
2020	      {
2021		ix86_tune_string = "x86-64";
2022		for (i = 0; i < pta_size; i++)
2023		  if (! strcmp (ix86_tune_string,
2024				processor_alias_table[i].name))
2025		    break;
2026		ix86_tune = processor_alias_table[i].processor;
2027	      }
2028	    else
2029	      error ("CPU you selected does not support x86-64 "
2030		     "instruction set");
2031	  }
2032        /* Intel CPUs have always interpreted SSE prefetch instructions as
2033	   NOPs; so, we can enable SSE prefetch instructions even when
2034	   -mtune (rather than -march) points us to a processor that has them.
2035	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2036	   higher processors.  */
2037	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2038	  x86_prefetch_sse = true;
2039	break;
2040      }
2041  if (i == pta_size)
2042    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2043
2044  if (optimize_size)
2045    ix86_cost = &size_cost;
2046  else
2047    ix86_cost = processor_target_table[ix86_tune].cost;
2048  target_flags |= processor_target_table[ix86_tune].target_enable;
2049  target_flags &= ~processor_target_table[ix86_tune].target_disable;
2050
2051  /* Arrange to set up i386_stack_locals for all functions.  */
2052  init_machine_status = ix86_init_machine_status;
2053
2054  /* Validate -mregparm= value.  */
2055  if (ix86_regparm_string)
2056    {
2057      i = atoi (ix86_regparm_string);
2058      if (i < 0 || i > REGPARM_MAX)
2059	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2060      else
2061	ix86_regparm = i;
2062    }
2063  else
2064   if (TARGET_64BIT)
2065     ix86_regparm = REGPARM_MAX;
2066
2067  /* If the user has provided any of the -malign-* options,
2068     warn and use that value only if -falign-* is not set.
2069     Remove this code in GCC 3.2 or later.  */
2070  if (ix86_align_loops_string)
2071    {
2072      warning (0, "-malign-loops is obsolete, use -falign-loops");
2073      if (align_loops == 0)
2074	{
2075	  i = atoi (ix86_align_loops_string);
2076	  if (i < 0 || i > MAX_CODE_ALIGN)
2077	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2078	  else
2079	    align_loops = 1 << i;
2080	}
2081    }
2082
2083  if (ix86_align_jumps_string)
2084    {
2085      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2086      if (align_jumps == 0)
2087	{
2088	  i = atoi (ix86_align_jumps_string);
2089	  if (i < 0 || i > MAX_CODE_ALIGN)
2090	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2091	  else
2092	    align_jumps = 1 << i;
2093	}
2094    }
2095
2096  if (ix86_align_funcs_string)
2097    {
2098      warning (0, "-malign-functions is obsolete, use -falign-functions");
2099      if (align_functions == 0)
2100	{
2101	  i = atoi (ix86_align_funcs_string);
2102	  if (i < 0 || i > MAX_CODE_ALIGN)
2103	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2104	  else
2105	    align_functions = 1 << i;
2106	}
2107    }
2108
2109  /* Default align_* from the processor table.  */
2110  if (align_loops == 0)
2111    {
2112      align_loops = processor_target_table[ix86_tune].align_loop;
2113      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2114    }
2115  if (align_jumps == 0)
2116    {
2117      align_jumps = processor_target_table[ix86_tune].align_jump;
2118      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2119    }
2120  if (align_functions == 0)
2121    {
2122      align_functions = processor_target_table[ix86_tune].align_func;
2123    }
2124
2125  /* Validate -mbranch-cost= value, or provide default.  */
2126  ix86_branch_cost = ix86_cost->branch_cost;
2127  if (ix86_branch_cost_string)
2128    {
2129      i = atoi (ix86_branch_cost_string);
2130      if (i < 0 || i > 5)
2131	error ("-mbranch-cost=%d is not between 0 and 5", i);
2132      else
2133	ix86_branch_cost = i;
2134    }
2135  if (ix86_section_threshold_string)
2136    {
2137      i = atoi (ix86_section_threshold_string);
2138      if (i < 0)
2139	error ("-mlarge-data-threshold=%d is negative", i);
2140      else
2141	ix86_section_threshold = i;
2142    }
2143
2144  if (ix86_tls_dialect_string)
2145    {
2146      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2147	ix86_tls_dialect = TLS_DIALECT_GNU;
2148      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2149	ix86_tls_dialect = TLS_DIALECT_GNU2;
2150      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2151	ix86_tls_dialect = TLS_DIALECT_SUN;
2152      else
2153	error ("bad value (%s) for -mtls-dialect= switch",
2154	       ix86_tls_dialect_string);
2155    }
2156
2157  /* Keep nonleaf frame pointers.  */
2158  if (flag_omit_frame_pointer)
2159    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2160  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2161    flag_omit_frame_pointer = 1;
2162
2163  /* If we're doing fast math, we don't care about comparison order
2164     wrt NaNs.  This lets us use a shorter comparison sequence.  */
2165  if (flag_finite_math_only)
2166    target_flags &= ~MASK_IEEE_FP;
2167
2168  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2169     since the insns won't need emulation.  */
2170  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2171    target_flags &= ~MASK_NO_FANCY_MATH_387;
2172
2173  /* Likewise, if the target doesn't have a 387, or we've specified
2174     software floating point, don't use 387 inline intrinsics.  */
2175  if (!TARGET_80387)
2176    target_flags |= MASK_NO_FANCY_MATH_387;
2177
2178  /* Turn on SSE3 builtins for -mssse3.  */
2179  if (TARGET_SSSE3)
2180    target_flags |= MASK_SSE3;
2181
2182  /* Turn on SSE3 builtins for -msse4a.  */
2183  if (TARGET_SSE4A)
2184    target_flags |= MASK_SSE3;
2185
2186  /* Turn on SSE2 builtins for -msse3.  */
2187  if (TARGET_SSE3)
2188    target_flags |= MASK_SSE2;
2189
2190  /* Turn on SSE builtins for -msse2.  */
2191  if (TARGET_SSE2)
2192    target_flags |= MASK_SSE;
2193
2194  /* Turn on MMX builtins for -msse.  */
2195  if (TARGET_SSE)
2196    {
2197      target_flags |= MASK_MMX & ~target_flags_explicit;
2198      x86_prefetch_sse = true;
2199    }
2200
2201  /* Turn on MMX builtins for 3Dnow.  */
2202  if (TARGET_3DNOW)
2203    target_flags |= MASK_MMX;
2204
2205  /* Turn on POPCNT builtins for -mabm.  */
2206  if (TARGET_ABM)
2207    target_flags |= MASK_POPCNT;
2208
2209  if (TARGET_64BIT)
2210    {
2211      if (TARGET_ALIGN_DOUBLE)
2212	error ("-malign-double makes no sense in the 64bit mode");
2213      if (TARGET_RTD)
2214	error ("-mrtd calling convention not supported in the 64bit mode");
2215
2216      /* Enable by default the SSE and MMX builtins.  Do allow the user to
2217	 explicitly disable any of these.  In particular, disabling SSE and
2218	 MMX for kernel code is extremely useful.  */
2219      target_flags
2220	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2221	    & ~target_flags_explicit);
2222     }
2223  else
2224    {
2225      /* i386 ABI does not specify red zone.  It still makes sense to use it
2226         when programmer takes care to stack from being destroyed.  */
2227      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2228        target_flags |= MASK_NO_RED_ZONE;
2229    }
2230
2231  /* Validate -mpreferred-stack-boundary= value, or provide default.
2232     The default of 128 bits is for Pentium III's SSE __m128.  We can't
2233     change it because of optimize_size.  Otherwise, we can't mix object
2234     files compiled with -Os and -On.  */
2235  ix86_preferred_stack_boundary = 128;
2236  if (ix86_preferred_stack_boundary_string)
2237    {
2238      i = atoi (ix86_preferred_stack_boundary_string);
2239      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2240	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2241	       TARGET_64BIT ? 4 : 2);
2242      else
2243	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2244    }
2245
2246  /* Accept -msseregparm only if at least SSE support is enabled.  */
2247  if (TARGET_SSEREGPARM
2248      && ! TARGET_SSE)
2249    error ("-msseregparm used without SSE enabled");
2250
2251  ix86_fpmath = TARGET_FPMATH_DEFAULT;
2252
2253  if (ix86_fpmath_string != 0)
2254    {
2255      if (! strcmp (ix86_fpmath_string, "387"))
2256	ix86_fpmath = FPMATH_387;
2257      else if (! strcmp (ix86_fpmath_string, "sse"))
2258	{
2259	  if (!TARGET_SSE)
2260	    {
2261	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2262	      ix86_fpmath = FPMATH_387;
2263	    }
2264	  else
2265	    ix86_fpmath = FPMATH_SSE;
2266	}
2267      else if (! strcmp (ix86_fpmath_string, "387,sse")
2268	       || ! strcmp (ix86_fpmath_string, "sse,387"))
2269	{
2270	  if (!TARGET_SSE)
2271	    {
2272	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2273	      ix86_fpmath = FPMATH_387;
2274	    }
2275	  else if (!TARGET_80387)
2276	    {
2277	      warning (0, "387 instruction set disabled, using SSE arithmetics");
2278	      ix86_fpmath = FPMATH_SSE;
2279	    }
2280	  else
2281	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
2282	}
2283      else
2284	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2285    }
2286
2287  /* If the i387 is disabled, then do not return values in it. */
2288  if (!TARGET_80387)
2289    target_flags &= ~MASK_FLOAT_RETURNS;
2290
2291  if ((x86_accumulate_outgoing_args & TUNEMASK)
2292      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2293      && !optimize_size)
2294    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2295
2296  /* ??? Unwind info is not correct around the CFG unless either a frame
2297     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
2298     unwind info generation to be aware of the CFG and propagating states
2299     around edges.  */
2300  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2301       || flag_exceptions || flag_non_call_exceptions)
2302      && flag_omit_frame_pointer
2303      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2304    {
2305      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2306	warning (0, "unwind tables currently require either a frame pointer "
2307		 "or -maccumulate-outgoing-args for correctness");
2308      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2309    }
2310
2311  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
2312  {
2313    char *p;
2314    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2315    p = strchr (internal_label_prefix, 'X');
2316    internal_label_prefix_len = p - internal_label_prefix;
2317    *p = '\0';
2318  }
2319
2320  /* When scheduling description is not available, disable scheduler pass
2321     so it won't slow down the compilation and make x87 code slower.  */
2322  if (!TARGET_SCHEDULE)
2323    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2324}
2325
2326/* switch to the appropriate section for output of DECL.
2327   DECL is either a `VAR_DECL' node or a constant of some sort.
2328   RELOC indicates whether forming the initial value of DECL requires
2329   link-time relocations.  */
2330
2331static section *
2332x86_64_elf_select_section (tree decl, int reloc,
2333			   unsigned HOST_WIDE_INT align)
2334{
2335  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2336      && ix86_in_large_data_p (decl))
2337    {
2338      const char *sname = NULL;
2339      unsigned int flags = SECTION_WRITE;
2340      switch (categorize_decl_for_section (decl, reloc))
2341	{
2342	case SECCAT_DATA:
2343	  sname = ".ldata";
2344	  break;
2345	case SECCAT_DATA_REL:
2346	  sname = ".ldata.rel";
2347	  break;
2348	case SECCAT_DATA_REL_LOCAL:
2349	  sname = ".ldata.rel.local";
2350	  break;
2351	case SECCAT_DATA_REL_RO:
2352	  sname = ".ldata.rel.ro";
2353	  break;
2354	case SECCAT_DATA_REL_RO_LOCAL:
2355	  sname = ".ldata.rel.ro.local";
2356	  break;
2357	case SECCAT_BSS:
2358	  sname = ".lbss";
2359	  flags |= SECTION_BSS;
2360	  break;
2361	case SECCAT_RODATA:
2362	case SECCAT_RODATA_MERGE_STR:
2363	case SECCAT_RODATA_MERGE_STR_INIT:
2364	case SECCAT_RODATA_MERGE_CONST:
2365	  sname = ".lrodata";
2366	  flags = 0;
2367	  break;
2368	case SECCAT_SRODATA:
2369	case SECCAT_SDATA:
2370	case SECCAT_SBSS:
2371	  gcc_unreachable ();
2372	case SECCAT_TEXT:
2373	case SECCAT_TDATA:
2374	case SECCAT_TBSS:
2375	  /* We don't split these for medium model.  Place them into
2376	     default sections and hope for best.  */
2377	  break;
2378	}
2379      if (sname)
2380	{
2381	  /* We might get called with string constants, but get_named_section
2382	     doesn't like them as they are not DECLs.  Also, we need to set
2383	     flags in that case.  */
2384	  if (!DECL_P (decl))
2385	    return get_section (sname, flags, NULL);
2386	  return get_named_section (decl, sname, reloc);
2387	}
2388    }
2389  return default_elf_select_section (decl, reloc, align);
2390}
2391
2392/* Build up a unique section name, expressed as a
2393   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2394   RELOC indicates whether the initial value of EXP requires
2395   link-time relocations.  */
2396
2397static void
2398x86_64_elf_unique_section (tree decl, int reloc)
2399{
2400  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2401      && ix86_in_large_data_p (decl))
2402    {
2403      const char *prefix = NULL;
2404      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2405      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2406
2407      switch (categorize_decl_for_section (decl, reloc))
2408	{
2409	case SECCAT_DATA:
2410	case SECCAT_DATA_REL:
2411	case SECCAT_DATA_REL_LOCAL:
2412	case SECCAT_DATA_REL_RO:
2413	case SECCAT_DATA_REL_RO_LOCAL:
2414          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2415	  break;
2416	case SECCAT_BSS:
2417          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2418	  break;
2419	case SECCAT_RODATA:
2420	case SECCAT_RODATA_MERGE_STR:
2421	case SECCAT_RODATA_MERGE_STR_INIT:
2422	case SECCAT_RODATA_MERGE_CONST:
2423          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2424	  break;
2425	case SECCAT_SRODATA:
2426	case SECCAT_SDATA:
2427	case SECCAT_SBSS:
2428	  gcc_unreachable ();
2429	case SECCAT_TEXT:
2430	case SECCAT_TDATA:
2431	case SECCAT_TBSS:
2432	  /* We don't split these for medium model.  Place them into
2433	     default sections and hope for best.  */
2434	  break;
2435	}
2436      if (prefix)
2437	{
2438	  const char *name;
2439	  size_t nlen, plen;
2440	  char *string;
2441	  plen = strlen (prefix);
2442
2443	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2444	  name = targetm.strip_name_encoding (name);
2445	  nlen = strlen (name);
2446
2447	  string = alloca (nlen + plen + 1);
2448	  memcpy (string, prefix, plen);
2449	  memcpy (string + plen, name, nlen + 1);
2450
2451	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2452	  return;
2453	}
2454    }
2455  default_unique_section (decl, reloc);
2456}
2457
2458#ifdef COMMON_ASM_OP
2459/* This says how to output assembler code to declare an
2460   uninitialized external linkage data object.
2461
2462   For medium model x86-64 we need to use .largecomm opcode for
2463   large objects.  */
2464void
2465x86_elf_aligned_common (FILE *file,
2466			const char *name, unsigned HOST_WIDE_INT size,
2467			int align)
2468{
2469  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2470      && size > (unsigned int)ix86_section_threshold)
2471    fprintf (file, ".largecomm\t");
2472  else
2473    fprintf (file, "%s", COMMON_ASM_OP);
2474  assemble_name (file, name);
2475  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2476	   size, align / BITS_PER_UNIT);
2477}
2478
2479/* Utility function for targets to use in implementing
2480   ASM_OUTPUT_ALIGNED_BSS.  */
2481
2482void
2483x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2484			const char *name, unsigned HOST_WIDE_INT size,
2485			int align)
2486{
2487  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2488      && size > (unsigned int)ix86_section_threshold)
2489    switch_to_section (get_named_section (decl, ".lbss", 0));
2490  else
2491    switch_to_section (bss_section);
2492  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2493#ifdef ASM_DECLARE_OBJECT_NAME
2494  last_assemble_variable_decl = decl;
2495  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2496#else
2497  /* Standard thing is just output label for the object.  */
2498  ASM_OUTPUT_LABEL (file, name);
2499#endif /* ASM_DECLARE_OBJECT_NAME */
2500  ASM_OUTPUT_SKIP (file, size ? size : 1);
2501}
2502#endif
2503
2504void
2505optimization_options (int level, int size ATTRIBUTE_UNUSED)
2506{
2507  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2508     make the problem with not enough registers even worse.  */
2509#ifdef INSN_SCHEDULING
2510  if (level > 1)
2511    flag_schedule_insns = 0;
2512#endif
2513
2514  if (TARGET_MACHO)
2515    /* The Darwin libraries never set errno, so we might as well
2516       avoid calling them when that's the only reason we would.  */
2517    flag_errno_math = 0;
2518
2519  /* The default values of these switches depend on the TARGET_64BIT
2520     that is not known at this moment.  Mark these values with 2 and
2521     let user the to override these.  In case there is no command line option
2522     specifying them, we will set the defaults in override_options.  */
2523  if (optimize >= 1)
2524    flag_omit_frame_pointer = 2;
2525  flag_pcc_struct_return = 2;
2526  flag_asynchronous_unwind_tables = 2;
2527#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2528  SUBTARGET_OPTIMIZATION_OPTIONS;
2529#endif
2530}
2531
2532/* Table of valid machine attributes.  */
2533const struct attribute_spec ix86_attribute_table[] =
2534{
2535  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2536  /* Stdcall attribute says callee is responsible for popping arguments
2537     if they are not variable.  */
2538  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2539  /* Fastcall attribute says callee is responsible for popping arguments
2540     if they are not variable.  */
2541  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2542  /* Cdecl attribute says the callee is a normal C declaration */
2543  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2544  /* Regparm attribute specifies how many integer arguments are to be
2545     passed in registers.  */
2546  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2547  /* Sseregparm attribute says we are using x86_64 calling conventions
2548     for FP arguments.  */
2549  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2550  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2551  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2552    false, true,  true, ix86_handle_cconv_attribute },
2553#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2554  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2555  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2556  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2557#endif
2558  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2559  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2560#ifdef SUBTARGET_ATTRIBUTE_TABLE
2561  SUBTARGET_ATTRIBUTE_TABLE,
2562#endif
2563  { NULL,        0, 0, false, false, false, NULL }
2564};
2565
2566/* Decide whether we can make a sibling call to a function.  DECL is the
2567   declaration of the function being targeted by the call and EXP is the
2568   CALL_EXPR representing the call.  */
2569
2570static bool
2571ix86_function_ok_for_sibcall (tree decl, tree exp)
2572{
2573  tree func;
2574  rtx a, b;
2575
2576  /* If we are generating position-independent code, we cannot sibcall
2577     optimize any indirect call, or a direct call to a global function,
2578     as the PLT requires %ebx be live.  */
2579  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2580    return false;
2581
2582  if (decl)
2583    func = decl;
2584  else
2585    {
2586      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2587      if (POINTER_TYPE_P (func))
2588        func = TREE_TYPE (func);
2589    }
2590
2591  /* Check that the return value locations are the same.  Like
2592     if we are returning floats on the 80387 register stack, we cannot
2593     make a sibcall from a function that doesn't return a float to a
2594     function that does or, conversely, from a function that does return
2595     a float to a function that doesn't; the necessary stack adjustment
2596     would not be executed.  This is also the place we notice
2597     differences in the return value ABI.  Note that it is ok for one
2598     of the functions to have void return type as long as the return
2599     value of the other is passed in a register.  */
2600  a = ix86_function_value (TREE_TYPE (exp), func, false);
2601  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2602			   cfun->decl, false);
2603  if (STACK_REG_P (a) || STACK_REG_P (b))
2604    {
2605      if (!rtx_equal_p (a, b))
2606	return false;
2607    }
2608  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2609    ;
2610  else if (!rtx_equal_p (a, b))
2611    return false;
2612
2613  /* If this call is indirect, we'll need to be able to use a call-clobbered
2614     register for the address of the target function.  Make sure that all
2615     such registers are not used for passing parameters.  */
2616  if (!decl && !TARGET_64BIT)
2617    {
2618      tree type;
2619
2620      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2621      type = TREE_OPERAND (exp, 0);		/* pointer expression */
2622      type = TREE_TYPE (type);			/* pointer type */
2623      type = TREE_TYPE (type);			/* function type */
2624
2625      if (ix86_function_regparm (type, NULL) >= 3)
2626	{
2627	  /* ??? Need to count the actual number of registers to be used,
2628	     not the possible number of registers.  Fix later.  */
2629	  return false;
2630	}
2631    }
2632
2633#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2634  /* Dllimport'd functions are also called indirectly.  */
2635  if (decl && DECL_DLLIMPORT_P (decl)
2636      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2637    return false;
2638#endif
2639
2640  /* If we forced aligned the stack, then sibcalling would unalign the
2641     stack, which may break the called function.  */
2642  if (cfun->machine->force_align_arg_pointer)
2643    return false;
2644
2645  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2646  return true;
2647}
2648
2649/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2650   calling convention attributes;
2651   arguments as in struct attribute_spec.handler.  */
2652
2653static tree
2654ix86_handle_cconv_attribute (tree *node, tree name,
2655				   tree args,
2656				   int flags ATTRIBUTE_UNUSED,
2657				   bool *no_add_attrs)
2658{
2659  if (TREE_CODE (*node) != FUNCTION_TYPE
2660      && TREE_CODE (*node) != METHOD_TYPE
2661      && TREE_CODE (*node) != FIELD_DECL
2662      && TREE_CODE (*node) != TYPE_DECL)
2663    {
2664      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2665	       IDENTIFIER_POINTER (name));
2666      *no_add_attrs = true;
2667      return NULL_TREE;
2668    }
2669
2670  /* Can combine regparm with all attributes but fastcall.  */
2671  if (is_attribute_p ("regparm", name))
2672    {
2673      tree cst;
2674
2675      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2676        {
2677	  error ("fastcall and regparm attributes are not compatible");
2678	}
2679
2680      cst = TREE_VALUE (args);
2681      if (TREE_CODE (cst) != INTEGER_CST)
2682	{
2683	  warning (OPT_Wattributes,
2684		   "%qs attribute requires an integer constant argument",
2685		   IDENTIFIER_POINTER (name));
2686	  *no_add_attrs = true;
2687	}
2688      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2689	{
2690	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2691		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2692	  *no_add_attrs = true;
2693	}
2694
2695      if (!TARGET_64BIT
2696	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2697			       TYPE_ATTRIBUTES (*node))
2698	  && compare_tree_int (cst, REGPARM_MAX-1))
2699	{
2700	  error ("%s functions limited to %d register parameters",
2701		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2702	}
2703
2704      return NULL_TREE;
2705    }
2706
2707  if (TARGET_64BIT)
2708    {
2709      warning (OPT_Wattributes, "%qs attribute ignored",
2710	       IDENTIFIER_POINTER (name));
2711      *no_add_attrs = true;
2712      return NULL_TREE;
2713    }
2714
2715  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2716  if (is_attribute_p ("fastcall", name))
2717    {
2718      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2719        {
2720	  error ("fastcall and cdecl attributes are not compatible");
2721	}
2722      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2723        {
2724	  error ("fastcall and stdcall attributes are not compatible");
2725	}
2726      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2727        {
2728	  error ("fastcall and regparm attributes are not compatible");
2729	}
2730    }
2731
2732  /* Can combine stdcall with fastcall (redundant), regparm and
2733     sseregparm.  */
2734  else if (is_attribute_p ("stdcall", name))
2735    {
2736      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2737        {
2738	  error ("stdcall and cdecl attributes are not compatible");
2739	}
2740      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2741        {
2742	  error ("stdcall and fastcall attributes are not compatible");
2743	}
2744    }
2745
2746  /* Can combine cdecl with regparm and sseregparm.  */
2747  else if (is_attribute_p ("cdecl", name))
2748    {
2749      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2750        {
2751	  error ("stdcall and cdecl attributes are not compatible");
2752	}
2753      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2754        {
2755	  error ("fastcall and cdecl attributes are not compatible");
2756	}
2757    }
2758
2759  /* Can combine sseregparm with all attributes.  */
2760
2761  return NULL_TREE;
2762}
2763
2764/* Return 0 if the attributes for two types are incompatible, 1 if they
2765   are compatible, and 2 if they are nearly compatible (which causes a
2766   warning to be generated).  */
2767
2768static int
2769ix86_comp_type_attributes (tree type1, tree type2)
2770{
2771  /* Check for mismatch of non-default calling convention.  */
2772  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2773
2774  if (TREE_CODE (type1) != FUNCTION_TYPE)
2775    return 1;
2776
2777  /* Check for mismatched fastcall/regparm types.  */
2778  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2779       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2780      || (ix86_function_regparm (type1, NULL)
2781	  != ix86_function_regparm (type2, NULL)))
2782    return 0;
2783
2784  /* Check for mismatched sseregparm types.  */
2785  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2786      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2787    return 0;
2788
2789  /* Check for mismatched return types (cdecl vs stdcall).  */
2790  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2791      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2792    return 0;
2793
2794  return 1;
2795}
2796
2797/* Return the regparm value for a function with the indicated TYPE and DECL.
2798   DECL may be NULL when calling function indirectly
2799   or considering a libcall.  */
2800
2801static int
2802ix86_function_regparm (tree type, tree decl)
2803{
2804  tree attr;
2805  int regparm = ix86_regparm;
2806  bool user_convention = false;
2807
2808  if (!TARGET_64BIT)
2809    {
2810      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2811      if (attr)
2812	{
2813	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2814	  user_convention = true;
2815	}
2816
2817      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2818	{
2819	  regparm = 2;
2820	  user_convention = true;
2821	}
2822
2823      /* Use register calling convention for local functions when possible.  */
2824      if (!TARGET_64BIT && !user_convention && decl
2825	  && flag_unit_at_a_time && !profile_flag)
2826	{
2827	  struct cgraph_local_info *i = cgraph_local_info (decl);
2828	  if (i && i->local)
2829	    {
2830	      int local_regparm, globals = 0, regno;
2831
2832	      /* Make sure no regparm register is taken by a global register
2833		 variable.  */
2834	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2835		if (global_regs[local_regparm])
2836		  break;
2837	      /* We can't use regparm(3) for nested functions as these use
2838		 static chain pointer in third argument.  */
2839	      if (local_regparm == 3
2840		  && decl_function_context (decl)
2841		  && !DECL_NO_STATIC_CHAIN (decl))
2842		local_regparm = 2;
2843	      /* If the function realigns its stackpointer, the
2844		 prologue will clobber %ecx.  If we've already
2845		 generated code for the callee, the callee
2846		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2847		 scanning the attributes for the self-realigning
2848		 property.  */
2849	      if ((DECL_STRUCT_FUNCTION (decl)
2850		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2851		  || (!DECL_STRUCT_FUNCTION (decl)
2852		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2853					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2854		local_regparm = 2;
2855	      /* Each global register variable increases register preassure,
2856		 so the more global reg vars there are, the smaller regparm
2857		 optimization use, unless requested by the user explicitly.  */
2858	      for (regno = 0; regno < 6; regno++)
2859		if (global_regs[regno])
2860		  globals++;
2861	      local_regparm
2862		= globals < local_regparm ? local_regparm - globals : 0;
2863
2864	      if (local_regparm > regparm)
2865		regparm = local_regparm;
2866	    }
2867	}
2868    }
2869  return regparm;
2870}
2871
2872/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2873   DFmode (2) arguments in SSE registers for a function with the
2874   indicated TYPE and DECL.  DECL may be NULL when calling function
2875   indirectly or considering a libcall.  Otherwise return 0.  */
2876
2877static int
2878ix86_function_sseregparm (tree type, tree decl)
2879{
2880  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2881     by the sseregparm attribute.  */
2882  if (TARGET_SSEREGPARM
2883      || (type
2884	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2885    {
2886      if (!TARGET_SSE)
2887	{
2888	  if (decl)
2889	    error ("Calling %qD with attribute sseregparm without "
2890		   "SSE/SSE2 enabled", decl);
2891	  else
2892	    error ("Calling %qT with attribute sseregparm without "
2893		   "SSE/SSE2 enabled", type);
2894	  return 0;
2895	}
2896
2897      return 2;
2898    }
2899
2900  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2901     (and DFmode for SSE2) arguments in SSE registers,
2902     even for 32-bit targets.  */
2903  if (!TARGET_64BIT && decl
2904      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2905    {
2906      struct cgraph_local_info *i = cgraph_local_info (decl);
2907      if (i && i->local)
2908	return TARGET_SSE2 ? 2 : 1;
2909    }
2910
2911  return 0;
2912}
2913
2914/* Return true if EAX is live at the start of the function.  Used by
2915   ix86_expand_prologue to determine if we need special help before
2916   calling allocate_stack_worker.  */
2917
2918static bool
2919ix86_eax_live_at_start_p (void)
2920{
2921  /* Cheat.  Don't bother working forward from ix86_function_regparm
2922     to the function type to whether an actual argument is located in
2923     eax.  Instead just look at cfg info, which is still close enough
2924     to correct at this point.  This gives false positives for broken
2925     functions that might use uninitialized data that happens to be
2926     allocated in eax, but who cares?  */
2927  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2928}
2929
2930/* Value is the number of bytes of arguments automatically
2931   popped when returning from a subroutine call.
2932   FUNDECL is the declaration node of the function (as a tree),
2933   FUNTYPE is the data type of the function (as a tree),
2934   or for a library call it is an identifier node for the subroutine name.
2935   SIZE is the number of bytes of arguments passed on the stack.
2936
2937   On the 80386, the RTD insn may be used to pop them if the number
2938     of args is fixed, but if the number is variable then the caller
2939     must pop them all.  RTD can't be used for library calls now
2940     because the library is compiled with the Unix compiler.
2941   Use of RTD is a selectable option, since it is incompatible with
2942   standard Unix calling sequences.  If the option is not selected,
2943   the caller must always pop the args.
2944
2945   The attribute stdcall is equivalent to RTD on a per module basis.  */
2946
2947int
2948ix86_return_pops_args (tree fundecl, tree funtype, int size)
2949{
2950  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2951
2952  /* Cdecl functions override -mrtd, and never pop the stack.  */
2953  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2954
2955    /* Stdcall and fastcall functions will pop the stack if not
2956       variable args.  */
2957    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2958        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2959      rtd = 1;
2960
2961    if (rtd
2962        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2963	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2964		== void_type_node)))
2965      return size;
2966  }
2967
2968  /* Lose any fake structure return argument if it is passed on the stack.  */
2969  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2970      && !TARGET_64BIT
2971      && !KEEP_AGGREGATE_RETURN_POINTER)
2972    {
2973      int nregs = ix86_function_regparm (funtype, fundecl);
2974
2975      if (!nregs)
2976	return GET_MODE_SIZE (Pmode);
2977    }
2978
2979  return 0;
2980}
2981
2982/* Argument support functions.  */
2983
2984/* Return true when register may be used to pass function parameters.  */
2985bool
2986ix86_function_arg_regno_p (int regno)
2987{
2988  int i;
2989  if (!TARGET_64BIT)
2990    {
2991      if (TARGET_MACHO)
2992        return (regno < REGPARM_MAX
2993                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2994      else
2995        return (regno < REGPARM_MAX
2996	        || (TARGET_MMX && MMX_REGNO_P (regno)
2997	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2998	        || (TARGET_SSE && SSE_REGNO_P (regno)
2999		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3000    }
3001
3002  if (TARGET_MACHO)
3003    {
3004      if (SSE_REGNO_P (regno) && TARGET_SSE)
3005        return true;
3006    }
3007  else
3008    {
3009      if (TARGET_SSE && SSE_REGNO_P (regno)
3010          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3011        return true;
3012    }
3013  /* RAX is used as hidden argument to va_arg functions.  */
3014  if (!regno)
3015    return true;
3016  for (i = 0; i < REGPARM_MAX; i++)
3017    if (regno == x86_64_int_parameter_registers[i])
3018      return true;
3019  return false;
3020}
3021
3022/* Return if we do not know how to pass TYPE solely in registers.  */
3023
3024static bool
3025ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3026{
3027  if (must_pass_in_stack_var_size_or_pad (mode, type))
3028    return true;
3029
3030  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
3031     The layout_type routine is crafty and tries to trick us into passing
3032     currently unsupported vector types on the stack by using TImode.  */
3033  return (!TARGET_64BIT && mode == TImode
3034	  && type && TREE_CODE (type) != VECTOR_TYPE);
3035}
3036
3037/* Initialize a variable CUM of type CUMULATIVE_ARGS
3038   for a call to a function whose data type is FNTYPE.
3039   For a library call, FNTYPE is 0.  */
3040
3041void
3042init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
3043		      tree fntype,	/* tree ptr for function decl */
3044		      rtx libname,	/* SYMBOL_REF of library name or 0 */
3045		      tree fndecl)
3046{
3047  static CUMULATIVE_ARGS zero_cum;
3048  tree param, next_param;
3049
3050  if (TARGET_DEBUG_ARG)
3051    {
3052      fprintf (stderr, "\ninit_cumulative_args (");
3053      if (fntype)
3054	fprintf (stderr, "fntype code = %s, ret code = %s",
3055		 tree_code_name[(int) TREE_CODE (fntype)],
3056		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3057      else
3058	fprintf (stderr, "no fntype");
3059
3060      if (libname)
3061	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3062    }
3063
3064  *cum = zero_cum;
3065
3066  /* Set up the number of registers to use for passing arguments.  */
3067  cum->nregs = ix86_regparm;
3068  if (TARGET_SSE)
3069    cum->sse_nregs = SSE_REGPARM_MAX;
3070  if (TARGET_MMX)
3071    cum->mmx_nregs = MMX_REGPARM_MAX;
3072  cum->warn_sse = true;
3073  cum->warn_mmx = true;
3074  cum->maybe_vaarg = false;
3075
3076  /* Use ecx and edx registers if function has fastcall attribute,
3077     else look for regparm information.  */
3078  if (fntype && !TARGET_64BIT)
3079    {
3080      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3081	{
3082	  cum->nregs = 2;
3083	  cum->fastcall = 1;
3084	}
3085      else
3086	cum->nregs = ix86_function_regparm (fntype, fndecl);
3087    }
3088
3089  /* Set up the number of SSE registers used for passing SFmode
3090     and DFmode arguments.  Warn for mismatching ABI.  */
3091  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3092
3093  /* Determine if this function has variable arguments.  This is
3094     indicated by the last argument being 'void_type_mode' if there
3095     are no variable arguments.  If there are variable arguments, then
3096     we won't pass anything in registers in 32-bit mode. */
3097
3098  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3099    {
3100      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3101	   param != 0; param = next_param)
3102	{
3103	  next_param = TREE_CHAIN (param);
3104	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3105	    {
3106	      if (!TARGET_64BIT)
3107		{
3108		  cum->nregs = 0;
3109		  cum->sse_nregs = 0;
3110		  cum->mmx_nregs = 0;
3111		  cum->warn_sse = 0;
3112		  cum->warn_mmx = 0;
3113		  cum->fastcall = 0;
3114		  cum->float_in_sse = 0;
3115		}
3116	      cum->maybe_vaarg = true;
3117	    }
3118	}
3119    }
3120  if ((!fntype && !libname)
3121      || (fntype && !TYPE_ARG_TYPES (fntype)))
3122    cum->maybe_vaarg = true;
3123
3124  if (TARGET_DEBUG_ARG)
3125    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3126
3127  return;
3128}
3129
3130/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
3131   But in the case of vector types, it is some vector mode.
3132
3133   When we have only some of our vector isa extensions enabled, then there
3134   are some modes for which vector_mode_supported_p is false.  For these
3135   modes, the generic vector support in gcc will choose some non-vector mode
3136   in order to implement the type.  By computing the natural mode, we'll
3137   select the proper ABI location for the operand and not depend on whatever
3138   the middle-end decides to do with these vector types.  */
3139
3140static enum machine_mode
3141type_natural_mode (tree type)
3142{
3143  enum machine_mode mode = TYPE_MODE (type);
3144
3145  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3146    {
3147      HOST_WIDE_INT size = int_size_in_bytes (type);
3148      if ((size == 8 || size == 16)
3149	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
3150	  && TYPE_VECTOR_SUBPARTS (type) > 1)
3151	{
3152	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3153
3154	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3155	    mode = MIN_MODE_VECTOR_FLOAT;
3156	  else
3157	    mode = MIN_MODE_VECTOR_INT;
3158
3159	  /* Get the mode which has this inner mode and number of units.  */
3160	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3161	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3162		&& GET_MODE_INNER (mode) == innermode)
3163	      return mode;
3164
3165	  gcc_unreachable ();
3166	}
3167    }
3168
3169  return mode;
3170}
3171
3172/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
3173   this may not agree with the mode that the type system has chosen for the
3174   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
3175   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
3176
3177static rtx
3178gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3179		     unsigned int regno)
3180{
3181  rtx tmp;
3182
3183  if (orig_mode != BLKmode)
3184    tmp = gen_rtx_REG (orig_mode, regno);
3185  else
3186    {
3187      tmp = gen_rtx_REG (mode, regno);
3188      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3189      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3190    }
3191
3192  return tmp;
3193}
3194
3195/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
3196   of this code is to classify each 8bytes of incoming argument by the register
3197   class and assign registers accordingly.  */
3198
3199/* Return the union class of CLASS1 and CLASS2.
3200   See the x86-64 PS ABI for details.  */
3201
3202static enum x86_64_reg_class
3203merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3204{
3205  /* Rule #1: If both classes are equal, this is the resulting class.  */
3206  if (class1 == class2)
3207    return class1;
3208
3209  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3210     the other class.  */
3211  if (class1 == X86_64_NO_CLASS)
3212    return class2;
3213  if (class2 == X86_64_NO_CLASS)
3214    return class1;
3215
3216  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
3217  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3218    return X86_64_MEMORY_CLASS;
3219
3220  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
3221  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3222      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3223    return X86_64_INTEGERSI_CLASS;
3224  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3225      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3226    return X86_64_INTEGER_CLASS;
3227
3228  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3229     MEMORY is used.  */
3230  if (class1 == X86_64_X87_CLASS
3231      || class1 == X86_64_X87UP_CLASS
3232      || class1 == X86_64_COMPLEX_X87_CLASS
3233      || class2 == X86_64_X87_CLASS
3234      || class2 == X86_64_X87UP_CLASS
3235      || class2 == X86_64_COMPLEX_X87_CLASS)
3236    return X86_64_MEMORY_CLASS;
3237
3238  /* Rule #6: Otherwise class SSE is used.  */
3239  return X86_64_SSE_CLASS;
3240}
3241
3242/* Classify the argument of type TYPE and mode MODE.
3243   CLASSES will be filled by the register class used to pass each word
3244   of the operand.  The number of words is returned.  In case the parameter
3245   should be passed in memory, 0 is returned. As a special case for zero
3246   sized containers, classes[0] will be NO_CLASS and 1 is returned.
3247
3248   BIT_OFFSET is used internally for handling records and specifies offset
3249   of the offset in bits modulo 256 to avoid overflow cases.
3250
3251   See the x86-64 PS ABI for details.
3252*/
3253
3254static int
3255classify_argument (enum machine_mode mode, tree type,
3256		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3257{
3258  HOST_WIDE_INT bytes =
3259    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3260  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3261
3262  /* Variable sized entities are always passed/returned in memory.  */
3263  if (bytes < 0)
3264    return 0;
3265
3266  if (mode != VOIDmode
3267      && targetm.calls.must_pass_in_stack (mode, type))
3268    return 0;
3269
3270  if (type && AGGREGATE_TYPE_P (type))
3271    {
3272      int i;
3273      tree field;
3274      enum x86_64_reg_class subclasses[MAX_CLASSES];
3275
3276      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
3277      if (bytes > 16)
3278	return 0;
3279
3280      for (i = 0; i < words; i++)
3281	classes[i] = X86_64_NO_CLASS;
3282
3283      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
3284	 signalize memory class, so handle it as special case.  */
3285      if (!words)
3286	{
3287	  classes[0] = X86_64_NO_CLASS;
3288	  return 1;
3289	}
3290
3291      /* Classify each field of record and merge classes.  */
3292      switch (TREE_CODE (type))
3293	{
3294	case RECORD_TYPE:
3295	  /* For classes first merge in the field of the subclasses.  */
3296	  if (TYPE_BINFO (type))
3297	    {
3298	      tree binfo, base_binfo;
3299	      int basenum;
3300
3301	      for (binfo = TYPE_BINFO (type), basenum = 0;
3302		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3303		{
3304		   int num;
3305		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3306		   tree type = BINFO_TYPE (base_binfo);
3307
3308		   num = classify_argument (TYPE_MODE (type),
3309					    type, subclasses,
3310					    (offset + bit_offset) % 256);
3311		   if (!num)
3312		     return 0;
3313		   for (i = 0; i < num; i++)
3314		     {
3315		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
3316		       classes[i + pos] =
3317			 merge_classes (subclasses[i], classes[i + pos]);
3318		     }
3319		}
3320	    }
3321	  /* And now merge the fields of structure.  */
3322	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3323	    {
3324	      if (TREE_CODE (field) == FIELD_DECL)
3325		{
3326		  int num;
3327
3328		  if (TREE_TYPE (field) == error_mark_node)
3329		    continue;
3330
3331		  /* Bitfields are always classified as integer.  Handle them
3332		     early, since later code would consider them to be
3333		     misaligned integers.  */
3334		  if (DECL_BIT_FIELD (field))
3335		    {
3336		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3337			   i < ((int_bit_position (field) + (bit_offset % 64))
3338			        + tree_low_cst (DECL_SIZE (field), 0)
3339				+ 63) / 8 / 8; i++)
3340			classes[i] =
3341			  merge_classes (X86_64_INTEGER_CLASS,
3342					 classes[i]);
3343		    }
3344		  else
3345		    {
3346		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3347					       TREE_TYPE (field), subclasses,
3348					       (int_bit_position (field)
3349						+ bit_offset) % 256);
3350		      if (!num)
3351			return 0;
3352		      for (i = 0; i < num; i++)
3353			{
3354			  int pos =
3355			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3356			  classes[i + pos] =
3357			    merge_classes (subclasses[i], classes[i + pos]);
3358			}
3359		    }
3360		}
3361	    }
3362	  break;
3363
3364	case ARRAY_TYPE:
3365	  /* Arrays are handled as small records.  */
3366	  {
3367	    int num;
3368	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3369				     TREE_TYPE (type), subclasses, bit_offset);
3370	    if (!num)
3371	      return 0;
3372
3373	    /* The partial classes are now full classes.  */
3374	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3375	      subclasses[0] = X86_64_SSE_CLASS;
3376	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3377	      subclasses[0] = X86_64_INTEGER_CLASS;
3378
3379	    for (i = 0; i < words; i++)
3380	      classes[i] = subclasses[i % num];
3381
3382	    break;
3383	  }
3384	case UNION_TYPE:
3385	case QUAL_UNION_TYPE:
3386	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3387	     */
3388
3389	  /* Unions are not derived.  */
3390	  gcc_assert (!TYPE_BINFO (type)
3391		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3392	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3393	    {
3394	      if (TREE_CODE (field) == FIELD_DECL)
3395		{
3396		  int num;
3397
3398		  if (TREE_TYPE (field) == error_mark_node)
3399		    continue;
3400
3401		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3402					   TREE_TYPE (field), subclasses,
3403					   bit_offset);
3404		  if (!num)
3405		    return 0;
3406		  for (i = 0; i < num; i++)
3407		    classes[i] = merge_classes (subclasses[i], classes[i]);
3408		}
3409	    }
3410	  break;
3411
3412	default:
3413	  gcc_unreachable ();
3414	}
3415
3416      /* Final merger cleanup.  */
3417      for (i = 0; i < words; i++)
3418	{
3419	  /* If one class is MEMORY, everything should be passed in
3420	     memory.  */
3421	  if (classes[i] == X86_64_MEMORY_CLASS)
3422	    return 0;
3423
3424	  /* The X86_64_SSEUP_CLASS should be always preceded by
3425	     X86_64_SSE_CLASS.  */
3426	  if (classes[i] == X86_64_SSEUP_CLASS
3427	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3428	    classes[i] = X86_64_SSE_CLASS;
3429
3430	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3431	  if (classes[i] == X86_64_X87UP_CLASS
3432	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3433	    classes[i] = X86_64_SSE_CLASS;
3434	}
3435      return words;
3436    }
3437
3438  /* Compute alignment needed.  We align all types to natural boundaries with
3439     exception of XFmode that is aligned to 64bits.  */
3440  if (mode != VOIDmode && mode != BLKmode)
3441    {
3442      int mode_alignment = GET_MODE_BITSIZE (mode);
3443
3444      if (mode == XFmode)
3445	mode_alignment = 128;
3446      else if (mode == XCmode)
3447	mode_alignment = 256;
3448      if (COMPLEX_MODE_P (mode))
3449	mode_alignment /= 2;
3450      /* Misaligned fields are always returned in memory.  */
3451      if (bit_offset % mode_alignment)
3452	return 0;
3453    }
3454
3455  /* for V1xx modes, just use the base mode */
3456  if (VECTOR_MODE_P (mode)
3457      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3458    mode = GET_MODE_INNER (mode);
3459
3460  /* Classification of atomic types.  */
3461  switch (mode)
3462    {
3463    case SDmode:
3464    case DDmode:
3465      classes[0] = X86_64_SSE_CLASS;
3466      return 1;
3467    case TDmode:
3468      classes[0] = X86_64_SSE_CLASS;
3469      classes[1] = X86_64_SSEUP_CLASS;
3470      return 2;
3471    case DImode:
3472    case SImode:
3473    case HImode:
3474    case QImode:
3475    case CSImode:
3476    case CHImode:
3477    case CQImode:
3478      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3479	classes[0] = X86_64_INTEGERSI_CLASS;
3480      else
3481	classes[0] = X86_64_INTEGER_CLASS;
3482      return 1;
3483    case CDImode:
3484    case TImode:
3485      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3486      return 2;
3487    case CTImode:
3488      return 0;
3489    case SFmode:
3490      if (!(bit_offset % 64))
3491	classes[0] = X86_64_SSESF_CLASS;
3492      else
3493	classes[0] = X86_64_SSE_CLASS;
3494      return 1;
3495    case DFmode:
3496      classes[0] = X86_64_SSEDF_CLASS;
3497      return 1;
3498    case XFmode:
3499      classes[0] = X86_64_X87_CLASS;
3500      classes[1] = X86_64_X87UP_CLASS;
3501      return 2;
3502    case TFmode:
3503      classes[0] = X86_64_SSE_CLASS;
3504      classes[1] = X86_64_SSEUP_CLASS;
3505      return 2;
3506    case SCmode:
3507      classes[0] = X86_64_SSE_CLASS;
3508      return 1;
3509    case DCmode:
3510      classes[0] = X86_64_SSEDF_CLASS;
3511      classes[1] = X86_64_SSEDF_CLASS;
3512      return 2;
3513    case XCmode:
3514      classes[0] = X86_64_COMPLEX_X87_CLASS;
3515      return 1;
3516    case TCmode:
3517      /* This modes is larger than 16 bytes.  */
3518      return 0;
3519    case V4SFmode:
3520    case V4SImode:
3521    case V16QImode:
3522    case V8HImode:
3523    case V2DFmode:
3524    case V2DImode:
3525      classes[0] = X86_64_SSE_CLASS;
3526      classes[1] = X86_64_SSEUP_CLASS;
3527      return 2;
3528    case V2SFmode:
3529    case V2SImode:
3530    case V4HImode:
3531    case V8QImode:
3532      classes[0] = X86_64_SSE_CLASS;
3533      return 1;
3534    case BLKmode:
3535    case VOIDmode:
3536      return 0;
3537    default:
3538      gcc_assert (VECTOR_MODE_P (mode));
3539
3540      if (bytes > 16)
3541	return 0;
3542
3543      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3544
3545      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3546	classes[0] = X86_64_INTEGERSI_CLASS;
3547      else
3548	classes[0] = X86_64_INTEGER_CLASS;
3549      classes[1] = X86_64_INTEGER_CLASS;
3550      return 1 + (bytes > 8);
3551    }
3552}
3553
3554/* Examine the argument and return set number of register required in each
3555   class.  Return 0 iff parameter should be passed in memory.  */
3556static int
3557examine_argument (enum machine_mode mode, tree type, int in_return,
3558		  int *int_nregs, int *sse_nregs)
3559{
3560  enum x86_64_reg_class class[MAX_CLASSES];
3561  int n = classify_argument (mode, type, class, 0);
3562
3563  *int_nregs = 0;
3564  *sse_nregs = 0;
3565  if (!n)
3566    return 0;
3567  for (n--; n >= 0; n--)
3568    switch (class[n])
3569      {
3570      case X86_64_INTEGER_CLASS:
3571      case X86_64_INTEGERSI_CLASS:
3572	(*int_nregs)++;
3573	break;
3574      case X86_64_SSE_CLASS:
3575      case X86_64_SSESF_CLASS:
3576      case X86_64_SSEDF_CLASS:
3577	(*sse_nregs)++;
3578	break;
3579      case X86_64_NO_CLASS:
3580      case X86_64_SSEUP_CLASS:
3581	break;
3582      case X86_64_X87_CLASS:
3583      case X86_64_X87UP_CLASS:
3584	if (!in_return)
3585	  return 0;
3586	break;
3587      case X86_64_COMPLEX_X87_CLASS:
3588	return in_return ? 2 : 0;
3589      case X86_64_MEMORY_CLASS:
3590	gcc_unreachable ();
3591      }
3592  return 1;
3593}
3594
3595/* Construct container for the argument used by GCC interface.  See
3596   FUNCTION_ARG for the detailed description.  */
3597
3598static rtx
3599construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3600		     tree type, int in_return, int nintregs, int nsseregs,
3601		     const int *intreg, int sse_regno)
3602{
3603  /* The following variables hold the static issued_error state.  */
3604  static bool issued_sse_arg_error;
3605  static bool issued_sse_ret_error;
3606  static bool issued_x87_ret_error;
3607
3608  enum machine_mode tmpmode;
3609  int bytes =
3610    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3611  enum x86_64_reg_class class[MAX_CLASSES];
3612  int n;
3613  int i;
3614  int nexps = 0;
3615  int needed_sseregs, needed_intregs;
3616  rtx exp[MAX_CLASSES];
3617  rtx ret;
3618
3619  n = classify_argument (mode, type, class, 0);
3620  if (TARGET_DEBUG_ARG)
3621    {
3622      if (!n)
3623	fprintf (stderr, "Memory class\n");
3624      else
3625	{
3626	  fprintf (stderr, "Classes:");
3627	  for (i = 0; i < n; i++)
3628	    {
3629	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3630	    }
3631	   fprintf (stderr, "\n");
3632	}
3633    }
3634  if (!n)
3635    return NULL;
3636  if (!examine_argument (mode, type, in_return, &needed_intregs,
3637			 &needed_sseregs))
3638    return NULL;
3639  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3640    return NULL;
3641
3642  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3643     some less clueful developer tries to use floating-point anyway.  */
3644  if (needed_sseregs && !TARGET_SSE)
3645    {
3646      if (in_return)
3647	{
3648	  if (!issued_sse_ret_error)
3649	    {
3650	      error ("SSE register return with SSE disabled");
3651	      issued_sse_ret_error = true;
3652	    }
3653	}
3654      else if (!issued_sse_arg_error)
3655	{
3656	  error ("SSE register argument with SSE disabled");
3657	  issued_sse_arg_error = true;
3658	}
3659      return NULL;
3660    }
3661
3662  /* Likewise, error if the ABI requires us to return values in the
3663     x87 registers and the user specified -mno-80387.  */
3664  if (!TARGET_80387 && in_return)
3665    for (i = 0; i < n; i++)
3666      if (class[i] == X86_64_X87_CLASS
3667	  || class[i] == X86_64_X87UP_CLASS
3668	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3669	{
3670	  if (!issued_x87_ret_error)
3671	    {
3672	      error ("x87 register return with x87 disabled");
3673	      issued_x87_ret_error = true;
3674	    }
3675	  return NULL;
3676	}
3677
3678  /* First construct simple cases.  Avoid SCmode, since we want to use
3679     single register to pass this type.  */
3680  if (n == 1 && mode != SCmode)
3681    switch (class[0])
3682      {
3683      case X86_64_INTEGER_CLASS:
3684      case X86_64_INTEGERSI_CLASS:
3685	return gen_rtx_REG (mode, intreg[0]);
3686      case X86_64_SSE_CLASS:
3687      case X86_64_SSESF_CLASS:
3688      case X86_64_SSEDF_CLASS:
3689	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3690      case X86_64_X87_CLASS:
3691      case X86_64_COMPLEX_X87_CLASS:
3692	return gen_rtx_REG (mode, FIRST_STACK_REG);
3693      case X86_64_NO_CLASS:
3694	/* Zero sized array, struct or class.  */
3695	return NULL;
3696      default:
3697	gcc_unreachable ();
3698      }
3699  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3700      && mode != BLKmode)
3701    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3702  if (n == 2
3703      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3704    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3705  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3706      && class[1] == X86_64_INTEGER_CLASS
3707      && (mode == CDImode || mode == TImode || mode == TFmode)
3708      && intreg[0] + 1 == intreg[1])
3709    return gen_rtx_REG (mode, intreg[0]);
3710
3711  /* Otherwise figure out the entries of the PARALLEL.  */
3712  for (i = 0; i < n; i++)
3713    {
3714      switch (class[i])
3715        {
3716	  case X86_64_NO_CLASS:
3717	    break;
3718	  case X86_64_INTEGER_CLASS:
3719	  case X86_64_INTEGERSI_CLASS:
3720	    /* Merge TImodes on aligned occasions here too.  */
3721	    if (i * 8 + 8 > bytes)
3722	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3723	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3724	      tmpmode = SImode;
3725	    else
3726	      tmpmode = DImode;
3727	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3728	    if (tmpmode == BLKmode)
3729	      tmpmode = DImode;
3730	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3731					       gen_rtx_REG (tmpmode, *intreg),
3732					       GEN_INT (i*8));
3733	    intreg++;
3734	    break;
3735	  case X86_64_SSESF_CLASS:
3736	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3737					       gen_rtx_REG (SFmode,
3738							    SSE_REGNO (sse_regno)),
3739					       GEN_INT (i*8));
3740	    sse_regno++;
3741	    break;
3742	  case X86_64_SSEDF_CLASS:
3743	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3744					       gen_rtx_REG (DFmode,
3745							    SSE_REGNO (sse_regno)),
3746					       GEN_INT (i*8));
3747	    sse_regno++;
3748	    break;
3749	  case X86_64_SSE_CLASS:
3750	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3751	      tmpmode = TImode;
3752	    else
3753	      tmpmode = DImode;
3754	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3755					       gen_rtx_REG (tmpmode,
3756							    SSE_REGNO (sse_regno)),
3757					       GEN_INT (i*8));
3758	    if (tmpmode == TImode)
3759	      i++;
3760	    sse_regno++;
3761	    break;
3762	  default:
3763	    gcc_unreachable ();
3764	}
3765    }
3766
3767  /* Empty aligned struct, union or class.  */
3768  if (nexps == 0)
3769    return NULL;
3770
3771  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3772  for (i = 0; i < nexps; i++)
3773    XVECEXP (ret, 0, i) = exp [i];
3774  return ret;
3775}
3776
3777/* Update the data in CUM to advance over an argument
3778   of mode MODE and data type TYPE.
3779   (TYPE is null for libcalls where that information may not be available.)  */
3780
3781void
3782function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3783		      tree type, int named)
3784{
3785  int bytes =
3786    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3787  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3788
3789  if (type)
3790    mode = type_natural_mode (type);
3791
3792  if (TARGET_DEBUG_ARG)
3793    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3794	     "mode=%s, named=%d)\n\n",
3795	     words, cum->words, cum->nregs, cum->sse_nregs,
3796	     GET_MODE_NAME (mode), named);
3797
3798  if (TARGET_64BIT)
3799    {
3800      int int_nregs, sse_nregs;
3801      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3802	cum->words += words;
3803      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3804	{
3805	  cum->nregs -= int_nregs;
3806	  cum->sse_nregs -= sse_nregs;
3807	  cum->regno += int_nregs;
3808	  cum->sse_regno += sse_nregs;
3809	}
3810      else
3811	cum->words += words;
3812    }
3813  else
3814    {
3815      switch (mode)
3816	{
3817	default:
3818	  break;
3819
3820	case BLKmode:
3821	  if (bytes < 0)
3822	    break;
3823	  /* FALLTHRU */
3824
3825	case DImode:
3826	case SImode:
3827	case HImode:
3828	case QImode:
3829	  cum->words += words;
3830	  cum->nregs -= words;
3831	  cum->regno += words;
3832
3833	  if (cum->nregs <= 0)
3834	    {
3835	      cum->nregs = 0;
3836	      cum->regno = 0;
3837	    }
3838	  break;
3839
3840	case DFmode:
3841	  if (cum->float_in_sse < 2)
3842	    break;
3843	case SFmode:
3844	  if (cum->float_in_sse < 1)
3845	    break;
3846	  /* FALLTHRU */
3847
3848	case TImode:
3849	case V16QImode:
3850	case V8HImode:
3851	case V4SImode:
3852	case V2DImode:
3853	case V4SFmode:
3854	case V2DFmode:
3855	  if (!type || !AGGREGATE_TYPE_P (type))
3856	    {
3857	      cum->sse_words += words;
3858	      cum->sse_nregs -= 1;
3859	      cum->sse_regno += 1;
3860	      if (cum->sse_nregs <= 0)
3861		{
3862		  cum->sse_nregs = 0;
3863		  cum->sse_regno = 0;
3864		}
3865	    }
3866	  break;
3867
3868	case V8QImode:
3869	case V4HImode:
3870	case V2SImode:
3871	case V2SFmode:
3872	  if (!type || !AGGREGATE_TYPE_P (type))
3873	    {
3874	      cum->mmx_words += words;
3875	      cum->mmx_nregs -= 1;
3876	      cum->mmx_regno += 1;
3877	      if (cum->mmx_nregs <= 0)
3878		{
3879		  cum->mmx_nregs = 0;
3880		  cum->mmx_regno = 0;
3881		}
3882	    }
3883	  break;
3884	}
3885    }
3886}
3887
3888/* Define where to put the arguments to a function.
3889   Value is zero to push the argument on the stack,
3890   or a hard register in which to store the argument.
3891
3892   MODE is the argument's machine mode.
3893   TYPE is the data type of the argument (as a tree).
3894    This is null for libcalls where that information may
3895    not be available.
3896   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3897    the preceding args and about the function being called.
3898   NAMED is nonzero if this argument is a named parameter
3899    (otherwise it is an extra parameter matching an ellipsis).  */
3900
3901rtx
3902function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3903	      tree type, int named)
3904{
3905  enum machine_mode mode = orig_mode;
3906  rtx ret = NULL_RTX;
3907  int bytes =
3908    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3909  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3910  static bool warnedsse, warnedmmx;
3911
3912  /* To simplify the code below, represent vector types with a vector mode
3913     even if MMX/SSE are not active.  */
3914  if (type && TREE_CODE (type) == VECTOR_TYPE)
3915    mode = type_natural_mode (type);
3916
3917  /* Handle a hidden AL argument containing number of registers for varargs
3918     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3919     any AL settings.  */
3920  if (mode == VOIDmode)
3921    {
3922      if (TARGET_64BIT)
3923	return GEN_INT (cum->maybe_vaarg
3924			? (cum->sse_nregs < 0
3925			   ? SSE_REGPARM_MAX
3926			   : cum->sse_regno)
3927			: -1);
3928      else
3929	return constm1_rtx;
3930    }
3931  if (TARGET_64BIT)
3932    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3933			       cum->sse_nregs,
3934			       &x86_64_int_parameter_registers [cum->regno],
3935			       cum->sse_regno);
3936  else
3937    switch (mode)
3938      {
3939	/* For now, pass fp/complex values on the stack.  */
3940      default:
3941	break;
3942
3943      case BLKmode:
3944	if (bytes < 0)
3945	  break;
3946	/* FALLTHRU */
3947      case DImode:
3948      case SImode:
3949      case HImode:
3950      case QImode:
3951	if (words <= cum->nregs)
3952	  {
3953	    int regno = cum->regno;
3954
3955	    /* Fastcall allocates the first two DWORD (SImode) or
3956	       smaller arguments to ECX and EDX.  */
3957	    if (cum->fastcall)
3958	      {
3959	        if (mode == BLKmode || mode == DImode)
3960	          break;
3961
3962	        /* ECX not EAX is the first allocated register.  */
3963	        if (regno == 0)
3964		  regno = 2;
3965	      }
3966	    ret = gen_rtx_REG (mode, regno);
3967	  }
3968	break;
3969      case DFmode:
3970	if (cum->float_in_sse < 2)
3971	  break;
3972      case SFmode:
3973	if (cum->float_in_sse < 1)
3974	  break;
3975	/* FALLTHRU */
3976      case TImode:
3977      case V16QImode:
3978      case V8HImode:
3979      case V4SImode:
3980      case V2DImode:
3981      case V4SFmode:
3982      case V2DFmode:
3983	if (!type || !AGGREGATE_TYPE_P (type))
3984	  {
3985	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3986	      {
3987		warnedsse = true;
3988		warning (0, "SSE vector argument without SSE enabled "
3989			 "changes the ABI");
3990	      }
3991	    if (cum->sse_nregs)
3992	      ret = gen_reg_or_parallel (mode, orig_mode,
3993					 cum->sse_regno + FIRST_SSE_REG);
3994	  }
3995	break;
3996      case V8QImode:
3997      case V4HImode:
3998      case V2SImode:
3999      case V2SFmode:
4000	if (!type || !AGGREGATE_TYPE_P (type))
4001	  {
4002	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4003	      {
4004		warnedmmx = true;
4005		warning (0, "MMX vector argument without MMX enabled "
4006			 "changes the ABI");
4007	      }
4008	    if (cum->mmx_nregs)
4009	      ret = gen_reg_or_parallel (mode, orig_mode,
4010					 cum->mmx_regno + FIRST_MMX_REG);
4011	  }
4012	break;
4013      }
4014
4015  if (TARGET_DEBUG_ARG)
4016    {
4017      fprintf (stderr,
4018	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4019	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4020
4021      if (ret)
4022	print_simple_rtl (stderr, ret);
4023      else
4024	fprintf (stderr, ", stack");
4025
4026      fprintf (stderr, " )\n");
4027    }
4028
4029  return ret;
4030}
4031
4032/* A C expression that indicates when an argument must be passed by
4033   reference.  If nonzero for an argument, a copy of that argument is
4034   made in memory and a pointer to the argument is passed instead of
4035   the argument itself.  The pointer is passed in whatever way is
4036   appropriate for passing a pointer to that type.  */
4037
4038static bool
4039ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4040			enum machine_mode mode ATTRIBUTE_UNUSED,
4041			tree type, bool named ATTRIBUTE_UNUSED)
4042{
4043  if (!TARGET_64BIT)
4044    return 0;
4045
4046  if (type && int_size_in_bytes (type) == -1)
4047    {
4048      if (TARGET_DEBUG_ARG)
4049	fprintf (stderr, "function_arg_pass_by_reference\n");
4050      return 1;
4051    }
4052
4053  return 0;
4054}
4055
4056/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4057   ABI.  Only called if TARGET_SSE.  */
4058static bool
4059contains_128bit_aligned_vector_p (tree type)
4060{
4061  enum machine_mode mode = TYPE_MODE (type);
4062  if (SSE_REG_MODE_P (mode)
4063      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4064    return true;
4065  if (TYPE_ALIGN (type) < 128)
4066    return false;
4067
4068  if (AGGREGATE_TYPE_P (type))
4069    {
4070      /* Walk the aggregates recursively.  */
4071      switch (TREE_CODE (type))
4072	{
4073	case RECORD_TYPE:
4074	case UNION_TYPE:
4075	case QUAL_UNION_TYPE:
4076	  {
4077	    tree field;
4078
4079	    if (TYPE_BINFO (type))
4080	      {
4081		tree binfo, base_binfo;
4082		int i;
4083
4084		for (binfo = TYPE_BINFO (type), i = 0;
4085		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4086		  if (contains_128bit_aligned_vector_p
4087		      (BINFO_TYPE (base_binfo)))
4088		    return true;
4089	      }
4090	    /* And now merge the fields of structure.  */
4091	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4092	      {
4093		if (TREE_CODE (field) == FIELD_DECL
4094		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4095		  return true;
4096	      }
4097	    break;
4098	  }
4099
4100	case ARRAY_TYPE:
4101	  /* Just for use if some languages passes arrays by value.  */
4102	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4103	    return true;
4104	  break;
4105
4106	default:
4107	  gcc_unreachable ();
4108	}
4109    }
4110  return false;
4111}
4112
4113/* Gives the alignment boundary, in bits, of an argument with the
4114   specified mode and type.  */
4115
4116int
4117ix86_function_arg_boundary (enum machine_mode mode, tree type)
4118{
4119  int align;
4120  if (type)
4121    align = TYPE_ALIGN (type);
4122  else
4123    align = GET_MODE_ALIGNMENT (mode);
4124  if (align < PARM_BOUNDARY)
4125    align = PARM_BOUNDARY;
4126  if (!TARGET_64BIT)
4127    {
4128      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
4129	 make an exception for SSE modes since these require 128bit
4130	 alignment.
4131
4132	 The handling here differs from field_alignment.  ICC aligns MMX
4133	 arguments to 4 byte boundaries, while structure fields are aligned
4134	 to 8 byte boundaries.  */
4135      if (!TARGET_SSE)
4136	align = PARM_BOUNDARY;
4137      else if (!type)
4138	{
4139	  if (!SSE_REG_MODE_P (mode))
4140	    align = PARM_BOUNDARY;
4141	}
4142      else
4143	{
4144	  if (!contains_128bit_aligned_vector_p (type))
4145	    align = PARM_BOUNDARY;
4146	}
4147    }
4148  if (align > 128)
4149    align = 128;
4150  return align;
4151}
4152
4153/* Return true if N is a possible register number of function value.  */
4154bool
4155ix86_function_value_regno_p (int regno)
4156{
4157  if (TARGET_MACHO)
4158    {
4159      if (!TARGET_64BIT)
4160        {
4161          return ((regno) == 0
4162                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4163                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4164        }
4165      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4166              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4167              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4168      }
4169  else
4170    {
4171      if (regno == 0
4172          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4173          || (regno == FIRST_SSE_REG && TARGET_SSE))
4174        return true;
4175
4176      if (!TARGET_64BIT
4177          && (regno == FIRST_MMX_REG && TARGET_MMX))
4178	    return true;
4179
4180      return false;
4181    }
4182}
4183
4184/* Define how to find the value returned by a function.
4185   VALTYPE is the data type of the value (as a tree).
4186   If the precise function being called is known, FUNC is its FUNCTION_DECL;
4187   otherwise, FUNC is 0.  */
4188rtx
4189ix86_function_value (tree valtype, tree fntype_or_decl,
4190		     bool outgoing ATTRIBUTE_UNUSED)
4191{
4192  enum machine_mode natmode = type_natural_mode (valtype);
4193
4194  if (TARGET_64BIT)
4195    {
4196      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4197				     1, REGPARM_MAX, SSE_REGPARM_MAX,
4198				     x86_64_int_return_registers, 0);
4199      /* For zero sized structures, construct_container return NULL, but we
4200	 need to keep rest of compiler happy by returning meaningful value.  */
4201      if (!ret)
4202	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4203      return ret;
4204    }
4205  else
4206    {
4207      tree fn = NULL_TREE, fntype;
4208      if (fntype_or_decl
4209	  && DECL_P (fntype_or_decl))
4210        fn = fntype_or_decl;
4211      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4212      return gen_rtx_REG (TYPE_MODE (valtype),
4213			  ix86_value_regno (natmode, fn, fntype));
4214    }
4215}
4216
4217/* Return true iff type is returned in memory.  */
4218int
4219ix86_return_in_memory (tree type)
4220{
4221  int needed_intregs, needed_sseregs, size;
4222  enum machine_mode mode = type_natural_mode (type);
4223
4224  if (TARGET_64BIT)
4225    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4226
4227  if (mode == BLKmode)
4228    return 1;
4229
4230  size = int_size_in_bytes (type);
4231
4232  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4233    return 0;
4234
4235  if (VECTOR_MODE_P (mode) || mode == TImode)
4236    {
4237      /* User-created vectors small enough to fit in EAX.  */
4238      if (size < 8)
4239	return 0;
4240
4241      /* MMX/3dNow values are returned in MM0,
4242	 except when it doesn't exits.  */
4243      if (size == 8)
4244	return (TARGET_MMX ? 0 : 1);
4245
4246      /* SSE values are returned in XMM0, except when it doesn't exist.  */
4247      if (size == 16)
4248	return (TARGET_SSE ? 0 : 1);
4249    }
4250
4251  if (mode == XFmode)
4252    return 0;
4253
4254  if (mode == TDmode)
4255    return 1;
4256
4257  if (size > 12)
4258    return 1;
4259  return 0;
4260}
4261
4262/* When returning SSE vector types, we have a choice of either
4263     (1) being abi incompatible with a -march switch, or
4264     (2) generating an error.
4265   Given no good solution, I think the safest thing is one warning.
4266   The user won't be able to use -Werror, but....
4267
4268   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4269   called in response to actually generating a caller or callee that
4270   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
4271   via aggregate_value_p for general type probing from tree-ssa.  */
4272
4273static rtx
4274ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4275{
4276  static bool warnedsse, warnedmmx;
4277
4278  if (type)
4279    {
4280      /* Look at the return type of the function, not the function type.  */
4281      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4282
4283      if (!TARGET_SSE && !warnedsse)
4284	{
4285	  if (mode == TImode
4286	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4287	    {
4288	      warnedsse = true;
4289	      warning (0, "SSE vector return without SSE enabled "
4290		       "changes the ABI");
4291	    }
4292	}
4293
4294      if (!TARGET_MMX && !warnedmmx)
4295	{
4296	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4297	    {
4298	      warnedmmx = true;
4299	      warning (0, "MMX vector return without MMX enabled "
4300		       "changes the ABI");
4301	    }
4302	}
4303    }
4304
4305  return NULL;
4306}
4307
4308/* Define how to find the value returned by a library function
4309   assuming the value has mode MODE.  */
4310rtx
4311ix86_libcall_value (enum machine_mode mode)
4312{
4313  if (TARGET_64BIT)
4314    {
4315      switch (mode)
4316	{
4317	case SFmode:
4318	case SCmode:
4319	case DFmode:
4320	case DCmode:
4321	case TFmode:
4322	case SDmode:
4323	case DDmode:
4324	case TDmode:
4325	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4326	case XFmode:
4327	case XCmode:
4328	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4329	case TCmode:
4330	  return NULL;
4331	default:
4332	  return gen_rtx_REG (mode, 0);
4333	}
4334    }
4335  else
4336    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4337}
4338
4339/* Given a mode, return the register to use for a return value.  */
4340
4341static int
4342ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4343{
4344  gcc_assert (!TARGET_64BIT);
4345
4346  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4347     we normally prevent this case when mmx is not available.  However
4348     some ABIs may require the result to be returned like DImode.  */
4349  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4350    return TARGET_MMX ? FIRST_MMX_REG : 0;
4351
4352  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4353     we prevent this case when sse is not available.  However some ABIs
4354     may require the result to be returned like integer TImode.  */
4355  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4356    return TARGET_SSE ? FIRST_SSE_REG : 0;
4357
4358  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4359  if (DECIMAL_FLOAT_MODE_P (mode))
4360    return 0;
4361
4362  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4363  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4364    return 0;
4365
4366  /* Floating point return values in %st(0), except for local functions when
4367     SSE math is enabled or for functions with sseregparm attribute.  */
4368  if ((func || fntype)
4369      && (mode == SFmode || mode == DFmode))
4370    {
4371      int sse_level = ix86_function_sseregparm (fntype, func);
4372      if ((sse_level >= 1 && mode == SFmode)
4373	  || (sse_level == 2 && mode == DFmode))
4374        return FIRST_SSE_REG;
4375    }
4376
4377  return FIRST_FLOAT_REG;
4378}
4379
4380/* Create the va_list data type.  */
4381
4382static tree
4383ix86_build_builtin_va_list (void)
4384{
4385  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4386
4387  /* For i386 we use plain pointer to argument area.  */
4388  if (!TARGET_64BIT)
4389    return build_pointer_type (char_type_node);
4390
4391  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4392  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4393
4394  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4395		      unsigned_type_node);
4396  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4397		      unsigned_type_node);
4398  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4399		      ptr_type_node);
4400  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4401		      ptr_type_node);
4402
4403  va_list_gpr_counter_field = f_gpr;
4404  va_list_fpr_counter_field = f_fpr;
4405
4406  DECL_FIELD_CONTEXT (f_gpr) = record;
4407  DECL_FIELD_CONTEXT (f_fpr) = record;
4408  DECL_FIELD_CONTEXT (f_ovf) = record;
4409  DECL_FIELD_CONTEXT (f_sav) = record;
4410
4411  TREE_CHAIN (record) = type_decl;
4412  TYPE_NAME (record) = type_decl;
4413  TYPE_FIELDS (record) = f_gpr;
4414  TREE_CHAIN (f_gpr) = f_fpr;
4415  TREE_CHAIN (f_fpr) = f_ovf;
4416  TREE_CHAIN (f_ovf) = f_sav;
4417
4418  layout_type (record);
4419
4420  /* The correct type is an array type of one element.  */
4421  return build_array_type (record, build_index_type (size_zero_node));
4422}
4423
4424/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4425
4426static void
4427ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4428			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4429			     int no_rtl)
4430{
4431  CUMULATIVE_ARGS next_cum;
4432  rtx save_area = NULL_RTX, mem;
4433  rtx label;
4434  rtx label_ref;
4435  rtx tmp_reg;
4436  rtx nsse_reg;
4437  int set;
4438  tree fntype;
4439  int stdarg_p;
4440  int i;
4441
4442  if (!TARGET_64BIT)
4443    return;
4444
4445  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4446    return;
4447
4448  /* Indicate to allocate space on the stack for varargs save area.  */
4449  ix86_save_varrargs_registers = 1;
4450
4451  cfun->stack_alignment_needed = 128;
4452
4453  fntype = TREE_TYPE (current_function_decl);
4454  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4455	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4456		  != void_type_node));
4457
4458  /* For varargs, we do not want to skip the dummy va_dcl argument.
4459     For stdargs, we do want to skip the last named argument.  */
4460  next_cum = *cum;
4461  if (stdarg_p)
4462    function_arg_advance (&next_cum, mode, type, 1);
4463
4464  if (!no_rtl)
4465    save_area = frame_pointer_rtx;
4466
4467  set = get_varargs_alias_set ();
4468
4469  for (i = next_cum.regno;
4470       i < ix86_regparm
4471       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4472       i++)
4473    {
4474      mem = gen_rtx_MEM (Pmode,
4475			 plus_constant (save_area, i * UNITS_PER_WORD));
4476      MEM_NOTRAP_P (mem) = 1;
4477      set_mem_alias_set (mem, set);
4478      emit_move_insn (mem, gen_rtx_REG (Pmode,
4479					x86_64_int_parameter_registers[i]));
4480    }
4481
4482  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4483    {
4484      /* Now emit code to save SSE registers.  The AX parameter contains number
4485	 of SSE parameter registers used to call this function.  We use
4486	 sse_prologue_save insn template that produces computed jump across
4487	 SSE saves.  We need some preparation work to get this working.  */
4488
4489      label = gen_label_rtx ();
4490      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4491
4492      /* Compute address to jump to :
4493         label - 5*eax + nnamed_sse_arguments*5  */
4494      tmp_reg = gen_reg_rtx (Pmode);
4495      nsse_reg = gen_reg_rtx (Pmode);
4496      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4497      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4498			      gen_rtx_MULT (Pmode, nsse_reg,
4499					    GEN_INT (4))));
4500      if (next_cum.sse_regno)
4501	emit_move_insn
4502	  (nsse_reg,
4503	   gen_rtx_CONST (DImode,
4504			  gen_rtx_PLUS (DImode,
4505					label_ref,
4506					GEN_INT (next_cum.sse_regno * 4))));
4507      else
4508	emit_move_insn (nsse_reg, label_ref);
4509      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4510
4511      /* Compute address of memory block we save into.  We always use pointer
4512	 pointing 127 bytes after first byte to store - this is needed to keep
4513	 instruction size limited by 4 bytes.  */
4514      tmp_reg = gen_reg_rtx (Pmode);
4515      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4516			      plus_constant (save_area,
4517					     8 * REGPARM_MAX + 127)));
4518      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4519      MEM_NOTRAP_P (mem) = 1;
4520      set_mem_alias_set (mem, set);
4521      set_mem_align (mem, BITS_PER_WORD);
4522
4523      /* And finally do the dirty job!  */
4524      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4525					GEN_INT (next_cum.sse_regno), label));
4526    }
4527
4528}
4529
4530/* Implement va_start.  */
4531
4532void
4533ix86_va_start (tree valist, rtx nextarg)
4534{
4535  HOST_WIDE_INT words, n_gpr, n_fpr;
4536  tree f_gpr, f_fpr, f_ovf, f_sav;
4537  tree gpr, fpr, ovf, sav, t;
4538  tree type;
4539
4540  /* Only 64bit target needs something special.  */
4541  if (!TARGET_64BIT)
4542    {
4543      std_expand_builtin_va_start (valist, nextarg);
4544      return;
4545    }
4546
4547  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4548  f_fpr = TREE_CHAIN (f_gpr);
4549  f_ovf = TREE_CHAIN (f_fpr);
4550  f_sav = TREE_CHAIN (f_ovf);
4551
4552  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4553  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4554  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4555  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4556  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4557
4558  /* Count number of gp and fp argument registers used.  */
4559  words = current_function_args_info.words;
4560  n_gpr = current_function_args_info.regno;
4561  n_fpr = current_function_args_info.sse_regno;
4562
4563  if (TARGET_DEBUG_ARG)
4564    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4565	     (int) words, (int) n_gpr, (int) n_fpr);
4566
4567  if (cfun->va_list_gpr_size)
4568    {
4569      type = TREE_TYPE (gpr);
4570      t = build2 (MODIFY_EXPR, type, gpr,
4571		  build_int_cst (type, n_gpr * 8));
4572      TREE_SIDE_EFFECTS (t) = 1;
4573      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4574    }
4575
4576  if (cfun->va_list_fpr_size)
4577    {
4578      type = TREE_TYPE (fpr);
4579      t = build2 (MODIFY_EXPR, type, fpr,
4580		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4581      TREE_SIDE_EFFECTS (t) = 1;
4582      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4583    }
4584
4585  /* Find the overflow area.  */
4586  type = TREE_TYPE (ovf);
4587  t = make_tree (type, virtual_incoming_args_rtx);
4588  if (words != 0)
4589    t = build2 (PLUS_EXPR, type, t,
4590	        build_int_cst (type, words * UNITS_PER_WORD));
4591  t = build2 (MODIFY_EXPR, type, ovf, t);
4592  TREE_SIDE_EFFECTS (t) = 1;
4593  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4594
4595  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4596    {
4597      /* Find the register save area.
4598	 Prologue of the function save it right above stack frame.  */
4599      type = TREE_TYPE (sav);
4600      t = make_tree (type, frame_pointer_rtx);
4601      t = build2 (MODIFY_EXPR, type, sav, t);
4602      TREE_SIDE_EFFECTS (t) = 1;
4603      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4604    }
4605}
4606
4607/* Implement va_arg.  */
4608
4609tree
4610ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4611{
4612  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4613  tree f_gpr, f_fpr, f_ovf, f_sav;
4614  tree gpr, fpr, ovf, sav, t;
4615  int size, rsize;
4616  tree lab_false, lab_over = NULL_TREE;
4617  tree addr, t2;
4618  rtx container;
4619  int indirect_p = 0;
4620  tree ptrtype;
4621  enum machine_mode nat_mode;
4622
4623  /* Only 64bit target needs something special.  */
4624  if (!TARGET_64BIT)
4625    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4626
4627  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4628  f_fpr = TREE_CHAIN (f_gpr);
4629  f_ovf = TREE_CHAIN (f_fpr);
4630  f_sav = TREE_CHAIN (f_ovf);
4631
4632  valist = build_va_arg_indirect_ref (valist);
4633  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4634  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4635  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4636  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4637
4638  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4639  if (indirect_p)
4640    type = build_pointer_type (type);
4641  size = int_size_in_bytes (type);
4642  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4643
4644  nat_mode = type_natural_mode (type);
4645  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4646				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4647
4648  /* Pull the value out of the saved registers.  */
4649
4650  addr = create_tmp_var (ptr_type_node, "addr");
4651  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4652
4653  if (container)
4654    {
4655      int needed_intregs, needed_sseregs;
4656      bool need_temp;
4657      tree int_addr, sse_addr;
4658
4659      lab_false = create_artificial_label ();
4660      lab_over = create_artificial_label ();
4661
4662      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4663
4664      need_temp = (!REG_P (container)
4665		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4666		       || TYPE_ALIGN (type) > 128));
4667
4668      /* In case we are passing structure, verify that it is consecutive block
4669         on the register save area.  If not we need to do moves.  */
4670      if (!need_temp && !REG_P (container))
4671	{
4672	  /* Verify that all registers are strictly consecutive  */
4673	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4674	    {
4675	      int i;
4676
4677	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4678		{
4679		  rtx slot = XVECEXP (container, 0, i);
4680		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4681		      || INTVAL (XEXP (slot, 1)) != i * 16)
4682		    need_temp = 1;
4683		}
4684	    }
4685	  else
4686	    {
4687	      int i;
4688
4689	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4690		{
4691		  rtx slot = XVECEXP (container, 0, i);
4692		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4693		      || INTVAL (XEXP (slot, 1)) != i * 8)
4694		    need_temp = 1;
4695		}
4696	    }
4697	}
4698      if (!need_temp)
4699	{
4700	  int_addr = addr;
4701	  sse_addr = addr;
4702	}
4703      else
4704	{
4705	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4706	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4707	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4708	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4709	}
4710
4711      /* First ensure that we fit completely in registers.  */
4712      if (needed_intregs)
4713	{
4714	  t = build_int_cst (TREE_TYPE (gpr),
4715			     (REGPARM_MAX - needed_intregs + 1) * 8);
4716	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4717	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4718	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4719	  gimplify_and_add (t, pre_p);
4720	}
4721      if (needed_sseregs)
4722	{
4723	  t = build_int_cst (TREE_TYPE (fpr),
4724			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4725			     + REGPARM_MAX * 8);
4726	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4727	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4728	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4729	  gimplify_and_add (t, pre_p);
4730	}
4731
4732      /* Compute index to start of area used for integer regs.  */
4733      if (needed_intregs)
4734	{
4735	  /* int_addr = gpr + sav; */
4736	  t = fold_convert (ptr_type_node, gpr);
4737	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4738	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4739	  gimplify_and_add (t, pre_p);
4740	}
4741      if (needed_sseregs)
4742	{
4743	  /* sse_addr = fpr + sav; */
4744	  t = fold_convert (ptr_type_node, fpr);
4745	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4746	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4747	  gimplify_and_add (t, pre_p);
4748	}
4749      if (need_temp)
4750	{
4751	  int i;
4752	  tree temp = create_tmp_var (type, "va_arg_tmp");
4753
4754	  /* addr = &temp; */
4755	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4756	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4757	  gimplify_and_add (t, pre_p);
4758
4759	  for (i = 0; i < XVECLEN (container, 0); i++)
4760	    {
4761	      rtx slot = XVECEXP (container, 0, i);
4762	      rtx reg = XEXP (slot, 0);
4763	      enum machine_mode mode = GET_MODE (reg);
4764	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4765	      tree addr_type = build_pointer_type (piece_type);
4766	      tree src_addr, src;
4767	      int src_offset;
4768	      tree dest_addr, dest;
4769
4770	      if (SSE_REGNO_P (REGNO (reg)))
4771		{
4772		  src_addr = sse_addr;
4773		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4774		}
4775	      else
4776		{
4777		  src_addr = int_addr;
4778		  src_offset = REGNO (reg) * 8;
4779		}
4780	      src_addr = fold_convert (addr_type, src_addr);
4781	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4782				       size_int (src_offset)));
4783	      src = build_va_arg_indirect_ref (src_addr);
4784
4785	      dest_addr = fold_convert (addr_type, addr);
4786	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4787					size_int (INTVAL (XEXP (slot, 1)))));
4788	      dest = build_va_arg_indirect_ref (dest_addr);
4789
4790	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4791	      gimplify_and_add (t, pre_p);
4792	    }
4793	}
4794
4795      if (needed_intregs)
4796	{
4797	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4798		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4799	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4800	  gimplify_and_add (t, pre_p);
4801	}
4802      if (needed_sseregs)
4803	{
4804	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4805		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4806	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4807	  gimplify_and_add (t, pre_p);
4808	}
4809
4810      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4811      gimplify_and_add (t, pre_p);
4812
4813      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4814      append_to_statement_list (t, pre_p);
4815    }
4816
4817  /* ... otherwise out of the overflow area.  */
4818
4819  /* Care for on-stack alignment if needed.  */
4820  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4821      || integer_zerop (TYPE_SIZE (type)))
4822    t = ovf;
4823  else
4824    {
4825      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4826      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4827		  build_int_cst (TREE_TYPE (ovf), align - 1));
4828      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4829		  build_int_cst (TREE_TYPE (t), -align));
4830    }
4831  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4832
4833  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4834  gimplify_and_add (t2, pre_p);
4835
4836  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4837	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4838  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4839  gimplify_and_add (t, pre_p);
4840
4841  if (container)
4842    {
4843      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4844      append_to_statement_list (t, pre_p);
4845    }
4846
4847  ptrtype = build_pointer_type (type);
4848  addr = fold_convert (ptrtype, addr);
4849
4850  if (indirect_p)
4851    addr = build_va_arg_indirect_ref (addr);
4852  return build_va_arg_indirect_ref (addr);
4853}
4854
4855/* Return nonzero if OPNUM's MEM should be matched
4856   in movabs* patterns.  */
4857
4858int
4859ix86_check_movabs (rtx insn, int opnum)
4860{
4861  rtx set, mem;
4862
4863  set = PATTERN (insn);
4864  if (GET_CODE (set) == PARALLEL)
4865    set = XVECEXP (set, 0, 0);
4866  gcc_assert (GET_CODE (set) == SET);
4867  mem = XEXP (set, opnum);
4868  while (GET_CODE (mem) == SUBREG)
4869    mem = SUBREG_REG (mem);
4870  gcc_assert (GET_CODE (mem) == MEM);
4871  return (volatile_ok || !MEM_VOLATILE_P (mem));
4872}
4873
4874/* Initialize the table of extra 80387 mathematical constants.  */
4875
4876static void
4877init_ext_80387_constants (void)
4878{
4879  static const char * cst[5] =
4880  {
4881    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4882    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4883    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4884    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4885    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4886  };
4887  int i;
4888
4889  for (i = 0; i < 5; i++)
4890    {
4891      real_from_string (&ext_80387_constants_table[i], cst[i]);
4892      /* Ensure each constant is rounded to XFmode precision.  */
4893      real_convert (&ext_80387_constants_table[i],
4894		    XFmode, &ext_80387_constants_table[i]);
4895    }
4896
4897  ext_80387_constants_init = 1;
4898}
4899
4900/* Return true if the constant is something that can be loaded with
4901   a special instruction.  */
4902
4903int
4904standard_80387_constant_p (rtx x)
4905{
4906  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4907    return -1;
4908
4909  if (x == CONST0_RTX (GET_MODE (x)))
4910    return 1;
4911  if (x == CONST1_RTX (GET_MODE (x)))
4912    return 2;
4913
4914  /* For XFmode constants, try to find a special 80387 instruction when
4915     optimizing for size or on those CPUs that benefit from them.  */
4916  if (GET_MODE (x) == XFmode
4917      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4918    {
4919      REAL_VALUE_TYPE r;
4920      int i;
4921
4922      if (! ext_80387_constants_init)
4923	init_ext_80387_constants ();
4924
4925      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4926      for (i = 0; i < 5; i++)
4927        if (real_identical (&r, &ext_80387_constants_table[i]))
4928	  return i + 3;
4929    }
4930
4931  return 0;
4932}
4933
4934/* Return the opcode of the special instruction to be used to load
4935   the constant X.  */
4936
4937const char *
4938standard_80387_constant_opcode (rtx x)
4939{
4940  switch (standard_80387_constant_p (x))
4941    {
4942    case 1:
4943      return "fldz";
4944    case 2:
4945      return "fld1";
4946    case 3:
4947      return "fldlg2";
4948    case 4:
4949      return "fldln2";
4950    case 5:
4951      return "fldl2e";
4952    case 6:
4953      return "fldl2t";
4954    case 7:
4955      return "fldpi";
4956    default:
4957      gcc_unreachable ();
4958    }
4959}
4960
4961/* Return the CONST_DOUBLE representing the 80387 constant that is
4962   loaded by the specified special instruction.  The argument IDX
4963   matches the return value from standard_80387_constant_p.  */
4964
4965rtx
4966standard_80387_constant_rtx (int idx)
4967{
4968  int i;
4969
4970  if (! ext_80387_constants_init)
4971    init_ext_80387_constants ();
4972
4973  switch (idx)
4974    {
4975    case 3:
4976    case 4:
4977    case 5:
4978    case 6:
4979    case 7:
4980      i = idx - 3;
4981      break;
4982
4983    default:
4984      gcc_unreachable ();
4985    }
4986
4987  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4988				       XFmode);
4989}
4990
4991/* Return 1 if mode is a valid mode for sse.  */
4992static int
4993standard_sse_mode_p (enum machine_mode mode)
4994{
4995  switch (mode)
4996    {
4997    case V16QImode:
4998    case V8HImode:
4999    case V4SImode:
5000    case V2DImode:
5001    case V4SFmode:
5002    case V2DFmode:
5003      return 1;
5004
5005    default:
5006      return 0;
5007    }
5008}
5009
5010/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5011 */
5012int
5013standard_sse_constant_p (rtx x)
5014{
5015  enum machine_mode mode = GET_MODE (x);
5016
5017  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5018    return 1;
5019  if (vector_all_ones_operand (x, mode)
5020      && standard_sse_mode_p (mode))
5021    return TARGET_SSE2 ? 2 : -1;
5022
5023  return 0;
5024}
5025
5026/* Return the opcode of the special instruction to be used to load
5027   the constant X.  */
5028
5029const char *
5030standard_sse_constant_opcode (rtx insn, rtx x)
5031{
5032  switch (standard_sse_constant_p (x))
5033    {
5034    case 1:
5035      if (get_attr_mode (insn) == MODE_V4SF)
5036        return "xorps\t%0, %0";
5037      else if (get_attr_mode (insn) == MODE_V2DF)
5038        return "xorpd\t%0, %0";
5039      else
5040        return "pxor\t%0, %0";
5041    case 2:
5042      return "pcmpeqd\t%0, %0";
5043    }
5044  gcc_unreachable ();
5045}
5046
5047/* Returns 1 if OP contains a symbol reference */
5048
5049int
5050symbolic_reference_mentioned_p (rtx op)
5051{
5052  const char *fmt;
5053  int i;
5054
5055  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5056    return 1;
5057
5058  fmt = GET_RTX_FORMAT (GET_CODE (op));
5059  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5060    {
5061      if (fmt[i] == 'E')
5062	{
5063	  int j;
5064
5065	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5066	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5067	      return 1;
5068	}
5069
5070      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5071	return 1;
5072    }
5073
5074  return 0;
5075}
5076
5077/* Return 1 if it is appropriate to emit `ret' instructions in the
5078   body of a function.  Do this only if the epilogue is simple, needing a
5079   couple of insns.  Prior to reloading, we can't tell how many registers
5080   must be saved, so return 0 then.  Return 0 if there is no frame
5081   marker to de-allocate.  */
5082
5083int
5084ix86_can_use_return_insn_p (void)
5085{
5086  struct ix86_frame frame;
5087
5088  if (! reload_completed || frame_pointer_needed)
5089    return 0;
5090
5091  /* Don't allow more than 32 pop, since that's all we can do
5092     with one instruction.  */
5093  if (current_function_pops_args
5094      && current_function_args_size >= 32768)
5095    return 0;
5096
5097  ix86_compute_frame_layout (&frame);
5098  return frame.to_allocate == 0 && frame.nregs == 0;
5099}
5100
5101/* Value should be nonzero if functions must have frame pointers.
5102   Zero means the frame pointer need not be set up (and parms may
5103   be accessed via the stack pointer) in functions that seem suitable.  */
5104
5105int
5106ix86_frame_pointer_required (void)
5107{
5108  /* If we accessed previous frames, then the generated code expects
5109     to be able to access the saved ebp value in our frame.  */
5110  if (cfun->machine->accesses_prev_frame)
5111    return 1;
5112
5113  /* Several x86 os'es need a frame pointer for other reasons,
5114     usually pertaining to setjmp.  */
5115  if (SUBTARGET_FRAME_POINTER_REQUIRED)
5116    return 1;
5117
5118  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5119     the frame pointer by default.  Turn it back on now if we've not
5120     got a leaf function.  */
5121  if (TARGET_OMIT_LEAF_FRAME_POINTER
5122      && (!current_function_is_leaf
5123	  || ix86_current_function_calls_tls_descriptor))
5124    return 1;
5125
5126  if (current_function_profile)
5127    return 1;
5128
5129  return 0;
5130}
5131
5132/* Record that the current function accesses previous call frames.  */
5133
5134void
5135ix86_setup_frame_addresses (void)
5136{
5137  cfun->machine->accesses_prev_frame = 1;
5138}
5139
5140#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5141# define USE_HIDDEN_LINKONCE 1
5142#else
5143# define USE_HIDDEN_LINKONCE 0
5144#endif
5145
5146static int pic_labels_used;
5147
5148/* Fills in the label name that should be used for a pc thunk for
5149   the given register.  */
5150
5151static void
5152get_pc_thunk_name (char name[32], unsigned int regno)
5153{
5154  gcc_assert (!TARGET_64BIT);
5155
5156  if (USE_HIDDEN_LINKONCE)
5157    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5158  else
5159    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5160}
5161
5162
5163/* This function generates code for -fpic that loads %ebx with
5164   the return address of the caller and then returns.  */
5165
5166void
5167ix86_file_end (void)
5168{
5169  rtx xops[2];
5170  int regno;
5171
5172  for (regno = 0; regno < 8; ++regno)
5173    {
5174      char name[32];
5175
5176      if (! ((pic_labels_used >> regno) & 1))
5177	continue;
5178
5179      get_pc_thunk_name (name, regno);
5180
5181#if TARGET_MACHO
5182      if (TARGET_MACHO)
5183	{
5184	  switch_to_section (darwin_sections[text_coal_section]);
5185	  fputs ("\t.weak_definition\t", asm_out_file);
5186	  assemble_name (asm_out_file, name);
5187	  fputs ("\n\t.private_extern\t", asm_out_file);
5188	  assemble_name (asm_out_file, name);
5189	  fputs ("\n", asm_out_file);
5190	  ASM_OUTPUT_LABEL (asm_out_file, name);
5191	}
5192      else
5193#endif
5194      if (USE_HIDDEN_LINKONCE)
5195	{
5196	  tree decl;
5197
5198	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
5199			     error_mark_node);
5200	  TREE_PUBLIC (decl) = 1;
5201	  TREE_STATIC (decl) = 1;
5202	  DECL_ONE_ONLY (decl) = 1;
5203
5204	  (*targetm.asm_out.unique_section) (decl, 0);
5205	  switch_to_section (get_named_section (decl, NULL, 0));
5206
5207	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
5208	  fputs ("\t.hidden\t", asm_out_file);
5209	  assemble_name (asm_out_file, name);
5210	  fputc ('\n', asm_out_file);
5211	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5212	}
5213      else
5214	{
5215	  switch_to_section (text_section);
5216	  ASM_OUTPUT_LABEL (asm_out_file, name);
5217	}
5218
5219      xops[0] = gen_rtx_REG (SImode, regno);
5220      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5221      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5222      output_asm_insn ("ret", xops);
5223    }
5224
5225  if (NEED_INDICATE_EXEC_STACK)
5226    file_end_indicate_exec_stack ();
5227}
5228
5229/* Emit code for the SET_GOT patterns.  */
5230
5231const char *
5232output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5233{
5234  rtx xops[3];
5235
5236  xops[0] = dest;
5237  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5238
5239  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5240    {
5241      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5242
5243      if (!flag_pic)
5244	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5245      else
5246	output_asm_insn ("call\t%a2", xops);
5247
5248#if TARGET_MACHO
5249      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5250         is what will be referenced by the Mach-O PIC subsystem.  */
5251      if (!label)
5252	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5253#endif
5254
5255      (*targetm.asm_out.internal_label) (asm_out_file, "L",
5256				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5257
5258      if (flag_pic)
5259	output_asm_insn ("pop{l}\t%0", xops);
5260    }
5261  else
5262    {
5263      char name[32];
5264      get_pc_thunk_name (name, REGNO (dest));
5265      pic_labels_used |= 1 << REGNO (dest);
5266
5267      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5268      xops[2] = gen_rtx_MEM (QImode, xops[2]);
5269      output_asm_insn ("call\t%X2", xops);
5270      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5271         is what will be referenced by the Mach-O PIC subsystem.  */
5272#if TARGET_MACHO
5273      if (!label)
5274	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5275      else
5276        targetm.asm_out.internal_label (asm_out_file, "L",
5277					   CODE_LABEL_NUMBER (label));
5278#endif
5279    }
5280
5281  if (TARGET_MACHO)
5282    return "";
5283
5284  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5285    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5286  else
5287    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5288
5289  return "";
5290}
5291
5292/* Generate an "push" pattern for input ARG.  */
5293
5294static rtx
5295gen_push (rtx arg)
5296{
5297  return gen_rtx_SET (VOIDmode,
5298		      gen_rtx_MEM (Pmode,
5299				   gen_rtx_PRE_DEC (Pmode,
5300						    stack_pointer_rtx)),
5301		      arg);
5302}
5303
5304/* Return >= 0 if there is an unused call-clobbered register available
5305   for the entire function.  */
5306
5307static unsigned int
5308ix86_select_alt_pic_regnum (void)
5309{
5310  if (current_function_is_leaf && !current_function_profile
5311      && !ix86_current_function_calls_tls_descriptor)
5312    {
5313      int i;
5314      for (i = 2; i >= 0; --i)
5315        if (!regs_ever_live[i])
5316	  return i;
5317    }
5318
5319  return INVALID_REGNUM;
5320}
5321
5322/* Return 1 if we need to save REGNO.  */
5323static int
5324ix86_save_reg (unsigned int regno, int maybe_eh_return)
5325{
5326  if (pic_offset_table_rtx
5327      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5328      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329	  || current_function_profile
5330	  || current_function_calls_eh_return
5331	  || current_function_uses_const_pool))
5332    {
5333      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5334	return 0;
5335      return 1;
5336    }
5337
5338  if (current_function_calls_eh_return && maybe_eh_return)
5339    {
5340      unsigned i;
5341      for (i = 0; ; i++)
5342	{
5343	  unsigned test = EH_RETURN_DATA_REGNO (i);
5344	  if (test == INVALID_REGNUM)
5345	    break;
5346	  if (test == regno)
5347	    return 1;
5348	}
5349    }
5350
5351  if (cfun->machine->force_align_arg_pointer
5352      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5353    return 1;
5354
5355  return (regs_ever_live[regno]
5356	  && !call_used_regs[regno]
5357	  && !fixed_regs[regno]
5358	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5359}
5360
5361/* Return number of registers to be saved on the stack.  */
5362
5363static int
5364ix86_nsaved_regs (void)
5365{
5366  int nregs = 0;
5367  int regno;
5368
5369  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5370    if (ix86_save_reg (regno, true))
5371      nregs++;
5372  return nregs;
5373}
5374
5375/* Return the offset between two registers, one to be eliminated, and the other
5376   its replacement, at the start of a routine.  */
5377
5378HOST_WIDE_INT
5379ix86_initial_elimination_offset (int from, int to)
5380{
5381  struct ix86_frame frame;
5382  ix86_compute_frame_layout (&frame);
5383
5384  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5385    return frame.hard_frame_pointer_offset;
5386  else if (from == FRAME_POINTER_REGNUM
5387	   && to == HARD_FRAME_POINTER_REGNUM)
5388    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5389  else
5390    {
5391      gcc_assert (to == STACK_POINTER_REGNUM);
5392
5393      if (from == ARG_POINTER_REGNUM)
5394	return frame.stack_pointer_offset;
5395
5396      gcc_assert (from == FRAME_POINTER_REGNUM);
5397      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5398    }
5399}
5400
5401/* Fill structure ix86_frame about frame of currently computed function.  */
5402
5403static void
5404ix86_compute_frame_layout (struct ix86_frame *frame)
5405{
5406  HOST_WIDE_INT total_size;
5407  unsigned int stack_alignment_needed;
5408  HOST_WIDE_INT offset;
5409  unsigned int preferred_alignment;
5410  HOST_WIDE_INT size = get_frame_size ();
5411
5412  frame->nregs = ix86_nsaved_regs ();
5413  total_size = size;
5414
5415  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5416  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5417
5418  /* During reload iteration the amount of registers saved can change.
5419     Recompute the value as needed.  Do not recompute when amount of registers
5420     didn't change as reload does multiple calls to the function and does not
5421     expect the decision to change within single iteration.  */
5422  if (!optimize_size
5423      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5424    {
5425      int count = frame->nregs;
5426
5427      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5428      /* The fast prologue uses move instead of push to save registers.  This
5429         is significantly longer, but also executes faster as modern hardware
5430         can execute the moves in parallel, but can't do that for push/pop.
5431
5432	 Be careful about choosing what prologue to emit:  When function takes
5433	 many instructions to execute we may use slow version as well as in
5434	 case function is known to be outside hot spot (this is known with
5435	 feedback only).  Weight the size of function by number of registers
5436	 to save as it is cheap to use one or two push instructions but very
5437	 slow to use many of them.  */
5438      if (count)
5439	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5440      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5441	  || (flag_branch_probabilities
5442	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5443        cfun->machine->use_fast_prologue_epilogue = false;
5444      else
5445        cfun->machine->use_fast_prologue_epilogue
5446	   = !expensive_function_p (count);
5447    }
5448  if (TARGET_PROLOGUE_USING_MOVE
5449      && cfun->machine->use_fast_prologue_epilogue)
5450    frame->save_regs_using_mov = true;
5451  else
5452    frame->save_regs_using_mov = false;
5453
5454
5455  /* Skip return address and saved base pointer.  */
5456  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5457
5458  frame->hard_frame_pointer_offset = offset;
5459
5460  /* Do some sanity checking of stack_alignment_needed and
5461     preferred_alignment, since i386 port is the only using those features
5462     that may break easily.  */
5463
5464  gcc_assert (!size || stack_alignment_needed);
5465  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5466  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5467  gcc_assert (stack_alignment_needed
5468	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5469
5470  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5471    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5472
5473  /* Register save area */
5474  offset += frame->nregs * UNITS_PER_WORD;
5475
5476  /* Va-arg area */
5477  if (ix86_save_varrargs_registers)
5478    {
5479      offset += X86_64_VARARGS_SIZE;
5480      frame->va_arg_size = X86_64_VARARGS_SIZE;
5481    }
5482  else
5483    frame->va_arg_size = 0;
5484
5485  /* Align start of frame for local function.  */
5486  frame->padding1 = ((offset + stack_alignment_needed - 1)
5487		     & -stack_alignment_needed) - offset;
5488
5489  offset += frame->padding1;
5490
5491  /* Frame pointer points here.  */
5492  frame->frame_pointer_offset = offset;
5493
5494  offset += size;
5495
5496  /* Add outgoing arguments area.  Can be skipped if we eliminated
5497     all the function calls as dead code.
5498     Skipping is however impossible when function calls alloca.  Alloca
5499     expander assumes that last current_function_outgoing_args_size
5500     of stack frame are unused.  */
5501  if (ACCUMULATE_OUTGOING_ARGS
5502      && (!current_function_is_leaf || current_function_calls_alloca
5503	  || ix86_current_function_calls_tls_descriptor))
5504    {
5505      offset += current_function_outgoing_args_size;
5506      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5507    }
5508  else
5509    frame->outgoing_arguments_size = 0;
5510
5511  /* Align stack boundary.  Only needed if we're calling another function
5512     or using alloca.  */
5513  if (!current_function_is_leaf || current_function_calls_alloca
5514      || ix86_current_function_calls_tls_descriptor)
5515    frame->padding2 = ((offset + preferred_alignment - 1)
5516		       & -preferred_alignment) - offset;
5517  else
5518    frame->padding2 = 0;
5519
5520  offset += frame->padding2;
5521
5522  /* We've reached end of stack frame.  */
5523  frame->stack_pointer_offset = offset;
5524
5525  /* Size prologue needs to allocate.  */
5526  frame->to_allocate =
5527    (size + frame->padding1 + frame->padding2
5528     + frame->outgoing_arguments_size + frame->va_arg_size);
5529
5530  if ((!frame->to_allocate && frame->nregs <= 1)
5531      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5532    frame->save_regs_using_mov = false;
5533
5534  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5535      && current_function_is_leaf
5536      && !ix86_current_function_calls_tls_descriptor)
5537    {
5538      frame->red_zone_size = frame->to_allocate;
5539      if (frame->save_regs_using_mov)
5540	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5541      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5542	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5543    }
5544  else
5545    frame->red_zone_size = 0;
5546  frame->to_allocate -= frame->red_zone_size;
5547  frame->stack_pointer_offset -= frame->red_zone_size;
5548#if 0
5549  fprintf (stderr, "nregs: %i\n", frame->nregs);
5550  fprintf (stderr, "size: %i\n", size);
5551  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5552  fprintf (stderr, "padding1: %i\n", frame->padding1);
5553  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5554  fprintf (stderr, "padding2: %i\n", frame->padding2);
5555  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5556  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5557  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5558  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5559	   frame->hard_frame_pointer_offset);
5560  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5561#endif
5562}
5563
5564/* Emit code to save registers in the prologue.  */
5565
5566static void
5567ix86_emit_save_regs (void)
5568{
5569  unsigned int regno;
5570  rtx insn;
5571
5572  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5573    if (ix86_save_reg (regno, true))
5574      {
5575	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5576	RTX_FRAME_RELATED_P (insn) = 1;
5577      }
5578}
5579
5580/* Emit code to save registers using MOV insns.  First register
5581   is restored from POINTER + OFFSET.  */
5582static void
5583ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5584{
5585  unsigned int regno;
5586  rtx insn;
5587
5588  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5589    if (ix86_save_reg (regno, true))
5590      {
5591	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5592					       Pmode, offset),
5593			       gen_rtx_REG (Pmode, regno));
5594	RTX_FRAME_RELATED_P (insn) = 1;
5595	offset += UNITS_PER_WORD;
5596      }
5597}
5598
5599/* Expand prologue or epilogue stack adjustment.
5600   The pattern exist to put a dependency on all ebp-based memory accesses.
5601   STYLE should be negative if instructions should be marked as frame related,
5602   zero if %r11 register is live and cannot be freely used and positive
5603   otherwise.  */
5604
5605static void
5606pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5607{
5608  rtx insn;
5609
5610  if (! TARGET_64BIT)
5611    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5612  else if (x86_64_immediate_operand (offset, DImode))
5613    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5614  else
5615    {
5616      rtx r11;
5617      /* r11 is used by indirect sibcall return as well, set before the
5618	 epilogue and used after the epilogue.  ATM indirect sibcall
5619	 shouldn't be used together with huge frame sizes in one
5620	 function because of the frame_size check in sibcall.c.  */
5621      gcc_assert (style);
5622      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5623      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5624      if (style < 0)
5625	RTX_FRAME_RELATED_P (insn) = 1;
5626      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5627							       offset));
5628    }
5629  if (style < 0)
5630    RTX_FRAME_RELATED_P (insn) = 1;
5631}
5632
5633/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5634
5635static rtx
5636ix86_internal_arg_pointer (void)
5637{
5638  bool has_force_align_arg_pointer =
5639    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5640			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5641  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5642       && DECL_NAME (current_function_decl)
5643       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5644       && DECL_FILE_SCOPE_P (current_function_decl))
5645      || ix86_force_align_arg_pointer
5646      || has_force_align_arg_pointer)
5647    {
5648      /* Nested functions can't realign the stack due to a register
5649	 conflict.  */
5650      if (DECL_CONTEXT (current_function_decl)
5651	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5652	{
5653	  if (ix86_force_align_arg_pointer)
5654	    warning (0, "-mstackrealign ignored for nested functions");
5655	  if (has_force_align_arg_pointer)
5656	    error ("%s not supported for nested functions",
5657		   ix86_force_align_arg_pointer_string);
5658	  return virtual_incoming_args_rtx;
5659	}
5660      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5661      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5662    }
5663  else
5664    return virtual_incoming_args_rtx;
5665}
5666
5667/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5668   This is called from dwarf2out.c to emit call frame instructions
5669   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5670static void
5671ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5672{
5673  rtx unspec = SET_SRC (pattern);
5674  gcc_assert (GET_CODE (unspec) == UNSPEC);
5675
5676  switch (index)
5677    {
5678    case UNSPEC_REG_SAVE:
5679      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5680			      SET_DEST (pattern));
5681      break;
5682    case UNSPEC_DEF_CFA:
5683      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5684			 INTVAL (XVECEXP (unspec, 0, 0)));
5685      break;
5686    default:
5687      gcc_unreachable ();
5688    }
5689}
5690
5691/* Expand the prologue into a bunch of separate insns.  */
5692
5693void
5694ix86_expand_prologue (void)
5695{
5696  rtx insn;
5697  bool pic_reg_used;
5698  struct ix86_frame frame;
5699  HOST_WIDE_INT allocate;
5700
5701  ix86_compute_frame_layout (&frame);
5702
5703  if (cfun->machine->force_align_arg_pointer)
5704    {
5705      rtx x, y;
5706
5707      /* Grab the argument pointer.  */
5708      x = plus_constant (stack_pointer_rtx, 4);
5709      y = cfun->machine->force_align_arg_pointer;
5710      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5711      RTX_FRAME_RELATED_P (insn) = 1;
5712
5713      /* The unwind info consists of two parts: install the fafp as the cfa,
5714	 and record the fafp as the "save register" of the stack pointer.
5715	 The later is there in order that the unwinder can see where it
5716	 should restore the stack pointer across the and insn.  */
5717      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5718      x = gen_rtx_SET (VOIDmode, y, x);
5719      RTX_FRAME_RELATED_P (x) = 1;
5720      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5721			  UNSPEC_REG_SAVE);
5722      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5723      RTX_FRAME_RELATED_P (y) = 1;
5724      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5725      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5726      REG_NOTES (insn) = x;
5727
5728      /* Align the stack.  */
5729      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5730			     GEN_INT (-16)));
5731
5732      /* And here we cheat like madmen with the unwind info.  We force the
5733	 cfa register back to sp+4, which is exactly what it was at the
5734	 start of the function.  Re-pushing the return address results in
5735	 the return at the same spot relative to the cfa, and thus is
5736	 correct wrt the unwind info.  */
5737      x = cfun->machine->force_align_arg_pointer;
5738      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5739      insn = emit_insn (gen_push (x));
5740      RTX_FRAME_RELATED_P (insn) = 1;
5741
5742      x = GEN_INT (4);
5743      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5744      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5745      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5746      REG_NOTES (insn) = x;
5747    }
5748
5749  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5750     slower on all targets.  Also sdb doesn't like it.  */
5751
5752  if (frame_pointer_needed)
5753    {
5754      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5755      RTX_FRAME_RELATED_P (insn) = 1;
5756
5757      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5758      RTX_FRAME_RELATED_P (insn) = 1;
5759    }
5760
5761  allocate = frame.to_allocate;
5762
5763  if (!frame.save_regs_using_mov)
5764    ix86_emit_save_regs ();
5765  else
5766    allocate += frame.nregs * UNITS_PER_WORD;
5767
5768  /* When using red zone we may start register saving before allocating
5769     the stack frame saving one cycle of the prologue.  */
5770  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5771    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5772				   : stack_pointer_rtx,
5773				   -frame.nregs * UNITS_PER_WORD);
5774
5775  if (allocate == 0)
5776    ;
5777  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5778    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5779			       GEN_INT (-allocate), -1);
5780  else
5781    {
5782      /* Only valid for Win32.  */
5783      rtx eax = gen_rtx_REG (SImode, 0);
5784      bool eax_live = ix86_eax_live_at_start_p ();
5785      rtx t;
5786
5787      gcc_assert (!TARGET_64BIT);
5788
5789      if (eax_live)
5790	{
5791	  emit_insn (gen_push (eax));
5792	  allocate -= 4;
5793	}
5794
5795      emit_move_insn (eax, GEN_INT (allocate));
5796
5797      insn = emit_insn (gen_allocate_stack_worker (eax));
5798      RTX_FRAME_RELATED_P (insn) = 1;
5799      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5800      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5801      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5802					    t, REG_NOTES (insn));
5803
5804      if (eax_live)
5805	{
5806	  if (frame_pointer_needed)
5807	    t = plus_constant (hard_frame_pointer_rtx,
5808			       allocate
5809			       - frame.to_allocate
5810			       - frame.nregs * UNITS_PER_WORD);
5811	  else
5812	    t = plus_constant (stack_pointer_rtx, allocate);
5813	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5814	}
5815    }
5816
5817  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5818    {
5819      if (!frame_pointer_needed || !frame.to_allocate)
5820        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5821      else
5822        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5823				       -frame.nregs * UNITS_PER_WORD);
5824    }
5825
5826  pic_reg_used = false;
5827  if (pic_offset_table_rtx
5828      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5829	  || current_function_profile))
5830    {
5831      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5832
5833      if (alt_pic_reg_used != INVALID_REGNUM)
5834	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5835
5836      pic_reg_used = true;
5837    }
5838
5839  if (pic_reg_used)
5840    {
5841      if (TARGET_64BIT)
5842        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5843      else
5844        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5845
5846      /* Even with accurate pre-reload life analysis, we can wind up
5847	 deleting all references to the pic register after reload.
5848	 Consider if cross-jumping unifies two sides of a branch
5849	 controlled by a comparison vs the only read from a global.
5850	 In which case, allow the set_got to be deleted, though we're
5851	 too late to do anything about the ebx save in the prologue.  */
5852      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5853    }
5854
5855  /* Prevent function calls from be scheduled before the call to mcount.
5856     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5857  if (current_function_profile)
5858    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5859}
5860
5861/* Emit code to restore saved registers using MOV insns.  First register
5862   is restored from POINTER + OFFSET.  */
5863static void
5864ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5865				  int maybe_eh_return)
5866{
5867  int regno;
5868  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5869
5870  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5871    if (ix86_save_reg (regno, maybe_eh_return))
5872      {
5873	/* Ensure that adjust_address won't be forced to produce pointer
5874	   out of range allowed by x86-64 instruction set.  */
5875	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5876	  {
5877	    rtx r11;
5878
5879	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5880	    emit_move_insn (r11, GEN_INT (offset));
5881	    emit_insn (gen_adddi3 (r11, r11, pointer));
5882	    base_address = gen_rtx_MEM (Pmode, r11);
5883	    offset = 0;
5884	  }
5885	emit_move_insn (gen_rtx_REG (Pmode, regno),
5886			adjust_address (base_address, Pmode, offset));
5887	offset += UNITS_PER_WORD;
5888      }
5889}
5890
5891/* Restore function stack, frame, and registers.  */
5892
5893void
5894ix86_expand_epilogue (int style)
5895{
5896  int regno;
5897  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5898  struct ix86_frame frame;
5899  HOST_WIDE_INT offset;
5900
5901  ix86_compute_frame_layout (&frame);
5902
5903  /* Calculate start of saved registers relative to ebp.  Special care
5904     must be taken for the normal return case of a function using
5905     eh_return: the eax and edx registers are marked as saved, but not
5906     restored along this path.  */
5907  offset = frame.nregs;
5908  if (current_function_calls_eh_return && style != 2)
5909    offset -= 2;
5910  offset *= -UNITS_PER_WORD;
5911
5912  /* If we're only restoring one register and sp is not valid then
5913     using a move instruction to restore the register since it's
5914     less work than reloading sp and popping the register.
5915
5916     The default code result in stack adjustment using add/lea instruction,
5917     while this code results in LEAVE instruction (or discrete equivalent),
5918     so it is profitable in some other cases as well.  Especially when there
5919     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5920     and there is exactly one register to pop. This heuristic may need some
5921     tuning in future.  */
5922  if ((!sp_valid && frame.nregs <= 1)
5923      || (TARGET_EPILOGUE_USING_MOVE
5924	  && cfun->machine->use_fast_prologue_epilogue
5925	  && (frame.nregs > 1 || frame.to_allocate))
5926      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5927      || (frame_pointer_needed && TARGET_USE_LEAVE
5928	  && cfun->machine->use_fast_prologue_epilogue
5929	  && frame.nregs == 1)
5930      || current_function_calls_eh_return)
5931    {
5932      /* Restore registers.  We can use ebp or esp to address the memory
5933	 locations.  If both are available, default to ebp, since offsets
5934	 are known to be small.  Only exception is esp pointing directly to the
5935	 end of block of saved registers, where we may simplify addressing
5936	 mode.  */
5937
5938      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5939	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5940					  frame.to_allocate, style == 2);
5941      else
5942	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5943					  offset, style == 2);
5944
5945      /* eh_return epilogues need %ecx added to the stack pointer.  */
5946      if (style == 2)
5947	{
5948	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5949
5950	  if (frame_pointer_needed)
5951	    {
5952	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5953	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5954	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5955
5956	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5957	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5958
5959	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5960					 const0_rtx, style);
5961	    }
5962	  else
5963	    {
5964	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5965	      tmp = plus_constant (tmp, (frame.to_allocate
5966                                         + frame.nregs * UNITS_PER_WORD));
5967	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5968	    }
5969	}
5970      else if (!frame_pointer_needed)
5971	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5972				   GEN_INT (frame.to_allocate
5973					    + frame.nregs * UNITS_PER_WORD),
5974				   style);
5975      /* If not an i386, mov & pop is faster than "leave".  */
5976      else if (TARGET_USE_LEAVE || optimize_size
5977	       || !cfun->machine->use_fast_prologue_epilogue)
5978	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5979      else
5980	{
5981	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5982				     hard_frame_pointer_rtx,
5983				     const0_rtx, style);
5984	  if (TARGET_64BIT)
5985	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5986	  else
5987	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5988	}
5989    }
5990  else
5991    {
5992      /* First step is to deallocate the stack frame so that we can
5993	 pop the registers.  */
5994      if (!sp_valid)
5995	{
5996	  gcc_assert (frame_pointer_needed);
5997	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5998				     hard_frame_pointer_rtx,
5999				     GEN_INT (offset), style);
6000	}
6001      else if (frame.to_allocate)
6002	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6003				   GEN_INT (frame.to_allocate), style);
6004
6005      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6006	if (ix86_save_reg (regno, false))
6007	  {
6008	    if (TARGET_64BIT)
6009	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6010	    else
6011	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6012	  }
6013      if (frame_pointer_needed)
6014	{
6015	  /* Leave results in shorter dependency chains on CPUs that are
6016	     able to grok it fast.  */
6017	  if (TARGET_USE_LEAVE)
6018	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6019	  else if (TARGET_64BIT)
6020	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6021	  else
6022	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6023	}
6024    }
6025
6026  if (cfun->machine->force_align_arg_pointer)
6027    {
6028      emit_insn (gen_addsi3 (stack_pointer_rtx,
6029			     cfun->machine->force_align_arg_pointer,
6030			     GEN_INT (-4)));
6031    }
6032
6033  /* Sibcall epilogues don't want a return instruction.  */
6034  if (style == 0)
6035    return;
6036
6037  if (current_function_pops_args && current_function_args_size)
6038    {
6039      rtx popc = GEN_INT (current_function_pops_args);
6040
6041      /* i386 can only pop 64K bytes.  If asked to pop more, pop
6042	 return address, do explicit add, and jump indirectly to the
6043	 caller.  */
6044
6045      if (current_function_pops_args >= 65536)
6046	{
6047	  rtx ecx = gen_rtx_REG (SImode, 2);
6048
6049	  /* There is no "pascal" calling convention in 64bit ABI.  */
6050	  gcc_assert (!TARGET_64BIT);
6051
6052	  emit_insn (gen_popsi1 (ecx));
6053	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6054	  emit_jump_insn (gen_return_indirect_internal (ecx));
6055	}
6056      else
6057	emit_jump_insn (gen_return_pop_internal (popc));
6058    }
6059  else
6060    emit_jump_insn (gen_return_internal ());
6061}
6062
6063/* Reset from the function's potential modifications.  */
6064
6065static void
6066ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6067			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6068{
6069  if (pic_offset_table_rtx)
6070    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6071#if TARGET_MACHO
6072  /* Mach-O doesn't support labels at the end of objects, so if
6073     it looks like we might want one, insert a NOP.  */
6074  {
6075    rtx insn = get_last_insn ();
6076    while (insn
6077	   && NOTE_P (insn)
6078	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6079      insn = PREV_INSN (insn);
6080    if (insn
6081	&& (LABEL_P (insn)
6082	    || (NOTE_P (insn)
6083		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6084      fputs ("\tnop\n", file);
6085  }
6086#endif
6087
6088}
6089
6090/* Extract the parts of an RTL expression that is a valid memory address
6091   for an instruction.  Return 0 if the structure of the address is
6092   grossly off.  Return -1 if the address contains ASHIFT, so it is not
6093   strictly valid, but still used for computing length of lea instruction.  */
6094
6095int
6096ix86_decompose_address (rtx addr, struct ix86_address *out)
6097{
6098  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6099  rtx base_reg, index_reg;
6100  HOST_WIDE_INT scale = 1;
6101  rtx scale_rtx = NULL_RTX;
6102  int retval = 1;
6103  enum ix86_address_seg seg = SEG_DEFAULT;
6104
6105  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6106    base = addr;
6107  else if (GET_CODE (addr) == PLUS)
6108    {
6109      rtx addends[4], op;
6110      int n = 0, i;
6111
6112      op = addr;
6113      do
6114	{
6115	  if (n >= 4)
6116	    return 0;
6117	  addends[n++] = XEXP (op, 1);
6118	  op = XEXP (op, 0);
6119	}
6120      while (GET_CODE (op) == PLUS);
6121      if (n >= 4)
6122	return 0;
6123      addends[n] = op;
6124
6125      for (i = n; i >= 0; --i)
6126	{
6127	  op = addends[i];
6128	  switch (GET_CODE (op))
6129	    {
6130	    case MULT:
6131	      if (index)
6132		return 0;
6133	      index = XEXP (op, 0);
6134	      scale_rtx = XEXP (op, 1);
6135	      break;
6136
6137	    case UNSPEC:
6138	      if (XINT (op, 1) == UNSPEC_TP
6139	          && TARGET_TLS_DIRECT_SEG_REFS
6140	          && seg == SEG_DEFAULT)
6141		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6142	      else
6143		return 0;
6144	      break;
6145
6146	    case REG:
6147	    case SUBREG:
6148	      if (!base)
6149		base = op;
6150	      else if (!index)
6151		index = op;
6152	      else
6153		return 0;
6154	      break;
6155
6156	    case CONST:
6157	    case CONST_INT:
6158	    case SYMBOL_REF:
6159	    case LABEL_REF:
6160	      if (disp)
6161		return 0;
6162	      disp = op;
6163	      break;
6164
6165	    default:
6166	      return 0;
6167	    }
6168	}
6169    }
6170  else if (GET_CODE (addr) == MULT)
6171    {
6172      index = XEXP (addr, 0);		/* index*scale */
6173      scale_rtx = XEXP (addr, 1);
6174    }
6175  else if (GET_CODE (addr) == ASHIFT)
6176    {
6177      rtx tmp;
6178
6179      /* We're called for lea too, which implements ashift on occasion.  */
6180      index = XEXP (addr, 0);
6181      tmp = XEXP (addr, 1);
6182      if (GET_CODE (tmp) != CONST_INT)
6183	return 0;
6184      scale = INTVAL (tmp);
6185      if ((unsigned HOST_WIDE_INT) scale > 3)
6186	return 0;
6187      scale = 1 << scale;
6188      retval = -1;
6189    }
6190  else
6191    disp = addr;			/* displacement */
6192
6193  /* Extract the integral value of scale.  */
6194  if (scale_rtx)
6195    {
6196      if (GET_CODE (scale_rtx) != CONST_INT)
6197	return 0;
6198      scale = INTVAL (scale_rtx);
6199    }
6200
6201  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6202  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6203
6204  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
6205  if (base_reg && index_reg && scale == 1
6206      && (index_reg == arg_pointer_rtx
6207	  || index_reg == frame_pointer_rtx
6208	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6209    {
6210      rtx tmp;
6211      tmp = base, base = index, index = tmp;
6212      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6213    }
6214
6215  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
6216  if ((base_reg == hard_frame_pointer_rtx
6217       || base_reg == frame_pointer_rtx
6218       || base_reg == arg_pointer_rtx) && !disp)
6219    disp = const0_rtx;
6220
6221  /* Special case: on K6, [%esi] makes the instruction vector decoded.
6222     Avoid this by transforming to [%esi+0].  */
6223  if (ix86_tune == PROCESSOR_K6 && !optimize_size
6224      && base_reg && !index_reg && !disp
6225      && REG_P (base_reg)
6226      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6227    disp = const0_rtx;
6228
6229  /* Special case: encode reg+reg instead of reg*2.  */
6230  if (!base && index && scale && scale == 2)
6231    base = index, base_reg = index_reg, scale = 1;
6232
6233  /* Special case: scaling cannot be encoded without base or displacement.  */
6234  if (!base && !disp && index && scale != 1)
6235    disp = const0_rtx;
6236
6237  out->base = base;
6238  out->index = index;
6239  out->disp = disp;
6240  out->scale = scale;
6241  out->seg = seg;
6242
6243  return retval;
6244}
6245
6246/* Return cost of the memory address x.
6247   For i386, it is better to use a complex address than let gcc copy
6248   the address into a reg and make a new pseudo.  But not if the address
6249   requires to two regs - that would mean more pseudos with longer
6250   lifetimes.  */
6251static int
6252ix86_address_cost (rtx x)
6253{
6254  struct ix86_address parts;
6255  int cost = 1;
6256  int ok = ix86_decompose_address (x, &parts);
6257
6258  gcc_assert (ok);
6259
6260  if (parts.base && GET_CODE (parts.base) == SUBREG)
6261    parts.base = SUBREG_REG (parts.base);
6262  if (parts.index && GET_CODE (parts.index) == SUBREG)
6263    parts.index = SUBREG_REG (parts.index);
6264
6265  /* More complex memory references are better.  */
6266  if (parts.disp && parts.disp != const0_rtx)
6267    cost--;
6268  if (parts.seg != SEG_DEFAULT)
6269    cost--;
6270
6271  /* Attempt to minimize number of registers in the address.  */
6272  if ((parts.base
6273       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6274      || (parts.index
6275	  && (!REG_P (parts.index)
6276	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6277    cost++;
6278
6279  if (parts.base
6280      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6281      && parts.index
6282      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6283      && parts.base != parts.index)
6284    cost++;
6285
6286  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6287     since it's predecode logic can't detect the length of instructions
6288     and it degenerates to vector decoded.  Increase cost of such
6289     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
6290     to split such addresses or even refuse such addresses at all.
6291
6292     Following addressing modes are affected:
6293      [base+scale*index]
6294      [scale*index+disp]
6295      [base+index]
6296
6297     The first and last case  may be avoidable by explicitly coding the zero in
6298     memory address, but I don't have AMD-K6 machine handy to check this
6299     theory.  */
6300
6301  if (TARGET_K6
6302      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6303	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6304	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6305    cost += 10;
6306
6307  return cost;
6308}
6309
6310/* If X is a machine specific address (i.e. a symbol or label being
6311   referenced as a displacement from the GOT implemented using an
6312   UNSPEC), then return the base term.  Otherwise return X.  */
6313
6314rtx
6315ix86_find_base_term (rtx x)
6316{
6317  rtx term;
6318
6319  if (TARGET_64BIT)
6320    {
6321      if (GET_CODE (x) != CONST)
6322	return x;
6323      term = XEXP (x, 0);
6324      if (GET_CODE (term) == PLUS
6325	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6326	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6327	term = XEXP (term, 0);
6328      if (GET_CODE (term) != UNSPEC
6329	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6330	return x;
6331
6332      term = XVECEXP (term, 0, 0);
6333
6334      if (GET_CODE (term) != SYMBOL_REF
6335	  && GET_CODE (term) != LABEL_REF)
6336	return x;
6337
6338      return term;
6339    }
6340
6341  term = ix86_delegitimize_address (x);
6342
6343  if (GET_CODE (term) != SYMBOL_REF
6344      && GET_CODE (term) != LABEL_REF)
6345    return x;
6346
6347  return term;
6348}
6349
6350/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6351   this is used for to form addresses to local data when -fPIC is in
6352   use.  */
6353
6354static bool
6355darwin_local_data_pic (rtx disp)
6356{
6357  if (GET_CODE (disp) == MINUS)
6358    {
6359      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6360          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6361        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6362          {
6363            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6364            if (! strcmp (sym_name, "<pic base>"))
6365              return true;
6366          }
6367    }
6368
6369  return false;
6370}
6371
6372/* Determine if a given RTX is a valid constant.  We already know this
6373   satisfies CONSTANT_P.  */
6374
6375bool
6376legitimate_constant_p (rtx x)
6377{
6378  switch (GET_CODE (x))
6379    {
6380    case CONST:
6381      x = XEXP (x, 0);
6382
6383      if (GET_CODE (x) == PLUS)
6384	{
6385	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6386	    return false;
6387	  x = XEXP (x, 0);
6388	}
6389
6390      if (TARGET_MACHO && darwin_local_data_pic (x))
6391	return true;
6392
6393      /* Only some unspecs are valid as "constants".  */
6394      if (GET_CODE (x) == UNSPEC)
6395	switch (XINT (x, 1))
6396	  {
6397	  case UNSPEC_GOTOFF:
6398	    return TARGET_64BIT;
6399	  case UNSPEC_TPOFF:
6400	  case UNSPEC_NTPOFF:
6401	    x = XVECEXP (x, 0, 0);
6402	    return (GET_CODE (x) == SYMBOL_REF
6403		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6404	  case UNSPEC_DTPOFF:
6405	    x = XVECEXP (x, 0, 0);
6406	    return (GET_CODE (x) == SYMBOL_REF
6407		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6408	  default:
6409	    return false;
6410	  }
6411
6412      /* We must have drilled down to a symbol.  */
6413      if (GET_CODE (x) == LABEL_REF)
6414	return true;
6415      if (GET_CODE (x) != SYMBOL_REF)
6416	return false;
6417      /* FALLTHRU */
6418
6419    case SYMBOL_REF:
6420      /* TLS symbols are never valid.  */
6421      if (SYMBOL_REF_TLS_MODEL (x))
6422	return false;
6423      break;
6424
6425    case CONST_DOUBLE:
6426      if (GET_MODE (x) == TImode
6427	  && x != CONST0_RTX (TImode)
6428          && !TARGET_64BIT)
6429	return false;
6430      break;
6431
6432    case CONST_VECTOR:
6433      if (x == CONST0_RTX (GET_MODE (x)))
6434	return true;
6435      return false;
6436
6437    default:
6438      break;
6439    }
6440
6441  /* Otherwise we handle everything else in the move patterns.  */
6442  return true;
6443}
6444
6445/* Determine if it's legal to put X into the constant pool.  This
6446   is not possible for the address of thread-local symbols, which
6447   is checked above.  */
6448
6449static bool
6450ix86_cannot_force_const_mem (rtx x)
6451{
6452  /* We can always put integral constants and vectors in memory.  */
6453  switch (GET_CODE (x))
6454    {
6455    case CONST_INT:
6456    case CONST_DOUBLE:
6457    case CONST_VECTOR:
6458      return false;
6459
6460    default:
6461      break;
6462    }
6463  return !legitimate_constant_p (x);
6464}
6465
6466/* Determine if a given RTX is a valid constant address.  */
6467
6468bool
6469constant_address_p (rtx x)
6470{
6471  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6472}
6473
6474/* Nonzero if the constant value X is a legitimate general operand
6475   when generating PIC code.  It is given that flag_pic is on and
6476   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6477
6478bool
6479legitimate_pic_operand_p (rtx x)
6480{
6481  rtx inner;
6482
6483  switch (GET_CODE (x))
6484    {
6485    case CONST:
6486      inner = XEXP (x, 0);
6487      if (GET_CODE (inner) == PLUS
6488	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6489	inner = XEXP (inner, 0);
6490
6491      /* Only some unspecs are valid as "constants".  */
6492      if (GET_CODE (inner) == UNSPEC)
6493	switch (XINT (inner, 1))
6494	  {
6495	  case UNSPEC_GOTOFF:
6496	    return TARGET_64BIT;
6497	  case UNSPEC_TPOFF:
6498	    x = XVECEXP (inner, 0, 0);
6499	    return (GET_CODE (x) == SYMBOL_REF
6500		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6501	  default:
6502	    return false;
6503	  }
6504      /* FALLTHRU */
6505
6506    case SYMBOL_REF:
6507    case LABEL_REF:
6508      return legitimate_pic_address_disp_p (x);
6509
6510    default:
6511      return true;
6512    }
6513}
6514
6515/* Determine if a given CONST RTX is a valid memory displacement
6516   in PIC mode.  */
6517
6518int
6519legitimate_pic_address_disp_p (rtx disp)
6520{
6521  bool saw_plus;
6522
6523  /* In 64bit mode we can allow direct addresses of symbols and labels
6524     when they are not dynamic symbols.  */
6525  if (TARGET_64BIT)
6526    {
6527      rtx op0 = disp, op1;
6528
6529      switch (GET_CODE (disp))
6530	{
6531	case LABEL_REF:
6532	  return true;
6533
6534	case CONST:
6535	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6536	    break;
6537	  op0 = XEXP (XEXP (disp, 0), 0);
6538	  op1 = XEXP (XEXP (disp, 0), 1);
6539	  if (GET_CODE (op1) != CONST_INT
6540	      || INTVAL (op1) >= 16*1024*1024
6541	      || INTVAL (op1) < -16*1024*1024)
6542            break;
6543	  if (GET_CODE (op0) == LABEL_REF)
6544	    return true;
6545	  if (GET_CODE (op0) != SYMBOL_REF)
6546	    break;
6547	  /* FALLTHRU */
6548
6549	case SYMBOL_REF:
6550	  /* TLS references should always be enclosed in UNSPEC.  */
6551	  if (SYMBOL_REF_TLS_MODEL (op0))
6552	    return false;
6553	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6554	    return true;
6555	  break;
6556
6557	default:
6558	  break;
6559	}
6560    }
6561  if (GET_CODE (disp) != CONST)
6562    return 0;
6563  disp = XEXP (disp, 0);
6564
6565  if (TARGET_64BIT)
6566    {
6567      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6568         of GOT tables.  We should not need these anyway.  */
6569      if (GET_CODE (disp) != UNSPEC
6570	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6571	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6572	return 0;
6573
6574      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6575	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6576	return 0;
6577      return 1;
6578    }
6579
6580  saw_plus = false;
6581  if (GET_CODE (disp) == PLUS)
6582    {
6583      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6584	return 0;
6585      disp = XEXP (disp, 0);
6586      saw_plus = true;
6587    }
6588
6589  if (TARGET_MACHO && darwin_local_data_pic (disp))
6590    return 1;
6591
6592  if (GET_CODE (disp) != UNSPEC)
6593    return 0;
6594
6595  switch (XINT (disp, 1))
6596    {
6597    case UNSPEC_GOT:
6598      if (saw_plus)
6599	return false;
6600      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6601    case UNSPEC_GOTOFF:
6602      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6603	 While ABI specify also 32bit relocation but we don't produce it in
6604	 small PIC model at all.  */
6605      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6606	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6607	  && !TARGET_64BIT)
6608        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6609      return false;
6610    case UNSPEC_GOTTPOFF:
6611    case UNSPEC_GOTNTPOFF:
6612    case UNSPEC_INDNTPOFF:
6613      if (saw_plus)
6614	return false;
6615      disp = XVECEXP (disp, 0, 0);
6616      return (GET_CODE (disp) == SYMBOL_REF
6617	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6618    case UNSPEC_NTPOFF:
6619      disp = XVECEXP (disp, 0, 0);
6620      return (GET_CODE (disp) == SYMBOL_REF
6621	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6622    case UNSPEC_DTPOFF:
6623      disp = XVECEXP (disp, 0, 0);
6624      return (GET_CODE (disp) == SYMBOL_REF
6625	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6626    }
6627
6628  return 0;
6629}
6630
6631/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6632   memory address for an instruction.  The MODE argument is the machine mode
6633   for the MEM expression that wants to use this address.
6634
6635   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6636   convert common non-canonical forms to canonical form so that they will
6637   be recognized.  */
6638
6639int
6640legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6641{
6642  struct ix86_address parts;
6643  rtx base, index, disp;
6644  HOST_WIDE_INT scale;
6645  const char *reason = NULL;
6646  rtx reason_rtx = NULL_RTX;
6647
6648  if (TARGET_DEBUG_ADDR)
6649    {
6650      fprintf (stderr,
6651	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6652	       GET_MODE_NAME (mode), strict);
6653      debug_rtx (addr);
6654    }
6655
6656  if (ix86_decompose_address (addr, &parts) <= 0)
6657    {
6658      reason = "decomposition failed";
6659      goto report_error;
6660    }
6661
6662  base = parts.base;
6663  index = parts.index;
6664  disp = parts.disp;
6665  scale = parts.scale;
6666
6667  /* Validate base register.
6668
6669     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6670     failures when the base is one word out of a two word structure, which is
6671     represented internally as a DImode int.  */
6672
6673  if (base)
6674    {
6675      rtx reg;
6676      reason_rtx = base;
6677
6678      if (REG_P (base))
6679  	reg = base;
6680      else if (GET_CODE (base) == SUBREG
6681	       && REG_P (SUBREG_REG (base))
6682	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6683		  <= UNITS_PER_WORD)
6684  	reg = SUBREG_REG (base);
6685      else
6686	{
6687	  reason = "base is not a register";
6688	  goto report_error;
6689	}
6690
6691      if (GET_MODE (base) != Pmode)
6692	{
6693	  reason = "base is not in Pmode";
6694	  goto report_error;
6695	}
6696
6697      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6698	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6699	{
6700	  reason = "base is not valid";
6701	  goto report_error;
6702	}
6703    }
6704
6705  /* Validate index register.
6706
6707     Don't allow SUBREG's that span more than a word here -- same as above.  */
6708
6709  if (index)
6710    {
6711      rtx reg;
6712      reason_rtx = index;
6713
6714      if (REG_P (index))
6715  	reg = index;
6716      else if (GET_CODE (index) == SUBREG
6717	       && REG_P (SUBREG_REG (index))
6718	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6719		  <= UNITS_PER_WORD)
6720  	reg = SUBREG_REG (index);
6721      else
6722	{
6723	  reason = "index is not a register";
6724	  goto report_error;
6725	}
6726
6727      if (GET_MODE (index) != Pmode)
6728	{
6729	  reason = "index is not in Pmode";
6730	  goto report_error;
6731	}
6732
6733      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6734	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6735	{
6736	  reason = "index is not valid";
6737	  goto report_error;
6738	}
6739    }
6740
6741  /* Validate scale factor.  */
6742  if (scale != 1)
6743    {
6744      reason_rtx = GEN_INT (scale);
6745      if (!index)
6746	{
6747	  reason = "scale without index";
6748	  goto report_error;
6749	}
6750
6751      if (scale != 2 && scale != 4 && scale != 8)
6752	{
6753	  reason = "scale is not a valid multiplier";
6754	  goto report_error;
6755	}
6756    }
6757
6758  /* Validate displacement.  */
6759  if (disp)
6760    {
6761      reason_rtx = disp;
6762
6763      if (GET_CODE (disp) == CONST
6764	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6765	switch (XINT (XEXP (disp, 0), 1))
6766	  {
6767	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6768	     used.  While ABI specify also 32bit relocations, we don't produce
6769	     them at all and use IP relative instead.  */
6770	  case UNSPEC_GOT:
6771	  case UNSPEC_GOTOFF:
6772	    gcc_assert (flag_pic);
6773	    if (!TARGET_64BIT)
6774	      goto is_legitimate_pic;
6775	    reason = "64bit address unspec";
6776	    goto report_error;
6777
6778	  case UNSPEC_GOTPCREL:
6779	    gcc_assert (flag_pic);
6780	    goto is_legitimate_pic;
6781
6782	  case UNSPEC_GOTTPOFF:
6783	  case UNSPEC_GOTNTPOFF:
6784	  case UNSPEC_INDNTPOFF:
6785	  case UNSPEC_NTPOFF:
6786	  case UNSPEC_DTPOFF:
6787	    break;
6788
6789	  default:
6790	    reason = "invalid address unspec";
6791	    goto report_error;
6792	  }
6793
6794      else if (SYMBOLIC_CONST (disp)
6795	       && (flag_pic
6796		   || (TARGET_MACHO
6797#if TARGET_MACHO
6798		       && MACHOPIC_INDIRECT
6799		       && !machopic_operand_p (disp)
6800#endif
6801	       )))
6802	{
6803
6804	is_legitimate_pic:
6805	  if (TARGET_64BIT && (index || base))
6806	    {
6807	      /* foo@dtpoff(%rX) is ok.  */
6808	      if (GET_CODE (disp) != CONST
6809		  || GET_CODE (XEXP (disp, 0)) != PLUS
6810		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6811		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6812		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6813		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6814		{
6815		  reason = "non-constant pic memory reference";
6816		  goto report_error;
6817		}
6818	    }
6819	  else if (! legitimate_pic_address_disp_p (disp))
6820	    {
6821	      reason = "displacement is an invalid pic construct";
6822	      goto report_error;
6823	    }
6824
6825          /* This code used to verify that a symbolic pic displacement
6826	     includes the pic_offset_table_rtx register.
6827
6828	     While this is good idea, unfortunately these constructs may
6829	     be created by "adds using lea" optimization for incorrect
6830	     code like:
6831
6832	     int a;
6833	     int foo(int i)
6834	       {
6835	         return *(&a+i);
6836	       }
6837
6838	     This code is nonsensical, but results in addressing
6839	     GOT table with pic_offset_table_rtx base.  We can't
6840	     just refuse it easily, since it gets matched by
6841	     "addsi3" pattern, that later gets split to lea in the
6842	     case output register differs from input.  While this
6843	     can be handled by separate addsi pattern for this case
6844	     that never results in lea, this seems to be easier and
6845	     correct fix for crash to disable this test.  */
6846	}
6847      else if (GET_CODE (disp) != LABEL_REF
6848	       && GET_CODE (disp) != CONST_INT
6849	       && (GET_CODE (disp) != CONST
6850		   || !legitimate_constant_p (disp))
6851	       && (GET_CODE (disp) != SYMBOL_REF
6852		   || !legitimate_constant_p (disp)))
6853	{
6854	  reason = "displacement is not constant";
6855	  goto report_error;
6856	}
6857      else if (TARGET_64BIT
6858	       && !x86_64_immediate_operand (disp, VOIDmode))
6859	{
6860	  reason = "displacement is out of range";
6861	  goto report_error;
6862	}
6863    }
6864
6865  /* Everything looks valid.  */
6866  if (TARGET_DEBUG_ADDR)
6867    fprintf (stderr, "Success.\n");
6868  return TRUE;
6869
6870 report_error:
6871  if (TARGET_DEBUG_ADDR)
6872    {
6873      fprintf (stderr, "Error: %s\n", reason);
6874      debug_rtx (reason_rtx);
6875    }
6876  return FALSE;
6877}
6878
6879/* Return a unique alias set for the GOT.  */
6880
6881static HOST_WIDE_INT
6882ix86_GOT_alias_set (void)
6883{
6884  static HOST_WIDE_INT set = -1;
6885  if (set == -1)
6886    set = new_alias_set ();
6887  return set;
6888}
6889
6890/* Return a legitimate reference for ORIG (an address) using the
6891   register REG.  If REG is 0, a new pseudo is generated.
6892
6893   There are two types of references that must be handled:
6894
6895   1. Global data references must load the address from the GOT, via
6896      the PIC reg.  An insn is emitted to do this load, and the reg is
6897      returned.
6898
6899   2. Static data references, constant pool addresses, and code labels
6900      compute the address as an offset from the GOT, whose base is in
6901      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6902      differentiate them from global data objects.  The returned
6903      address is the PIC reg + an unspec constant.
6904
6905   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6906   reg also appears in the address.  */
6907
6908static rtx
6909legitimize_pic_address (rtx orig, rtx reg)
6910{
6911  rtx addr = orig;
6912  rtx new = orig;
6913  rtx base;
6914
6915#if TARGET_MACHO
6916  if (TARGET_MACHO && !TARGET_64BIT)
6917    {
6918      if (reg == 0)
6919	reg = gen_reg_rtx (Pmode);
6920      /* Use the generic Mach-O PIC machinery.  */
6921      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6922    }
6923#endif
6924
6925  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6926    new = addr;
6927  else if (TARGET_64BIT
6928	   && ix86_cmodel != CM_SMALL_PIC
6929	   && local_symbolic_operand (addr, Pmode))
6930    {
6931      rtx tmpreg;
6932      /* This symbol may be referenced via a displacement from the PIC
6933	 base address (@GOTOFF).  */
6934
6935      if (reload_in_progress)
6936	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6937      if (GET_CODE (addr) == CONST)
6938	addr = XEXP (addr, 0);
6939      if (GET_CODE (addr) == PLUS)
6940	  {
6941            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6942	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6943	  }
6944	else
6945          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6946      new = gen_rtx_CONST (Pmode, new);
6947      if (!reg)
6948        tmpreg = gen_reg_rtx (Pmode);
6949      else
6950	tmpreg = reg;
6951      emit_move_insn (tmpreg, new);
6952
6953      if (reg != 0)
6954	{
6955	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6956				     tmpreg, 1, OPTAB_DIRECT);
6957	  new = reg;
6958	}
6959      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6960    }
6961  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6962    {
6963      /* This symbol may be referenced via a displacement from the PIC
6964	 base address (@GOTOFF).  */
6965
6966      if (reload_in_progress)
6967	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6968      if (GET_CODE (addr) == CONST)
6969	addr = XEXP (addr, 0);
6970      if (GET_CODE (addr) == PLUS)
6971	  {
6972            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6973	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6974	  }
6975	else
6976          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6977      new = gen_rtx_CONST (Pmode, new);
6978      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6979
6980      if (reg != 0)
6981	{
6982	  emit_move_insn (reg, new);
6983	  new = reg;
6984	}
6985    }
6986  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6987    {
6988      if (TARGET_64BIT)
6989	{
6990	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6991	  new = gen_rtx_CONST (Pmode, new);
6992	  new = gen_const_mem (Pmode, new);
6993	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6994
6995	  if (reg == 0)
6996	    reg = gen_reg_rtx (Pmode);
6997	  /* Use directly gen_movsi, otherwise the address is loaded
6998	     into register for CSE.  We don't want to CSE this addresses,
6999	     instead we CSE addresses from the GOT table, so skip this.  */
7000	  emit_insn (gen_movsi (reg, new));
7001	  new = reg;
7002	}
7003      else
7004	{
7005	  /* This symbol must be referenced via a load from the
7006	     Global Offset Table (@GOT).  */
7007
7008	  if (reload_in_progress)
7009	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7010	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7011	  new = gen_rtx_CONST (Pmode, new);
7012	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7013	  new = gen_const_mem (Pmode, new);
7014	  set_mem_alias_set (new, ix86_GOT_alias_set ());
7015
7016	  if (reg == 0)
7017	    reg = gen_reg_rtx (Pmode);
7018	  emit_move_insn (reg, new);
7019	  new = reg;
7020	}
7021    }
7022  else
7023    {
7024      if (GET_CODE (addr) == CONST_INT
7025	  && !x86_64_immediate_operand (addr, VOIDmode))
7026	{
7027	  if (reg)
7028	    {
7029	      emit_move_insn (reg, addr);
7030	      new = reg;
7031	    }
7032	  else
7033	    new = force_reg (Pmode, addr);
7034	}
7035      else if (GET_CODE (addr) == CONST)
7036	{
7037	  addr = XEXP (addr, 0);
7038
7039	  /* We must match stuff we generate before.  Assume the only
7040	     unspecs that can get here are ours.  Not that we could do
7041	     anything with them anyway....  */
7042	  if (GET_CODE (addr) == UNSPEC
7043	      || (GET_CODE (addr) == PLUS
7044		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7045	    return orig;
7046	  gcc_assert (GET_CODE (addr) == PLUS);
7047	}
7048      if (GET_CODE (addr) == PLUS)
7049	{
7050	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7051
7052	  /* Check first to see if this is a constant offset from a @GOTOFF
7053	     symbol reference.  */
7054	  if (local_symbolic_operand (op0, Pmode)
7055	      && GET_CODE (op1) == CONST_INT)
7056	    {
7057	      if (!TARGET_64BIT)
7058		{
7059		  if (reload_in_progress)
7060		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7061		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7062					UNSPEC_GOTOFF);
7063		  new = gen_rtx_PLUS (Pmode, new, op1);
7064		  new = gen_rtx_CONST (Pmode, new);
7065		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7066
7067		  if (reg != 0)
7068		    {
7069		      emit_move_insn (reg, new);
7070		      new = reg;
7071		    }
7072		}
7073	      else
7074		{
7075		  if (INTVAL (op1) < -16*1024*1024
7076		      || INTVAL (op1) >= 16*1024*1024)
7077		    {
7078		      if (!x86_64_immediate_operand (op1, Pmode))
7079			op1 = force_reg (Pmode, op1);
7080		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7081		    }
7082		}
7083	    }
7084	  else
7085	    {
7086	      base = legitimize_pic_address (XEXP (addr, 0), reg);
7087	      new  = legitimize_pic_address (XEXP (addr, 1),
7088					     base == reg ? NULL_RTX : reg);
7089
7090	      if (GET_CODE (new) == CONST_INT)
7091		new = plus_constant (base, INTVAL (new));
7092	      else
7093		{
7094		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7095		    {
7096		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7097		      new = XEXP (new, 1);
7098		    }
7099		  new = gen_rtx_PLUS (Pmode, base, new);
7100		}
7101	    }
7102	}
7103    }
7104  return new;
7105}
7106
7107/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
7108
7109static rtx
7110get_thread_pointer (int to_reg)
7111{
7112  rtx tp, reg, insn;
7113
7114  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7115  if (!to_reg)
7116    return tp;
7117
7118  reg = gen_reg_rtx (Pmode);
7119  insn = gen_rtx_SET (VOIDmode, reg, tp);
7120  insn = emit_insn (insn);
7121
7122  return reg;
7123}
7124
7125/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
7126   false if we expect this to be used for a memory address and true if
7127   we expect to load the address into a register.  */
7128
7129static rtx
7130legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7131{
7132  rtx dest, base, off, pic, tp;
7133  int type;
7134
7135  switch (model)
7136    {
7137    case TLS_MODEL_GLOBAL_DYNAMIC:
7138      dest = gen_reg_rtx (Pmode);
7139      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7140
7141      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7142	{
7143	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
7144
7145	  start_sequence ();
7146	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7147	  insns = get_insns ();
7148	  end_sequence ();
7149
7150	  emit_libcall_block (insns, dest, rax, x);
7151	}
7152      else if (TARGET_64BIT && TARGET_GNU2_TLS)
7153	emit_insn (gen_tls_global_dynamic_64 (dest, x));
7154      else
7155	emit_insn (gen_tls_global_dynamic_32 (dest, x));
7156
7157      if (TARGET_GNU2_TLS)
7158	{
7159	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7160
7161	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7162	}
7163      break;
7164
7165    case TLS_MODEL_LOCAL_DYNAMIC:
7166      base = gen_reg_rtx (Pmode);
7167      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7168
7169      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7170	{
7171	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7172
7173	  start_sequence ();
7174	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7175	  insns = get_insns ();
7176	  end_sequence ();
7177
7178	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7179	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7180	  emit_libcall_block (insns, base, rax, note);
7181	}
7182      else if (TARGET_64BIT && TARGET_GNU2_TLS)
7183	emit_insn (gen_tls_local_dynamic_base_64 (base));
7184      else
7185	emit_insn (gen_tls_local_dynamic_base_32 (base));
7186
7187      if (TARGET_GNU2_TLS)
7188	{
7189	  rtx x = ix86_tls_module_base ();
7190
7191	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
7192			       gen_rtx_MINUS (Pmode, x, tp));
7193	}
7194
7195      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7196      off = gen_rtx_CONST (Pmode, off);
7197
7198      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7199
7200      if (TARGET_GNU2_TLS)
7201	{
7202	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7203
7204	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7205	}
7206
7207      break;
7208
7209    case TLS_MODEL_INITIAL_EXEC:
7210      if (TARGET_64BIT)
7211	{
7212	  pic = NULL;
7213	  type = UNSPEC_GOTNTPOFF;
7214	}
7215      else if (flag_pic)
7216	{
7217	  if (reload_in_progress)
7218	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7219	  pic = pic_offset_table_rtx;
7220	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7221	}
7222      else if (!TARGET_ANY_GNU_TLS)
7223	{
7224	  pic = gen_reg_rtx (Pmode);
7225	  emit_insn (gen_set_got (pic));
7226	  type = UNSPEC_GOTTPOFF;
7227	}
7228      else
7229	{
7230	  pic = NULL;
7231	  type = UNSPEC_INDNTPOFF;
7232	}
7233
7234      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7235      off = gen_rtx_CONST (Pmode, off);
7236      if (pic)
7237	off = gen_rtx_PLUS (Pmode, pic, off);
7238      off = gen_const_mem (Pmode, off);
7239      set_mem_alias_set (off, ix86_GOT_alias_set ());
7240
7241      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7242	{
7243          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7244	  off = force_reg (Pmode, off);
7245	  return gen_rtx_PLUS (Pmode, base, off);
7246	}
7247      else
7248	{
7249	  base = get_thread_pointer (true);
7250	  dest = gen_reg_rtx (Pmode);
7251	  emit_insn (gen_subsi3 (dest, base, off));
7252	}
7253      break;
7254
7255    case TLS_MODEL_LOCAL_EXEC:
7256      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7257			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7258			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7259      off = gen_rtx_CONST (Pmode, off);
7260
7261      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7262	{
7263	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7264	  return gen_rtx_PLUS (Pmode, base, off);
7265	}
7266      else
7267	{
7268	  base = get_thread_pointer (true);
7269	  dest = gen_reg_rtx (Pmode);
7270	  emit_insn (gen_subsi3 (dest, base, off));
7271	}
7272      break;
7273
7274    default:
7275      gcc_unreachable ();
7276    }
7277
7278  return dest;
7279}
7280
7281/* Try machine-dependent ways of modifying an illegitimate address
7282   to be legitimate.  If we find one, return the new, valid address.
7283   This macro is used in only one place: `memory_address' in explow.c.
7284
7285   OLDX is the address as it was before break_out_memory_refs was called.
7286   In some cases it is useful to look at this to decide what needs to be done.
7287
7288   MODE and WIN are passed so that this macro can use
7289   GO_IF_LEGITIMATE_ADDRESS.
7290
7291   It is always safe for this macro to do nothing.  It exists to recognize
7292   opportunities to optimize the output.
7293
7294   For the 80386, we handle X+REG by loading X into a register R and
7295   using R+REG.  R will go in a general reg and indexing will be used.
7296   However, if REG is a broken-out memory address or multiplication,
7297   nothing needs to be done because REG can certainly go in a general reg.
7298
7299   When -fpic is used, special handling is needed for symbolic references.
7300   See comments by legitimize_pic_address in i386.c for details.  */
7301
7302rtx
7303legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7304{
7305  int changed = 0;
7306  unsigned log;
7307
7308  if (TARGET_DEBUG_ADDR)
7309    {
7310      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7311	       GET_MODE_NAME (mode));
7312      debug_rtx (x);
7313    }
7314
7315  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7316  if (log)
7317    return legitimize_tls_address (x, log, false);
7318  if (GET_CODE (x) == CONST
7319      && GET_CODE (XEXP (x, 0)) == PLUS
7320      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7321      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7322    {
7323      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7324      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7325    }
7326
7327  if (flag_pic && SYMBOLIC_CONST (x))
7328    return legitimize_pic_address (x, 0);
7329
7330  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7331  if (GET_CODE (x) == ASHIFT
7332      && GET_CODE (XEXP (x, 1)) == CONST_INT
7333      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7334    {
7335      changed = 1;
7336      log = INTVAL (XEXP (x, 1));
7337      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7338			GEN_INT (1 << log));
7339    }
7340
7341  if (GET_CODE (x) == PLUS)
7342    {
7343      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7344
7345      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7346	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7347	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7348	{
7349	  changed = 1;
7350	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7351	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7352				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7353				      GEN_INT (1 << log));
7354	}
7355
7356      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7357	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7358	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7359	{
7360	  changed = 1;
7361	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7362	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7363				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7364				      GEN_INT (1 << log));
7365	}
7366
7367      /* Put multiply first if it isn't already.  */
7368      if (GET_CODE (XEXP (x, 1)) == MULT)
7369	{
7370	  rtx tmp = XEXP (x, 0);
7371	  XEXP (x, 0) = XEXP (x, 1);
7372	  XEXP (x, 1) = tmp;
7373	  changed = 1;
7374	}
7375
7376      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7377	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7378	 created by virtual register instantiation, register elimination, and
7379	 similar optimizations.  */
7380      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7381	{
7382	  changed = 1;
7383	  x = gen_rtx_PLUS (Pmode,
7384			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7385					  XEXP (XEXP (x, 1), 0)),
7386			    XEXP (XEXP (x, 1), 1));
7387	}
7388
7389      /* Canonicalize
7390	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7391	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7392      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7393	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7394	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7395	       && CONSTANT_P (XEXP (x, 1)))
7396	{
7397	  rtx constant;
7398	  rtx other = NULL_RTX;
7399
7400	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7401	    {
7402	      constant = XEXP (x, 1);
7403	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7404	    }
7405	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7406	    {
7407	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7408	      other = XEXP (x, 1);
7409	    }
7410	  else
7411	    constant = 0;
7412
7413	  if (constant)
7414	    {
7415	      changed = 1;
7416	      x = gen_rtx_PLUS (Pmode,
7417				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7418					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7419				plus_constant (other, INTVAL (constant)));
7420	    }
7421	}
7422
7423      if (changed && legitimate_address_p (mode, x, FALSE))
7424	return x;
7425
7426      if (GET_CODE (XEXP (x, 0)) == MULT)
7427	{
7428	  changed = 1;
7429	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7430	}
7431
7432      if (GET_CODE (XEXP (x, 1)) == MULT)
7433	{
7434	  changed = 1;
7435	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7436	}
7437
7438      if (changed
7439	  && GET_CODE (XEXP (x, 1)) == REG
7440	  && GET_CODE (XEXP (x, 0)) == REG)
7441	return x;
7442
7443      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7444	{
7445	  changed = 1;
7446	  x = legitimize_pic_address (x, 0);
7447	}
7448
7449      if (changed && legitimate_address_p (mode, x, FALSE))
7450	return x;
7451
7452      if (GET_CODE (XEXP (x, 0)) == REG)
7453	{
7454	  rtx temp = gen_reg_rtx (Pmode);
7455	  rtx val  = force_operand (XEXP (x, 1), temp);
7456	  if (val != temp)
7457	    emit_move_insn (temp, val);
7458
7459	  XEXP (x, 1) = temp;
7460	  return x;
7461	}
7462
7463      else if (GET_CODE (XEXP (x, 1)) == REG)
7464	{
7465	  rtx temp = gen_reg_rtx (Pmode);
7466	  rtx val  = force_operand (XEXP (x, 0), temp);
7467	  if (val != temp)
7468	    emit_move_insn (temp, val);
7469
7470	  XEXP (x, 0) = temp;
7471	  return x;
7472	}
7473    }
7474
7475  return x;
7476}
7477
7478/* Print an integer constant expression in assembler syntax.  Addition
7479   and subtraction are the only arithmetic that may appear in these
7480   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7481   CODE is the operand print code from the output string.  */
7482
7483static void
7484output_pic_addr_const (FILE *file, rtx x, int code)
7485{
7486  char buf[256];
7487
7488  switch (GET_CODE (x))
7489    {
7490    case PC:
7491      gcc_assert (flag_pic);
7492      putc ('.', file);
7493      break;
7494
7495    case SYMBOL_REF:
7496      if (! TARGET_MACHO || TARGET_64BIT)
7497	output_addr_const (file, x);
7498      else
7499	{
7500	  const char *name = XSTR (x, 0);
7501
7502	  /* Mark the decl as referenced so that cgraph will output the function.  */
7503	  if (SYMBOL_REF_DECL (x))
7504	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7505
7506#if TARGET_MACHO
7507	  if (MACHOPIC_INDIRECT
7508	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7509	    name = machopic_indirection_name (x, /*stub_p=*/true);
7510#endif
7511	  assemble_name (file, name);
7512	}
7513      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7514	fputs ("@PLT", file);
7515      break;
7516
7517    case LABEL_REF:
7518      x = XEXP (x, 0);
7519      /* FALLTHRU */
7520    case CODE_LABEL:
7521      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7522      assemble_name (asm_out_file, buf);
7523      break;
7524
7525    case CONST_INT:
7526      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7527      break;
7528
7529    case CONST:
7530      /* This used to output parentheses around the expression,
7531	 but that does not work on the 386 (either ATT or BSD assembler).  */
7532      output_pic_addr_const (file, XEXP (x, 0), code);
7533      break;
7534
7535    case CONST_DOUBLE:
7536      if (GET_MODE (x) == VOIDmode)
7537	{
7538	  /* We can use %d if the number is <32 bits and positive.  */
7539	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7540	    fprintf (file, "0x%lx%08lx",
7541		     (unsigned long) CONST_DOUBLE_HIGH (x),
7542		     (unsigned long) CONST_DOUBLE_LOW (x));
7543	  else
7544	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7545	}
7546      else
7547	/* We can't handle floating point constants;
7548	   PRINT_OPERAND must handle them.  */
7549	output_operand_lossage ("floating constant misused");
7550      break;
7551
7552    case PLUS:
7553      /* Some assemblers need integer constants to appear first.  */
7554      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7555	{
7556	  output_pic_addr_const (file, XEXP (x, 0), code);
7557	  putc ('+', file);
7558	  output_pic_addr_const (file, XEXP (x, 1), code);
7559	}
7560      else
7561	{
7562	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7563	  output_pic_addr_const (file, XEXP (x, 1), code);
7564	  putc ('+', file);
7565	  output_pic_addr_const (file, XEXP (x, 0), code);
7566	}
7567      break;
7568
7569    case MINUS:
7570      if (!TARGET_MACHO)
7571	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7572      output_pic_addr_const (file, XEXP (x, 0), code);
7573      putc ('-', file);
7574      output_pic_addr_const (file, XEXP (x, 1), code);
7575      if (!TARGET_MACHO)
7576	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7577      break;
7578
7579     case UNSPEC:
7580       gcc_assert (XVECLEN (x, 0) == 1);
7581       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7582       switch (XINT (x, 1))
7583	{
7584	case UNSPEC_GOT:
7585	  fputs ("@GOT", file);
7586	  break;
7587	case UNSPEC_GOTOFF:
7588	  fputs ("@GOTOFF", file);
7589	  break;
7590	case UNSPEC_GOTPCREL:
7591	  fputs ("@GOTPCREL(%rip)", file);
7592	  break;
7593	case UNSPEC_GOTTPOFF:
7594	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7595	  fputs ("@GOTTPOFF", file);
7596	  break;
7597	case UNSPEC_TPOFF:
7598	  fputs ("@TPOFF", file);
7599	  break;
7600	case UNSPEC_NTPOFF:
7601	  if (TARGET_64BIT)
7602	    fputs ("@TPOFF", file);
7603	  else
7604	    fputs ("@NTPOFF", file);
7605	  break;
7606	case UNSPEC_DTPOFF:
7607	  fputs ("@DTPOFF", file);
7608	  break;
7609	case UNSPEC_GOTNTPOFF:
7610	  if (TARGET_64BIT)
7611	    fputs ("@GOTTPOFF(%rip)", file);
7612	  else
7613	    fputs ("@GOTNTPOFF", file);
7614	  break;
7615	case UNSPEC_INDNTPOFF:
7616	  fputs ("@INDNTPOFF", file);
7617	  break;
7618	default:
7619	  output_operand_lossage ("invalid UNSPEC as operand");
7620	  break;
7621	}
7622       break;
7623
7624    default:
7625      output_operand_lossage ("invalid expression as operand");
7626    }
7627}
7628
7629/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7630   We need to emit DTP-relative relocations.  */
7631
7632static void
7633i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7634{
7635  fputs (ASM_LONG, file);
7636  output_addr_const (file, x);
7637  fputs ("@DTPOFF", file);
7638  switch (size)
7639    {
7640    case 4:
7641      break;
7642    case 8:
7643      fputs (", 0", file);
7644      break;
7645    default:
7646      gcc_unreachable ();
7647   }
7648}
7649
7650/* In the name of slightly smaller debug output, and to cater to
7651   general assembler lossage, recognize PIC+GOTOFF and turn it back
7652   into a direct symbol reference.
7653
7654   On Darwin, this is necessary to avoid a crash, because Darwin
7655   has a different PIC label for each routine but the DWARF debugging
7656   information is not associated with any particular routine, so it's
7657   necessary to remove references to the PIC label from RTL stored by
7658   the DWARF output code.  */
7659
7660static rtx
7661ix86_delegitimize_address (rtx orig_x)
7662{
7663  rtx x = orig_x;
7664  /* reg_addend is NULL or a multiple of some register.  */
7665  rtx reg_addend = NULL_RTX;
7666  /* const_addend is NULL or a const_int.  */
7667  rtx const_addend = NULL_RTX;
7668  /* This is the result, or NULL.  */
7669  rtx result = NULL_RTX;
7670
7671  if (GET_CODE (x) == MEM)
7672    x = XEXP (x, 0);
7673
7674  if (TARGET_64BIT)
7675    {
7676      if (GET_CODE (x) != CONST
7677	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7678	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7679	  || GET_CODE (orig_x) != MEM)
7680	return orig_x;
7681      return XVECEXP (XEXP (x, 0), 0, 0);
7682    }
7683
7684  if (GET_CODE (x) != PLUS
7685      || GET_CODE (XEXP (x, 1)) != CONST)
7686    return orig_x;
7687
7688  if (GET_CODE (XEXP (x, 0)) == REG
7689      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7690    /* %ebx + GOT/GOTOFF */
7691    ;
7692  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7693    {
7694      /* %ebx + %reg * scale + GOT/GOTOFF */
7695      reg_addend = XEXP (x, 0);
7696      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7697	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7698	reg_addend = XEXP (reg_addend, 1);
7699      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7700	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7701	reg_addend = XEXP (reg_addend, 0);
7702      else
7703	return orig_x;
7704      if (GET_CODE (reg_addend) != REG
7705	  && GET_CODE (reg_addend) != MULT
7706	  && GET_CODE (reg_addend) != ASHIFT)
7707	return orig_x;
7708    }
7709  else
7710    return orig_x;
7711
7712  x = XEXP (XEXP (x, 1), 0);
7713  if (GET_CODE (x) == PLUS
7714      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7715    {
7716      const_addend = XEXP (x, 1);
7717      x = XEXP (x, 0);
7718    }
7719
7720  if (GET_CODE (x) == UNSPEC
7721      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7722	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7723    result = XVECEXP (x, 0, 0);
7724
7725  if (TARGET_MACHO && darwin_local_data_pic (x)
7726      && GET_CODE (orig_x) != MEM)
7727    result = XEXP (x, 0);
7728
7729  if (! result)
7730    return orig_x;
7731
7732  if (const_addend)
7733    result = gen_rtx_PLUS (Pmode, result, const_addend);
7734  if (reg_addend)
7735    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7736  return result;
7737}
7738
7739static void
7740put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7741		    int fp, FILE *file)
7742{
7743  const char *suffix;
7744
7745  if (mode == CCFPmode || mode == CCFPUmode)
7746    {
7747      enum rtx_code second_code, bypass_code;
7748      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7749      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7750      code = ix86_fp_compare_code_to_integer (code);
7751      mode = CCmode;
7752    }
7753  if (reverse)
7754    code = reverse_condition (code);
7755
7756  switch (code)
7757    {
7758    case EQ:
7759      suffix = "e";
7760      break;
7761    case NE:
7762      suffix = "ne";
7763      break;
7764    case GT:
7765      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7766      suffix = "g";
7767      break;
7768    case GTU:
7769      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7770	 Those same assemblers have the same but opposite lossage on cmov.  */
7771      gcc_assert (mode == CCmode);
7772      suffix = fp ? "nbe" : "a";
7773      break;
7774    case LT:
7775      switch (mode)
7776	{
7777	case CCNOmode:
7778	case CCGOCmode:
7779	  suffix = "s";
7780	  break;
7781
7782	case CCmode:
7783	case CCGCmode:
7784	  suffix = "l";
7785	  break;
7786
7787	default:
7788	  gcc_unreachable ();
7789	}
7790      break;
7791    case LTU:
7792      gcc_assert (mode == CCmode);
7793      suffix = "b";
7794      break;
7795    case GE:
7796      switch (mode)
7797	{
7798	case CCNOmode:
7799	case CCGOCmode:
7800	  suffix = "ns";
7801	  break;
7802
7803	case CCmode:
7804	case CCGCmode:
7805	  suffix = "ge";
7806	  break;
7807
7808	default:
7809	  gcc_unreachable ();
7810	}
7811      break;
7812    case GEU:
7813      /* ??? As above.  */
7814      gcc_assert (mode == CCmode);
7815      suffix = fp ? "nb" : "ae";
7816      break;
7817    case LE:
7818      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7819      suffix = "le";
7820      break;
7821    case LEU:
7822      gcc_assert (mode == CCmode);
7823      suffix = "be";
7824      break;
7825    case UNORDERED:
7826      suffix = fp ? "u" : "p";
7827      break;
7828    case ORDERED:
7829      suffix = fp ? "nu" : "np";
7830      break;
7831    default:
7832      gcc_unreachable ();
7833    }
7834  fputs (suffix, file);
7835}
7836
7837/* Print the name of register X to FILE based on its machine mode and number.
7838   If CODE is 'w', pretend the mode is HImode.
7839   If CODE is 'b', pretend the mode is QImode.
7840   If CODE is 'k', pretend the mode is SImode.
7841   If CODE is 'q', pretend the mode is DImode.
7842   If CODE is 'h', pretend the reg is the 'high' byte register.
7843   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7844
7845void
7846print_reg (rtx x, int code, FILE *file)
7847{
7848  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7849	      && REGNO (x) != FRAME_POINTER_REGNUM
7850	      && REGNO (x) != FLAGS_REG
7851	      && REGNO (x) != FPSR_REG);
7852
7853  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7854    putc ('%', file);
7855
7856  if (code == 'w' || MMX_REG_P (x))
7857    code = 2;
7858  else if (code == 'b')
7859    code = 1;
7860  else if (code == 'k')
7861    code = 4;
7862  else if (code == 'q')
7863    code = 8;
7864  else if (code == 'y')
7865    code = 3;
7866  else if (code == 'h')
7867    code = 0;
7868  else
7869    code = GET_MODE_SIZE (GET_MODE (x));
7870
7871  /* Irritatingly, AMD extended registers use different naming convention
7872     from the normal registers.  */
7873  if (REX_INT_REG_P (x))
7874    {
7875      gcc_assert (TARGET_64BIT);
7876      switch (code)
7877	{
7878	  case 0:
7879	    error ("extended registers have no high halves");
7880	    break;
7881	  case 1:
7882	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7883	    break;
7884	  case 2:
7885	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7886	    break;
7887	  case 4:
7888	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7889	    break;
7890	  case 8:
7891	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7892	    break;
7893	  default:
7894	    error ("unsupported operand size for extended register");
7895	    break;
7896	}
7897      return;
7898    }
7899  switch (code)
7900    {
7901    case 3:
7902      if (STACK_TOP_P (x))
7903	{
7904	  fputs ("st(0)", file);
7905	  break;
7906	}
7907      /* FALLTHRU */
7908    case 8:
7909    case 4:
7910    case 12:
7911      if (! ANY_FP_REG_P (x))
7912	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7913      /* FALLTHRU */
7914    case 16:
7915    case 2:
7916    normal:
7917      fputs (hi_reg_name[REGNO (x)], file);
7918      break;
7919    case 1:
7920      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7921	goto normal;
7922      fputs (qi_reg_name[REGNO (x)], file);
7923      break;
7924    case 0:
7925      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7926	goto normal;
7927      fputs (qi_high_reg_name[REGNO (x)], file);
7928      break;
7929    default:
7930      gcc_unreachable ();
7931    }
7932}
7933
7934/* Locate some local-dynamic symbol still in use by this function
7935   so that we can print its name in some tls_local_dynamic_base
7936   pattern.  */
7937
7938static const char *
7939get_some_local_dynamic_name (void)
7940{
7941  rtx insn;
7942
7943  if (cfun->machine->some_ld_name)
7944    return cfun->machine->some_ld_name;
7945
7946  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7947    if (INSN_P (insn)
7948	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7949      return cfun->machine->some_ld_name;
7950
7951  gcc_unreachable ();
7952}
7953
7954static int
7955get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7956{
7957  rtx x = *px;
7958
7959  if (GET_CODE (x) == SYMBOL_REF
7960      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7961    {
7962      cfun->machine->some_ld_name = XSTR (x, 0);
7963      return 1;
7964    }
7965
7966  return 0;
7967}
7968
7969/* Meaning of CODE:
7970   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7971   C -- print opcode suffix for set/cmov insn.
7972   c -- like C, but print reversed condition
7973   F,f -- likewise, but for floating-point.
7974   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7975        otherwise nothing
7976   R -- print the prefix for register names.
7977   z -- print the opcode suffix for the size of the current operand.
7978   * -- print a star (in certain assembler syntax)
7979   A -- print an absolute memory reference.
7980   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7981   s -- print a shift double count, followed by the assemblers argument
7982	delimiter.
7983   b -- print the QImode name of the register for the indicated operand.
7984	%b0 would print %al if operands[0] is reg 0.
7985   w --  likewise, print the HImode name of the register.
7986   k --  likewise, print the SImode name of the register.
7987   q --  likewise, print the DImode name of the register.
7988   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7989   y -- print "st(0)" instead of "st" as a register.
7990   D -- print condition for SSE cmp instruction.
7991   P -- if PIC, print an @PLT suffix.
7992   X -- don't print any sort of PIC '@' suffix for a symbol.
7993   & -- print some in-use local-dynamic symbol name.
7994   H -- print a memory address offset by 8; used for sse high-parts
7995 */
7996
7997void
7998print_operand (FILE *file, rtx x, int code)
7999{
8000  if (code)
8001    {
8002      switch (code)
8003	{
8004	case '*':
8005	  if (ASSEMBLER_DIALECT == ASM_ATT)
8006	    putc ('*', file);
8007	  return;
8008
8009	case '&':
8010	  assemble_name (file, get_some_local_dynamic_name ());
8011	  return;
8012
8013	case 'A':
8014	  switch (ASSEMBLER_DIALECT)
8015	    {
8016	    case ASM_ATT:
8017	      putc ('*', file);
8018	      break;
8019
8020	    case ASM_INTEL:
8021	      /* Intel syntax. For absolute addresses, registers should not
8022		 be surrounded by braces.  */
8023	      if (GET_CODE (x) != REG)
8024		{
8025		  putc ('[', file);
8026		  PRINT_OPERAND (file, x, 0);
8027		  putc (']', file);
8028		  return;
8029		}
8030	      break;
8031
8032	    default:
8033	      gcc_unreachable ();
8034	    }
8035
8036	  PRINT_OPERAND (file, x, 0);
8037	  return;
8038
8039
8040	case 'L':
8041	  if (ASSEMBLER_DIALECT == ASM_ATT)
8042	    putc ('l', file);
8043	  return;
8044
8045	case 'W':
8046	  if (ASSEMBLER_DIALECT == ASM_ATT)
8047	    putc ('w', file);
8048	  return;
8049
8050	case 'B':
8051	  if (ASSEMBLER_DIALECT == ASM_ATT)
8052	    putc ('b', file);
8053	  return;
8054
8055	case 'Q':
8056	  if (ASSEMBLER_DIALECT == ASM_ATT)
8057	    putc ('l', file);
8058	  return;
8059
8060	case 'S':
8061	  if (ASSEMBLER_DIALECT == ASM_ATT)
8062	    putc ('s', file);
8063	  return;
8064
8065	case 'T':
8066	  if (ASSEMBLER_DIALECT == ASM_ATT)
8067	    putc ('t', file);
8068	  return;
8069
8070	case 'z':
8071	  /* 387 opcodes don't get size suffixes if the operands are
8072	     registers.  */
8073	  if (STACK_REG_P (x))
8074	    return;
8075
8076	  /* Likewise if using Intel opcodes.  */
8077	  if (ASSEMBLER_DIALECT == ASM_INTEL)
8078	    return;
8079
8080	  /* This is the size of op from size of operand.  */
8081	  switch (GET_MODE_SIZE (GET_MODE (x)))
8082	    {
8083	    case 2:
8084#ifdef HAVE_GAS_FILDS_FISTS
8085	      putc ('s', file);
8086#endif
8087	      return;
8088
8089	    case 4:
8090	      if (GET_MODE (x) == SFmode)
8091		{
8092		  putc ('s', file);
8093		  return;
8094		}
8095	      else
8096		putc ('l', file);
8097	      return;
8098
8099	    case 12:
8100	    case 16:
8101	      putc ('t', file);
8102	      return;
8103
8104	    case 8:
8105	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8106		{
8107#ifdef GAS_MNEMONICS
8108		  putc ('q', file);
8109#else
8110		  putc ('l', file);
8111		  putc ('l', file);
8112#endif
8113		}
8114	      else
8115	        putc ('l', file);
8116	      return;
8117
8118	    default:
8119	      gcc_unreachable ();
8120	    }
8121
8122	case 'b':
8123	case 'w':
8124	case 'k':
8125	case 'q':
8126	case 'h':
8127	case 'y':
8128	case 'X':
8129	case 'P':
8130	  break;
8131
8132	case 's':
8133	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8134	    {
8135	      PRINT_OPERAND (file, x, 0);
8136	      putc (',', file);
8137	    }
8138	  return;
8139
8140	case 'D':
8141	  /* Little bit of braindamage here.  The SSE compare instructions
8142	     does use completely different names for the comparisons that the
8143	     fp conditional moves.  */
8144	  switch (GET_CODE (x))
8145	    {
8146	    case EQ:
8147	    case UNEQ:
8148	      fputs ("eq", file);
8149	      break;
8150	    case LT:
8151	    case UNLT:
8152	      fputs ("lt", file);
8153	      break;
8154	    case LE:
8155	    case UNLE:
8156	      fputs ("le", file);
8157	      break;
8158	    case UNORDERED:
8159	      fputs ("unord", file);
8160	      break;
8161	    case NE:
8162	    case LTGT:
8163	      fputs ("neq", file);
8164	      break;
8165	    case UNGE:
8166	    case GE:
8167	      fputs ("nlt", file);
8168	      break;
8169	    case UNGT:
8170	    case GT:
8171	      fputs ("nle", file);
8172	      break;
8173	    case ORDERED:
8174	      fputs ("ord", file);
8175	      break;
8176	    default:
8177	      gcc_unreachable ();
8178	    }
8179	  return;
8180	case 'O':
8181#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8182	  if (ASSEMBLER_DIALECT == ASM_ATT)
8183	    {
8184	      switch (GET_MODE (x))
8185		{
8186		case HImode: putc ('w', file); break;
8187		case SImode:
8188		case SFmode: putc ('l', file); break;
8189		case DImode:
8190		case DFmode: putc ('q', file); break;
8191		default: gcc_unreachable ();
8192		}
8193	      putc ('.', file);
8194	    }
8195#endif
8196	  return;
8197	case 'C':
8198	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8199	  return;
8200	case 'F':
8201#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8202	  if (ASSEMBLER_DIALECT == ASM_ATT)
8203	    putc ('.', file);
8204#endif
8205	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8206	  return;
8207
8208	  /* Like above, but reverse condition */
8209	case 'c':
8210	  /* Check to see if argument to %c is really a constant
8211	     and not a condition code which needs to be reversed.  */
8212	  if (!COMPARISON_P (x))
8213	  {
8214	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8215	     return;
8216	  }
8217	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8218	  return;
8219	case 'f':
8220#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8221	  if (ASSEMBLER_DIALECT == ASM_ATT)
8222	    putc ('.', file);
8223#endif
8224	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8225	  return;
8226
8227	case 'H':
8228	  /* It doesn't actually matter what mode we use here, as we're
8229	     only going to use this for printing.  */
8230	  x = adjust_address_nv (x, DImode, 8);
8231	  break;
8232
8233	case '+':
8234	  {
8235	    rtx x;
8236
8237	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8238	      return;
8239
8240	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8241	    if (x)
8242	      {
8243		int pred_val = INTVAL (XEXP (x, 0));
8244
8245		if (pred_val < REG_BR_PROB_BASE * 45 / 100
8246		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
8247		  {
8248		    int taken = pred_val > REG_BR_PROB_BASE / 2;
8249		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
8250
8251		    /* Emit hints only in the case default branch prediction
8252		       heuristics would fail.  */
8253		    if (taken != cputaken)
8254		      {
8255			/* We use 3e (DS) prefix for taken branches and
8256			   2e (CS) prefix for not taken branches.  */
8257			if (taken)
8258			  fputs ("ds ; ", file);
8259			else
8260			  fputs ("cs ; ", file);
8261		      }
8262		  }
8263	      }
8264	    return;
8265	  }
8266	default:
8267	    output_operand_lossage ("invalid operand code '%c'", code);
8268	}
8269    }
8270
8271  if (GET_CODE (x) == REG)
8272    print_reg (x, code, file);
8273
8274  else if (GET_CODE (x) == MEM)
8275    {
8276      /* No `byte ptr' prefix for call instructions.  */
8277      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8278	{
8279	  const char * size;
8280	  switch (GET_MODE_SIZE (GET_MODE (x)))
8281	    {
8282	    case 1: size = "BYTE"; break;
8283	    case 2: size = "WORD"; break;
8284	    case 4: size = "DWORD"; break;
8285	    case 8: size = "QWORD"; break;
8286	    case 12: size = "XWORD"; break;
8287	    case 16: size = "XMMWORD"; break;
8288	    default:
8289	      gcc_unreachable ();
8290	    }
8291
8292	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
8293	  if (code == 'b')
8294	    size = "BYTE";
8295	  else if (code == 'w')
8296	    size = "WORD";
8297	  else if (code == 'k')
8298	    size = "DWORD";
8299
8300	  fputs (size, file);
8301	  fputs (" PTR ", file);
8302	}
8303
8304      x = XEXP (x, 0);
8305      /* Avoid (%rip) for call operands.  */
8306      if (CONSTANT_ADDRESS_P (x) && code == 'P'
8307	       && GET_CODE (x) != CONST_INT)
8308	output_addr_const (file, x);
8309      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8310	output_operand_lossage ("invalid constraints for operand");
8311      else
8312	output_address (x);
8313    }
8314
8315  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8316    {
8317      REAL_VALUE_TYPE r;
8318      long l;
8319
8320      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8321      REAL_VALUE_TO_TARGET_SINGLE (r, l);
8322
8323      if (ASSEMBLER_DIALECT == ASM_ATT)
8324	putc ('$', file);
8325      fprintf (file, "0x%08lx", l);
8326    }
8327
8328  /* These float cases don't actually occur as immediate operands.  */
8329  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8330    {
8331      char dstr[30];
8332
8333      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8334      fprintf (file, "%s", dstr);
8335    }
8336
8337  else if (GET_CODE (x) == CONST_DOUBLE
8338	   && GET_MODE (x) == XFmode)
8339    {
8340      char dstr[30];
8341
8342      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8343      fprintf (file, "%s", dstr);
8344    }
8345
8346  else
8347    {
8348      /* We have patterns that allow zero sets of memory, for instance.
8349	 In 64-bit mode, we should probably support all 8-byte vectors,
8350	 since we can in fact encode that into an immediate.  */
8351      if (GET_CODE (x) == CONST_VECTOR)
8352	{
8353	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8354	  x = const0_rtx;
8355	}
8356
8357      if (code != 'P')
8358	{
8359	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8360	    {
8361	      if (ASSEMBLER_DIALECT == ASM_ATT)
8362		putc ('$', file);
8363	    }
8364	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8365		   || GET_CODE (x) == LABEL_REF)
8366	    {
8367	      if (ASSEMBLER_DIALECT == ASM_ATT)
8368		putc ('$', file);
8369	      else
8370		fputs ("OFFSET FLAT:", file);
8371	    }
8372	}
8373      if (GET_CODE (x) == CONST_INT)
8374	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8375      else if (flag_pic)
8376	output_pic_addr_const (file, x, code);
8377      else
8378	output_addr_const (file, x);
8379    }
8380}
8381
8382/* Print a memory operand whose address is ADDR.  */
8383
8384void
8385print_operand_address (FILE *file, rtx addr)
8386{
8387  struct ix86_address parts;
8388  rtx base, index, disp;
8389  int scale;
8390  int ok = ix86_decompose_address (addr, &parts);
8391
8392  gcc_assert (ok);
8393
8394  base = parts.base;
8395  index = parts.index;
8396  disp = parts.disp;
8397  scale = parts.scale;
8398
8399  switch (parts.seg)
8400    {
8401    case SEG_DEFAULT:
8402      break;
8403    case SEG_FS:
8404    case SEG_GS:
8405      if (USER_LABEL_PREFIX[0] == 0)
8406	putc ('%', file);
8407      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8408      break;
8409    default:
8410      gcc_unreachable ();
8411    }
8412
8413  if (!base && !index)
8414    {
8415      /* Displacement only requires special attention.  */
8416
8417      if (GET_CODE (disp) == CONST_INT)
8418	{
8419	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8420	    {
8421	      if (USER_LABEL_PREFIX[0] == 0)
8422		putc ('%', file);
8423	      fputs ("ds:", file);
8424	    }
8425	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8426	}
8427      else if (flag_pic)
8428	output_pic_addr_const (file, disp, 0);
8429      else
8430	output_addr_const (file, disp);
8431
8432      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8433      if (TARGET_64BIT)
8434	{
8435	  if (GET_CODE (disp) == CONST
8436	      && GET_CODE (XEXP (disp, 0)) == PLUS
8437	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8438	    disp = XEXP (XEXP (disp, 0), 0);
8439	  if (GET_CODE (disp) == LABEL_REF
8440	      || (GET_CODE (disp) == SYMBOL_REF
8441		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8442	    fputs ("(%rip)", file);
8443	}
8444    }
8445  else
8446    {
8447      if (ASSEMBLER_DIALECT == ASM_ATT)
8448	{
8449	  if (disp)
8450	    {
8451	      if (flag_pic)
8452		output_pic_addr_const (file, disp, 0);
8453	      else if (GET_CODE (disp) == LABEL_REF)
8454		output_asm_label (disp);
8455	      else
8456		output_addr_const (file, disp);
8457	    }
8458
8459	  putc ('(', file);
8460	  if (base)
8461	    print_reg (base, 0, file);
8462	  if (index)
8463	    {
8464	      putc (',', file);
8465	      print_reg (index, 0, file);
8466	      if (scale != 1)
8467		fprintf (file, ",%d", scale);
8468	    }
8469	  putc (')', file);
8470	}
8471      else
8472	{
8473	  rtx offset = NULL_RTX;
8474
8475	  if (disp)
8476	    {
8477	      /* Pull out the offset of a symbol; print any symbol itself.  */
8478	      if (GET_CODE (disp) == CONST
8479		  && GET_CODE (XEXP (disp, 0)) == PLUS
8480		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8481		{
8482		  offset = XEXP (XEXP (disp, 0), 1);
8483		  disp = gen_rtx_CONST (VOIDmode,
8484					XEXP (XEXP (disp, 0), 0));
8485		}
8486
8487	      if (flag_pic)
8488		output_pic_addr_const (file, disp, 0);
8489	      else if (GET_CODE (disp) == LABEL_REF)
8490		output_asm_label (disp);
8491	      else if (GET_CODE (disp) == CONST_INT)
8492		offset = disp;
8493	      else
8494		output_addr_const (file, disp);
8495	    }
8496
8497	  putc ('[', file);
8498	  if (base)
8499	    {
8500	      print_reg (base, 0, file);
8501	      if (offset)
8502		{
8503		  if (INTVAL (offset) >= 0)
8504		    putc ('+', file);
8505		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8506		}
8507	    }
8508	  else if (offset)
8509	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8510	  else
8511	    putc ('0', file);
8512
8513	  if (index)
8514	    {
8515	      putc ('+', file);
8516	      print_reg (index, 0, file);
8517	      if (scale != 1)
8518		fprintf (file, "*%d", scale);
8519	    }
8520	  putc (']', file);
8521	}
8522    }
8523}
8524
8525bool
8526output_addr_const_extra (FILE *file, rtx x)
8527{
8528  rtx op;
8529
8530  if (GET_CODE (x) != UNSPEC)
8531    return false;
8532
8533  op = XVECEXP (x, 0, 0);
8534  switch (XINT (x, 1))
8535    {
8536    case UNSPEC_GOTTPOFF:
8537      output_addr_const (file, op);
8538      /* FIXME: This might be @TPOFF in Sun ld.  */
8539      fputs ("@GOTTPOFF", file);
8540      break;
8541    case UNSPEC_TPOFF:
8542      output_addr_const (file, op);
8543      fputs ("@TPOFF", file);
8544      break;
8545    case UNSPEC_NTPOFF:
8546      output_addr_const (file, op);
8547      if (TARGET_64BIT)
8548	fputs ("@TPOFF", file);
8549      else
8550	fputs ("@NTPOFF", file);
8551      break;
8552    case UNSPEC_DTPOFF:
8553      output_addr_const (file, op);
8554      fputs ("@DTPOFF", file);
8555      break;
8556    case UNSPEC_GOTNTPOFF:
8557      output_addr_const (file, op);
8558      if (TARGET_64BIT)
8559	fputs ("@GOTTPOFF(%rip)", file);
8560      else
8561	fputs ("@GOTNTPOFF", file);
8562      break;
8563    case UNSPEC_INDNTPOFF:
8564      output_addr_const (file, op);
8565      fputs ("@INDNTPOFF", file);
8566      break;
8567
8568    default:
8569      return false;
8570    }
8571
8572  return true;
8573}
8574
8575/* Split one or more DImode RTL references into pairs of SImode
8576   references.  The RTL can be REG, offsettable MEM, integer constant, or
8577   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8578   split and "num" is its length.  lo_half and hi_half are output arrays
8579   that parallel "operands".  */
8580
8581void
8582split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8583{
8584  while (num--)
8585    {
8586      rtx op = operands[num];
8587
8588      /* simplify_subreg refuse to split volatile memory addresses,
8589         but we still have to handle it.  */
8590      if (GET_CODE (op) == MEM)
8591	{
8592	  lo_half[num] = adjust_address (op, SImode, 0);
8593	  hi_half[num] = adjust_address (op, SImode, 4);
8594	}
8595      else
8596	{
8597	  lo_half[num] = simplify_gen_subreg (SImode, op,
8598					      GET_MODE (op) == VOIDmode
8599					      ? DImode : GET_MODE (op), 0);
8600	  hi_half[num] = simplify_gen_subreg (SImode, op,
8601					      GET_MODE (op) == VOIDmode
8602					      ? DImode : GET_MODE (op), 4);
8603	}
8604    }
8605}
8606/* Split one or more TImode RTL references into pairs of DImode
8607   references.  The RTL can be REG, offsettable MEM, integer constant, or
8608   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8609   split and "num" is its length.  lo_half and hi_half are output arrays
8610   that parallel "operands".  */
8611
8612void
8613split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8614{
8615  while (num--)
8616    {
8617      rtx op = operands[num];
8618
8619      /* simplify_subreg refuse to split volatile memory addresses, but we
8620         still have to handle it.  */
8621      if (GET_CODE (op) == MEM)
8622	{
8623	  lo_half[num] = adjust_address (op, DImode, 0);
8624	  hi_half[num] = adjust_address (op, DImode, 8);
8625	}
8626      else
8627	{
8628	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8629	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8630	}
8631    }
8632}
8633
8634/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8635   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8636   is the expression of the binary operation.  The output may either be
8637   emitted here, or returned to the caller, like all output_* functions.
8638
8639   There is no guarantee that the operands are the same mode, as they
8640   might be within FLOAT or FLOAT_EXTEND expressions.  */
8641
8642#ifndef SYSV386_COMPAT
8643/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8644   wants to fix the assemblers because that causes incompatibility
8645   with gcc.  No-one wants to fix gcc because that causes
8646   incompatibility with assemblers...  You can use the option of
8647   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8648#define SYSV386_COMPAT 1
8649#endif
8650
8651const char *
8652output_387_binary_op (rtx insn, rtx *operands)
8653{
8654  static char buf[30];
8655  const char *p;
8656  const char *ssep;
8657  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8658
8659#ifdef ENABLE_CHECKING
8660  /* Even if we do not want to check the inputs, this documents input
8661     constraints.  Which helps in understanding the following code.  */
8662  if (STACK_REG_P (operands[0])
8663      && ((REG_P (operands[1])
8664	   && REGNO (operands[0]) == REGNO (operands[1])
8665	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8666	  || (REG_P (operands[2])
8667	      && REGNO (operands[0]) == REGNO (operands[2])
8668	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8669      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8670    ; /* ok */
8671  else
8672    gcc_assert (is_sse);
8673#endif
8674
8675  switch (GET_CODE (operands[3]))
8676    {
8677    case PLUS:
8678      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8679	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8680	p = "fiadd";
8681      else
8682	p = "fadd";
8683      ssep = "add";
8684      break;
8685
8686    case MINUS:
8687      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8688	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8689	p = "fisub";
8690      else
8691	p = "fsub";
8692      ssep = "sub";
8693      break;
8694
8695    case MULT:
8696      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8697	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8698	p = "fimul";
8699      else
8700	p = "fmul";
8701      ssep = "mul";
8702      break;
8703
8704    case DIV:
8705      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8706	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8707	p = "fidiv";
8708      else
8709	p = "fdiv";
8710      ssep = "div";
8711      break;
8712
8713    default:
8714      gcc_unreachable ();
8715    }
8716
8717  if (is_sse)
8718   {
8719      strcpy (buf, ssep);
8720      if (GET_MODE (operands[0]) == SFmode)
8721	strcat (buf, "ss\t{%2, %0|%0, %2}");
8722      else
8723	strcat (buf, "sd\t{%2, %0|%0, %2}");
8724      return buf;
8725   }
8726  strcpy (buf, p);
8727
8728  switch (GET_CODE (operands[3]))
8729    {
8730    case MULT:
8731    case PLUS:
8732      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8733	{
8734	  rtx temp = operands[2];
8735	  operands[2] = operands[1];
8736	  operands[1] = temp;
8737	}
8738
8739      /* know operands[0] == operands[1].  */
8740
8741      if (GET_CODE (operands[2]) == MEM)
8742	{
8743	  p = "%z2\t%2";
8744	  break;
8745	}
8746
8747      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8748	{
8749	  if (STACK_TOP_P (operands[0]))
8750	    /* How is it that we are storing to a dead operand[2]?
8751	       Well, presumably operands[1] is dead too.  We can't
8752	       store the result to st(0) as st(0) gets popped on this
8753	       instruction.  Instead store to operands[2] (which I
8754	       think has to be st(1)).  st(1) will be popped later.
8755	       gcc <= 2.8.1 didn't have this check and generated
8756	       assembly code that the Unixware assembler rejected.  */
8757	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8758	  else
8759	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8760	  break;
8761	}
8762
8763      if (STACK_TOP_P (operands[0]))
8764	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8765      else
8766	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8767      break;
8768
8769    case MINUS:
8770    case DIV:
8771      if (GET_CODE (operands[1]) == MEM)
8772	{
8773	  p = "r%z1\t%1";
8774	  break;
8775	}
8776
8777      if (GET_CODE (operands[2]) == MEM)
8778	{
8779	  p = "%z2\t%2";
8780	  break;
8781	}
8782
8783      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8784	{
8785#if SYSV386_COMPAT
8786	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8787	     derived assemblers, confusingly reverse the direction of
8788	     the operation for fsub{r} and fdiv{r} when the
8789	     destination register is not st(0).  The Intel assembler
8790	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8791	     figure out what the hardware really does.  */
8792	  if (STACK_TOP_P (operands[0]))
8793	    p = "{p\t%0, %2|rp\t%2, %0}";
8794	  else
8795	    p = "{rp\t%2, %0|p\t%0, %2}";
8796#else
8797	  if (STACK_TOP_P (operands[0]))
8798	    /* As above for fmul/fadd, we can't store to st(0).  */
8799	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8800	  else
8801	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8802#endif
8803	  break;
8804	}
8805
8806      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8807	{
8808#if SYSV386_COMPAT
8809	  if (STACK_TOP_P (operands[0]))
8810	    p = "{rp\t%0, %1|p\t%1, %0}";
8811	  else
8812	    p = "{p\t%1, %0|rp\t%0, %1}";
8813#else
8814	  if (STACK_TOP_P (operands[0]))
8815	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8816	  else
8817	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8818#endif
8819	  break;
8820	}
8821
8822      if (STACK_TOP_P (operands[0]))
8823	{
8824	  if (STACK_TOP_P (operands[1]))
8825	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8826	  else
8827	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8828	  break;
8829	}
8830      else if (STACK_TOP_P (operands[1]))
8831	{
8832#if SYSV386_COMPAT
8833	  p = "{\t%1, %0|r\t%0, %1}";
8834#else
8835	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8836#endif
8837	}
8838      else
8839	{
8840#if SYSV386_COMPAT
8841	  p = "{r\t%2, %0|\t%0, %2}";
8842#else
8843	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8844#endif
8845	}
8846      break;
8847
8848    default:
8849      gcc_unreachable ();
8850    }
8851
8852  strcat (buf, p);
8853  return buf;
8854}
8855
8856/* Return needed mode for entity in optimize_mode_switching pass.  */
8857
8858int
8859ix86_mode_needed (int entity, rtx insn)
8860{
8861  enum attr_i387_cw mode;
8862
8863  /* The mode UNINITIALIZED is used to store control word after a
8864     function call or ASM pattern.  The mode ANY specify that function
8865     has no requirements on the control word and make no changes in the
8866     bits we are interested in.  */
8867
8868  if (CALL_P (insn)
8869      || (NONJUMP_INSN_P (insn)
8870	  && (asm_noperands (PATTERN (insn)) >= 0
8871	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8872    return I387_CW_UNINITIALIZED;
8873
8874  if (recog_memoized (insn) < 0)
8875    return I387_CW_ANY;
8876
8877  mode = get_attr_i387_cw (insn);
8878
8879  switch (entity)
8880    {
8881    case I387_TRUNC:
8882      if (mode == I387_CW_TRUNC)
8883	return mode;
8884      break;
8885
8886    case I387_FLOOR:
8887      if (mode == I387_CW_FLOOR)
8888	return mode;
8889      break;
8890
8891    case I387_CEIL:
8892      if (mode == I387_CW_CEIL)
8893	return mode;
8894      break;
8895
8896    case I387_MASK_PM:
8897      if (mode == I387_CW_MASK_PM)
8898	return mode;
8899      break;
8900
8901    default:
8902      gcc_unreachable ();
8903    }
8904
8905  return I387_CW_ANY;
8906}
8907
8908/* Output code to initialize control word copies used by trunc?f?i and
8909   rounding patterns.  CURRENT_MODE is set to current control word,
8910   while NEW_MODE is set to new control word.  */
8911
8912void
8913emit_i387_cw_initialization (int mode)
8914{
8915  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8916  rtx new_mode;
8917
8918  int slot;
8919
8920  rtx reg = gen_reg_rtx (HImode);
8921
8922  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8923  emit_move_insn (reg, stored_mode);
8924
8925  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8926    {
8927      switch (mode)
8928	{
8929	case I387_CW_TRUNC:
8930	  /* round toward zero (truncate) */
8931	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8932	  slot = SLOT_CW_TRUNC;
8933	  break;
8934
8935	case I387_CW_FLOOR:
8936	  /* round down toward -oo */
8937	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8938	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8939	  slot = SLOT_CW_FLOOR;
8940	  break;
8941
8942	case I387_CW_CEIL:
8943	  /* round up toward +oo */
8944	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8945	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8946	  slot = SLOT_CW_CEIL;
8947	  break;
8948
8949	case I387_CW_MASK_PM:
8950	  /* mask precision exception for nearbyint() */
8951	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8952	  slot = SLOT_CW_MASK_PM;
8953	  break;
8954
8955	default:
8956	  gcc_unreachable ();
8957	}
8958    }
8959  else
8960    {
8961      switch (mode)
8962	{
8963	case I387_CW_TRUNC:
8964	  /* round toward zero (truncate) */
8965	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8966	  slot = SLOT_CW_TRUNC;
8967	  break;
8968
8969	case I387_CW_FLOOR:
8970	  /* round down toward -oo */
8971	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8972	  slot = SLOT_CW_FLOOR;
8973	  break;
8974
8975	case I387_CW_CEIL:
8976	  /* round up toward +oo */
8977	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8978	  slot = SLOT_CW_CEIL;
8979	  break;
8980
8981	case I387_CW_MASK_PM:
8982	  /* mask precision exception for nearbyint() */
8983	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8984	  slot = SLOT_CW_MASK_PM;
8985	  break;
8986
8987	default:
8988	  gcc_unreachable ();
8989	}
8990    }
8991
8992  gcc_assert (slot < MAX_386_STACK_LOCALS);
8993
8994  new_mode = assign_386_stack_local (HImode, slot);
8995  emit_move_insn (new_mode, reg);
8996}
8997
8998/* Output code for INSN to convert a float to a signed int.  OPERANDS
8999   are the insn operands.  The output may be [HSD]Imode and the input
9000   operand may be [SDX]Fmode.  */
9001
9002const char *
9003output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9004{
9005  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9006  int dimode_p = GET_MODE (operands[0]) == DImode;
9007  int round_mode = get_attr_i387_cw (insn);
9008
9009  /* Jump through a hoop or two for DImode, since the hardware has no
9010     non-popping instruction.  We used to do this a different way, but
9011     that was somewhat fragile and broke with post-reload splitters.  */
9012  if ((dimode_p || fisttp) && !stack_top_dies)
9013    output_asm_insn ("fld\t%y1", operands);
9014
9015  gcc_assert (STACK_TOP_P (operands[1]));
9016  gcc_assert (GET_CODE (operands[0]) == MEM);
9017
9018  if (fisttp)
9019      output_asm_insn ("fisttp%z0\t%0", operands);
9020  else
9021    {
9022      if (round_mode != I387_CW_ANY)
9023	output_asm_insn ("fldcw\t%3", operands);
9024      if (stack_top_dies || dimode_p)
9025	output_asm_insn ("fistp%z0\t%0", operands);
9026      else
9027	output_asm_insn ("fist%z0\t%0", operands);
9028      if (round_mode != I387_CW_ANY)
9029	output_asm_insn ("fldcw\t%2", operands);
9030    }
9031
9032  return "";
9033}
9034
9035/* Output code for x87 ffreep insn.  The OPNO argument, which may only
9036   have the values zero or one, indicates the ffreep insn's operand
9037   from the OPERANDS array.  */
9038
9039static const char *
9040output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9041{
9042  if (TARGET_USE_FFREEP)
9043#if HAVE_AS_IX86_FFREEP
9044    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9045#else
9046    switch (REGNO (operands[opno]))
9047      {
9048      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9049      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9050      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9051      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9052      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9053      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9054      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9055      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9056      }
9057#endif
9058
9059  return opno ? "fstp\t%y1" : "fstp\t%y0";
9060}
9061
9062
9063/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
9064   should be used.  UNORDERED_P is true when fucom should be used.  */
9065
9066const char *
9067output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9068{
9069  int stack_top_dies;
9070  rtx cmp_op0, cmp_op1;
9071  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9072
9073  if (eflags_p)
9074    {
9075      cmp_op0 = operands[0];
9076      cmp_op1 = operands[1];
9077    }
9078  else
9079    {
9080      cmp_op0 = operands[1];
9081      cmp_op1 = operands[2];
9082    }
9083
9084  if (is_sse)
9085    {
9086      if (GET_MODE (operands[0]) == SFmode)
9087	if (unordered_p)
9088	  return "ucomiss\t{%1, %0|%0, %1}";
9089	else
9090	  return "comiss\t{%1, %0|%0, %1}";
9091      else
9092	if (unordered_p)
9093	  return "ucomisd\t{%1, %0|%0, %1}";
9094	else
9095	  return "comisd\t{%1, %0|%0, %1}";
9096    }
9097
9098  gcc_assert (STACK_TOP_P (cmp_op0));
9099
9100  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9101
9102  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9103    {
9104      if (stack_top_dies)
9105	{
9106	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9107	  return output_387_ffreep (operands, 1);
9108	}
9109      else
9110	return "ftst\n\tfnstsw\t%0";
9111    }
9112
9113  if (STACK_REG_P (cmp_op1)
9114      && stack_top_dies
9115      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9116      && REGNO (cmp_op1) != FIRST_STACK_REG)
9117    {
9118      /* If both the top of the 387 stack dies, and the other operand
9119	 is also a stack register that dies, then this must be a
9120	 `fcompp' float compare */
9121
9122      if (eflags_p)
9123	{
9124	  /* There is no double popping fcomi variant.  Fortunately,
9125	     eflags is immune from the fstp's cc clobbering.  */
9126	  if (unordered_p)
9127	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9128	  else
9129	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9130	  return output_387_ffreep (operands, 0);
9131	}
9132      else
9133	{
9134	  if (unordered_p)
9135	    return "fucompp\n\tfnstsw\t%0";
9136	  else
9137	    return "fcompp\n\tfnstsw\t%0";
9138	}
9139    }
9140  else
9141    {
9142      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
9143
9144      static const char * const alt[16] =
9145      {
9146	"fcom%z2\t%y2\n\tfnstsw\t%0",
9147	"fcomp%z2\t%y2\n\tfnstsw\t%0",
9148	"fucom%z2\t%y2\n\tfnstsw\t%0",
9149	"fucomp%z2\t%y2\n\tfnstsw\t%0",
9150
9151	"ficom%z2\t%y2\n\tfnstsw\t%0",
9152	"ficomp%z2\t%y2\n\tfnstsw\t%0",
9153	NULL,
9154	NULL,
9155
9156	"fcomi\t{%y1, %0|%0, %y1}",
9157	"fcomip\t{%y1, %0|%0, %y1}",
9158	"fucomi\t{%y1, %0|%0, %y1}",
9159	"fucomip\t{%y1, %0|%0, %y1}",
9160
9161	NULL,
9162	NULL,
9163	NULL,
9164	NULL
9165      };
9166
9167      int mask;
9168      const char *ret;
9169
9170      mask  = eflags_p << 3;
9171      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9172      mask |= unordered_p << 1;
9173      mask |= stack_top_dies;
9174
9175      gcc_assert (mask < 16);
9176      ret = alt[mask];
9177      gcc_assert (ret);
9178
9179      return ret;
9180    }
9181}
9182
9183void
9184ix86_output_addr_vec_elt (FILE *file, int value)
9185{
9186  const char *directive = ASM_LONG;
9187
9188#ifdef ASM_QUAD
9189  if (TARGET_64BIT)
9190    directive = ASM_QUAD;
9191#else
9192  gcc_assert (!TARGET_64BIT);
9193#endif
9194
9195  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9196}
9197
9198void
9199ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9200{
9201  if (TARGET_64BIT)
9202    fprintf (file, "%s%s%d-%s%d\n",
9203	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
9204  else if (HAVE_AS_GOTOFF_IN_DATA)
9205    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9206#if TARGET_MACHO
9207  else if (TARGET_MACHO)
9208    {
9209      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9210      machopic_output_function_base_name (file);
9211      fprintf(file, "\n");
9212    }
9213#endif
9214  else
9215    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9216		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9217}
9218
9219/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9220   for the target.  */
9221
9222void
9223ix86_expand_clear (rtx dest)
9224{
9225  rtx tmp;
9226
9227  /* We play register width games, which are only valid after reload.  */
9228  gcc_assert (reload_completed);
9229
9230  /* Avoid HImode and its attendant prefix byte.  */
9231  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9232    dest = gen_rtx_REG (SImode, REGNO (dest));
9233
9234  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9235
9236  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
9237  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9238    {
9239      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9240      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9241    }
9242
9243  emit_insn (tmp);
9244}
9245
9246/* X is an unchanging MEM.  If it is a constant pool reference, return
9247   the constant pool rtx, else NULL.  */
9248
9249rtx
9250maybe_get_pool_constant (rtx x)
9251{
9252  x = ix86_delegitimize_address (XEXP (x, 0));
9253
9254  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9255    return get_pool_constant (x);
9256
9257  return NULL_RTX;
9258}
9259
9260void
9261ix86_expand_move (enum machine_mode mode, rtx operands[])
9262{
9263  int strict = (reload_in_progress || reload_completed);
9264  rtx op0, op1;
9265  enum tls_model model;
9266
9267  op0 = operands[0];
9268  op1 = operands[1];
9269
9270  if (GET_CODE (op1) == SYMBOL_REF)
9271    {
9272      model = SYMBOL_REF_TLS_MODEL (op1);
9273      if (model)
9274	{
9275	  op1 = legitimize_tls_address (op1, model, true);
9276	  op1 = force_operand (op1, op0);
9277	  if (op1 == op0)
9278	    return;
9279	}
9280    }
9281  else if (GET_CODE (op1) == CONST
9282	   && GET_CODE (XEXP (op1, 0)) == PLUS
9283	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9284    {
9285      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9286      if (model)
9287	{
9288	  rtx addend = XEXP (XEXP (op1, 0), 1);
9289	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9290	  op1 = force_operand (op1, NULL);
9291	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9292				     op0, 1, OPTAB_DIRECT);
9293	  if (op1 == op0)
9294	    return;
9295	}
9296    }
9297
9298  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9299    {
9300      if (TARGET_MACHO && !TARGET_64BIT)
9301	{
9302#if TARGET_MACHO
9303	  if (MACHOPIC_PURE)
9304	    {
9305	      rtx temp = ((reload_in_progress
9306			   || ((op0 && GET_CODE (op0) == REG)
9307			       && mode == Pmode))
9308			  ? op0 : gen_reg_rtx (Pmode));
9309	      op1 = machopic_indirect_data_reference (op1, temp);
9310	      op1 = machopic_legitimize_pic_address (op1, mode,
9311						     temp == op1 ? 0 : temp);
9312	    }
9313	  else if (MACHOPIC_INDIRECT)
9314	    op1 = machopic_indirect_data_reference (op1, 0);
9315	  if (op0 == op1)
9316	    return;
9317#endif
9318	}
9319      else
9320	{
9321	  if (GET_CODE (op0) == MEM)
9322	    op1 = force_reg (Pmode, op1);
9323	  else
9324	    op1 = legitimize_address (op1, op1, Pmode);
9325	}
9326    }
9327  else
9328    {
9329      if (GET_CODE (op0) == MEM
9330	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9331	      || !push_operand (op0, mode))
9332	  && GET_CODE (op1) == MEM)
9333	op1 = force_reg (mode, op1);
9334
9335      if (push_operand (op0, mode)
9336	  && ! general_no_elim_operand (op1, mode))
9337	op1 = copy_to_mode_reg (mode, op1);
9338
9339      /* Force large constants in 64bit compilation into register
9340	 to get them CSEed.  */
9341      if (TARGET_64BIT && mode == DImode
9342	  && immediate_operand (op1, mode)
9343	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9344	  && !register_operand (op0, mode)
9345	  && optimize && !reload_completed && !reload_in_progress)
9346	op1 = copy_to_mode_reg (mode, op1);
9347
9348      if (FLOAT_MODE_P (mode))
9349	{
9350	  /* If we are loading a floating point constant to a register,
9351	     force the value to memory now, since we'll get better code
9352	     out the back end.  */
9353
9354	  if (strict)
9355	    ;
9356	  else if (GET_CODE (op1) == CONST_DOUBLE)
9357	    {
9358	      op1 = validize_mem (force_const_mem (mode, op1));
9359	      if (!register_operand (op0, mode))
9360		{
9361		  rtx temp = gen_reg_rtx (mode);
9362		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9363		  emit_move_insn (op0, temp);
9364		  return;
9365		}
9366	    }
9367	}
9368    }
9369
9370  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9371}
9372
9373void
9374ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9375{
9376  rtx op0 = operands[0], op1 = operands[1];
9377
9378  /* Force constants other than zero into memory.  We do not know how
9379     the instructions used to build constants modify the upper 64 bits
9380     of the register, once we have that information we may be able
9381     to handle some of them more efficiently.  */
9382  if ((reload_in_progress | reload_completed) == 0
9383      && register_operand (op0, mode)
9384      && CONSTANT_P (op1)
9385      && standard_sse_constant_p (op1) <= 0)
9386    op1 = validize_mem (force_const_mem (mode, op1));
9387
9388  /* Make operand1 a register if it isn't already.  */
9389  if (!no_new_pseudos
9390      && !register_operand (op0, mode)
9391      && !register_operand (op1, mode))
9392    {
9393      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9394      return;
9395    }
9396
9397  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9398}
9399
9400/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9401   straight to ix86_expand_vector_move.  */
9402
9403void
9404ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9405{
9406  rtx op0, op1, m;
9407
9408  op0 = operands[0];
9409  op1 = operands[1];
9410
9411  if (MEM_P (op1))
9412    {
9413      /* If we're optimizing for size, movups is the smallest.  */
9414      if (optimize_size)
9415	{
9416	  op0 = gen_lowpart (V4SFmode, op0);
9417	  op1 = gen_lowpart (V4SFmode, op1);
9418	  emit_insn (gen_sse_movups (op0, op1));
9419	  return;
9420	}
9421
9422      /* ??? If we have typed data, then it would appear that using
9423	 movdqu is the only way to get unaligned data loaded with
9424	 integer type.  */
9425      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9426	{
9427	  op0 = gen_lowpart (V16QImode, op0);
9428	  op1 = gen_lowpart (V16QImode, op1);
9429	  emit_insn (gen_sse2_movdqu (op0, op1));
9430	  return;
9431	}
9432
9433      if (TARGET_SSE2 && mode == V2DFmode)
9434        {
9435          rtx zero;
9436
9437          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9438            {
9439              op0 = gen_lowpart (V2DFmode, op0);
9440              op1 = gen_lowpart (V2DFmode, op1);
9441              emit_insn (gen_sse2_movupd (op0, op1));
9442              return;
9443            }
9444
9445	  /* When SSE registers are split into halves, we can avoid
9446	     writing to the top half twice.  */
9447	  if (TARGET_SSE_SPLIT_REGS)
9448	    {
9449	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9450	      zero = op0;
9451	    }
9452	  else
9453	    {
9454	      /* ??? Not sure about the best option for the Intel chips.
9455		 The following would seem to satisfy; the register is
9456		 entirely cleared, breaking the dependency chain.  We
9457		 then store to the upper half, with a dependency depth
9458		 of one.  A rumor has it that Intel recommends two movsd
9459		 followed by an unpacklpd, but this is unconfirmed.  And
9460		 given that the dependency depth of the unpacklpd would
9461		 still be one, I'm not sure why this would be better.  */
9462	      zero = CONST0_RTX (V2DFmode);
9463	    }
9464
9465	  m = adjust_address (op1, DFmode, 0);
9466	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9467	  m = adjust_address (op1, DFmode, 8);
9468	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9469	}
9470      else
9471        {
9472          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9473            {
9474              op0 = gen_lowpart (V4SFmode, op0);
9475              op1 = gen_lowpart (V4SFmode, op1);
9476              emit_insn (gen_sse_movups (op0, op1));
9477              return;
9478            }
9479
9480	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9481	    emit_move_insn (op0, CONST0_RTX (mode));
9482	  else
9483	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9484
9485	  if (mode != V4SFmode)
9486	    op0 = gen_lowpart (V4SFmode, op0);
9487	  m = adjust_address (op1, V2SFmode, 0);
9488	  emit_insn (gen_sse_loadlps (op0, op0, m));
9489	  m = adjust_address (op1, V2SFmode, 8);
9490	  emit_insn (gen_sse_loadhps (op0, op0, m));
9491	}
9492    }
9493  else if (MEM_P (op0))
9494    {
9495      /* If we're optimizing for size, movups is the smallest.  */
9496      if (optimize_size)
9497	{
9498	  op0 = gen_lowpart (V4SFmode, op0);
9499	  op1 = gen_lowpart (V4SFmode, op1);
9500	  emit_insn (gen_sse_movups (op0, op1));
9501	  return;
9502	}
9503
9504      /* ??? Similar to above, only less clear because of quote
9505	 typeless stores unquote.  */
9506      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9507	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9508        {
9509	  op0 = gen_lowpart (V16QImode, op0);
9510	  op1 = gen_lowpart (V16QImode, op1);
9511	  emit_insn (gen_sse2_movdqu (op0, op1));
9512	  return;
9513	}
9514
9515      if (TARGET_SSE2 && mode == V2DFmode)
9516	{
9517	  m = adjust_address (op0, DFmode, 0);
9518	  emit_insn (gen_sse2_storelpd (m, op1));
9519	  m = adjust_address (op0, DFmode, 8);
9520	  emit_insn (gen_sse2_storehpd (m, op1));
9521	}
9522      else
9523	{
9524	  if (mode != V4SFmode)
9525	    op1 = gen_lowpart (V4SFmode, op1);
9526	  m = adjust_address (op0, V2SFmode, 0);
9527	  emit_insn (gen_sse_storelps (m, op1));
9528	  m = adjust_address (op0, V2SFmode, 8);
9529	  emit_insn (gen_sse_storehps (m, op1));
9530	}
9531    }
9532  else
9533    gcc_unreachable ();
9534}
9535
9536/* Expand a push in MODE.  This is some mode for which we do not support
9537   proper push instructions, at least from the registers that we expect
9538   the value to live in.  */
9539
9540void
9541ix86_expand_push (enum machine_mode mode, rtx x)
9542{
9543  rtx tmp;
9544
9545  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9546			     GEN_INT (-GET_MODE_SIZE (mode)),
9547			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9548  if (tmp != stack_pointer_rtx)
9549    emit_move_insn (stack_pointer_rtx, tmp);
9550
9551  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9552  emit_move_insn (tmp, x);
9553}
9554
9555/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9556   destination to use for the operation.  If different from the true
9557   destination in operands[0], a copy operation will be required.  */
9558
9559rtx
9560ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9561			    rtx operands[])
9562{
9563  int matching_memory;
9564  rtx src1, src2, dst;
9565
9566  dst = operands[0];
9567  src1 = operands[1];
9568  src2 = operands[2];
9569
9570  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9571  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9572      && (rtx_equal_p (dst, src2)
9573	  || immediate_operand (src1, mode)))
9574    {
9575      rtx temp = src1;
9576      src1 = src2;
9577      src2 = temp;
9578    }
9579
9580  /* If the destination is memory, and we do not have matching source
9581     operands, do things in registers.  */
9582  matching_memory = 0;
9583  if (GET_CODE (dst) == MEM)
9584    {
9585      if (rtx_equal_p (dst, src1))
9586	matching_memory = 1;
9587      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9588	       && rtx_equal_p (dst, src2))
9589	matching_memory = 2;
9590      else
9591	dst = gen_reg_rtx (mode);
9592    }
9593
9594  /* Both source operands cannot be in memory.  */
9595  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9596    {
9597      if (matching_memory != 2)
9598	src2 = force_reg (mode, src2);
9599      else
9600	src1 = force_reg (mode, src1);
9601    }
9602
9603  /* If the operation is not commutable, source 1 cannot be a constant
9604     or non-matching memory.  */
9605  if ((CONSTANT_P (src1)
9606       || (!matching_memory && GET_CODE (src1) == MEM))
9607      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9608    src1 = force_reg (mode, src1);
9609
9610  src1 = operands[1] = src1;
9611  src2 = operands[2] = src2;
9612  return dst;
9613}
9614
9615/* Similarly, but assume that the destination has already been
9616   set up properly.  */
9617
9618void
9619ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9620				    enum machine_mode mode, rtx operands[])
9621{
9622  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9623  gcc_assert (dst == operands[0]);
9624}
9625
9626/* Attempt to expand a binary operator.  Make the expansion closer to the
9627   actual machine, then just general_operand, which will allow 3 separate
9628   memory references (one output, two input) in a single insn.  */
9629
9630void
9631ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9632			     rtx operands[])
9633{
9634  rtx src1, src2, dst, op, clob;
9635
9636  dst = ix86_fixup_binary_operands (code, mode, operands);
9637  src1 = operands[1];
9638  src2 = operands[2];
9639
9640 /* Emit the instruction.  */
9641
9642  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9643  if (reload_in_progress)
9644    {
9645      /* Reload doesn't know about the flags register, and doesn't know that
9646         it doesn't want to clobber it.  We can only do this with PLUS.  */
9647      gcc_assert (code == PLUS);
9648      emit_insn (op);
9649    }
9650  else
9651    {
9652      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9653      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9654    }
9655
9656  /* Fix up the destination if needed.  */
9657  if (dst != operands[0])
9658    emit_move_insn (operands[0], dst);
9659}
9660
9661/* Return TRUE or FALSE depending on whether the binary operator meets the
9662   appropriate constraints.  */
9663
9664int
9665ix86_binary_operator_ok (enum rtx_code code,
9666			 enum machine_mode mode ATTRIBUTE_UNUSED,
9667			 rtx operands[3])
9668{
9669  /* Both source operands cannot be in memory.  */
9670  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9671    return 0;
9672  /* If the operation is not commutable, source 1 cannot be a constant.  */
9673  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9674    return 0;
9675  /* If the destination is memory, we must have a matching source operand.  */
9676  if (GET_CODE (operands[0]) == MEM
9677      && ! (rtx_equal_p (operands[0], operands[1])
9678	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9679		&& rtx_equal_p (operands[0], operands[2]))))
9680    return 0;
9681  /* If the operation is not commutable and the source 1 is memory, we must
9682     have a matching destination.  */
9683  if (GET_CODE (operands[1]) == MEM
9684      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9685      && ! rtx_equal_p (operands[0], operands[1]))
9686    return 0;
9687  return 1;
9688}
9689
9690/* Attempt to expand a unary operator.  Make the expansion closer to the
9691   actual machine, then just general_operand, which will allow 2 separate
9692   memory references (one output, one input) in a single insn.  */
9693
9694void
9695ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9696			    rtx operands[])
9697{
9698  int matching_memory;
9699  rtx src, dst, op, clob;
9700
9701  dst = operands[0];
9702  src = operands[1];
9703
9704  /* If the destination is memory, and we do not have matching source
9705     operands, do things in registers.  */
9706  matching_memory = 0;
9707  if (MEM_P (dst))
9708    {
9709      if (rtx_equal_p (dst, src))
9710	matching_memory = 1;
9711      else
9712	dst = gen_reg_rtx (mode);
9713    }
9714
9715  /* When source operand is memory, destination must match.  */
9716  if (MEM_P (src) && !matching_memory)
9717    src = force_reg (mode, src);
9718
9719  /* Emit the instruction.  */
9720
9721  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9722  if (reload_in_progress || code == NOT)
9723    {
9724      /* Reload doesn't know about the flags register, and doesn't know that
9725         it doesn't want to clobber it.  */
9726      gcc_assert (code == NOT);
9727      emit_insn (op);
9728    }
9729  else
9730    {
9731      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9732      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9733    }
9734
9735  /* Fix up the destination if needed.  */
9736  if (dst != operands[0])
9737    emit_move_insn (operands[0], dst);
9738}
9739
9740/* Return TRUE or FALSE depending on whether the unary operator meets the
9741   appropriate constraints.  */
9742
9743int
9744ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9745			enum machine_mode mode ATTRIBUTE_UNUSED,
9746			rtx operands[2] ATTRIBUTE_UNUSED)
9747{
9748  /* If one of operands is memory, source and destination must match.  */
9749  if ((GET_CODE (operands[0]) == MEM
9750       || GET_CODE (operands[1]) == MEM)
9751      && ! rtx_equal_p (operands[0], operands[1]))
9752    return FALSE;
9753  return TRUE;
9754}
9755
9756/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9757   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9758   true, then replicate the mask for all elements of the vector register.
9759   If INVERT is true, then create a mask excluding the sign bit.  */
9760
9761rtx
9762ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9763{
9764  enum machine_mode vec_mode;
9765  HOST_WIDE_INT hi, lo;
9766  int shift = 63;
9767  rtvec v;
9768  rtx mask;
9769
9770  /* Find the sign bit, sign extended to 2*HWI.  */
9771  if (mode == SFmode)
9772    lo = 0x80000000, hi = lo < 0;
9773  else if (HOST_BITS_PER_WIDE_INT >= 64)
9774    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9775  else
9776    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9777
9778  if (invert)
9779    lo = ~lo, hi = ~hi;
9780
9781  /* Force this value into the low part of a fp vector constant.  */
9782  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9783  mask = gen_lowpart (mode, mask);
9784
9785  if (mode == SFmode)
9786    {
9787      if (vect)
9788	v = gen_rtvec (4, mask, mask, mask, mask);
9789      else
9790	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9791		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9792      vec_mode = V4SFmode;
9793    }
9794  else
9795    {
9796      if (vect)
9797	v = gen_rtvec (2, mask, mask);
9798      else
9799	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9800      vec_mode = V2DFmode;
9801    }
9802
9803  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9804}
9805
9806/* Generate code for floating point ABS or NEG.  */
9807
9808void
9809ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9810				rtx operands[])
9811{
9812  rtx mask, set, use, clob, dst, src;
9813  bool matching_memory;
9814  bool use_sse = false;
9815  bool vector_mode = VECTOR_MODE_P (mode);
9816  enum machine_mode elt_mode = mode;
9817
9818  if (vector_mode)
9819    {
9820      elt_mode = GET_MODE_INNER (mode);
9821      use_sse = true;
9822    }
9823  else if (TARGET_SSE_MATH)
9824    use_sse = SSE_FLOAT_MODE_P (mode);
9825
9826  /* NEG and ABS performed with SSE use bitwise mask operations.
9827     Create the appropriate mask now.  */
9828  if (use_sse)
9829    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9830  else
9831    mask = NULL_RTX;
9832
9833  dst = operands[0];
9834  src = operands[1];
9835
9836  /* If the destination is memory, and we don't have matching source
9837     operands or we're using the x87, do things in registers.  */
9838  matching_memory = false;
9839  if (MEM_P (dst))
9840    {
9841      if (use_sse && rtx_equal_p (dst, src))
9842	matching_memory = true;
9843      else
9844	dst = gen_reg_rtx (mode);
9845    }
9846  if (MEM_P (src) && !matching_memory)
9847    src = force_reg (mode, src);
9848
9849  if (vector_mode)
9850    {
9851      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9852      set = gen_rtx_SET (VOIDmode, dst, set);
9853      emit_insn (set);
9854    }
9855  else
9856    {
9857      set = gen_rtx_fmt_e (code, mode, src);
9858      set = gen_rtx_SET (VOIDmode, dst, set);
9859      if (mask)
9860        {
9861          use = gen_rtx_USE (VOIDmode, mask);
9862          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9863          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9864				       gen_rtvec (3, set, use, clob)));
9865        }
9866      else
9867	emit_insn (set);
9868    }
9869
9870  if (dst != operands[0])
9871    emit_move_insn (operands[0], dst);
9872}
9873
9874/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9875
9876void
9877ix86_expand_copysign (rtx operands[])
9878{
9879  enum machine_mode mode, vmode;
9880  rtx dest, op0, op1, mask, nmask;
9881
9882  dest = operands[0];
9883  op0 = operands[1];
9884  op1 = operands[2];
9885
9886  mode = GET_MODE (dest);
9887  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9888
9889  if (GET_CODE (op0) == CONST_DOUBLE)
9890    {
9891      rtvec v;
9892
9893      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9894	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9895
9896      if (op0 == CONST0_RTX (mode))
9897	op0 = CONST0_RTX (vmode);
9898      else
9899        {
9900	  if (mode == SFmode)
9901	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9902                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9903	  else
9904	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9905          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9906	}
9907
9908      mask = ix86_build_signbit_mask (mode, 0, 0);
9909
9910      if (mode == SFmode)
9911	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9912      else
9913	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9914    }
9915  else
9916    {
9917      nmask = ix86_build_signbit_mask (mode, 0, 1);
9918      mask = ix86_build_signbit_mask (mode, 0, 0);
9919
9920      if (mode == SFmode)
9921	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9922      else
9923	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9924    }
9925}
9926
9927/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9928   be a constant, and so has already been expanded into a vector constant.  */
9929
9930void
9931ix86_split_copysign_const (rtx operands[])
9932{
9933  enum machine_mode mode, vmode;
9934  rtx dest, op0, op1, mask, x;
9935
9936  dest = operands[0];
9937  op0 = operands[1];
9938  op1 = operands[2];
9939  mask = operands[3];
9940
9941  mode = GET_MODE (dest);
9942  vmode = GET_MODE (mask);
9943
9944  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9945  x = gen_rtx_AND (vmode, dest, mask);
9946  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9947
9948  if (op0 != CONST0_RTX (vmode))
9949    {
9950      x = gen_rtx_IOR (vmode, dest, op0);
9951      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9952    }
9953}
9954
9955/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9956   so we have to do two masks.  */
9957
9958void
9959ix86_split_copysign_var (rtx operands[])
9960{
9961  enum machine_mode mode, vmode;
9962  rtx dest, scratch, op0, op1, mask, nmask, x;
9963
9964  dest = operands[0];
9965  scratch = operands[1];
9966  op0 = operands[2];
9967  op1 = operands[3];
9968  nmask = operands[4];
9969  mask = operands[5];
9970
9971  mode = GET_MODE (dest);
9972  vmode = GET_MODE (mask);
9973
9974  if (rtx_equal_p (op0, op1))
9975    {
9976      /* Shouldn't happen often (it's useless, obviously), but when it does
9977	 we'd generate incorrect code if we continue below.  */
9978      emit_move_insn (dest, op0);
9979      return;
9980    }
9981
9982  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9983    {
9984      gcc_assert (REGNO (op1) == REGNO (scratch));
9985
9986      x = gen_rtx_AND (vmode, scratch, mask);
9987      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9988
9989      dest = mask;
9990      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9991      x = gen_rtx_NOT (vmode, dest);
9992      x = gen_rtx_AND (vmode, x, op0);
9993      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9994    }
9995  else
9996    {
9997      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9998	{
9999	  x = gen_rtx_AND (vmode, scratch, mask);
10000	}
10001      else						/* alternative 2,4 */
10002	{
10003          gcc_assert (REGNO (mask) == REGNO (scratch));
10004          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10005	  x = gen_rtx_AND (vmode, scratch, op1);
10006	}
10007      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10008
10009      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
10010	{
10011	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
10012	  x = gen_rtx_AND (vmode, dest, nmask);
10013	}
10014      else						/* alternative 3,4 */
10015	{
10016          gcc_assert (REGNO (nmask) == REGNO (dest));
10017	  dest = nmask;
10018	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10019	  x = gen_rtx_AND (vmode, dest, op0);
10020	}
10021      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10022    }
10023
10024  x = gen_rtx_IOR (vmode, dest, scratch);
10025  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10026}
10027
10028/* Return TRUE or FALSE depending on whether the first SET in INSN
10029   has source and destination with matching CC modes, and that the
10030   CC mode is at least as constrained as REQ_MODE.  */
10031
10032int
10033ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10034{
10035  rtx set;
10036  enum machine_mode set_mode;
10037
10038  set = PATTERN (insn);
10039  if (GET_CODE (set) == PARALLEL)
10040    set = XVECEXP (set, 0, 0);
10041  gcc_assert (GET_CODE (set) == SET);
10042  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10043
10044  set_mode = GET_MODE (SET_DEST (set));
10045  switch (set_mode)
10046    {
10047    case CCNOmode:
10048      if (req_mode != CCNOmode
10049	  && (req_mode != CCmode
10050	      || XEXP (SET_SRC (set), 1) != const0_rtx))
10051	return 0;
10052      break;
10053    case CCmode:
10054      if (req_mode == CCGCmode)
10055	return 0;
10056      /* FALLTHRU */
10057    case CCGCmode:
10058      if (req_mode == CCGOCmode || req_mode == CCNOmode)
10059	return 0;
10060      /* FALLTHRU */
10061    case CCGOCmode:
10062      if (req_mode == CCZmode)
10063	return 0;
10064      /* FALLTHRU */
10065    case CCZmode:
10066      break;
10067
10068    default:
10069      gcc_unreachable ();
10070    }
10071
10072  return (GET_MODE (SET_SRC (set)) == set_mode);
10073}
10074
10075/* Generate insn patterns to do an integer compare of OPERANDS.  */
10076
10077static rtx
10078ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10079{
10080  enum machine_mode cmpmode;
10081  rtx tmp, flags;
10082
10083  cmpmode = SELECT_CC_MODE (code, op0, op1);
10084  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10085
10086  /* This is very simple, but making the interface the same as in the
10087     FP case makes the rest of the code easier.  */
10088  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10089  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10090
10091  /* Return the test that should be put into the flags user, i.e.
10092     the bcc, scc, or cmov instruction.  */
10093  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10094}
10095
10096/* Figure out whether to use ordered or unordered fp comparisons.
10097   Return the appropriate mode to use.  */
10098
10099enum machine_mode
10100ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10101{
10102  /* ??? In order to make all comparisons reversible, we do all comparisons
10103     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
10104     all forms trapping and nontrapping comparisons, we can make inequality
10105     comparisons trapping again, since it results in better code when using
10106     FCOM based compares.  */
10107  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10108}
10109
10110enum machine_mode
10111ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10112{
10113  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10114    return ix86_fp_compare_mode (code);
10115  switch (code)
10116    {
10117      /* Only zero flag is needed.  */
10118    case EQ:			/* ZF=0 */
10119    case NE:			/* ZF!=0 */
10120      return CCZmode;
10121      /* Codes needing carry flag.  */
10122    case GEU:			/* CF=0 */
10123    case GTU:			/* CF=0 & ZF=0 */
10124    case LTU:			/* CF=1 */
10125    case LEU:			/* CF=1 | ZF=1 */
10126      return CCmode;
10127      /* Codes possibly doable only with sign flag when
10128         comparing against zero.  */
10129    case GE:			/* SF=OF   or   SF=0 */
10130    case LT:			/* SF<>OF  or   SF=1 */
10131      if (op1 == const0_rtx)
10132	return CCGOCmode;
10133      else
10134	/* For other cases Carry flag is not required.  */
10135	return CCGCmode;
10136      /* Codes doable only with sign flag when comparing
10137         against zero, but we miss jump instruction for it
10138         so we need to use relational tests against overflow
10139         that thus needs to be zero.  */
10140    case GT:			/* ZF=0 & SF=OF */
10141    case LE:			/* ZF=1 | SF<>OF */
10142      if (op1 == const0_rtx)
10143	return CCNOmode;
10144      else
10145	return CCGCmode;
10146      /* strcmp pattern do (use flags) and combine may ask us for proper
10147	 mode.  */
10148    case USE:
10149      return CCmode;
10150    default:
10151      gcc_unreachable ();
10152    }
10153}
10154
10155/* Return the fixed registers used for condition codes.  */
10156
10157static bool
10158ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10159{
10160  *p1 = FLAGS_REG;
10161  *p2 = FPSR_REG;
10162  return true;
10163}
10164
10165/* If two condition code modes are compatible, return a condition code
10166   mode which is compatible with both.  Otherwise, return
10167   VOIDmode.  */
10168
10169static enum machine_mode
10170ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10171{
10172  if (m1 == m2)
10173    return m1;
10174
10175  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10176    return VOIDmode;
10177
10178  if ((m1 == CCGCmode && m2 == CCGOCmode)
10179      || (m1 == CCGOCmode && m2 == CCGCmode))
10180    return CCGCmode;
10181
10182  switch (m1)
10183    {
10184    default:
10185      gcc_unreachable ();
10186
10187    case CCmode:
10188    case CCGCmode:
10189    case CCGOCmode:
10190    case CCNOmode:
10191    case CCZmode:
10192      switch (m2)
10193	{
10194	default:
10195	  return VOIDmode;
10196
10197	case CCmode:
10198	case CCGCmode:
10199	case CCGOCmode:
10200	case CCNOmode:
10201	case CCZmode:
10202	  return CCmode;
10203	}
10204
10205    case CCFPmode:
10206    case CCFPUmode:
10207      /* These are only compatible with themselves, which we already
10208	 checked above.  */
10209      return VOIDmode;
10210    }
10211}
10212
10213/* Return true if we should use an FCOMI instruction for this fp comparison.  */
10214
10215int
10216ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10217{
10218  enum rtx_code swapped_code = swap_condition (code);
10219  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10220	  || (ix86_fp_comparison_cost (swapped_code)
10221	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
10222}
10223
10224/* Swap, force into registers, or otherwise massage the two operands
10225   to a fp comparison.  The operands are updated in place; the new
10226   comparison code is returned.  */
10227
10228static enum rtx_code
10229ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10230{
10231  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10232  rtx op0 = *pop0, op1 = *pop1;
10233  enum machine_mode op_mode = GET_MODE (op0);
10234  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10235
10236  /* All of the unordered compare instructions only work on registers.
10237     The same is true of the fcomi compare instructions.  The XFmode
10238     compare instructions require registers except when comparing
10239     against zero or when converting operand 1 from fixed point to
10240     floating point.  */
10241
10242  if (!is_sse
10243      && (fpcmp_mode == CCFPUmode
10244	  || (op_mode == XFmode
10245	      && ! (standard_80387_constant_p (op0) == 1
10246		    || standard_80387_constant_p (op1) == 1)
10247	      && GET_CODE (op1) != FLOAT)
10248	  || ix86_use_fcomi_compare (code)))
10249    {
10250      op0 = force_reg (op_mode, op0);
10251      op1 = force_reg (op_mode, op1);
10252    }
10253  else
10254    {
10255      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
10256	 things around if they appear profitable, otherwise force op0
10257	 into a register.  */
10258
10259      if (standard_80387_constant_p (op0) == 0
10260	  || (GET_CODE (op0) == MEM
10261	      && ! (standard_80387_constant_p (op1) == 0
10262		    || GET_CODE (op1) == MEM)))
10263	{
10264	  rtx tmp;
10265	  tmp = op0, op0 = op1, op1 = tmp;
10266	  code = swap_condition (code);
10267	}
10268
10269      if (GET_CODE (op0) != REG)
10270	op0 = force_reg (op_mode, op0);
10271
10272      if (CONSTANT_P (op1))
10273	{
10274	  int tmp = standard_80387_constant_p (op1);
10275	  if (tmp == 0)
10276	    op1 = validize_mem (force_const_mem (op_mode, op1));
10277	  else if (tmp == 1)
10278	    {
10279	      if (TARGET_CMOVE)
10280		op1 = force_reg (op_mode, op1);
10281	    }
10282	  else
10283	    op1 = force_reg (op_mode, op1);
10284	}
10285    }
10286
10287  /* Try to rearrange the comparison to make it cheaper.  */
10288  if (ix86_fp_comparison_cost (code)
10289      > ix86_fp_comparison_cost (swap_condition (code))
10290      && (GET_CODE (op1) == REG || !no_new_pseudos))
10291    {
10292      rtx tmp;
10293      tmp = op0, op0 = op1, op1 = tmp;
10294      code = swap_condition (code);
10295      if (GET_CODE (op0) != REG)
10296	op0 = force_reg (op_mode, op0);
10297    }
10298
10299  *pop0 = op0;
10300  *pop1 = op1;
10301  return code;
10302}
10303
10304/* Convert comparison codes we use to represent FP comparison to integer
10305   code that will result in proper branch.  Return UNKNOWN if no such code
10306   is available.  */
10307
10308enum rtx_code
10309ix86_fp_compare_code_to_integer (enum rtx_code code)
10310{
10311  switch (code)
10312    {
10313    case GT:
10314      return GTU;
10315    case GE:
10316      return GEU;
10317    case ORDERED:
10318    case UNORDERED:
10319      return code;
10320      break;
10321    case UNEQ:
10322      return EQ;
10323      break;
10324    case UNLT:
10325      return LTU;
10326      break;
10327    case UNLE:
10328      return LEU;
10329      break;
10330    case LTGT:
10331      return NE;
10332      break;
10333    default:
10334      return UNKNOWN;
10335    }
10336}
10337
10338/* Split comparison code CODE into comparisons we can do using branch
10339   instructions.  BYPASS_CODE is comparison code for branch that will
10340   branch around FIRST_CODE and SECOND_CODE.  If some of branches
10341   is not required, set value to UNKNOWN.
10342   We never require more than two branches.  */
10343
10344void
10345ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10346			  enum rtx_code *first_code,
10347			  enum rtx_code *second_code)
10348{
10349  *first_code = code;
10350  *bypass_code = UNKNOWN;
10351  *second_code = UNKNOWN;
10352
10353  /* The fcomi comparison sets flags as follows:
10354
10355     cmp    ZF PF CF
10356     >      0  0  0
10357     <      0  0  1
10358     =      1  0  0
10359     un     1  1  1 */
10360
10361  switch (code)
10362    {
10363    case GT:			/* GTU - CF=0 & ZF=0 */
10364    case GE:			/* GEU - CF=0 */
10365    case ORDERED:		/* PF=0 */
10366    case UNORDERED:		/* PF=1 */
10367    case UNEQ:			/* EQ - ZF=1 */
10368    case UNLT:			/* LTU - CF=1 */
10369    case UNLE:			/* LEU - CF=1 | ZF=1 */
10370    case LTGT:			/* EQ - ZF=0 */
10371      break;
10372    case LT:			/* LTU - CF=1 - fails on unordered */
10373      *first_code = UNLT;
10374      *bypass_code = UNORDERED;
10375      break;
10376    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10377      *first_code = UNLE;
10378      *bypass_code = UNORDERED;
10379      break;
10380    case EQ:			/* EQ - ZF=1 - fails on unordered */
10381      *first_code = UNEQ;
10382      *bypass_code = UNORDERED;
10383      break;
10384    case NE:			/* NE - ZF=0 - fails on unordered */
10385      *first_code = LTGT;
10386      *second_code = UNORDERED;
10387      break;
10388    case UNGE:			/* GEU - CF=0 - fails on unordered */
10389      *first_code = GE;
10390      *second_code = UNORDERED;
10391      break;
10392    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10393      *first_code = GT;
10394      *second_code = UNORDERED;
10395      break;
10396    default:
10397      gcc_unreachable ();
10398    }
10399  if (!TARGET_IEEE_FP)
10400    {
10401      *second_code = UNKNOWN;
10402      *bypass_code = UNKNOWN;
10403    }
10404}
10405
10406/* Return cost of comparison done fcom + arithmetics operations on AX.
10407   All following functions do use number of instructions as a cost metrics.
10408   In future this should be tweaked to compute bytes for optimize_size and
10409   take into account performance of various instructions on various CPUs.  */
10410static int
10411ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10412{
10413  if (!TARGET_IEEE_FP)
10414    return 4;
10415  /* The cost of code output by ix86_expand_fp_compare.  */
10416  switch (code)
10417    {
10418    case UNLE:
10419    case UNLT:
10420    case LTGT:
10421    case GT:
10422    case GE:
10423    case UNORDERED:
10424    case ORDERED:
10425    case UNEQ:
10426      return 4;
10427      break;
10428    case LT:
10429    case NE:
10430    case EQ:
10431    case UNGE:
10432      return 5;
10433      break;
10434    case LE:
10435    case UNGT:
10436      return 6;
10437      break;
10438    default:
10439      gcc_unreachable ();
10440    }
10441}
10442
10443/* Return cost of comparison done using fcomi operation.
10444   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10445static int
10446ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10447{
10448  enum rtx_code bypass_code, first_code, second_code;
10449  /* Return arbitrarily high cost when instruction is not supported - this
10450     prevents gcc from using it.  */
10451  if (!TARGET_CMOVE)
10452    return 1024;
10453  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10454  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10455}
10456
10457/* Return cost of comparison done using sahf operation.
10458   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10459static int
10460ix86_fp_comparison_sahf_cost (enum rtx_code code)
10461{
10462  enum rtx_code bypass_code, first_code, second_code;
10463  /* Return arbitrarily high cost when instruction is not preferred - this
10464     avoids gcc from using it.  */
10465  if (!TARGET_USE_SAHF && !optimize_size)
10466    return 1024;
10467  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10469}
10470
10471/* Compute cost of the comparison done using any method.
10472   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10473static int
10474ix86_fp_comparison_cost (enum rtx_code code)
10475{
10476  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10477  int min;
10478
10479  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10480  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10481
10482  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10483  if (min > sahf_cost)
10484    min = sahf_cost;
10485  if (min > fcomi_cost)
10486    min = fcomi_cost;
10487  return min;
10488}
10489
10490/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10491
10492static rtx
10493ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10494			rtx *second_test, rtx *bypass_test)
10495{
10496  enum machine_mode fpcmp_mode, intcmp_mode;
10497  rtx tmp, tmp2;
10498  int cost = ix86_fp_comparison_cost (code);
10499  enum rtx_code bypass_code, first_code, second_code;
10500
10501  fpcmp_mode = ix86_fp_compare_mode (code);
10502  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10503
10504  if (second_test)
10505    *second_test = NULL_RTX;
10506  if (bypass_test)
10507    *bypass_test = NULL_RTX;
10508
10509  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10510
10511  /* Do fcomi/sahf based test when profitable.  */
10512  if ((bypass_code == UNKNOWN || bypass_test)
10513      && (second_code == UNKNOWN || second_test)
10514      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10515    {
10516      if (TARGET_CMOVE)
10517	{
10518	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10519	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10520			     tmp);
10521	  emit_insn (tmp);
10522	}
10523      else
10524	{
10525	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10526	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10527	  if (!scratch)
10528	    scratch = gen_reg_rtx (HImode);
10529	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10530	  emit_insn (gen_x86_sahf_1 (scratch));
10531	}
10532
10533      /* The FP codes work out to act like unsigned.  */
10534      intcmp_mode = fpcmp_mode;
10535      code = first_code;
10536      if (bypass_code != UNKNOWN)
10537	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10538				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10539				       const0_rtx);
10540      if (second_code != UNKNOWN)
10541	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10542				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10543				       const0_rtx);
10544    }
10545  else
10546    {
10547      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10548      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10549      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10550      if (!scratch)
10551	scratch = gen_reg_rtx (HImode);
10552      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10553
10554      /* In the unordered case, we have to check C2 for NaN's, which
10555	 doesn't happen to work out to anything nice combination-wise.
10556	 So do some bit twiddling on the value we've got in AH to come
10557	 up with an appropriate set of condition codes.  */
10558
10559      intcmp_mode = CCNOmode;
10560      switch (code)
10561	{
10562	case GT:
10563	case UNGT:
10564	  if (code == GT || !TARGET_IEEE_FP)
10565	    {
10566	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10567	      code = EQ;
10568	    }
10569	  else
10570	    {
10571	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10572	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10573	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10574	      intcmp_mode = CCmode;
10575	      code = GEU;
10576	    }
10577	  break;
10578	case LT:
10579	case UNLT:
10580	  if (code == LT && TARGET_IEEE_FP)
10581	    {
10582	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10583	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10584	      intcmp_mode = CCmode;
10585	      code = EQ;
10586	    }
10587	  else
10588	    {
10589	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10590	      code = NE;
10591	    }
10592	  break;
10593	case GE:
10594	case UNGE:
10595	  if (code == GE || !TARGET_IEEE_FP)
10596	    {
10597	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10598	      code = EQ;
10599	    }
10600	  else
10601	    {
10602	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10603	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10604					     GEN_INT (0x01)));
10605	      code = NE;
10606	    }
10607	  break;
10608	case LE:
10609	case UNLE:
10610	  if (code == LE && TARGET_IEEE_FP)
10611	    {
10612	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10613	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10614	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10615	      intcmp_mode = CCmode;
10616	      code = LTU;
10617	    }
10618	  else
10619	    {
10620	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10621	      code = NE;
10622	    }
10623	  break;
10624	case EQ:
10625	case UNEQ:
10626	  if (code == EQ && TARGET_IEEE_FP)
10627	    {
10628	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10629	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10630	      intcmp_mode = CCmode;
10631	      code = EQ;
10632	    }
10633	  else
10634	    {
10635	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10636	      code = NE;
10637	      break;
10638	    }
10639	  break;
10640	case NE:
10641	case LTGT:
10642	  if (code == NE && TARGET_IEEE_FP)
10643	    {
10644	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10645	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10646					     GEN_INT (0x40)));
10647	      code = NE;
10648	    }
10649	  else
10650	    {
10651	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10652	      code = EQ;
10653	    }
10654	  break;
10655
10656	case UNORDERED:
10657	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10658	  code = NE;
10659	  break;
10660	case ORDERED:
10661	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10662	  code = EQ;
10663	  break;
10664
10665	default:
10666	  gcc_unreachable ();
10667	}
10668    }
10669
10670  /* Return the test that should be put into the flags user, i.e.
10671     the bcc, scc, or cmov instruction.  */
10672  return gen_rtx_fmt_ee (code, VOIDmode,
10673			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10674			 const0_rtx);
10675}
10676
10677rtx
10678ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10679{
10680  rtx op0, op1, ret;
10681  op0 = ix86_compare_op0;
10682  op1 = ix86_compare_op1;
10683
10684  if (second_test)
10685    *second_test = NULL_RTX;
10686  if (bypass_test)
10687    *bypass_test = NULL_RTX;
10688
10689  if (ix86_compare_emitted)
10690    {
10691      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10692      ix86_compare_emitted = NULL_RTX;
10693    }
10694  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10695    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10696				  second_test, bypass_test);
10697  else
10698    ret = ix86_expand_int_compare (code, op0, op1);
10699
10700  return ret;
10701}
10702
10703/* Return true if the CODE will result in nontrivial jump sequence.  */
10704bool
10705ix86_fp_jump_nontrivial_p (enum rtx_code code)
10706{
10707  enum rtx_code bypass_code, first_code, second_code;
10708  if (!TARGET_CMOVE)
10709    return true;
10710  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10711  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10712}
10713
10714void
10715ix86_expand_branch (enum rtx_code code, rtx label)
10716{
10717  rtx tmp;
10718
10719  /* If we have emitted a compare insn, go straight to simple.
10720     ix86_expand_compare won't emit anything if ix86_compare_emitted
10721     is non NULL.  */
10722  if (ix86_compare_emitted)
10723    goto simple;
10724
10725  switch (GET_MODE (ix86_compare_op0))
10726    {
10727    case QImode:
10728    case HImode:
10729    case SImode:
10730      simple:
10731      tmp = ix86_expand_compare (code, NULL, NULL);
10732      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10733				  gen_rtx_LABEL_REF (VOIDmode, label),
10734				  pc_rtx);
10735      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10736      return;
10737
10738    case SFmode:
10739    case DFmode:
10740    case XFmode:
10741      {
10742	rtvec vec;
10743	int use_fcomi;
10744	enum rtx_code bypass_code, first_code, second_code;
10745
10746	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10747					     &ix86_compare_op1);
10748
10749	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10750
10751	/* Check whether we will use the natural sequence with one jump.  If
10752	   so, we can expand jump early.  Otherwise delay expansion by
10753	   creating compound insn to not confuse optimizers.  */
10754	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10755	    && TARGET_CMOVE)
10756	  {
10757	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10758				  gen_rtx_LABEL_REF (VOIDmode, label),
10759				  pc_rtx, NULL_RTX, NULL_RTX);
10760	  }
10761	else
10762	  {
10763	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10764				  ix86_compare_op0, ix86_compare_op1);
10765	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10766					gen_rtx_LABEL_REF (VOIDmode, label),
10767					pc_rtx);
10768	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10769
10770	    use_fcomi = ix86_use_fcomi_compare (code);
10771	    vec = rtvec_alloc (3 + !use_fcomi);
10772	    RTVEC_ELT (vec, 0) = tmp;
10773	    RTVEC_ELT (vec, 1)
10774	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10775	    RTVEC_ELT (vec, 2)
10776	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10777	    if (! use_fcomi)
10778	      RTVEC_ELT (vec, 3)
10779		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10780
10781	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10782	  }
10783	return;
10784      }
10785
10786    case DImode:
10787      if (TARGET_64BIT)
10788	goto simple;
10789    case TImode:
10790      /* Expand DImode branch into multiple compare+branch.  */
10791      {
10792	rtx lo[2], hi[2], label2;
10793	enum rtx_code code1, code2, code3;
10794	enum machine_mode submode;
10795
10796	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10797	  {
10798	    tmp = ix86_compare_op0;
10799	    ix86_compare_op0 = ix86_compare_op1;
10800	    ix86_compare_op1 = tmp;
10801	    code = swap_condition (code);
10802	  }
10803	if (GET_MODE (ix86_compare_op0) == DImode)
10804	  {
10805	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10806	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10807	    submode = SImode;
10808	  }
10809	else
10810	  {
10811	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10812	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10813	    submode = DImode;
10814	  }
10815
10816	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10817	   avoid two branches.  This costs one extra insn, so disable when
10818	   optimizing for size.  */
10819
10820	if ((code == EQ || code == NE)
10821	    && (!optimize_size
10822	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10823	  {
10824	    rtx xor0, xor1;
10825
10826	    xor1 = hi[0];
10827	    if (hi[1] != const0_rtx)
10828	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10829				   NULL_RTX, 0, OPTAB_WIDEN);
10830
10831	    xor0 = lo[0];
10832	    if (lo[1] != const0_rtx)
10833	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10834				   NULL_RTX, 0, OPTAB_WIDEN);
10835
10836	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10837				NULL_RTX, 0, OPTAB_WIDEN);
10838
10839	    ix86_compare_op0 = tmp;
10840	    ix86_compare_op1 = const0_rtx;
10841	    ix86_expand_branch (code, label);
10842	    return;
10843	  }
10844
10845	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10846	   op1 is a constant and the low word is zero, then we can just
10847	   examine the high word.  */
10848
10849	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10850	  switch (code)
10851	    {
10852	    case LT: case LTU: case GE: case GEU:
10853	      ix86_compare_op0 = hi[0];
10854	      ix86_compare_op1 = hi[1];
10855	      ix86_expand_branch (code, label);
10856	      return;
10857	    default:
10858	      break;
10859	    }
10860
10861	/* Otherwise, we need two or three jumps.  */
10862
10863	label2 = gen_label_rtx ();
10864
10865	code1 = code;
10866	code2 = swap_condition (code);
10867	code3 = unsigned_condition (code);
10868
10869	switch (code)
10870	  {
10871	  case LT: case GT: case LTU: case GTU:
10872	    break;
10873
10874	  case LE:   code1 = LT;  code2 = GT;  break;
10875	  case GE:   code1 = GT;  code2 = LT;  break;
10876	  case LEU:  code1 = LTU; code2 = GTU; break;
10877	  case GEU:  code1 = GTU; code2 = LTU; break;
10878
10879	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10880	  case NE:   code2 = UNKNOWN; break;
10881
10882	  default:
10883	    gcc_unreachable ();
10884	  }
10885
10886	/*
10887	 * a < b =>
10888	 *    if (hi(a) < hi(b)) goto true;
10889	 *    if (hi(a) > hi(b)) goto false;
10890	 *    if (lo(a) < lo(b)) goto true;
10891	 *  false:
10892	 */
10893
10894	ix86_compare_op0 = hi[0];
10895	ix86_compare_op1 = hi[1];
10896
10897	if (code1 != UNKNOWN)
10898	  ix86_expand_branch (code1, label);
10899	if (code2 != UNKNOWN)
10900	  ix86_expand_branch (code2, label2);
10901
10902	ix86_compare_op0 = lo[0];
10903	ix86_compare_op1 = lo[1];
10904	ix86_expand_branch (code3, label);
10905
10906	if (code2 != UNKNOWN)
10907	  emit_label (label2);
10908	return;
10909      }
10910
10911    default:
10912      gcc_unreachable ();
10913    }
10914}
10915
10916/* Split branch based on floating point condition.  */
10917void
10918ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10919		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10920{
10921  rtx second, bypass;
10922  rtx label = NULL_RTX;
10923  rtx condition;
10924  int bypass_probability = -1, second_probability = -1, probability = -1;
10925  rtx i;
10926
10927  if (target2 != pc_rtx)
10928    {
10929      rtx tmp = target2;
10930      code = reverse_condition_maybe_unordered (code);
10931      target2 = target1;
10932      target1 = tmp;
10933    }
10934
10935  condition = ix86_expand_fp_compare (code, op1, op2,
10936				      tmp, &second, &bypass);
10937
10938  /* Remove pushed operand from stack.  */
10939  if (pushed)
10940    ix86_free_from_memory (GET_MODE (pushed));
10941
10942  if (split_branch_probability >= 0)
10943    {
10944      /* Distribute the probabilities across the jumps.
10945	 Assume the BYPASS and SECOND to be always test
10946	 for UNORDERED.  */
10947      probability = split_branch_probability;
10948
10949      /* Value of 1 is low enough to make no need for probability
10950	 to be updated.  Later we may run some experiments and see
10951	 if unordered values are more frequent in practice.  */
10952      if (bypass)
10953	bypass_probability = 1;
10954      if (second)
10955	second_probability = 1;
10956    }
10957  if (bypass != NULL_RTX)
10958    {
10959      label = gen_label_rtx ();
10960      i = emit_jump_insn (gen_rtx_SET
10961			  (VOIDmode, pc_rtx,
10962			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10963						 bypass,
10964						 gen_rtx_LABEL_REF (VOIDmode,
10965								    label),
10966						 pc_rtx)));
10967      if (bypass_probability >= 0)
10968	REG_NOTES (i)
10969	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10970			       GEN_INT (bypass_probability),
10971			       REG_NOTES (i));
10972    }
10973  i = emit_jump_insn (gen_rtx_SET
10974		      (VOIDmode, pc_rtx,
10975		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10976					     condition, target1, target2)));
10977  if (probability >= 0)
10978    REG_NOTES (i)
10979      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10980			   GEN_INT (probability),
10981			   REG_NOTES (i));
10982  if (second != NULL_RTX)
10983    {
10984      i = emit_jump_insn (gen_rtx_SET
10985			  (VOIDmode, pc_rtx,
10986			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10987						 target2)));
10988      if (second_probability >= 0)
10989	REG_NOTES (i)
10990	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10991			       GEN_INT (second_probability),
10992			       REG_NOTES (i));
10993    }
10994  if (label != NULL_RTX)
10995    emit_label (label);
10996}
10997
10998int
10999ix86_expand_setcc (enum rtx_code code, rtx dest)
11000{
11001  rtx ret, tmp, tmpreg, equiv;
11002  rtx second_test, bypass_test;
11003
11004  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11005    return 0; /* FAIL */
11006
11007  gcc_assert (GET_MODE (dest) == QImode);
11008
11009  ret = ix86_expand_compare (code, &second_test, &bypass_test);
11010  PUT_MODE (ret, QImode);
11011
11012  tmp = dest;
11013  tmpreg = dest;
11014
11015  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11016  if (bypass_test || second_test)
11017    {
11018      rtx test = second_test;
11019      int bypass = 0;
11020      rtx tmp2 = gen_reg_rtx (QImode);
11021      if (bypass_test)
11022	{
11023	  gcc_assert (!second_test);
11024	  test = bypass_test;
11025	  bypass = 1;
11026	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11027	}
11028      PUT_MODE (test, QImode);
11029      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11030
11031      if (bypass)
11032	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11033      else
11034	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11035    }
11036
11037  /* Attach a REG_EQUAL note describing the comparison result.  */
11038  if (ix86_compare_op0 && ix86_compare_op1)
11039    {
11040      equiv = simplify_gen_relational (code, QImode,
11041				       GET_MODE (ix86_compare_op0),
11042				       ix86_compare_op0, ix86_compare_op1);
11043      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11044    }
11045
11046  return 1; /* DONE */
11047}
11048
11049/* Expand comparison setting or clearing carry flag.  Return true when
11050   successful and set pop for the operation.  */
11051static bool
11052ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11053{
11054  enum machine_mode mode =
11055    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11056
11057  /* Do not handle DImode compares that go through special path.  Also we can't
11058     deal with FP compares yet.  This is possible to add.  */
11059  if (mode == (TARGET_64BIT ? TImode : DImode))
11060    return false;
11061  if (FLOAT_MODE_P (mode))
11062    {
11063      rtx second_test = NULL, bypass_test = NULL;
11064      rtx compare_op, compare_seq;
11065
11066      /* Shortcut:  following common codes never translate into carry flag compares.  */
11067      if (code == EQ || code == NE || code == UNEQ || code == LTGT
11068	  || code == ORDERED || code == UNORDERED)
11069	return false;
11070
11071      /* These comparisons require zero flag; swap operands so they won't.  */
11072      if ((code == GT || code == UNLE || code == LE || code == UNGT)
11073	  && !TARGET_IEEE_FP)
11074	{
11075	  rtx tmp = op0;
11076	  op0 = op1;
11077	  op1 = tmp;
11078	  code = swap_condition (code);
11079	}
11080
11081      /* Try to expand the comparison and verify that we end up with carry flag
11082	 based comparison.  This is fails to be true only when we decide to expand
11083	 comparison using arithmetic that is not too common scenario.  */
11084      start_sequence ();
11085      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11086					   &second_test, &bypass_test);
11087      compare_seq = get_insns ();
11088      end_sequence ();
11089
11090      if (second_test || bypass_test)
11091	return false;
11092      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11093	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11094        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11095      else
11096	code = GET_CODE (compare_op);
11097      if (code != LTU && code != GEU)
11098	return false;
11099      emit_insn (compare_seq);
11100      *pop = compare_op;
11101      return true;
11102    }
11103  if (!INTEGRAL_MODE_P (mode))
11104    return false;
11105  switch (code)
11106    {
11107    case LTU:
11108    case GEU:
11109      break;
11110
11111    /* Convert a==0 into (unsigned)a<1.  */
11112    case EQ:
11113    case NE:
11114      if (op1 != const0_rtx)
11115	return false;
11116      op1 = const1_rtx;
11117      code = (code == EQ ? LTU : GEU);
11118      break;
11119
11120    /* Convert a>b into b<a or a>=b-1.  */
11121    case GTU:
11122    case LEU:
11123      if (GET_CODE (op1) == CONST_INT)
11124	{
11125	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11126	  /* Bail out on overflow.  We still can swap operands but that
11127	     would force loading of the constant into register.  */
11128	  if (op1 == const0_rtx
11129	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11130	    return false;
11131	  code = (code == GTU ? GEU : LTU);
11132	}
11133      else
11134	{
11135	  rtx tmp = op1;
11136	  op1 = op0;
11137	  op0 = tmp;
11138	  code = (code == GTU ? LTU : GEU);
11139	}
11140      break;
11141
11142    /* Convert a>=0 into (unsigned)a<0x80000000.  */
11143    case LT:
11144    case GE:
11145      if (mode == DImode || op1 != const0_rtx)
11146	return false;
11147      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11148      code = (code == LT ? GEU : LTU);
11149      break;
11150    case LE:
11151    case GT:
11152      if (mode == DImode || op1 != constm1_rtx)
11153	return false;
11154      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11155      code = (code == LE ? GEU : LTU);
11156      break;
11157
11158    default:
11159      return false;
11160    }
11161  /* Swapping operands may cause constant to appear as first operand.  */
11162  if (!nonimmediate_operand (op0, VOIDmode))
11163    {
11164      if (no_new_pseudos)
11165	return false;
11166      op0 = force_reg (mode, op0);
11167    }
11168  ix86_compare_op0 = op0;
11169  ix86_compare_op1 = op1;
11170  *pop = ix86_expand_compare (code, NULL, NULL);
11171  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11172  return true;
11173}
11174
11175int
11176ix86_expand_int_movcc (rtx operands[])
11177{
11178  enum rtx_code code = GET_CODE (operands[1]), compare_code;
11179  rtx compare_seq, compare_op;
11180  rtx second_test, bypass_test;
11181  enum machine_mode mode = GET_MODE (operands[0]);
11182  bool sign_bit_compare_p = false;;
11183
11184  start_sequence ();
11185  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11186  compare_seq = get_insns ();
11187  end_sequence ();
11188
11189  compare_code = GET_CODE (compare_op);
11190
11191  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11192      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11193    sign_bit_compare_p = true;
11194
11195  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11196     HImode insns, we'd be swallowed in word prefix ops.  */
11197
11198  if ((mode != HImode || TARGET_FAST_PREFIX)
11199      && (mode != (TARGET_64BIT ? TImode : DImode))
11200      && GET_CODE (operands[2]) == CONST_INT
11201      && GET_CODE (operands[3]) == CONST_INT)
11202    {
11203      rtx out = operands[0];
11204      HOST_WIDE_INT ct = INTVAL (operands[2]);
11205      HOST_WIDE_INT cf = INTVAL (operands[3]);
11206      HOST_WIDE_INT diff;
11207
11208      diff = ct - cf;
11209      /*  Sign bit compares are better done using shifts than we do by using
11210	  sbb.  */
11211      if (sign_bit_compare_p
11212	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11213					     ix86_compare_op1, &compare_op))
11214	{
11215	  /* Detect overlap between destination and compare sources.  */
11216	  rtx tmp = out;
11217
11218          if (!sign_bit_compare_p)
11219	    {
11220	      bool fpcmp = false;
11221
11222	      compare_code = GET_CODE (compare_op);
11223
11224	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11225		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11226		{
11227		  fpcmp = true;
11228		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
11229		}
11230
11231	      /* To simplify rest of code, restrict to the GEU case.  */
11232	      if (compare_code == LTU)
11233		{
11234		  HOST_WIDE_INT tmp = ct;
11235		  ct = cf;
11236		  cf = tmp;
11237		  compare_code = reverse_condition (compare_code);
11238		  code = reverse_condition (code);
11239		}
11240	      else
11241		{
11242		  if (fpcmp)
11243		    PUT_CODE (compare_op,
11244			      reverse_condition_maybe_unordered
11245			        (GET_CODE (compare_op)));
11246		  else
11247		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11248		}
11249	      diff = ct - cf;
11250
11251	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11252		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
11253		tmp = gen_reg_rtx (mode);
11254
11255	      if (mode == DImode)
11256		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11257	      else
11258		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11259	    }
11260	  else
11261	    {
11262	      if (code == GT || code == GE)
11263		code = reverse_condition (code);
11264	      else
11265		{
11266		  HOST_WIDE_INT tmp = ct;
11267		  ct = cf;
11268		  cf = tmp;
11269		  diff = ct - cf;
11270		}
11271	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11272				     ix86_compare_op1, VOIDmode, 0, -1);
11273	    }
11274
11275	  if (diff == 1)
11276	    {
11277	      /*
11278	       * cmpl op0,op1
11279	       * sbbl dest,dest
11280	       * [addl dest, ct]
11281	       *
11282	       * Size 5 - 8.
11283	       */
11284	      if (ct)
11285		tmp = expand_simple_binop (mode, PLUS,
11286					   tmp, GEN_INT (ct),
11287					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11288	    }
11289	  else if (cf == -1)
11290	    {
11291	      /*
11292	       * cmpl op0,op1
11293	       * sbbl dest,dest
11294	       * orl $ct, dest
11295	       *
11296	       * Size 8.
11297	       */
11298	      tmp = expand_simple_binop (mode, IOR,
11299					 tmp, GEN_INT (ct),
11300					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11301	    }
11302	  else if (diff == -1 && ct)
11303	    {
11304	      /*
11305	       * cmpl op0,op1
11306	       * sbbl dest,dest
11307	       * notl dest
11308	       * [addl dest, cf]
11309	       *
11310	       * Size 8 - 11.
11311	       */
11312	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11313	      if (cf)
11314		tmp = expand_simple_binop (mode, PLUS,
11315					   copy_rtx (tmp), GEN_INT (cf),
11316					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11317	    }
11318	  else
11319	    {
11320	      /*
11321	       * cmpl op0,op1
11322	       * sbbl dest,dest
11323	       * [notl dest]
11324	       * andl cf - ct, dest
11325	       * [addl dest, ct]
11326	       *
11327	       * Size 8 - 11.
11328	       */
11329
11330	      if (cf == 0)
11331		{
11332		  cf = ct;
11333		  ct = 0;
11334		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11335		}
11336
11337	      tmp = expand_simple_binop (mode, AND,
11338					 copy_rtx (tmp),
11339					 gen_int_mode (cf - ct, mode),
11340					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11341	      if (ct)
11342		tmp = expand_simple_binop (mode, PLUS,
11343					   copy_rtx (tmp), GEN_INT (ct),
11344					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11345	    }
11346
11347	  if (!rtx_equal_p (tmp, out))
11348	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11349
11350	  return 1; /* DONE */
11351	}
11352
11353      if (diff < 0)
11354	{
11355	  HOST_WIDE_INT tmp;
11356	  tmp = ct, ct = cf, cf = tmp;
11357	  diff = -diff;
11358	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11359	    {
11360	      /* We may be reversing unordered compare to normal compare, that
11361		 is not valid in general (we may convert non-trapping condition
11362		 to trapping one), however on i386 we currently emit all
11363		 comparisons unordered.  */
11364	      compare_code = reverse_condition_maybe_unordered (compare_code);
11365	      code = reverse_condition_maybe_unordered (code);
11366	    }
11367	  else
11368	    {
11369	      compare_code = reverse_condition (compare_code);
11370	      code = reverse_condition (code);
11371	    }
11372	}
11373
11374      compare_code = UNKNOWN;
11375      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11376	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11377	{
11378	  if (ix86_compare_op1 == const0_rtx
11379	      && (code == LT || code == GE))
11380	    compare_code = code;
11381	  else if (ix86_compare_op1 == constm1_rtx)
11382	    {
11383	      if (code == LE)
11384		compare_code = LT;
11385	      else if (code == GT)
11386		compare_code = GE;
11387	    }
11388	}
11389
11390      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11391      if (compare_code != UNKNOWN
11392	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11393	  && (cf == -1 || ct == -1))
11394	{
11395	  /* If lea code below could be used, only optimize
11396	     if it results in a 2 insn sequence.  */
11397
11398	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11399		 || diff == 3 || diff == 5 || diff == 9)
11400	      || (compare_code == LT && ct == -1)
11401	      || (compare_code == GE && cf == -1))
11402	    {
11403	      /*
11404	       * notl op1	(if necessary)
11405	       * sarl $31, op1
11406	       * orl cf, op1
11407	       */
11408	      if (ct != -1)
11409		{
11410		  cf = ct;
11411		  ct = -1;
11412		  code = reverse_condition (code);
11413		}
11414
11415	      out = emit_store_flag (out, code, ix86_compare_op0,
11416				     ix86_compare_op1, VOIDmode, 0, -1);
11417
11418	      out = expand_simple_binop (mode, IOR,
11419					 out, GEN_INT (cf),
11420					 out, 1, OPTAB_DIRECT);
11421	      if (out != operands[0])
11422		emit_move_insn (operands[0], out);
11423
11424	      return 1; /* DONE */
11425	    }
11426	}
11427
11428
11429      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11430	   || diff == 3 || diff == 5 || diff == 9)
11431	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11432	  && (mode != DImode
11433	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11434	{
11435	  /*
11436	   * xorl dest,dest
11437	   * cmpl op1,op2
11438	   * setcc dest
11439	   * lea cf(dest*(ct-cf)),dest
11440	   *
11441	   * Size 14.
11442	   *
11443	   * This also catches the degenerate setcc-only case.
11444	   */
11445
11446	  rtx tmp;
11447	  int nops;
11448
11449	  out = emit_store_flag (out, code, ix86_compare_op0,
11450				 ix86_compare_op1, VOIDmode, 0, 1);
11451
11452	  nops = 0;
11453	  /* On x86_64 the lea instruction operates on Pmode, so we need
11454	     to get arithmetics done in proper mode to match.  */
11455	  if (diff == 1)
11456	    tmp = copy_rtx (out);
11457	  else
11458	    {
11459	      rtx out1;
11460	      out1 = copy_rtx (out);
11461	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11462	      nops++;
11463	      if (diff & 1)
11464		{
11465		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11466		  nops++;
11467		}
11468	    }
11469	  if (cf != 0)
11470	    {
11471	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11472	      nops++;
11473	    }
11474	  if (!rtx_equal_p (tmp, out))
11475	    {
11476	      if (nops == 1)
11477		out = force_operand (tmp, copy_rtx (out));
11478	      else
11479		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11480	    }
11481	  if (!rtx_equal_p (out, operands[0]))
11482	    emit_move_insn (operands[0], copy_rtx (out));
11483
11484	  return 1; /* DONE */
11485	}
11486
11487      /*
11488       * General case:			Jumpful:
11489       *   xorl dest,dest		cmpl op1, op2
11490       *   cmpl op1, op2		movl ct, dest
11491       *   setcc dest			jcc 1f
11492       *   decl dest			movl cf, dest
11493       *   andl (cf-ct),dest		1:
11494       *   addl ct,dest
11495       *
11496       * Size 20.			Size 14.
11497       *
11498       * This is reasonably steep, but branch mispredict costs are
11499       * high on modern cpus, so consider failing only if optimizing
11500       * for space.
11501       */
11502
11503      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11504	  && BRANCH_COST >= 2)
11505	{
11506	  if (cf == 0)
11507	    {
11508	      cf = ct;
11509	      ct = 0;
11510	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11511		/* We may be reversing unordered compare to normal compare,
11512		   that is not valid in general (we may convert non-trapping
11513		   condition to trapping one), however on i386 we currently
11514		   emit all comparisons unordered.  */
11515		code = reverse_condition_maybe_unordered (code);
11516	      else
11517		{
11518		  code = reverse_condition (code);
11519		  if (compare_code != UNKNOWN)
11520		    compare_code = reverse_condition (compare_code);
11521		}
11522	    }
11523
11524	  if (compare_code != UNKNOWN)
11525	    {
11526	      /* notl op1	(if needed)
11527		 sarl $31, op1
11528		 andl (cf-ct), op1
11529		 addl ct, op1
11530
11531		 For x < 0 (resp. x <= -1) there will be no notl,
11532		 so if possible swap the constants to get rid of the
11533		 complement.
11534		 True/false will be -1/0 while code below (store flag
11535		 followed by decrement) is 0/-1, so the constants need
11536		 to be exchanged once more.  */
11537
11538	      if (compare_code == GE || !cf)
11539		{
11540		  code = reverse_condition (code);
11541		  compare_code = LT;
11542		}
11543	      else
11544		{
11545		  HOST_WIDE_INT tmp = cf;
11546		  cf = ct;
11547		  ct = tmp;
11548		}
11549
11550	      out = emit_store_flag (out, code, ix86_compare_op0,
11551				     ix86_compare_op1, VOIDmode, 0, -1);
11552	    }
11553	  else
11554	    {
11555	      out = emit_store_flag (out, code, ix86_compare_op0,
11556				     ix86_compare_op1, VOIDmode, 0, 1);
11557
11558	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11559					 copy_rtx (out), 1, OPTAB_DIRECT);
11560	    }
11561
11562	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11563				     gen_int_mode (cf - ct, mode),
11564				     copy_rtx (out), 1, OPTAB_DIRECT);
11565	  if (ct)
11566	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11567				       copy_rtx (out), 1, OPTAB_DIRECT);
11568	  if (!rtx_equal_p (out, operands[0]))
11569	    emit_move_insn (operands[0], copy_rtx (out));
11570
11571	  return 1; /* DONE */
11572	}
11573    }
11574
11575  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11576    {
11577      /* Try a few things more with specific constants and a variable.  */
11578
11579      optab op;
11580      rtx var, orig_out, out, tmp;
11581
11582      if (BRANCH_COST <= 2)
11583	return 0; /* FAIL */
11584
11585      /* If one of the two operands is an interesting constant, load a
11586	 constant with the above and mask it in with a logical operation.  */
11587
11588      if (GET_CODE (operands[2]) == CONST_INT)
11589	{
11590	  var = operands[3];
11591	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11592	    operands[3] = constm1_rtx, op = and_optab;
11593	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11594	    operands[3] = const0_rtx, op = ior_optab;
11595	  else
11596	    return 0; /* FAIL */
11597	}
11598      else if (GET_CODE (operands[3]) == CONST_INT)
11599	{
11600	  var = operands[2];
11601	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11602	    operands[2] = constm1_rtx, op = and_optab;
11603	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11604	    operands[2] = const0_rtx, op = ior_optab;
11605	  else
11606	    return 0; /* FAIL */
11607	}
11608      else
11609        return 0; /* FAIL */
11610
11611      orig_out = operands[0];
11612      tmp = gen_reg_rtx (mode);
11613      operands[0] = tmp;
11614
11615      /* Recurse to get the constant loaded.  */
11616      if (ix86_expand_int_movcc (operands) == 0)
11617        return 0; /* FAIL */
11618
11619      /* Mask in the interesting variable.  */
11620      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11621			  OPTAB_WIDEN);
11622      if (!rtx_equal_p (out, orig_out))
11623	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11624
11625      return 1; /* DONE */
11626    }
11627
11628  /*
11629   * For comparison with above,
11630   *
11631   * movl cf,dest
11632   * movl ct,tmp
11633   * cmpl op1,op2
11634   * cmovcc tmp,dest
11635   *
11636   * Size 15.
11637   */
11638
11639  if (! nonimmediate_operand (operands[2], mode))
11640    operands[2] = force_reg (mode, operands[2]);
11641  if (! nonimmediate_operand (operands[3], mode))
11642    operands[3] = force_reg (mode, operands[3]);
11643
11644  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11645    {
11646      rtx tmp = gen_reg_rtx (mode);
11647      emit_move_insn (tmp, operands[3]);
11648      operands[3] = tmp;
11649    }
11650  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11651    {
11652      rtx tmp = gen_reg_rtx (mode);
11653      emit_move_insn (tmp, operands[2]);
11654      operands[2] = tmp;
11655    }
11656
11657  if (! register_operand (operands[2], VOIDmode)
11658      && (mode == QImode
11659          || ! register_operand (operands[3], VOIDmode)))
11660    operands[2] = force_reg (mode, operands[2]);
11661
11662  if (mode == QImode
11663      && ! register_operand (operands[3], VOIDmode))
11664    operands[3] = force_reg (mode, operands[3]);
11665
11666  emit_insn (compare_seq);
11667  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11668			  gen_rtx_IF_THEN_ELSE (mode,
11669						compare_op, operands[2],
11670						operands[3])));
11671  if (bypass_test)
11672    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11673			    gen_rtx_IF_THEN_ELSE (mode,
11674				  bypass_test,
11675				  copy_rtx (operands[3]),
11676				  copy_rtx (operands[0]))));
11677  if (second_test)
11678    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11679			    gen_rtx_IF_THEN_ELSE (mode,
11680				  second_test,
11681				  copy_rtx (operands[2]),
11682				  copy_rtx (operands[0]))));
11683
11684  return 1; /* DONE */
11685}
11686
11687/* Swap, force into registers, or otherwise massage the two operands
11688   to an sse comparison with a mask result.  Thus we differ a bit from
11689   ix86_prepare_fp_compare_args which expects to produce a flags result.
11690
11691   The DEST operand exists to help determine whether to commute commutative
11692   operators.  The POP0/POP1 operands are updated in place.  The new
11693   comparison code is returned, or UNKNOWN if not implementable.  */
11694
11695static enum rtx_code
11696ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11697				  rtx *pop0, rtx *pop1)
11698{
11699  rtx tmp;
11700
11701  switch (code)
11702    {
11703    case LTGT:
11704    case UNEQ:
11705      /* We have no LTGT as an operator.  We could implement it with
11706	 NE & ORDERED, but this requires an extra temporary.  It's
11707	 not clear that it's worth it.  */
11708      return UNKNOWN;
11709
11710    case LT:
11711    case LE:
11712    case UNGT:
11713    case UNGE:
11714      /* These are supported directly.  */
11715      break;
11716
11717    case EQ:
11718    case NE:
11719    case UNORDERED:
11720    case ORDERED:
11721      /* For commutative operators, try to canonicalize the destination
11722	 operand to be first in the comparison - this helps reload to
11723	 avoid extra moves.  */
11724      if (!dest || !rtx_equal_p (dest, *pop1))
11725	break;
11726      /* FALLTHRU */
11727
11728    case GE:
11729    case GT:
11730    case UNLE:
11731    case UNLT:
11732      /* These are not supported directly.  Swap the comparison operands
11733	 to transform into something that is supported.  */
11734      tmp = *pop0;
11735      *pop0 = *pop1;
11736      *pop1 = tmp;
11737      code = swap_condition (code);
11738      break;
11739
11740    default:
11741      gcc_unreachable ();
11742    }
11743
11744  return code;
11745}
11746
11747/* Detect conditional moves that exactly match min/max operational
11748   semantics.  Note that this is IEEE safe, as long as we don't
11749   interchange the operands.
11750
11751   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11752   and TRUE if the operation is successful and instructions are emitted.  */
11753
11754static bool
11755ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11756			   rtx cmp_op1, rtx if_true, rtx if_false)
11757{
11758  enum machine_mode mode;
11759  bool is_min;
11760  rtx tmp;
11761
11762  if (code == LT)
11763    ;
11764  else if (code == UNGE)
11765    {
11766      tmp = if_true;
11767      if_true = if_false;
11768      if_false = tmp;
11769    }
11770  else
11771    return false;
11772
11773  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11774    is_min = true;
11775  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11776    is_min = false;
11777  else
11778    return false;
11779
11780  mode = GET_MODE (dest);
11781
11782  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11783     but MODE may be a vector mode and thus not appropriate.  */
11784  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11785    {
11786      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11787      rtvec v;
11788
11789      if_true = force_reg (mode, if_true);
11790      v = gen_rtvec (2, if_true, if_false);
11791      tmp = gen_rtx_UNSPEC (mode, v, u);
11792    }
11793  else
11794    {
11795      code = is_min ? SMIN : SMAX;
11796      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11797    }
11798
11799  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11800  return true;
11801}
11802
11803/* Expand an sse vector comparison.  Return the register with the result.  */
11804
11805static rtx
11806ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11807		     rtx op_true, rtx op_false)
11808{
11809  enum machine_mode mode = GET_MODE (dest);
11810  rtx x;
11811
11812  cmp_op0 = force_reg (mode, cmp_op0);
11813  if (!nonimmediate_operand (cmp_op1, mode))
11814    cmp_op1 = force_reg (mode, cmp_op1);
11815
11816  if (optimize
11817      || reg_overlap_mentioned_p (dest, op_true)
11818      || reg_overlap_mentioned_p (dest, op_false))
11819    dest = gen_reg_rtx (mode);
11820
11821  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11822  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11823
11824  return dest;
11825}
11826
11827/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11828   operations.  This is used for both scalar and vector conditional moves.  */
11829
11830static void
11831ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11832{
11833  enum machine_mode mode = GET_MODE (dest);
11834  rtx t2, t3, x;
11835
11836  if (op_false == CONST0_RTX (mode))
11837    {
11838      op_true = force_reg (mode, op_true);
11839      x = gen_rtx_AND (mode, cmp, op_true);
11840      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11841    }
11842  else if (op_true == CONST0_RTX (mode))
11843    {
11844      op_false = force_reg (mode, op_false);
11845      x = gen_rtx_NOT (mode, cmp);
11846      x = gen_rtx_AND (mode, x, op_false);
11847      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11848    }
11849  else
11850    {
11851      op_true = force_reg (mode, op_true);
11852      op_false = force_reg (mode, op_false);
11853
11854      t2 = gen_reg_rtx (mode);
11855      if (optimize)
11856	t3 = gen_reg_rtx (mode);
11857      else
11858	t3 = dest;
11859
11860      x = gen_rtx_AND (mode, op_true, cmp);
11861      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11862
11863      x = gen_rtx_NOT (mode, cmp);
11864      x = gen_rtx_AND (mode, x, op_false);
11865      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11866
11867      x = gen_rtx_IOR (mode, t3, t2);
11868      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11869    }
11870}
11871
11872/* Expand a floating-point conditional move.  Return true if successful.  */
11873
11874int
11875ix86_expand_fp_movcc (rtx operands[])
11876{
11877  enum machine_mode mode = GET_MODE (operands[0]);
11878  enum rtx_code code = GET_CODE (operands[1]);
11879  rtx tmp, compare_op, second_test, bypass_test;
11880
11881  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11882    {
11883      enum machine_mode cmode;
11884
11885      /* Since we've no cmove for sse registers, don't force bad register
11886	 allocation just to gain access to it.  Deny movcc when the
11887	 comparison mode doesn't match the move mode.  */
11888      cmode = GET_MODE (ix86_compare_op0);
11889      if (cmode == VOIDmode)
11890	cmode = GET_MODE (ix86_compare_op1);
11891      if (cmode != mode)
11892	return 0;
11893
11894      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11895					       &ix86_compare_op0,
11896					       &ix86_compare_op1);
11897      if (code == UNKNOWN)
11898	return 0;
11899
11900      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11901				     ix86_compare_op1, operands[2],
11902				     operands[3]))
11903	return 1;
11904
11905      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11906				 ix86_compare_op1, operands[2], operands[3]);
11907      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11908      return 1;
11909    }
11910
11911  /* The floating point conditional move instructions don't directly
11912     support conditions resulting from a signed integer comparison.  */
11913
11914  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11915
11916  /* The floating point conditional move instructions don't directly
11917     support signed integer comparisons.  */
11918
11919  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11920    {
11921      gcc_assert (!second_test && !bypass_test);
11922      tmp = gen_reg_rtx (QImode);
11923      ix86_expand_setcc (code, tmp);
11924      code = NE;
11925      ix86_compare_op0 = tmp;
11926      ix86_compare_op1 = const0_rtx;
11927      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11928    }
11929  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11930    {
11931      tmp = gen_reg_rtx (mode);
11932      emit_move_insn (tmp, operands[3]);
11933      operands[3] = tmp;
11934    }
11935  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11936    {
11937      tmp = gen_reg_rtx (mode);
11938      emit_move_insn (tmp, operands[2]);
11939      operands[2] = tmp;
11940    }
11941
11942  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11943			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11944						operands[2], operands[3])));
11945  if (bypass_test)
11946    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11947			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11948						  operands[3], operands[0])));
11949  if (second_test)
11950    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11951			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11952						  operands[2], operands[0])));
11953
11954  return 1;
11955}
11956
11957/* Expand a floating-point vector conditional move; a vcond operation
11958   rather than a movcc operation.  */
11959
11960bool
11961ix86_expand_fp_vcond (rtx operands[])
11962{
11963  enum rtx_code code = GET_CODE (operands[3]);
11964  rtx cmp;
11965
11966  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11967					   &operands[4], &operands[5]);
11968  if (code == UNKNOWN)
11969    return false;
11970
11971  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11972				 operands[5], operands[1], operands[2]))
11973    return true;
11974
11975  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11976			     operands[1], operands[2]);
11977  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11978  return true;
11979}
11980
11981/* Expand a signed integral vector conditional move.  */
11982
11983bool
11984ix86_expand_int_vcond (rtx operands[])
11985{
11986  enum machine_mode mode = GET_MODE (operands[0]);
11987  enum rtx_code code = GET_CODE (operands[3]);
11988  bool negate = false;
11989  rtx x, cop0, cop1;
11990
11991  cop0 = operands[4];
11992  cop1 = operands[5];
11993
11994  /* Canonicalize the comparison to EQ, GT, GTU.  */
11995  switch (code)
11996    {
11997    case EQ:
11998    case GT:
11999    case GTU:
12000      break;
12001
12002    case NE:
12003    case LE:
12004    case LEU:
12005      code = reverse_condition (code);
12006      negate = true;
12007      break;
12008
12009    case GE:
12010    case GEU:
12011      code = reverse_condition (code);
12012      negate = true;
12013      /* FALLTHRU */
12014
12015    case LT:
12016    case LTU:
12017      code = swap_condition (code);
12018      x = cop0, cop0 = cop1, cop1 = x;
12019      break;
12020
12021    default:
12022      gcc_unreachable ();
12023    }
12024
12025  /* Unsigned parallel compare is not supported by the hardware.  Play some
12026     tricks to turn this into a signed comparison against 0.  */
12027  if (code == GTU)
12028    {
12029      cop0 = force_reg (mode, cop0);
12030
12031      switch (mode)
12032	{
12033	case V4SImode:
12034	  {
12035	    rtx t1, t2, mask;
12036
12037	    /* Perform a parallel modulo subtraction.  */
12038	    t1 = gen_reg_rtx (mode);
12039	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
12040
12041	    /* Extract the original sign bit of op0.  */
12042	    mask = GEN_INT (-0x80000000);
12043	    mask = gen_rtx_CONST_VECTOR (mode,
12044			gen_rtvec (4, mask, mask, mask, mask));
12045	    mask = force_reg (mode, mask);
12046	    t2 = gen_reg_rtx (mode);
12047	    emit_insn (gen_andv4si3 (t2, cop0, mask));
12048
12049	    /* XOR it back into the result of the subtraction.  This results
12050	       in the sign bit set iff we saw unsigned underflow.  */
12051	    x = gen_reg_rtx (mode);
12052	    emit_insn (gen_xorv4si3 (x, t1, t2));
12053
12054	    code = GT;
12055	  }
12056	  break;
12057
12058	case V16QImode:
12059	case V8HImode:
12060	  /* Perform a parallel unsigned saturating subtraction.  */
12061	  x = gen_reg_rtx (mode);
12062	  emit_insn (gen_rtx_SET (VOIDmode, x,
12063				  gen_rtx_US_MINUS (mode, cop0, cop1)));
12064
12065	  code = EQ;
12066	  negate = !negate;
12067	  break;
12068
12069	default:
12070	  gcc_unreachable ();
12071	}
12072
12073      cop0 = x;
12074      cop1 = CONST0_RTX (mode);
12075    }
12076
12077  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12078			   operands[1+negate], operands[2-negate]);
12079
12080  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12081			 operands[2-negate]);
12082  return true;
12083}
12084
12085/* Expand conditional increment or decrement using adb/sbb instructions.
12086   The default case using setcc followed by the conditional move can be
12087   done by generic code.  */
12088int
12089ix86_expand_int_addcc (rtx operands[])
12090{
12091  enum rtx_code code = GET_CODE (operands[1]);
12092  rtx compare_op;
12093  rtx val = const0_rtx;
12094  bool fpcmp = false;
12095  enum machine_mode mode = GET_MODE (operands[0]);
12096
12097  if (operands[3] != const1_rtx
12098      && operands[3] != constm1_rtx)
12099    return 0;
12100  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12101				       ix86_compare_op1, &compare_op))
12102     return 0;
12103  code = GET_CODE (compare_op);
12104
12105  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12106      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12107    {
12108      fpcmp = true;
12109      code = ix86_fp_compare_code_to_integer (code);
12110    }
12111
12112  if (code != LTU)
12113    {
12114      val = constm1_rtx;
12115      if (fpcmp)
12116	PUT_CODE (compare_op,
12117		  reverse_condition_maybe_unordered
12118		    (GET_CODE (compare_op)));
12119      else
12120	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12121    }
12122  PUT_MODE (compare_op, mode);
12123
12124  /* Construct either adc or sbb insn.  */
12125  if ((code == LTU) == (operands[3] == constm1_rtx))
12126    {
12127      switch (GET_MODE (operands[0]))
12128	{
12129	  case QImode:
12130            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12131	    break;
12132	  case HImode:
12133            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12134	    break;
12135	  case SImode:
12136            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12137	    break;
12138	  case DImode:
12139            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12140	    break;
12141	  default:
12142	    gcc_unreachable ();
12143	}
12144    }
12145  else
12146    {
12147      switch (GET_MODE (operands[0]))
12148	{
12149	  case QImode:
12150            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12151	    break;
12152	  case HImode:
12153            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12154	    break;
12155	  case SImode:
12156            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12157	    break;
12158	  case DImode:
12159            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12160	    break;
12161	  default:
12162	    gcc_unreachable ();
12163	}
12164    }
12165  return 1; /* DONE */
12166}
12167
12168
12169/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
12170   works for floating pointer parameters and nonoffsetable memories.
12171   For pushes, it returns just stack offsets; the values will be saved
12172   in the right order.  Maximally three parts are generated.  */
12173
12174static int
12175ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12176{
12177  int size;
12178
12179  if (!TARGET_64BIT)
12180    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12181  else
12182    size = (GET_MODE_SIZE (mode) + 4) / 8;
12183
12184  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12185  gcc_assert (size >= 2 && size <= 3);
12186
12187  /* Optimize constant pool reference to immediates.  This is used by fp
12188     moves, that force all constants to memory to allow combining.  */
12189  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12190    {
12191      rtx tmp = maybe_get_pool_constant (operand);
12192      if (tmp)
12193	operand = tmp;
12194    }
12195
12196  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12197    {
12198      /* The only non-offsetable memories we handle are pushes.  */
12199      int ok = push_operand (operand, VOIDmode);
12200
12201      gcc_assert (ok);
12202
12203      operand = copy_rtx (operand);
12204      PUT_MODE (operand, Pmode);
12205      parts[0] = parts[1] = parts[2] = operand;
12206      return size;
12207    }
12208
12209  if (GET_CODE (operand) == CONST_VECTOR)
12210    {
12211      enum machine_mode imode = int_mode_for_mode (mode);
12212      /* Caution: if we looked through a constant pool memory above,
12213	 the operand may actually have a different mode now.  That's
12214	 ok, since we want to pun this all the way back to an integer.  */
12215      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12216      gcc_assert (operand != NULL);
12217      mode = imode;
12218    }
12219
12220  if (!TARGET_64BIT)
12221    {
12222      if (mode == DImode)
12223	split_di (&operand, 1, &parts[0], &parts[1]);
12224      else
12225	{
12226	  if (REG_P (operand))
12227	    {
12228	      gcc_assert (reload_completed);
12229	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12230	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12231	      if (size == 3)
12232		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12233	    }
12234	  else if (offsettable_memref_p (operand))
12235	    {
12236	      operand = adjust_address (operand, SImode, 0);
12237	      parts[0] = operand;
12238	      parts[1] = adjust_address (operand, SImode, 4);
12239	      if (size == 3)
12240		parts[2] = adjust_address (operand, SImode, 8);
12241	    }
12242	  else if (GET_CODE (operand) == CONST_DOUBLE)
12243	    {
12244	      REAL_VALUE_TYPE r;
12245	      long l[4];
12246
12247	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12248	      switch (mode)
12249		{
12250		case XFmode:
12251		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12252		  parts[2] = gen_int_mode (l[2], SImode);
12253		  break;
12254		case DFmode:
12255		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12256		  break;
12257		default:
12258		  gcc_unreachable ();
12259		}
12260	      parts[1] = gen_int_mode (l[1], SImode);
12261	      parts[0] = gen_int_mode (l[0], SImode);
12262	    }
12263	  else
12264	    gcc_unreachable ();
12265	}
12266    }
12267  else
12268    {
12269      if (mode == TImode)
12270	split_ti (&operand, 1, &parts[0], &parts[1]);
12271      if (mode == XFmode || mode == TFmode)
12272	{
12273	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12274	  if (REG_P (operand))
12275	    {
12276	      gcc_assert (reload_completed);
12277	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12278	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12279	    }
12280	  else if (offsettable_memref_p (operand))
12281	    {
12282	      operand = adjust_address (operand, DImode, 0);
12283	      parts[0] = operand;
12284	      parts[1] = adjust_address (operand, upper_mode, 8);
12285	    }
12286	  else if (GET_CODE (operand) == CONST_DOUBLE)
12287	    {
12288	      REAL_VALUE_TYPE r;
12289	      long l[4];
12290
12291	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12292	      real_to_target (l, &r, mode);
12293
12294	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
12295	      if (HOST_BITS_PER_WIDE_INT >= 64)
12296	        parts[0]
12297		  = gen_int_mode
12298		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12299		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12300		       DImode);
12301	      else
12302	        parts[0] = immed_double_const (l[0], l[1], DImode);
12303
12304	      if (upper_mode == SImode)
12305	        parts[1] = gen_int_mode (l[2], SImode);
12306	      else if (HOST_BITS_PER_WIDE_INT >= 64)
12307	        parts[1]
12308		  = gen_int_mode
12309		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12310		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12311		       DImode);
12312	      else
12313	        parts[1] = immed_double_const (l[2], l[3], DImode);
12314	    }
12315	  else
12316	    gcc_unreachable ();
12317	}
12318    }
12319
12320  return size;
12321}
12322
12323/* Emit insns to perform a move or push of DI, DF, and XF values.
12324   Return false when normal moves are needed; true when all required
12325   insns have been emitted.  Operands 2-4 contain the input values
12326   int the correct order; operands 5-7 contain the output values.  */
12327
12328void
12329ix86_split_long_move (rtx operands[])
12330{
12331  rtx part[2][3];
12332  int nparts;
12333  int push = 0;
12334  int collisions = 0;
12335  enum machine_mode mode = GET_MODE (operands[0]);
12336
12337  /* The DFmode expanders may ask us to move double.
12338     For 64bit target this is single move.  By hiding the fact
12339     here we simplify i386.md splitters.  */
12340  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12341    {
12342      /* Optimize constant pool reference to immediates.  This is used by
12343	 fp moves, that force all constants to memory to allow combining.  */
12344
12345      if (GET_CODE (operands[1]) == MEM
12346	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12347	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12348	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12349      if (push_operand (operands[0], VOIDmode))
12350	{
12351	  operands[0] = copy_rtx (operands[0]);
12352	  PUT_MODE (operands[0], Pmode);
12353	}
12354      else
12355        operands[0] = gen_lowpart (DImode, operands[0]);
12356      operands[1] = gen_lowpart (DImode, operands[1]);
12357      emit_move_insn (operands[0], operands[1]);
12358      return;
12359    }
12360
12361  /* The only non-offsettable memory we handle is push.  */
12362  if (push_operand (operands[0], VOIDmode))
12363    push = 1;
12364  else
12365    gcc_assert (GET_CODE (operands[0]) != MEM
12366		|| offsettable_memref_p (operands[0]));
12367
12368  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12369  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12370
12371  /* When emitting push, take care for source operands on the stack.  */
12372  if (push && GET_CODE (operands[1]) == MEM
12373      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12374    {
12375      if (nparts == 3)
12376	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12377				     XEXP (part[1][2], 0));
12378      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12379				   XEXP (part[1][1], 0));
12380    }
12381
12382  /* We need to do copy in the right order in case an address register
12383     of the source overlaps the destination.  */
12384  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12385    {
12386      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12387	collisions++;
12388      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12389	collisions++;
12390      if (nparts == 3
12391	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12392	collisions++;
12393
12394      /* Collision in the middle part can be handled by reordering.  */
12395      if (collisions == 1 && nparts == 3
12396	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12397	{
12398	  rtx tmp;
12399	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12400	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12401	}
12402
12403      /* If there are more collisions, we can't handle it by reordering.
12404	 Do an lea to the last part and use only one colliding move.  */
12405      else if (collisions > 1)
12406	{
12407	  rtx base;
12408
12409	  collisions = 1;
12410
12411	  base = part[0][nparts - 1];
12412
12413	  /* Handle the case when the last part isn't valid for lea.
12414	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12415	  if (GET_MODE (base) != Pmode)
12416	    base = gen_rtx_REG (Pmode, REGNO (base));
12417
12418	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12419	  part[1][0] = replace_equiv_address (part[1][0], base);
12420	  part[1][1] = replace_equiv_address (part[1][1],
12421				      plus_constant (base, UNITS_PER_WORD));
12422	  if (nparts == 3)
12423	    part[1][2] = replace_equiv_address (part[1][2],
12424				      plus_constant (base, 8));
12425	}
12426    }
12427
12428  if (push)
12429    {
12430      if (!TARGET_64BIT)
12431	{
12432	  if (nparts == 3)
12433	    {
12434	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12435                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12436	      emit_move_insn (part[0][2], part[1][2]);
12437	    }
12438	}
12439      else
12440	{
12441	  /* In 64bit mode we don't have 32bit push available.  In case this is
12442	     register, it is OK - we will just use larger counterpart.  We also
12443	     retype memory - these comes from attempt to avoid REX prefix on
12444	     moving of second half of TFmode value.  */
12445	  if (GET_MODE (part[1][1]) == SImode)
12446	    {
12447	      switch (GET_CODE (part[1][1]))
12448		{
12449		case MEM:
12450		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12451		  break;
12452
12453		case REG:
12454		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12455		  break;
12456
12457		default:
12458		  gcc_unreachable ();
12459		}
12460
12461	      if (GET_MODE (part[1][0]) == SImode)
12462		part[1][0] = part[1][1];
12463	    }
12464	}
12465      emit_move_insn (part[0][1], part[1][1]);
12466      emit_move_insn (part[0][0], part[1][0]);
12467      return;
12468    }
12469
12470  /* Choose correct order to not overwrite the source before it is copied.  */
12471  if ((REG_P (part[0][0])
12472       && REG_P (part[1][1])
12473       && (REGNO (part[0][0]) == REGNO (part[1][1])
12474	   || (nparts == 3
12475	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12476      || (collisions > 0
12477	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12478    {
12479      if (nparts == 3)
12480	{
12481	  operands[2] = part[0][2];
12482	  operands[3] = part[0][1];
12483	  operands[4] = part[0][0];
12484	  operands[5] = part[1][2];
12485	  operands[6] = part[1][1];
12486	  operands[7] = part[1][0];
12487	}
12488      else
12489	{
12490	  operands[2] = part[0][1];
12491	  operands[3] = part[0][0];
12492	  operands[5] = part[1][1];
12493	  operands[6] = part[1][0];
12494	}
12495    }
12496  else
12497    {
12498      if (nparts == 3)
12499	{
12500	  operands[2] = part[0][0];
12501	  operands[3] = part[0][1];
12502	  operands[4] = part[0][2];
12503	  operands[5] = part[1][0];
12504	  operands[6] = part[1][1];
12505	  operands[7] = part[1][2];
12506	}
12507      else
12508	{
12509	  operands[2] = part[0][0];
12510	  operands[3] = part[0][1];
12511	  operands[5] = part[1][0];
12512	  operands[6] = part[1][1];
12513	}
12514    }
12515
12516  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12517  if (optimize_size)
12518    {
12519      if (GET_CODE (operands[5]) == CONST_INT
12520	  && operands[5] != const0_rtx
12521	  && REG_P (operands[2]))
12522	{
12523	  if (GET_CODE (operands[6]) == CONST_INT
12524	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12525	    operands[6] = operands[2];
12526
12527	  if (nparts == 3
12528	      && GET_CODE (operands[7]) == CONST_INT
12529	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12530	    operands[7] = operands[2];
12531	}
12532
12533      if (nparts == 3
12534	  && GET_CODE (operands[6]) == CONST_INT
12535	  && operands[6] != const0_rtx
12536	  && REG_P (operands[3])
12537	  && GET_CODE (operands[7]) == CONST_INT
12538	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12539	operands[7] = operands[3];
12540    }
12541
12542  emit_move_insn (operands[2], operands[5]);
12543  emit_move_insn (operands[3], operands[6]);
12544  if (nparts == 3)
12545    emit_move_insn (operands[4], operands[7]);
12546
12547  return;
12548}
12549
12550/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12551   left shift by a constant, either using a single shift or
12552   a sequence of add instructions.  */
12553
12554static void
12555ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12556{
12557  if (count == 1)
12558    {
12559      emit_insn ((mode == DImode
12560		  ? gen_addsi3
12561		  : gen_adddi3) (operand, operand, operand));
12562    }
12563  else if (!optimize_size
12564	   && count * ix86_cost->add <= ix86_cost->shift_const)
12565    {
12566      int i;
12567      for (i=0; i<count; i++)
12568	{
12569	  emit_insn ((mode == DImode
12570		      ? gen_addsi3
12571		      : gen_adddi3) (operand, operand, operand));
12572	}
12573    }
12574  else
12575    emit_insn ((mode == DImode
12576		? gen_ashlsi3
12577		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12578}
12579
12580void
12581ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12582{
12583  rtx low[2], high[2];
12584  int count;
12585  const int single_width = mode == DImode ? 32 : 64;
12586
12587  if (GET_CODE (operands[2]) == CONST_INT)
12588    {
12589      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12590      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12591
12592      if (count >= single_width)
12593	{
12594	  emit_move_insn (high[0], low[1]);
12595	  emit_move_insn (low[0], const0_rtx);
12596
12597	  if (count > single_width)
12598	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12599	}
12600      else
12601	{
12602	  if (!rtx_equal_p (operands[0], operands[1]))
12603	    emit_move_insn (operands[0], operands[1]);
12604	  emit_insn ((mode == DImode
12605		     ? gen_x86_shld_1
12606		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12607	  ix86_expand_ashl_const (low[0], count, mode);
12608	}
12609      return;
12610    }
12611
12612  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12613
12614  if (operands[1] == const1_rtx)
12615    {
12616      /* Assuming we've chosen a QImode capable registers, then 1 << N
12617	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12618      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12619	{
12620	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12621
12622	  ix86_expand_clear (low[0]);
12623	  ix86_expand_clear (high[0]);
12624	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12625
12626	  d = gen_lowpart (QImode, low[0]);
12627	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12628	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12629	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12630
12631	  d = gen_lowpart (QImode, high[0]);
12632	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12633	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12634	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12635	}
12636
12637      /* Otherwise, we can get the same results by manually performing
12638	 a bit extract operation on bit 5/6, and then performing the two
12639	 shifts.  The two methods of getting 0/1 into low/high are exactly
12640	 the same size.  Avoiding the shift in the bit extract case helps
12641	 pentium4 a bit; no one else seems to care much either way.  */
12642      else
12643	{
12644	  rtx x;
12645
12646	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12647	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12648	  else
12649	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12650	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12651
12652	  emit_insn ((mode == DImode
12653		      ? gen_lshrsi3
12654		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12655	  emit_insn ((mode == DImode
12656		      ? gen_andsi3
12657		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12658	  emit_move_insn (low[0], high[0]);
12659	  emit_insn ((mode == DImode
12660		      ? gen_xorsi3
12661		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12662	}
12663
12664      emit_insn ((mode == DImode
12665		    ? gen_ashlsi3
12666		    : gen_ashldi3) (low[0], low[0], operands[2]));
12667      emit_insn ((mode == DImode
12668		    ? gen_ashlsi3
12669		    : gen_ashldi3) (high[0], high[0], operands[2]));
12670      return;
12671    }
12672
12673  if (operands[1] == constm1_rtx)
12674    {
12675      /* For -1 << N, we can avoid the shld instruction, because we
12676	 know that we're shifting 0...31/63 ones into a -1.  */
12677      emit_move_insn (low[0], constm1_rtx);
12678      if (optimize_size)
12679	emit_move_insn (high[0], low[0]);
12680      else
12681	emit_move_insn (high[0], constm1_rtx);
12682    }
12683  else
12684    {
12685      if (!rtx_equal_p (operands[0], operands[1]))
12686	emit_move_insn (operands[0], operands[1]);
12687
12688      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12689      emit_insn ((mode == DImode
12690		  ? gen_x86_shld_1
12691		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12692    }
12693
12694  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12695
12696  if (TARGET_CMOVE && scratch)
12697    {
12698      ix86_expand_clear (scratch);
12699      emit_insn ((mode == DImode
12700		  ? gen_x86_shift_adj_1
12701		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12702    }
12703  else
12704    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12705}
12706
12707void
12708ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12709{
12710  rtx low[2], high[2];
12711  int count;
12712  const int single_width = mode == DImode ? 32 : 64;
12713
12714  if (GET_CODE (operands[2]) == CONST_INT)
12715    {
12716      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12717      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12718
12719      if (count == single_width * 2 - 1)
12720	{
12721	  emit_move_insn (high[0], high[1]);
12722	  emit_insn ((mode == DImode
12723		      ? gen_ashrsi3
12724		      : gen_ashrdi3) (high[0], high[0],
12725				      GEN_INT (single_width - 1)));
12726	  emit_move_insn (low[0], high[0]);
12727
12728	}
12729      else if (count >= single_width)
12730	{
12731	  emit_move_insn (low[0], high[1]);
12732	  emit_move_insn (high[0], low[0]);
12733	  emit_insn ((mode == DImode
12734		      ? gen_ashrsi3
12735		      : gen_ashrdi3) (high[0], high[0],
12736				      GEN_INT (single_width - 1)));
12737	  if (count > single_width)
12738	    emit_insn ((mode == DImode
12739			? gen_ashrsi3
12740			: gen_ashrdi3) (low[0], low[0],
12741					GEN_INT (count - single_width)));
12742	}
12743      else
12744	{
12745	  if (!rtx_equal_p (operands[0], operands[1]))
12746	    emit_move_insn (operands[0], operands[1]);
12747	  emit_insn ((mode == DImode
12748		      ? gen_x86_shrd_1
12749		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12750	  emit_insn ((mode == DImode
12751		      ? gen_ashrsi3
12752		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12753	}
12754    }
12755  else
12756    {
12757      if (!rtx_equal_p (operands[0], operands[1]))
12758	emit_move_insn (operands[0], operands[1]);
12759
12760      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12761
12762      emit_insn ((mode == DImode
12763		  ? gen_x86_shrd_1
12764		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12765      emit_insn ((mode == DImode
12766		  ? gen_ashrsi3
12767		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12768
12769      if (TARGET_CMOVE && scratch)
12770	{
12771	  emit_move_insn (scratch, high[0]);
12772	  emit_insn ((mode == DImode
12773		      ? gen_ashrsi3
12774		      : gen_ashrdi3) (scratch, scratch,
12775				      GEN_INT (single_width - 1)));
12776	  emit_insn ((mode == DImode
12777		      ? gen_x86_shift_adj_1
12778		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12779					 scratch));
12780	}
12781      else
12782	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12783    }
12784}
12785
12786void
12787ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12788{
12789  rtx low[2], high[2];
12790  int count;
12791  const int single_width = mode == DImode ? 32 : 64;
12792
12793  if (GET_CODE (operands[2]) == CONST_INT)
12794    {
12795      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12796      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12797
12798      if (count >= single_width)
12799	{
12800	  emit_move_insn (low[0], high[1]);
12801	  ix86_expand_clear (high[0]);
12802
12803	  if (count > single_width)
12804	    emit_insn ((mode == DImode
12805			? gen_lshrsi3
12806			: gen_lshrdi3) (low[0], low[0],
12807					GEN_INT (count - single_width)));
12808	}
12809      else
12810	{
12811	  if (!rtx_equal_p (operands[0], operands[1]))
12812	    emit_move_insn (operands[0], operands[1]);
12813	  emit_insn ((mode == DImode
12814		      ? gen_x86_shrd_1
12815		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12816	  emit_insn ((mode == DImode
12817		      ? gen_lshrsi3
12818		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12819	}
12820    }
12821  else
12822    {
12823      if (!rtx_equal_p (operands[0], operands[1]))
12824	emit_move_insn (operands[0], operands[1]);
12825
12826      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12827
12828      emit_insn ((mode == DImode
12829		  ? gen_x86_shrd_1
12830		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12831      emit_insn ((mode == DImode
12832		  ? gen_lshrsi3
12833		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12834
12835      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12836      if (TARGET_CMOVE && scratch)
12837	{
12838	  ix86_expand_clear (scratch);
12839	  emit_insn ((mode == DImode
12840		      ? gen_x86_shift_adj_1
12841		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12842					       scratch));
12843	}
12844      else
12845	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12846    }
12847}
12848
12849/* Helper function for the string operations below.  Dest VARIABLE whether
12850   it is aligned to VALUE bytes.  If true, jump to the label.  */
12851static rtx
12852ix86_expand_aligntest (rtx variable, int value)
12853{
12854  rtx label = gen_label_rtx ();
12855  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12856  if (GET_MODE (variable) == DImode)
12857    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12858  else
12859    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12860  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12861			   1, label);
12862  return label;
12863}
12864
12865/* Adjust COUNTER by the VALUE.  */
12866static void
12867ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12868{
12869  if (GET_MODE (countreg) == DImode)
12870    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12871  else
12872    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12873}
12874
12875/* Zero extend possibly SImode EXP to Pmode register.  */
12876rtx
12877ix86_zero_extend_to_Pmode (rtx exp)
12878{
12879  rtx r;
12880  if (GET_MODE (exp) == VOIDmode)
12881    return force_reg (Pmode, exp);
12882  if (GET_MODE (exp) == Pmode)
12883    return copy_to_mode_reg (Pmode, exp);
12884  r = gen_reg_rtx (Pmode);
12885  emit_insn (gen_zero_extendsidi2 (r, exp));
12886  return r;
12887}
12888
12889/* Expand string move (memcpy) operation.  Use i386 string operations when
12890   profitable.  expand_clrmem contains similar code.  */
12891int
12892ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12893{
12894  rtx srcreg, destreg, countreg, srcexp, destexp;
12895  enum machine_mode counter_mode;
12896  HOST_WIDE_INT align = 0;
12897  unsigned HOST_WIDE_INT count = 0;
12898
12899  if (GET_CODE (align_exp) == CONST_INT)
12900    align = INTVAL (align_exp);
12901
12902  /* Can't use any of this if the user has appropriated esi or edi.  */
12903  if (global_regs[4] || global_regs[5])
12904    return 0;
12905
12906  /* This simple hack avoids all inlining code and simplifies code below.  */
12907  if (!TARGET_ALIGN_STRINGOPS)
12908    align = 64;
12909
12910  if (GET_CODE (count_exp) == CONST_INT)
12911    {
12912      count = INTVAL (count_exp);
12913      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12914	return 0;
12915    }
12916
12917  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12918     for 64bits use SImode when possible, otherwise DImode.
12919     Set count to number of bytes copied when known at compile time.  */
12920  if (!TARGET_64BIT
12921      || GET_MODE (count_exp) == SImode
12922      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12923    counter_mode = SImode;
12924  else
12925    counter_mode = DImode;
12926
12927  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12928
12929  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12930  if (destreg != XEXP (dst, 0))
12931    dst = replace_equiv_address_nv (dst, destreg);
12932  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12933  if (srcreg != XEXP (src, 0))
12934    src = replace_equiv_address_nv (src, srcreg);
12935
12936  /* When optimizing for size emit simple rep ; movsb instruction for
12937     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12938     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12939     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12940     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12941     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12942     known to be zero or not.  The rep; movsb sequence causes higher
12943     register pressure though, so take that into account.  */
12944
12945  if ((!optimize || optimize_size)
12946      && (count == 0
12947	  || ((count & 0x03)
12948	      && (!optimize_size
12949		  || count > 5 * 4
12950		  || (count & 3) + count / 4 > 6))))
12951    {
12952      emit_insn (gen_cld ());
12953      countreg = ix86_zero_extend_to_Pmode (count_exp);
12954      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12955      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12956      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12957			      destexp, srcexp));
12958    }
12959
12960  /* For constant aligned (or small unaligned) copies use rep movsl
12961     followed by code copying the rest.  For PentiumPro ensure 8 byte
12962     alignment to allow rep movsl acceleration.  */
12963
12964  else if (count != 0
12965	   && (align >= 8
12966	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12967	       || optimize_size || count < (unsigned int) 64))
12968    {
12969      unsigned HOST_WIDE_INT offset = 0;
12970      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12971      rtx srcmem, dstmem;
12972
12973      emit_insn (gen_cld ());
12974      if (count & ~(size - 1))
12975	{
12976	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12977	    {
12978	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12979
12980	      while (offset < (count & ~(size - 1)))
12981		{
12982		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12983							 srcreg, offset);
12984		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12985							 destreg, offset);
12986		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12987		  offset += size;
12988		}
12989	    }
12990	  else
12991	    {
12992	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12993				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12994	      countreg = copy_to_mode_reg (counter_mode, countreg);
12995	      countreg = ix86_zero_extend_to_Pmode (countreg);
12996
12997	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12998					GEN_INT (size == 4 ? 2 : 3));
12999	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13000	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13001
13002	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13003				      countreg, destexp, srcexp));
13004	      offset = count & ~(size - 1);
13005	    }
13006	}
13007      if (size == 8 && (count & 0x04))
13008	{
13009	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13010						 offset);
13011	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13012						 offset);
13013	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13014	  offset += 4;
13015	}
13016      if (count & 0x02)
13017	{
13018	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13019						 offset);
13020	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13021						 offset);
13022	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13023	  offset += 2;
13024	}
13025      if (count & 0x01)
13026	{
13027	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13028						 offset);
13029	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13030						 offset);
13031	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13032	}
13033    }
13034  /* The generic code based on the glibc implementation:
13035     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13036     allowing accelerated copying there)
13037     - copy the data using rep movsl
13038     - copy the rest.  */
13039  else
13040    {
13041      rtx countreg2;
13042      rtx label = NULL;
13043      rtx srcmem, dstmem;
13044      int desired_alignment = (TARGET_PENTIUMPRO
13045			       && (count == 0 || count >= (unsigned int) 260)
13046			       ? 8 : UNITS_PER_WORD);
13047      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
13048      dst = change_address (dst, BLKmode, destreg);
13049      src = change_address (src, BLKmode, srcreg);
13050
13051      /* In case we don't know anything about the alignment, default to
13052         library version, since it is usually equally fast and result in
13053         shorter code.
13054
13055	 Also emit call when we know that the count is large and call overhead
13056	 will not be important.  */
13057      if (!TARGET_INLINE_ALL_STRINGOPS
13058	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13059	return 0;
13060
13061      if (TARGET_SINGLE_STRINGOP)
13062	emit_insn (gen_cld ());
13063
13064      countreg2 = gen_reg_rtx (Pmode);
13065      countreg = copy_to_mode_reg (counter_mode, count_exp);
13066
13067      /* We don't use loops to align destination and to copy parts smaller
13068         than 4 bytes, because gcc is able to optimize such code better (in
13069         the case the destination or the count really is aligned, gcc is often
13070         able to predict the branches) and also it is friendlier to the
13071         hardware branch prediction.
13072
13073         Using loops is beneficial for generic case, because we can
13074         handle small counts using the loops.  Many CPUs (such as Athlon)
13075         have large REP prefix setup costs.
13076
13077         This is quite costly.  Maybe we can revisit this decision later or
13078         add some customizability to this code.  */
13079
13080      if (count == 0 && align < desired_alignment)
13081	{
13082	  label = gen_label_rtx ();
13083	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13084				   LEU, 0, counter_mode, 1, label);
13085	}
13086      if (align <= 1)
13087	{
13088	  rtx label = ix86_expand_aligntest (destreg, 1);
13089	  srcmem = change_address (src, QImode, srcreg);
13090	  dstmem = change_address (dst, QImode, destreg);
13091	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13092	  ix86_adjust_counter (countreg, 1);
13093	  emit_label (label);
13094	  LABEL_NUSES (label) = 1;
13095	}
13096      if (align <= 2)
13097	{
13098	  rtx label = ix86_expand_aligntest (destreg, 2);
13099	  srcmem = change_address (src, HImode, srcreg);
13100	  dstmem = change_address (dst, HImode, destreg);
13101	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13102	  ix86_adjust_counter (countreg, 2);
13103	  emit_label (label);
13104	  LABEL_NUSES (label) = 1;
13105	}
13106      if (align <= 4 && desired_alignment > 4)
13107	{
13108	  rtx label = ix86_expand_aligntest (destreg, 4);
13109	  srcmem = change_address (src, SImode, srcreg);
13110	  dstmem = change_address (dst, SImode, destreg);
13111	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13112	  ix86_adjust_counter (countreg, 4);
13113	  emit_label (label);
13114	  LABEL_NUSES (label) = 1;
13115	}
13116
13117      if (label && desired_alignment > 4 && !TARGET_64BIT)
13118	{
13119	  emit_label (label);
13120	  LABEL_NUSES (label) = 1;
13121	  label = NULL_RTX;
13122	}
13123      if (!TARGET_SINGLE_STRINGOP)
13124	emit_insn (gen_cld ());
13125      if (TARGET_64BIT)
13126	{
13127	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13128				  GEN_INT (3)));
13129	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13130	}
13131      else
13132	{
13133	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13134	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13135	}
13136      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13137      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13138      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13139			      countreg2, destexp, srcexp));
13140
13141      if (label)
13142	{
13143	  emit_label (label);
13144	  LABEL_NUSES (label) = 1;
13145	}
13146      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13147	{
13148	  srcmem = change_address (src, SImode, srcreg);
13149	  dstmem = change_address (dst, SImode, destreg);
13150	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13151	}
13152      if ((align <= 4 || count == 0) && TARGET_64BIT)
13153	{
13154	  rtx label = ix86_expand_aligntest (countreg, 4);
13155	  srcmem = change_address (src, SImode, srcreg);
13156	  dstmem = change_address (dst, SImode, destreg);
13157	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13158	  emit_label (label);
13159	  LABEL_NUSES (label) = 1;
13160	}
13161      if (align > 2 && count != 0 && (count & 2))
13162	{
13163	  srcmem = change_address (src, HImode, srcreg);
13164	  dstmem = change_address (dst, HImode, destreg);
13165	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13166	}
13167      if (align <= 2 || count == 0)
13168	{
13169	  rtx label = ix86_expand_aligntest (countreg, 2);
13170	  srcmem = change_address (src, HImode, srcreg);
13171	  dstmem = change_address (dst, HImode, destreg);
13172	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13173	  emit_label (label);
13174	  LABEL_NUSES (label) = 1;
13175	}
13176      if (align > 1 && count != 0 && (count & 1))
13177	{
13178	  srcmem = change_address (src, QImode, srcreg);
13179	  dstmem = change_address (dst, QImode, destreg);
13180	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13181	}
13182      if (align <= 1 || count == 0)
13183	{
13184	  rtx label = ix86_expand_aligntest (countreg, 1);
13185	  srcmem = change_address (src, QImode, srcreg);
13186	  dstmem = change_address (dst, QImode, destreg);
13187	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13188	  emit_label (label);
13189	  LABEL_NUSES (label) = 1;
13190	}
13191    }
13192
13193  return 1;
13194}
13195
13196/* Expand string clear operation (bzero).  Use i386 string operations when
13197   profitable.  expand_movmem contains similar code.  */
13198int
13199ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13200{
13201  rtx destreg, zeroreg, countreg, destexp;
13202  enum machine_mode counter_mode;
13203  HOST_WIDE_INT align = 0;
13204  unsigned HOST_WIDE_INT count = 0;
13205
13206  if (GET_CODE (align_exp) == CONST_INT)
13207    align = INTVAL (align_exp);
13208
13209  /* Can't use any of this if the user has appropriated esi.  */
13210  if (global_regs[4])
13211    return 0;
13212
13213  /* This simple hack avoids all inlining code and simplifies code below.  */
13214  if (!TARGET_ALIGN_STRINGOPS)
13215    align = 32;
13216
13217  if (GET_CODE (count_exp) == CONST_INT)
13218    {
13219      count = INTVAL (count_exp);
13220      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13221	return 0;
13222    }
13223  /* Figure out proper mode for counter.  For 32bits it is always SImode,
13224     for 64bits use SImode when possible, otherwise DImode.
13225     Set count to number of bytes copied when known at compile time.  */
13226  if (!TARGET_64BIT
13227      || GET_MODE (count_exp) == SImode
13228      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13229    counter_mode = SImode;
13230  else
13231    counter_mode = DImode;
13232
13233  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13234  if (destreg != XEXP (dst, 0))
13235    dst = replace_equiv_address_nv (dst, destreg);
13236
13237
13238  /* When optimizing for size emit simple rep ; movsb instruction for
13239     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
13240     sequence is 7 bytes long, so if optimizing for size and count is
13241     small enough that some stosl, stosw and stosb instructions without
13242     rep are shorter, fall back into the next if.  */
13243
13244  if ((!optimize || optimize_size)
13245      && (count == 0
13246	  || ((count & 0x03)
13247	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13248    {
13249      emit_insn (gen_cld ());
13250
13251      countreg = ix86_zero_extend_to_Pmode (count_exp);
13252      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13253      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13254      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13255    }
13256  else if (count != 0
13257	   && (align >= 8
13258	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13259	       || optimize_size || count < (unsigned int) 64))
13260    {
13261      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13262      unsigned HOST_WIDE_INT offset = 0;
13263
13264      emit_insn (gen_cld ());
13265
13266      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13267      if (count & ~(size - 1))
13268	{
13269	  unsigned HOST_WIDE_INT repcount;
13270	  unsigned int max_nonrep;
13271
13272	  repcount = count >> (size == 4 ? 2 : 3);
13273	  if (!TARGET_64BIT)
13274	    repcount &= 0x3fffffff;
13275
13276	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13277	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13278	     bytes.  In both cases the latter seems to be faster for small
13279	     values of N.  */
13280	  max_nonrep = size == 4 ? 7 : 4;
13281	  if (!optimize_size)
13282	    switch (ix86_tune)
13283	      {
13284	      case PROCESSOR_PENTIUM4:
13285	      case PROCESSOR_NOCONA:
13286	        max_nonrep = 3;
13287	        break;
13288	      default:
13289	        break;
13290	      }
13291
13292	  if (repcount <= max_nonrep)
13293	    while (repcount-- > 0)
13294	      {
13295		rtx mem = adjust_automodify_address_nv (dst,
13296							GET_MODE (zeroreg),
13297							destreg, offset);
13298		emit_insn (gen_strset (destreg, mem, zeroreg));
13299		offset += size;
13300	      }
13301	  else
13302	    {
13303	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13304	      countreg = ix86_zero_extend_to_Pmode (countreg);
13305	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
13306					GEN_INT (size == 4 ? 2 : 3));
13307	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13308	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13309				       destexp));
13310	      offset = count & ~(size - 1);
13311	    }
13312	}
13313      if (size == 8 && (count & 0x04))
13314	{
13315	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13316						  offset);
13317	  emit_insn (gen_strset (destreg, mem,
13318				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13319	  offset += 4;
13320	}
13321      if (count & 0x02)
13322	{
13323	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13324						  offset);
13325	  emit_insn (gen_strset (destreg, mem,
13326				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13327	  offset += 2;
13328	}
13329      if (count & 0x01)
13330	{
13331	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13332						  offset);
13333	  emit_insn (gen_strset (destreg, mem,
13334				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13335	}
13336    }
13337  else
13338    {
13339      rtx countreg2;
13340      rtx label = NULL;
13341      /* Compute desired alignment of the string operation.  */
13342      int desired_alignment = (TARGET_PENTIUMPRO
13343			       && (count == 0 || count >= (unsigned int) 260)
13344			       ? 8 : UNITS_PER_WORD);
13345
13346      /* In case we don't know anything about the alignment, default to
13347         library version, since it is usually equally fast and result in
13348         shorter code.
13349
13350	 Also emit call when we know that the count is large and call overhead
13351	 will not be important.  */
13352      if (!TARGET_INLINE_ALL_STRINGOPS
13353	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13354	return 0;
13355
13356      if (TARGET_SINGLE_STRINGOP)
13357	emit_insn (gen_cld ());
13358
13359      countreg2 = gen_reg_rtx (Pmode);
13360      countreg = copy_to_mode_reg (counter_mode, count_exp);
13361      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13362      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13363      dst = change_address (dst, BLKmode, destreg);
13364
13365      if (count == 0 && align < desired_alignment)
13366	{
13367	  label = gen_label_rtx ();
13368	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13369				   LEU, 0, counter_mode, 1, label);
13370	}
13371      if (align <= 1)
13372	{
13373	  rtx label = ix86_expand_aligntest (destreg, 1);
13374	  emit_insn (gen_strset (destreg, dst,
13375				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13376	  ix86_adjust_counter (countreg, 1);
13377	  emit_label (label);
13378	  LABEL_NUSES (label) = 1;
13379	}
13380      if (align <= 2)
13381	{
13382	  rtx label = ix86_expand_aligntest (destreg, 2);
13383	  emit_insn (gen_strset (destreg, dst,
13384				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13385	  ix86_adjust_counter (countreg, 2);
13386	  emit_label (label);
13387	  LABEL_NUSES (label) = 1;
13388	}
13389      if (align <= 4 && desired_alignment > 4)
13390	{
13391	  rtx label = ix86_expand_aligntest (destreg, 4);
13392	  emit_insn (gen_strset (destreg, dst,
13393				 (TARGET_64BIT
13394				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13395				  : zeroreg)));
13396	  ix86_adjust_counter (countreg, 4);
13397	  emit_label (label);
13398	  LABEL_NUSES (label) = 1;
13399	}
13400
13401      if (label && desired_alignment > 4 && !TARGET_64BIT)
13402	{
13403	  emit_label (label);
13404	  LABEL_NUSES (label) = 1;
13405	  label = NULL_RTX;
13406	}
13407
13408      if (!TARGET_SINGLE_STRINGOP)
13409	emit_insn (gen_cld ());
13410      if (TARGET_64BIT)
13411	{
13412	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13413				  GEN_INT (3)));
13414	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13415	}
13416      else
13417	{
13418	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13419	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13420	}
13421      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13422      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13423
13424      if (label)
13425	{
13426	  emit_label (label);
13427	  LABEL_NUSES (label) = 1;
13428	}
13429
13430      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13431	emit_insn (gen_strset (destreg, dst,
13432			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13433      if (TARGET_64BIT && (align <= 4 || count == 0))
13434	{
13435	  rtx label = ix86_expand_aligntest (countreg, 4);
13436	  emit_insn (gen_strset (destreg, dst,
13437				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13438	  emit_label (label);
13439	  LABEL_NUSES (label) = 1;
13440	}
13441      if (align > 2 && count != 0 && (count & 2))
13442	emit_insn (gen_strset (destreg, dst,
13443			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13444      if (align <= 2 || count == 0)
13445	{
13446	  rtx label = ix86_expand_aligntest (countreg, 2);
13447	  emit_insn (gen_strset (destreg, dst,
13448				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13449	  emit_label (label);
13450	  LABEL_NUSES (label) = 1;
13451	}
13452      if (align > 1 && count != 0 && (count & 1))
13453	emit_insn (gen_strset (destreg, dst,
13454			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13455      if (align <= 1 || count == 0)
13456	{
13457	  rtx label = ix86_expand_aligntest (countreg, 1);
13458	  emit_insn (gen_strset (destreg, dst,
13459				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13460	  emit_label (label);
13461	  LABEL_NUSES (label) = 1;
13462	}
13463    }
13464  return 1;
13465}
13466
13467/* Expand strlen.  */
13468int
13469ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13470{
13471  rtx addr, scratch1, scratch2, scratch3, scratch4;
13472
13473  /* The generic case of strlen expander is long.  Avoid it's
13474     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13475
13476  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13477      && !TARGET_INLINE_ALL_STRINGOPS
13478      && !optimize_size
13479      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13480    return 0;
13481
13482  addr = force_reg (Pmode, XEXP (src, 0));
13483  scratch1 = gen_reg_rtx (Pmode);
13484
13485  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13486      && !optimize_size)
13487    {
13488      /* Well it seems that some optimizer does not combine a call like
13489         foo(strlen(bar), strlen(bar));
13490         when the move and the subtraction is done here.  It does calculate
13491         the length just once when these instructions are done inside of
13492         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13493         often used and I use one fewer register for the lifetime of
13494         output_strlen_unroll() this is better.  */
13495
13496      emit_move_insn (out, addr);
13497
13498      ix86_expand_strlensi_unroll_1 (out, src, align);
13499
13500      /* strlensi_unroll_1 returns the address of the zero at the end of
13501         the string, like memchr(), so compute the length by subtracting
13502         the start address.  */
13503      if (TARGET_64BIT)
13504	emit_insn (gen_subdi3 (out, out, addr));
13505      else
13506	emit_insn (gen_subsi3 (out, out, addr));
13507    }
13508  else
13509    {
13510      rtx unspec;
13511      scratch2 = gen_reg_rtx (Pmode);
13512      scratch3 = gen_reg_rtx (Pmode);
13513      scratch4 = force_reg (Pmode, constm1_rtx);
13514
13515      emit_move_insn (scratch3, addr);
13516      eoschar = force_reg (QImode, eoschar);
13517
13518      emit_insn (gen_cld ());
13519      src = replace_equiv_address_nv (src, scratch3);
13520
13521      /* If .md starts supporting :P, this can be done in .md.  */
13522      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13523						 scratch4), UNSPEC_SCAS);
13524      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13525      if (TARGET_64BIT)
13526	{
13527	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13528	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13529	}
13530      else
13531	{
13532	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13533	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13534	}
13535    }
13536  return 1;
13537}
13538
13539/* Expand the appropriate insns for doing strlen if not just doing
13540   repnz; scasb
13541
13542   out = result, initialized with the start address
13543   align_rtx = alignment of the address.
13544   scratch = scratch register, initialized with the startaddress when
13545	not aligned, otherwise undefined
13546
13547   This is just the body. It needs the initializations mentioned above and
13548   some address computing at the end.  These things are done in i386.md.  */
13549
13550static void
13551ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13552{
13553  int align;
13554  rtx tmp;
13555  rtx align_2_label = NULL_RTX;
13556  rtx align_3_label = NULL_RTX;
13557  rtx align_4_label = gen_label_rtx ();
13558  rtx end_0_label = gen_label_rtx ();
13559  rtx mem;
13560  rtx tmpreg = gen_reg_rtx (SImode);
13561  rtx scratch = gen_reg_rtx (SImode);
13562  rtx cmp;
13563
13564  align = 0;
13565  if (GET_CODE (align_rtx) == CONST_INT)
13566    align = INTVAL (align_rtx);
13567
13568  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13569
13570  /* Is there a known alignment and is it less than 4?  */
13571  if (align < 4)
13572    {
13573      rtx scratch1 = gen_reg_rtx (Pmode);
13574      emit_move_insn (scratch1, out);
13575      /* Is there a known alignment and is it not 2? */
13576      if (align != 2)
13577	{
13578	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13579	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13580
13581	  /* Leave just the 3 lower bits.  */
13582	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13583				    NULL_RTX, 0, OPTAB_WIDEN);
13584
13585	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13586				   Pmode, 1, align_4_label);
13587	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13588				   Pmode, 1, align_2_label);
13589	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13590				   Pmode, 1, align_3_label);
13591	}
13592      else
13593        {
13594	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13595	     check if is aligned to 4 - byte.  */
13596
13597	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13598				    NULL_RTX, 0, OPTAB_WIDEN);
13599
13600	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13601				   Pmode, 1, align_4_label);
13602        }
13603
13604      mem = change_address (src, QImode, out);
13605
13606      /* Now compare the bytes.  */
13607
13608      /* Compare the first n unaligned byte on a byte per byte basis.  */
13609      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13610			       QImode, 1, end_0_label);
13611
13612      /* Increment the address.  */
13613      if (TARGET_64BIT)
13614	emit_insn (gen_adddi3 (out, out, const1_rtx));
13615      else
13616	emit_insn (gen_addsi3 (out, out, const1_rtx));
13617
13618      /* Not needed with an alignment of 2 */
13619      if (align != 2)
13620	{
13621	  emit_label (align_2_label);
13622
13623	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13624				   end_0_label);
13625
13626	  if (TARGET_64BIT)
13627	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13628	  else
13629	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13630
13631	  emit_label (align_3_label);
13632	}
13633
13634      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13635			       end_0_label);
13636
13637      if (TARGET_64BIT)
13638	emit_insn (gen_adddi3 (out, out, const1_rtx));
13639      else
13640	emit_insn (gen_addsi3 (out, out, const1_rtx));
13641    }
13642
13643  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13644     align this loop.  It gives only huge programs, but does not help to
13645     speed up.  */
13646  emit_label (align_4_label);
13647
13648  mem = change_address (src, SImode, out);
13649  emit_move_insn (scratch, mem);
13650  if (TARGET_64BIT)
13651    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13652  else
13653    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13654
13655  /* This formula yields a nonzero result iff one of the bytes is zero.
13656     This saves three branches inside loop and many cycles.  */
13657
13658  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13659  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13660  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13661  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13662			 gen_int_mode (0x80808080, SImode)));
13663  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13664			   align_4_label);
13665
13666  if (TARGET_CMOVE)
13667    {
13668       rtx reg = gen_reg_rtx (SImode);
13669       rtx reg2 = gen_reg_rtx (Pmode);
13670       emit_move_insn (reg, tmpreg);
13671       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13672
13673       /* If zero is not in the first two bytes, move two bytes forward.  */
13674       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13675       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13676       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13677       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13678			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13679						     reg,
13680						     tmpreg)));
13681       /* Emit lea manually to avoid clobbering of flags.  */
13682       emit_insn (gen_rtx_SET (SImode, reg2,
13683			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13684
13685       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13686       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13687       emit_insn (gen_rtx_SET (VOIDmode, out,
13688			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13689						     reg2,
13690						     out)));
13691
13692    }
13693  else
13694    {
13695       rtx end_2_label = gen_label_rtx ();
13696       /* Is zero in the first two bytes? */
13697
13698       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13699       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13701       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13702                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13703                            pc_rtx);
13704       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13705       JUMP_LABEL (tmp) = end_2_label;
13706
13707       /* Not in the first two.  Move two bytes forward.  */
13708       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13709       if (TARGET_64BIT)
13710	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13711       else
13712	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13713
13714       emit_label (end_2_label);
13715
13716    }
13717
13718  /* Avoid branch in fixing the byte.  */
13719  tmpreg = gen_lowpart (QImode, tmpreg);
13720  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13721  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13722  if (TARGET_64BIT)
13723    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13724  else
13725    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13726
13727  emit_label (end_0_label);
13728}
13729
13730void
13731ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13732		  rtx callarg2 ATTRIBUTE_UNUSED,
13733		  rtx pop, int sibcall)
13734{
13735  rtx use = NULL, call;
13736
13737  if (pop == const0_rtx)
13738    pop = NULL;
13739  gcc_assert (!TARGET_64BIT || !pop);
13740
13741  if (TARGET_MACHO && !TARGET_64BIT)
13742    {
13743#if TARGET_MACHO
13744      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13745	fnaddr = machopic_indirect_call_target (fnaddr);
13746#endif
13747    }
13748  else
13749    {
13750      /* Static functions and indirect calls don't need the pic register.  */
13751      if (! TARGET_64BIT && flag_pic
13752	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13753	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13754	use_reg (&use, pic_offset_table_rtx);
13755    }
13756
13757  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13758    {
13759      rtx al = gen_rtx_REG (QImode, 0);
13760      emit_move_insn (al, callarg2);
13761      use_reg (&use, al);
13762    }
13763
13764  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13765    {
13766      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13767      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13768    }
13769  if (sibcall && TARGET_64BIT
13770      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13771    {
13772      rtx addr;
13773      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13774      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13775      emit_move_insn (fnaddr, addr);
13776      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13777    }
13778
13779  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13780  if (retval)
13781    call = gen_rtx_SET (VOIDmode, retval, call);
13782  if (pop)
13783    {
13784      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13785      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13786      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13787    }
13788
13789  call = emit_call_insn (call);
13790  if (use)
13791    CALL_INSN_FUNCTION_USAGE (call) = use;
13792}
13793
13794
13795/* Clear stack slot assignments remembered from previous functions.
13796   This is called from INIT_EXPANDERS once before RTL is emitted for each
13797   function.  */
13798
13799static struct machine_function *
13800ix86_init_machine_status (void)
13801{
13802  struct machine_function *f;
13803
13804  f = ggc_alloc_cleared (sizeof (struct machine_function));
13805  f->use_fast_prologue_epilogue_nregs = -1;
13806  f->tls_descriptor_call_expanded_p = 0;
13807
13808  return f;
13809}
13810
13811/* Return a MEM corresponding to a stack slot with mode MODE.
13812   Allocate a new slot if necessary.
13813
13814   The RTL for a function can have several slots available: N is
13815   which slot to use.  */
13816
13817rtx
13818assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13819{
13820  struct stack_local_entry *s;
13821
13822  gcc_assert (n < MAX_386_STACK_LOCALS);
13823
13824  /* Virtual slot is valid only before vregs are instantiated.  */
13825  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13826
13827  for (s = ix86_stack_locals; s; s = s->next)
13828    if (s->mode == mode && s->n == n)
13829      return s->rtl;
13830
13831  s = (struct stack_local_entry *)
13832    ggc_alloc (sizeof (struct stack_local_entry));
13833  s->n = n;
13834  s->mode = mode;
13835  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13836
13837  s->next = ix86_stack_locals;
13838  ix86_stack_locals = s;
13839  return s->rtl;
13840}
13841
13842/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13843
13844static GTY(()) rtx ix86_tls_symbol;
13845rtx
13846ix86_tls_get_addr (void)
13847{
13848
13849  if (!ix86_tls_symbol)
13850    {
13851      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13852					    (TARGET_ANY_GNU_TLS
13853					     && !TARGET_64BIT)
13854					    ? "___tls_get_addr"
13855					    : "__tls_get_addr");
13856    }
13857
13858  return ix86_tls_symbol;
13859}
13860
13861/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13862
13863static GTY(()) rtx ix86_tls_module_base_symbol;
13864rtx
13865ix86_tls_module_base (void)
13866{
13867
13868  if (!ix86_tls_module_base_symbol)
13869    {
13870      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13871							"_TLS_MODULE_BASE_");
13872      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13873	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13874    }
13875
13876  return ix86_tls_module_base_symbol;
13877}
13878
13879/* Calculate the length of the memory address in the instruction
13880   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13881
13882int
13883memory_address_length (rtx addr)
13884{
13885  struct ix86_address parts;
13886  rtx base, index, disp;
13887  int len;
13888  int ok;
13889
13890  if (GET_CODE (addr) == PRE_DEC
13891      || GET_CODE (addr) == POST_INC
13892      || GET_CODE (addr) == PRE_MODIFY
13893      || GET_CODE (addr) == POST_MODIFY)
13894    return 0;
13895
13896  ok = ix86_decompose_address (addr, &parts);
13897  gcc_assert (ok);
13898
13899  if (parts.base && GET_CODE (parts.base) == SUBREG)
13900    parts.base = SUBREG_REG (parts.base);
13901  if (parts.index && GET_CODE (parts.index) == SUBREG)
13902    parts.index = SUBREG_REG (parts.index);
13903
13904  base = parts.base;
13905  index = parts.index;
13906  disp = parts.disp;
13907  len = 0;
13908
13909  /* Rule of thumb:
13910       - esp as the base always wants an index,
13911       - ebp as the base always wants a displacement.  */
13912
13913  /* Register Indirect.  */
13914  if (base && !index && !disp)
13915    {
13916      /* esp (for its index) and ebp (for its displacement) need
13917	 the two-byte modrm form.  */
13918      if (addr == stack_pointer_rtx
13919	  || addr == arg_pointer_rtx
13920	  || addr == frame_pointer_rtx
13921	  || addr == hard_frame_pointer_rtx)
13922	len = 1;
13923    }
13924
13925  /* Direct Addressing.  */
13926  else if (disp && !base && !index)
13927    len = 4;
13928
13929  else
13930    {
13931      /* Find the length of the displacement constant.  */
13932      if (disp)
13933	{
13934	  if (base && satisfies_constraint_K (disp))
13935	    len = 1;
13936	  else
13937	    len = 4;
13938	}
13939      /* ebp always wants a displacement.  */
13940      else if (base == hard_frame_pointer_rtx)
13941        len = 1;
13942
13943      /* An index requires the two-byte modrm form....  */
13944      if (index
13945	  /* ...like esp, which always wants an index.  */
13946	  || base == stack_pointer_rtx
13947	  || base == arg_pointer_rtx
13948	  || base == frame_pointer_rtx)
13949	len += 1;
13950    }
13951
13952  return len;
13953}
13954
13955/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13956   is set, expect that insn have 8bit immediate alternative.  */
13957int
13958ix86_attr_length_immediate_default (rtx insn, int shortform)
13959{
13960  int len = 0;
13961  int i;
13962  extract_insn_cached (insn);
13963  for (i = recog_data.n_operands - 1; i >= 0; --i)
13964    if (CONSTANT_P (recog_data.operand[i]))
13965      {
13966	gcc_assert (!len);
13967	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13968	  len = 1;
13969	else
13970	  {
13971	    switch (get_attr_mode (insn))
13972	      {
13973		case MODE_QI:
13974		  len+=1;
13975		  break;
13976		case MODE_HI:
13977		  len+=2;
13978		  break;
13979		case MODE_SI:
13980		  len+=4;
13981		  break;
13982		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13983		case MODE_DI:
13984		  len+=4;
13985		  break;
13986		default:
13987		  fatal_insn ("unknown insn mode", insn);
13988	      }
13989	  }
13990      }
13991  return len;
13992}
13993/* Compute default value for "length_address" attribute.  */
13994int
13995ix86_attr_length_address_default (rtx insn)
13996{
13997  int i;
13998
13999  if (get_attr_type (insn) == TYPE_LEA)
14000    {
14001      rtx set = PATTERN (insn);
14002
14003      if (GET_CODE (set) == PARALLEL)
14004	set = XVECEXP (set, 0, 0);
14005
14006      gcc_assert (GET_CODE (set) == SET);
14007
14008      return memory_address_length (SET_SRC (set));
14009    }
14010
14011  extract_insn_cached (insn);
14012  for (i = recog_data.n_operands - 1; i >= 0; --i)
14013    if (GET_CODE (recog_data.operand[i]) == MEM)
14014      {
14015	return memory_address_length (XEXP (recog_data.operand[i], 0));
14016	break;
14017      }
14018  return 0;
14019}
14020
14021/* Return the maximum number of instructions a cpu can issue.  */
14022
14023static int
14024ix86_issue_rate (void)
14025{
14026  switch (ix86_tune)
14027    {
14028    case PROCESSOR_PENTIUM:
14029    case PROCESSOR_K6:
14030      return 2;
14031
14032    case PROCESSOR_PENTIUMPRO:
14033    case PROCESSOR_PENTIUM4:
14034    case PROCESSOR_ATHLON:
14035    case PROCESSOR_K8:
14036    case PROCESSOR_AMDFAM10:
14037    case PROCESSOR_NOCONA:
14038    case PROCESSOR_GENERIC32:
14039    case PROCESSOR_GENERIC64:
14040      return 3;
14041
14042    case PROCESSOR_CORE2:
14043      return 4;
14044
14045    default:
14046      return 1;
14047    }
14048}
14049
14050/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14051   by DEP_INSN and nothing set by DEP_INSN.  */
14052
14053static int
14054ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14055{
14056  rtx set, set2;
14057
14058  /* Simplify the test for uninteresting insns.  */
14059  if (insn_type != TYPE_SETCC
14060      && insn_type != TYPE_ICMOV
14061      && insn_type != TYPE_FCMOV
14062      && insn_type != TYPE_IBR)
14063    return 0;
14064
14065  if ((set = single_set (dep_insn)) != 0)
14066    {
14067      set = SET_DEST (set);
14068      set2 = NULL_RTX;
14069    }
14070  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14071	   && XVECLEN (PATTERN (dep_insn), 0) == 2
14072	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14073	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14074    {
14075      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14076      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14077    }
14078  else
14079    return 0;
14080
14081  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14082    return 0;
14083
14084  /* This test is true if the dependent insn reads the flags but
14085     not any other potentially set register.  */
14086  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14087    return 0;
14088
14089  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14090    return 0;
14091
14092  return 1;
14093}
14094
14095/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14096   address with operands set by DEP_INSN.  */
14097
14098static int
14099ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14100{
14101  rtx addr;
14102
14103  if (insn_type == TYPE_LEA
14104      && TARGET_PENTIUM)
14105    {
14106      addr = PATTERN (insn);
14107
14108      if (GET_CODE (addr) == PARALLEL)
14109	addr = XVECEXP (addr, 0, 0);
14110
14111      gcc_assert (GET_CODE (addr) == SET);
14112
14113      addr = SET_SRC (addr);
14114    }
14115  else
14116    {
14117      int i;
14118      extract_insn_cached (insn);
14119      for (i = recog_data.n_operands - 1; i >= 0; --i)
14120	if (GET_CODE (recog_data.operand[i]) == MEM)
14121	  {
14122	    addr = XEXP (recog_data.operand[i], 0);
14123	    goto found;
14124	  }
14125      return 0;
14126    found:;
14127    }
14128
14129  return modified_in_p (addr, dep_insn);
14130}
14131
14132static int
14133ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14134{
14135  enum attr_type insn_type, dep_insn_type;
14136  enum attr_memory memory;
14137  rtx set, set2;
14138  int dep_insn_code_number;
14139
14140  /* Anti and output dependencies have zero cost on all CPUs.  */
14141  if (REG_NOTE_KIND (link) != 0)
14142    return 0;
14143
14144  dep_insn_code_number = recog_memoized (dep_insn);
14145
14146  /* If we can't recognize the insns, we can't really do anything.  */
14147  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14148    return cost;
14149
14150  insn_type = get_attr_type (insn);
14151  dep_insn_type = get_attr_type (dep_insn);
14152
14153  switch (ix86_tune)
14154    {
14155    case PROCESSOR_PENTIUM:
14156      /* Address Generation Interlock adds a cycle of latency.  */
14157      if (ix86_agi_dependent (insn, dep_insn, insn_type))
14158	cost += 1;
14159
14160      /* ??? Compares pair with jump/setcc.  */
14161      if (ix86_flags_dependent (insn, dep_insn, insn_type))
14162	cost = 0;
14163
14164      /* Floating point stores require value to be ready one cycle earlier.  */
14165      if (insn_type == TYPE_FMOV
14166	  && get_attr_memory (insn) == MEMORY_STORE
14167	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14168	cost += 1;
14169      break;
14170
14171    case PROCESSOR_PENTIUMPRO:
14172      memory = get_attr_memory (insn);
14173
14174      /* INT->FP conversion is expensive.  */
14175      if (get_attr_fp_int_src (dep_insn))
14176	cost += 5;
14177
14178      /* There is one cycle extra latency between an FP op and a store.  */
14179      if (insn_type == TYPE_FMOV
14180	  && (set = single_set (dep_insn)) != NULL_RTX
14181	  && (set2 = single_set (insn)) != NULL_RTX
14182	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14183	  && GET_CODE (SET_DEST (set2)) == MEM)
14184	cost += 1;
14185
14186      /* Show ability of reorder buffer to hide latency of load by executing
14187	 in parallel with previous instruction in case
14188	 previous instruction is not needed to compute the address.  */
14189      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14190	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14191	{
14192	  /* Claim moves to take one cycle, as core can issue one load
14193	     at time and the next load can start cycle later.  */
14194	  if (dep_insn_type == TYPE_IMOV
14195	      || dep_insn_type == TYPE_FMOV)
14196	    cost = 1;
14197	  else if (cost > 1)
14198	    cost--;
14199	}
14200      break;
14201
14202    case PROCESSOR_K6:
14203      memory = get_attr_memory (insn);
14204
14205      /* The esp dependency is resolved before the instruction is really
14206         finished.  */
14207      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14208	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14209	return 1;
14210
14211      /* INT->FP conversion is expensive.  */
14212      if (get_attr_fp_int_src (dep_insn))
14213	cost += 5;
14214
14215      /* Show ability of reorder buffer to hide latency of load by executing
14216	 in parallel with previous instruction in case
14217	 previous instruction is not needed to compute the address.  */
14218      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14219	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14220	{
14221	  /* Claim moves to take one cycle, as core can issue one load
14222	     at time and the next load can start cycle later.  */
14223	  if (dep_insn_type == TYPE_IMOV
14224	      || dep_insn_type == TYPE_FMOV)
14225	    cost = 1;
14226	  else if (cost > 2)
14227	    cost -= 2;
14228	  else
14229	    cost = 1;
14230	}
14231      break;
14232
14233    case PROCESSOR_ATHLON:
14234    case PROCESSOR_K8:
14235    case PROCESSOR_AMDFAM10:
14236    case PROCESSOR_GENERIC32:
14237    case PROCESSOR_GENERIC64:
14238      memory = get_attr_memory (insn);
14239
14240      /* Show ability of reorder buffer to hide latency of load by executing
14241	 in parallel with previous instruction in case
14242	 previous instruction is not needed to compute the address.  */
14243      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14244	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14245	{
14246	  enum attr_unit unit = get_attr_unit (insn);
14247	  int loadcost = 3;
14248
14249	  /* Because of the difference between the length of integer and
14250	     floating unit pipeline preparation stages, the memory operands
14251	     for floating point are cheaper.
14252
14253	     ??? For Athlon it the difference is most probably 2.  */
14254	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14255	    loadcost = 3;
14256	  else
14257	    loadcost = TARGET_ATHLON ? 2 : 0;
14258
14259	  if (cost >= loadcost)
14260	    cost -= loadcost;
14261	  else
14262	    cost = 0;
14263	}
14264
14265    default:
14266      break;
14267    }
14268
14269  return cost;
14270}
14271
14272/* How many alternative schedules to try.  This should be as wide as the
14273   scheduling freedom in the DFA, but no wider.  Making this value too
14274   large results extra work for the scheduler.  */
14275
14276static int
14277ia32_multipass_dfa_lookahead (void)
14278{
14279  if (ix86_tune == PROCESSOR_PENTIUM)
14280    return 2;
14281
14282  if (ix86_tune == PROCESSOR_PENTIUMPRO
14283      || ix86_tune == PROCESSOR_K6)
14284    return 1;
14285
14286  else
14287    return 0;
14288}
14289
14290
14291/* Compute the alignment given to a constant that is being placed in memory.
14292   EXP is the constant and ALIGN is the alignment that the object would
14293   ordinarily have.
14294   The value of this function is used instead of that alignment to align
14295   the object.  */
14296
14297int
14298ix86_constant_alignment (tree exp, int align)
14299{
14300  if (TREE_CODE (exp) == REAL_CST)
14301    {
14302      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14303	return 64;
14304      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14305	return 128;
14306    }
14307  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14308      	   && !TARGET_NO_ALIGN_LONG_STRINGS
14309	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14310    return BITS_PER_WORD;
14311
14312  return align;
14313}
14314
14315/* Compute the alignment for a static variable.
14316   TYPE is the data type, and ALIGN is the alignment that
14317   the object would ordinarily have.  The value of this function is used
14318   instead of that alignment to align the object.  */
14319
14320int
14321ix86_data_alignment (tree type, int align)
14322{
14323  int max_align = optimize_size ? BITS_PER_WORD : 256;
14324
14325  if (AGGREGATE_TYPE_P (type)
14326      && TYPE_SIZE (type)
14327      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14328      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14329	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14330      && align < max_align)
14331    align = max_align;
14332
14333  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14334     to 16byte boundary.  */
14335  if (TARGET_64BIT)
14336    {
14337      if (AGGREGATE_TYPE_P (type)
14338	   && TYPE_SIZE (type)
14339	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14340	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14341	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14342	return 128;
14343    }
14344
14345  if (TREE_CODE (type) == ARRAY_TYPE)
14346    {
14347      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14348	return 64;
14349      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14350	return 128;
14351    }
14352  else if (TREE_CODE (type) == COMPLEX_TYPE)
14353    {
14354
14355      if (TYPE_MODE (type) == DCmode && align < 64)
14356	return 64;
14357      if (TYPE_MODE (type) == XCmode && align < 128)
14358	return 128;
14359    }
14360  else if ((TREE_CODE (type) == RECORD_TYPE
14361	    || TREE_CODE (type) == UNION_TYPE
14362	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14363	   && TYPE_FIELDS (type))
14364    {
14365      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14366	return 64;
14367      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14368	return 128;
14369    }
14370  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14371	   || TREE_CODE (type) == INTEGER_TYPE)
14372    {
14373      if (TYPE_MODE (type) == DFmode && align < 64)
14374	return 64;
14375      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14376	return 128;
14377    }
14378
14379  return align;
14380}
14381
14382/* Compute the alignment for a local variable.
14383   TYPE is the data type, and ALIGN is the alignment that
14384   the object would ordinarily have.  The value of this macro is used
14385   instead of that alignment to align the object.  */
14386
14387int
14388ix86_local_alignment (tree type, int align)
14389{
14390  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14391     to 16byte boundary.  */
14392  if (TARGET_64BIT)
14393    {
14394      if (AGGREGATE_TYPE_P (type)
14395	   && TYPE_SIZE (type)
14396	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14397	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14398	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14399	return 128;
14400    }
14401  if (TREE_CODE (type) == ARRAY_TYPE)
14402    {
14403      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14404	return 64;
14405      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14406	return 128;
14407    }
14408  else if (TREE_CODE (type) == COMPLEX_TYPE)
14409    {
14410      if (TYPE_MODE (type) == DCmode && align < 64)
14411	return 64;
14412      if (TYPE_MODE (type) == XCmode && align < 128)
14413	return 128;
14414    }
14415  else if ((TREE_CODE (type) == RECORD_TYPE
14416	    || TREE_CODE (type) == UNION_TYPE
14417	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14418	   && TYPE_FIELDS (type))
14419    {
14420      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14421	return 64;
14422      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14423	return 128;
14424    }
14425  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14426	   || TREE_CODE (type) == INTEGER_TYPE)
14427    {
14428
14429      if (TYPE_MODE (type) == DFmode && align < 64)
14430	return 64;
14431      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14432	return 128;
14433    }
14434  return align;
14435}
14436
14437/* Emit RTL insns to initialize the variable parts of a trampoline.
14438   FNADDR is an RTX for the address of the function's pure code.
14439   CXT is an RTX for the static chain value for the function.  */
14440void
14441x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14442{
14443  if (!TARGET_64BIT)
14444    {
14445      /* Compute offset from the end of the jmp to the target function.  */
14446      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14447			       plus_constant (tramp, 10),
14448			       NULL_RTX, 1, OPTAB_DIRECT);
14449      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14450		      gen_int_mode (0xb9, QImode));
14451      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14452      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14453		      gen_int_mode (0xe9, QImode));
14454      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14455    }
14456  else
14457    {
14458      int offset = 0;
14459      /* Try to load address using shorter movl instead of movabs.
14460         We may want to support movq for kernel mode, but kernel does not use
14461         trampolines at the moment.  */
14462      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14463	{
14464	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14465	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14466			  gen_int_mode (0xbb41, HImode));
14467	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14468			  gen_lowpart (SImode, fnaddr));
14469	  offset += 6;
14470	}
14471      else
14472	{
14473	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14474			  gen_int_mode (0xbb49, HImode));
14475	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14476			  fnaddr);
14477	  offset += 10;
14478	}
14479      /* Load static chain using movabs to r10.  */
14480      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14481		      gen_int_mode (0xba49, HImode));
14482      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14483		      cxt);
14484      offset += 10;
14485      /* Jump to the r11 */
14486      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14487		      gen_int_mode (0xff49, HImode));
14488      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14489		      gen_int_mode (0xe3, QImode));
14490      offset += 3;
14491      gcc_assert (offset <= TRAMPOLINE_SIZE);
14492    }
14493
14494#ifdef ENABLE_EXECUTE_STACK
14495  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14496		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14497#endif
14498}
14499
14500/* Codes for all the SSE/MMX builtins.  */
14501enum ix86_builtins
14502{
14503  IX86_BUILTIN_ADDPS,
14504  IX86_BUILTIN_ADDSS,
14505  IX86_BUILTIN_DIVPS,
14506  IX86_BUILTIN_DIVSS,
14507  IX86_BUILTIN_MULPS,
14508  IX86_BUILTIN_MULSS,
14509  IX86_BUILTIN_SUBPS,
14510  IX86_BUILTIN_SUBSS,
14511
14512  IX86_BUILTIN_CMPEQPS,
14513  IX86_BUILTIN_CMPLTPS,
14514  IX86_BUILTIN_CMPLEPS,
14515  IX86_BUILTIN_CMPGTPS,
14516  IX86_BUILTIN_CMPGEPS,
14517  IX86_BUILTIN_CMPNEQPS,
14518  IX86_BUILTIN_CMPNLTPS,
14519  IX86_BUILTIN_CMPNLEPS,
14520  IX86_BUILTIN_CMPNGTPS,
14521  IX86_BUILTIN_CMPNGEPS,
14522  IX86_BUILTIN_CMPORDPS,
14523  IX86_BUILTIN_CMPUNORDPS,
14524  IX86_BUILTIN_CMPEQSS,
14525  IX86_BUILTIN_CMPLTSS,
14526  IX86_BUILTIN_CMPLESS,
14527  IX86_BUILTIN_CMPNEQSS,
14528  IX86_BUILTIN_CMPNLTSS,
14529  IX86_BUILTIN_CMPNLESS,
14530  IX86_BUILTIN_CMPNGTSS,
14531  IX86_BUILTIN_CMPNGESS,
14532  IX86_BUILTIN_CMPORDSS,
14533  IX86_BUILTIN_CMPUNORDSS,
14534
14535  IX86_BUILTIN_COMIEQSS,
14536  IX86_BUILTIN_COMILTSS,
14537  IX86_BUILTIN_COMILESS,
14538  IX86_BUILTIN_COMIGTSS,
14539  IX86_BUILTIN_COMIGESS,
14540  IX86_BUILTIN_COMINEQSS,
14541  IX86_BUILTIN_UCOMIEQSS,
14542  IX86_BUILTIN_UCOMILTSS,
14543  IX86_BUILTIN_UCOMILESS,
14544  IX86_BUILTIN_UCOMIGTSS,
14545  IX86_BUILTIN_UCOMIGESS,
14546  IX86_BUILTIN_UCOMINEQSS,
14547
14548  IX86_BUILTIN_CVTPI2PS,
14549  IX86_BUILTIN_CVTPS2PI,
14550  IX86_BUILTIN_CVTSI2SS,
14551  IX86_BUILTIN_CVTSI642SS,
14552  IX86_BUILTIN_CVTSS2SI,
14553  IX86_BUILTIN_CVTSS2SI64,
14554  IX86_BUILTIN_CVTTPS2PI,
14555  IX86_BUILTIN_CVTTSS2SI,
14556  IX86_BUILTIN_CVTTSS2SI64,
14557
14558  IX86_BUILTIN_MAXPS,
14559  IX86_BUILTIN_MAXSS,
14560  IX86_BUILTIN_MINPS,
14561  IX86_BUILTIN_MINSS,
14562
14563  IX86_BUILTIN_LOADUPS,
14564  IX86_BUILTIN_STOREUPS,
14565  IX86_BUILTIN_MOVSS,
14566
14567  IX86_BUILTIN_MOVHLPS,
14568  IX86_BUILTIN_MOVLHPS,
14569  IX86_BUILTIN_LOADHPS,
14570  IX86_BUILTIN_LOADLPS,
14571  IX86_BUILTIN_STOREHPS,
14572  IX86_BUILTIN_STORELPS,
14573
14574  IX86_BUILTIN_MASKMOVQ,
14575  IX86_BUILTIN_MOVMSKPS,
14576  IX86_BUILTIN_PMOVMSKB,
14577
14578  IX86_BUILTIN_MOVNTPS,
14579  IX86_BUILTIN_MOVNTQ,
14580
14581  IX86_BUILTIN_LOADDQU,
14582  IX86_BUILTIN_STOREDQU,
14583
14584  IX86_BUILTIN_PACKSSWB,
14585  IX86_BUILTIN_PACKSSDW,
14586  IX86_BUILTIN_PACKUSWB,
14587
14588  IX86_BUILTIN_PADDB,
14589  IX86_BUILTIN_PADDW,
14590  IX86_BUILTIN_PADDD,
14591  IX86_BUILTIN_PADDQ,
14592  IX86_BUILTIN_PADDSB,
14593  IX86_BUILTIN_PADDSW,
14594  IX86_BUILTIN_PADDUSB,
14595  IX86_BUILTIN_PADDUSW,
14596  IX86_BUILTIN_PSUBB,
14597  IX86_BUILTIN_PSUBW,
14598  IX86_BUILTIN_PSUBD,
14599  IX86_BUILTIN_PSUBQ,
14600  IX86_BUILTIN_PSUBSB,
14601  IX86_BUILTIN_PSUBSW,
14602  IX86_BUILTIN_PSUBUSB,
14603  IX86_BUILTIN_PSUBUSW,
14604
14605  IX86_BUILTIN_PAND,
14606  IX86_BUILTIN_PANDN,
14607  IX86_BUILTIN_POR,
14608  IX86_BUILTIN_PXOR,
14609
14610  IX86_BUILTIN_PAVGB,
14611  IX86_BUILTIN_PAVGW,
14612
14613  IX86_BUILTIN_PCMPEQB,
14614  IX86_BUILTIN_PCMPEQW,
14615  IX86_BUILTIN_PCMPEQD,
14616  IX86_BUILTIN_PCMPGTB,
14617  IX86_BUILTIN_PCMPGTW,
14618  IX86_BUILTIN_PCMPGTD,
14619
14620  IX86_BUILTIN_PMADDWD,
14621
14622  IX86_BUILTIN_PMAXSW,
14623  IX86_BUILTIN_PMAXUB,
14624  IX86_BUILTIN_PMINSW,
14625  IX86_BUILTIN_PMINUB,
14626
14627  IX86_BUILTIN_PMULHUW,
14628  IX86_BUILTIN_PMULHW,
14629  IX86_BUILTIN_PMULLW,
14630
14631  IX86_BUILTIN_PSADBW,
14632  IX86_BUILTIN_PSHUFW,
14633
14634  IX86_BUILTIN_PSLLW,
14635  IX86_BUILTIN_PSLLD,
14636  IX86_BUILTIN_PSLLQ,
14637  IX86_BUILTIN_PSRAW,
14638  IX86_BUILTIN_PSRAD,
14639  IX86_BUILTIN_PSRLW,
14640  IX86_BUILTIN_PSRLD,
14641  IX86_BUILTIN_PSRLQ,
14642  IX86_BUILTIN_PSLLWI,
14643  IX86_BUILTIN_PSLLDI,
14644  IX86_BUILTIN_PSLLQI,
14645  IX86_BUILTIN_PSRAWI,
14646  IX86_BUILTIN_PSRADI,
14647  IX86_BUILTIN_PSRLWI,
14648  IX86_BUILTIN_PSRLDI,
14649  IX86_BUILTIN_PSRLQI,
14650
14651  IX86_BUILTIN_PUNPCKHBW,
14652  IX86_BUILTIN_PUNPCKHWD,
14653  IX86_BUILTIN_PUNPCKHDQ,
14654  IX86_BUILTIN_PUNPCKLBW,
14655  IX86_BUILTIN_PUNPCKLWD,
14656  IX86_BUILTIN_PUNPCKLDQ,
14657
14658  IX86_BUILTIN_SHUFPS,
14659
14660  IX86_BUILTIN_RCPPS,
14661  IX86_BUILTIN_RCPSS,
14662  IX86_BUILTIN_RSQRTPS,
14663  IX86_BUILTIN_RSQRTSS,
14664  IX86_BUILTIN_SQRTPS,
14665  IX86_BUILTIN_SQRTSS,
14666
14667  IX86_BUILTIN_UNPCKHPS,
14668  IX86_BUILTIN_UNPCKLPS,
14669
14670  IX86_BUILTIN_ANDPS,
14671  IX86_BUILTIN_ANDNPS,
14672  IX86_BUILTIN_ORPS,
14673  IX86_BUILTIN_XORPS,
14674
14675  IX86_BUILTIN_EMMS,
14676  IX86_BUILTIN_LDMXCSR,
14677  IX86_BUILTIN_STMXCSR,
14678  IX86_BUILTIN_SFENCE,
14679
14680  /* 3DNow! Original */
14681  IX86_BUILTIN_FEMMS,
14682  IX86_BUILTIN_PAVGUSB,
14683  IX86_BUILTIN_PF2ID,
14684  IX86_BUILTIN_PFACC,
14685  IX86_BUILTIN_PFADD,
14686  IX86_BUILTIN_PFCMPEQ,
14687  IX86_BUILTIN_PFCMPGE,
14688  IX86_BUILTIN_PFCMPGT,
14689  IX86_BUILTIN_PFMAX,
14690  IX86_BUILTIN_PFMIN,
14691  IX86_BUILTIN_PFMUL,
14692  IX86_BUILTIN_PFRCP,
14693  IX86_BUILTIN_PFRCPIT1,
14694  IX86_BUILTIN_PFRCPIT2,
14695  IX86_BUILTIN_PFRSQIT1,
14696  IX86_BUILTIN_PFRSQRT,
14697  IX86_BUILTIN_PFSUB,
14698  IX86_BUILTIN_PFSUBR,
14699  IX86_BUILTIN_PI2FD,
14700  IX86_BUILTIN_PMULHRW,
14701
14702  /* 3DNow! Athlon Extensions */
14703  IX86_BUILTIN_PF2IW,
14704  IX86_BUILTIN_PFNACC,
14705  IX86_BUILTIN_PFPNACC,
14706  IX86_BUILTIN_PI2FW,
14707  IX86_BUILTIN_PSWAPDSI,
14708  IX86_BUILTIN_PSWAPDSF,
14709
14710  /* SSE2 */
14711  IX86_BUILTIN_ADDPD,
14712  IX86_BUILTIN_ADDSD,
14713  IX86_BUILTIN_DIVPD,
14714  IX86_BUILTIN_DIVSD,
14715  IX86_BUILTIN_MULPD,
14716  IX86_BUILTIN_MULSD,
14717  IX86_BUILTIN_SUBPD,
14718  IX86_BUILTIN_SUBSD,
14719
14720  IX86_BUILTIN_CMPEQPD,
14721  IX86_BUILTIN_CMPLTPD,
14722  IX86_BUILTIN_CMPLEPD,
14723  IX86_BUILTIN_CMPGTPD,
14724  IX86_BUILTIN_CMPGEPD,
14725  IX86_BUILTIN_CMPNEQPD,
14726  IX86_BUILTIN_CMPNLTPD,
14727  IX86_BUILTIN_CMPNLEPD,
14728  IX86_BUILTIN_CMPNGTPD,
14729  IX86_BUILTIN_CMPNGEPD,
14730  IX86_BUILTIN_CMPORDPD,
14731  IX86_BUILTIN_CMPUNORDPD,
14732  IX86_BUILTIN_CMPNEPD,
14733  IX86_BUILTIN_CMPEQSD,
14734  IX86_BUILTIN_CMPLTSD,
14735  IX86_BUILTIN_CMPLESD,
14736  IX86_BUILTIN_CMPNEQSD,
14737  IX86_BUILTIN_CMPNLTSD,
14738  IX86_BUILTIN_CMPNLESD,
14739  IX86_BUILTIN_CMPORDSD,
14740  IX86_BUILTIN_CMPUNORDSD,
14741  IX86_BUILTIN_CMPNESD,
14742
14743  IX86_BUILTIN_COMIEQSD,
14744  IX86_BUILTIN_COMILTSD,
14745  IX86_BUILTIN_COMILESD,
14746  IX86_BUILTIN_COMIGTSD,
14747  IX86_BUILTIN_COMIGESD,
14748  IX86_BUILTIN_COMINEQSD,
14749  IX86_BUILTIN_UCOMIEQSD,
14750  IX86_BUILTIN_UCOMILTSD,
14751  IX86_BUILTIN_UCOMILESD,
14752  IX86_BUILTIN_UCOMIGTSD,
14753  IX86_BUILTIN_UCOMIGESD,
14754  IX86_BUILTIN_UCOMINEQSD,
14755
14756  IX86_BUILTIN_MAXPD,
14757  IX86_BUILTIN_MAXSD,
14758  IX86_BUILTIN_MINPD,
14759  IX86_BUILTIN_MINSD,
14760
14761  IX86_BUILTIN_ANDPD,
14762  IX86_BUILTIN_ANDNPD,
14763  IX86_BUILTIN_ORPD,
14764  IX86_BUILTIN_XORPD,
14765
14766  IX86_BUILTIN_SQRTPD,
14767  IX86_BUILTIN_SQRTSD,
14768
14769  IX86_BUILTIN_UNPCKHPD,
14770  IX86_BUILTIN_UNPCKLPD,
14771
14772  IX86_BUILTIN_SHUFPD,
14773
14774  IX86_BUILTIN_LOADUPD,
14775  IX86_BUILTIN_STOREUPD,
14776  IX86_BUILTIN_MOVSD,
14777
14778  IX86_BUILTIN_LOADHPD,
14779  IX86_BUILTIN_LOADLPD,
14780
14781  IX86_BUILTIN_CVTDQ2PD,
14782  IX86_BUILTIN_CVTDQ2PS,
14783
14784  IX86_BUILTIN_CVTPD2DQ,
14785  IX86_BUILTIN_CVTPD2PI,
14786  IX86_BUILTIN_CVTPD2PS,
14787  IX86_BUILTIN_CVTTPD2DQ,
14788  IX86_BUILTIN_CVTTPD2PI,
14789
14790  IX86_BUILTIN_CVTPI2PD,
14791  IX86_BUILTIN_CVTSI2SD,
14792  IX86_BUILTIN_CVTSI642SD,
14793
14794  IX86_BUILTIN_CVTSD2SI,
14795  IX86_BUILTIN_CVTSD2SI64,
14796  IX86_BUILTIN_CVTSD2SS,
14797  IX86_BUILTIN_CVTSS2SD,
14798  IX86_BUILTIN_CVTTSD2SI,
14799  IX86_BUILTIN_CVTTSD2SI64,
14800
14801  IX86_BUILTIN_CVTPS2DQ,
14802  IX86_BUILTIN_CVTPS2PD,
14803  IX86_BUILTIN_CVTTPS2DQ,
14804
14805  IX86_BUILTIN_MOVNTI,
14806  IX86_BUILTIN_MOVNTPD,
14807  IX86_BUILTIN_MOVNTDQ,
14808
14809  /* SSE2 MMX */
14810  IX86_BUILTIN_MASKMOVDQU,
14811  IX86_BUILTIN_MOVMSKPD,
14812  IX86_BUILTIN_PMOVMSKB128,
14813
14814  IX86_BUILTIN_PACKSSWB128,
14815  IX86_BUILTIN_PACKSSDW128,
14816  IX86_BUILTIN_PACKUSWB128,
14817
14818  IX86_BUILTIN_PADDB128,
14819  IX86_BUILTIN_PADDW128,
14820  IX86_BUILTIN_PADDD128,
14821  IX86_BUILTIN_PADDQ128,
14822  IX86_BUILTIN_PADDSB128,
14823  IX86_BUILTIN_PADDSW128,
14824  IX86_BUILTIN_PADDUSB128,
14825  IX86_BUILTIN_PADDUSW128,
14826  IX86_BUILTIN_PSUBB128,
14827  IX86_BUILTIN_PSUBW128,
14828  IX86_BUILTIN_PSUBD128,
14829  IX86_BUILTIN_PSUBQ128,
14830  IX86_BUILTIN_PSUBSB128,
14831  IX86_BUILTIN_PSUBSW128,
14832  IX86_BUILTIN_PSUBUSB128,
14833  IX86_BUILTIN_PSUBUSW128,
14834
14835  IX86_BUILTIN_PAND128,
14836  IX86_BUILTIN_PANDN128,
14837  IX86_BUILTIN_POR128,
14838  IX86_BUILTIN_PXOR128,
14839
14840  IX86_BUILTIN_PAVGB128,
14841  IX86_BUILTIN_PAVGW128,
14842
14843  IX86_BUILTIN_PCMPEQB128,
14844  IX86_BUILTIN_PCMPEQW128,
14845  IX86_BUILTIN_PCMPEQD128,
14846  IX86_BUILTIN_PCMPGTB128,
14847  IX86_BUILTIN_PCMPGTW128,
14848  IX86_BUILTIN_PCMPGTD128,
14849
14850  IX86_BUILTIN_PMADDWD128,
14851
14852  IX86_BUILTIN_PMAXSW128,
14853  IX86_BUILTIN_PMAXUB128,
14854  IX86_BUILTIN_PMINSW128,
14855  IX86_BUILTIN_PMINUB128,
14856
14857  IX86_BUILTIN_PMULUDQ,
14858  IX86_BUILTIN_PMULUDQ128,
14859  IX86_BUILTIN_PMULHUW128,
14860  IX86_BUILTIN_PMULHW128,
14861  IX86_BUILTIN_PMULLW128,
14862
14863  IX86_BUILTIN_PSADBW128,
14864  IX86_BUILTIN_PSHUFHW,
14865  IX86_BUILTIN_PSHUFLW,
14866  IX86_BUILTIN_PSHUFD,
14867
14868  IX86_BUILTIN_PSLLW128,
14869  IX86_BUILTIN_PSLLD128,
14870  IX86_BUILTIN_PSLLQ128,
14871  IX86_BUILTIN_PSRAW128,
14872  IX86_BUILTIN_PSRAD128,
14873  IX86_BUILTIN_PSRLW128,
14874  IX86_BUILTIN_PSRLD128,
14875  IX86_BUILTIN_PSRLQ128,
14876  IX86_BUILTIN_PSLLDQI128,
14877  IX86_BUILTIN_PSLLWI128,
14878  IX86_BUILTIN_PSLLDI128,
14879  IX86_BUILTIN_PSLLQI128,
14880  IX86_BUILTIN_PSRAWI128,
14881  IX86_BUILTIN_PSRADI128,
14882  IX86_BUILTIN_PSRLDQI128,
14883  IX86_BUILTIN_PSRLWI128,
14884  IX86_BUILTIN_PSRLDI128,
14885  IX86_BUILTIN_PSRLQI128,
14886
14887  IX86_BUILTIN_PUNPCKHBW128,
14888  IX86_BUILTIN_PUNPCKHWD128,
14889  IX86_BUILTIN_PUNPCKHDQ128,
14890  IX86_BUILTIN_PUNPCKHQDQ128,
14891  IX86_BUILTIN_PUNPCKLBW128,
14892  IX86_BUILTIN_PUNPCKLWD128,
14893  IX86_BUILTIN_PUNPCKLDQ128,
14894  IX86_BUILTIN_PUNPCKLQDQ128,
14895
14896  IX86_BUILTIN_CLFLUSH,
14897  IX86_BUILTIN_MFENCE,
14898  IX86_BUILTIN_LFENCE,
14899
14900  /* Prescott New Instructions.  */
14901  IX86_BUILTIN_ADDSUBPS,
14902  IX86_BUILTIN_HADDPS,
14903  IX86_BUILTIN_HSUBPS,
14904  IX86_BUILTIN_MOVSHDUP,
14905  IX86_BUILTIN_MOVSLDUP,
14906  IX86_BUILTIN_ADDSUBPD,
14907  IX86_BUILTIN_HADDPD,
14908  IX86_BUILTIN_HSUBPD,
14909  IX86_BUILTIN_LDDQU,
14910
14911  IX86_BUILTIN_MONITOR,
14912  IX86_BUILTIN_MWAIT,
14913
14914  /* SSSE3.  */
14915  IX86_BUILTIN_PHADDW,
14916  IX86_BUILTIN_PHADDD,
14917  IX86_BUILTIN_PHADDSW,
14918  IX86_BUILTIN_PHSUBW,
14919  IX86_BUILTIN_PHSUBD,
14920  IX86_BUILTIN_PHSUBSW,
14921  IX86_BUILTIN_PMADDUBSW,
14922  IX86_BUILTIN_PMULHRSW,
14923  IX86_BUILTIN_PSHUFB,
14924  IX86_BUILTIN_PSIGNB,
14925  IX86_BUILTIN_PSIGNW,
14926  IX86_BUILTIN_PSIGND,
14927  IX86_BUILTIN_PALIGNR,
14928  IX86_BUILTIN_PABSB,
14929  IX86_BUILTIN_PABSW,
14930  IX86_BUILTIN_PABSD,
14931
14932  IX86_BUILTIN_PHADDW128,
14933  IX86_BUILTIN_PHADDD128,
14934  IX86_BUILTIN_PHADDSW128,
14935  IX86_BUILTIN_PHSUBW128,
14936  IX86_BUILTIN_PHSUBD128,
14937  IX86_BUILTIN_PHSUBSW128,
14938  IX86_BUILTIN_PMADDUBSW128,
14939  IX86_BUILTIN_PMULHRSW128,
14940  IX86_BUILTIN_PSHUFB128,
14941  IX86_BUILTIN_PSIGNB128,
14942  IX86_BUILTIN_PSIGNW128,
14943  IX86_BUILTIN_PSIGND128,
14944  IX86_BUILTIN_PALIGNR128,
14945  IX86_BUILTIN_PABSB128,
14946  IX86_BUILTIN_PABSW128,
14947  IX86_BUILTIN_PABSD128,
14948
14949  /* AMDFAM10 - SSE4A New Instructions.  */
14950  IX86_BUILTIN_MOVNTSD,
14951  IX86_BUILTIN_MOVNTSS,
14952  IX86_BUILTIN_EXTRQI,
14953  IX86_BUILTIN_EXTRQ,
14954  IX86_BUILTIN_INSERTQI,
14955  IX86_BUILTIN_INSERTQ,
14956
14957  IX86_BUILTIN_VEC_INIT_V2SI,
14958  IX86_BUILTIN_VEC_INIT_V4HI,
14959  IX86_BUILTIN_VEC_INIT_V8QI,
14960  IX86_BUILTIN_VEC_EXT_V2DF,
14961  IX86_BUILTIN_VEC_EXT_V2DI,
14962  IX86_BUILTIN_VEC_EXT_V4SF,
14963  IX86_BUILTIN_VEC_EXT_V4SI,
14964  IX86_BUILTIN_VEC_EXT_V8HI,
14965  IX86_BUILTIN_VEC_EXT_V16QI,
14966  IX86_BUILTIN_VEC_EXT_V2SI,
14967  IX86_BUILTIN_VEC_EXT_V4HI,
14968  IX86_BUILTIN_VEC_SET_V8HI,
14969  IX86_BUILTIN_VEC_SET_V4HI,
14970
14971  IX86_BUILTIN_MAX
14972};
14973
14974#define def_builtin(MASK, NAME, TYPE, CODE)				\
14975do {									\
14976  if ((MASK) & target_flags						\
14977      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14978    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14979				 NULL, NULL_TREE);			\
14980} while (0)
14981
14982/* Bits for builtin_description.flag.  */
14983
14984/* Set when we don't support the comparison natively, and should
14985   swap_comparison in order to support it.  */
14986#define BUILTIN_DESC_SWAP_OPERANDS	1
14987
14988struct builtin_description
14989{
14990  const unsigned int mask;
14991  const enum insn_code icode;
14992  const char *const name;
14993  const enum ix86_builtins code;
14994  const enum rtx_code comparison;
14995  const unsigned int flag;
14996};
14997
14998static const struct builtin_description bdesc_comi[] =
14999{
15000  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15001  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15002  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15003  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15004  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15005  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15006  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15007  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15008  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15009  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15010  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15011  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15012  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15013  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15014  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15015  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15016  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15017  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15018  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15019  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15020  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15021  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15022  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15023  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15024};
15025
15026static const struct builtin_description bdesc_2arg[] =
15027{
15028  /* SSE */
15029  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15030  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15031  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15032  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15033  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15034  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15035  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15036  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15037
15038  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15039  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15040  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15041  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15042    BUILTIN_DESC_SWAP_OPERANDS },
15043  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15044    BUILTIN_DESC_SWAP_OPERANDS },
15045  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15046  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15047  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15048  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15049  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15050    BUILTIN_DESC_SWAP_OPERANDS },
15051  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15052    BUILTIN_DESC_SWAP_OPERANDS },
15053  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15054  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15055  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15056  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15057  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15058  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15059  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15060  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15061  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15062    BUILTIN_DESC_SWAP_OPERANDS },
15063  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15064    BUILTIN_DESC_SWAP_OPERANDS },
15065  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15066
15067  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15068  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15069  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15070  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15071
15072  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15073  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15074  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15075  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15076
15077  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15078  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15079  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15080  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15081  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15082
15083  /* MMX */
15084  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15085  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15086  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15087  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15088  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15089  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15090  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15091  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15092
15093  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15094  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15095  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15096  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15097  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15098  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15099  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15100  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15101
15102  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15103  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15104  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15105
15106  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15107  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15108  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15109  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15110
15111  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15112  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15113
15114  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15115  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15116  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15117  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15118  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15119  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15120
15121  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15122  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15123  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15124  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15125
15126  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15127  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15128  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15129  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15130  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15131  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15132
15133  /* Special.  */
15134  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15135  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15136  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15137
15138  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15139  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15140  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15141
15142  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15143  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15144  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15145  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15146  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15147  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15148
15149  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15150  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15151  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15152  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15153  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15154  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15155
15156  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15157  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15158  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15159  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15160
15161  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15162  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15163
15164  /* SSE2 */
15165  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15166  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15167  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15168  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15169  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15170  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15171  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15172  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15173
15174  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15175  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15176  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15177  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15178    BUILTIN_DESC_SWAP_OPERANDS },
15179  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15180    BUILTIN_DESC_SWAP_OPERANDS },
15181  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15182  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15183  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15184  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15185  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15186    BUILTIN_DESC_SWAP_OPERANDS },
15187  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15188    BUILTIN_DESC_SWAP_OPERANDS },
15189  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15190  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15191  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15192  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15193  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15194  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15195  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15196  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15197  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15198
15199  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15200  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15201  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15202  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15203
15204  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15205  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15206  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15207  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15208
15209  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15210  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15211  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15212
15213  /* SSE2 MMX */
15214  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15215  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15216  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15217  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15218  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15219  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15220  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15221  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15222
15223  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15224  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15225  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15226  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15227  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15228  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15229  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15230  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15231
15232  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15233  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15234
15235  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15236  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15237  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15238  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15239
15240  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15241  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15242
15243  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15244  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15245  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15246  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15247  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15248  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15249
15250  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15251  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15252  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15253  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15254
15255  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15256  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15257  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15258  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15259  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15260  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15261  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15262  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15263
15264  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15265  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15266  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15267
15268  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15269  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15270
15271  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15272  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15273
15274  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15275  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15276  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15277
15278  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15279  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15280  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15281
15282  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15283  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15284
15285  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15286
15287  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15288  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15289  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15290  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15291
15292  /* SSE3 MMX */
15293  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15294  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15295  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15296  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15297  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15298  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15299
15300  /* SSSE3 */
15301  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15302  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15303  { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15304  { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15305  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15306  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15307  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15308  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15309  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15310  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15311  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15312  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15313  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15314  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15315  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15316  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15317  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15318  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15319  { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15320  { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15321  { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15322  { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15323  { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15324  { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15325};
15326
15327static const struct builtin_description bdesc_1arg[] =
15328{
15329  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15330  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15331
15332  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15333  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15334  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15335
15336  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15337  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15338  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15339  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15340  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15341  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15342
15343  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15344  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15345
15346  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15347
15348  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15349  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15350
15351  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15352  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15353  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15354  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15355  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15356
15357  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15358
15359  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15360  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15361  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15362  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15363
15364  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15365  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15366  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15367
15368  /* SSE3 */
15369  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15370  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15371
15372  /* SSSE3 */
15373  { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15374  { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15375  { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15376  { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15377  { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15378  { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15379};
15380
15381static void
15382ix86_init_builtins (void)
15383{
15384  if (TARGET_MMX)
15385    ix86_init_mmx_sse_builtins ();
15386}
15387
15388/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
15389   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
15390   builtins.  */
15391static void
15392ix86_init_mmx_sse_builtins (void)
15393{
15394  const struct builtin_description * d;
15395  size_t i;
15396
15397  tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
15398  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15399  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15400  tree V2DI_type_node
15401    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15402  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15403  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15404  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15405  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15406  tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
15407  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15408
15409  tree pchar_type_node = build_pointer_type (char_type_node);
15410  tree pcchar_type_node = build_pointer_type (
15411			     build_type_variant (char_type_node, 1, 0));
15412  tree pfloat_type_node = build_pointer_type (float_type_node);
15413  tree pcfloat_type_node = build_pointer_type (
15414			     build_type_variant (float_type_node, 1, 0));
15415  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15416  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15417  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15418
15419  /* Comparisons.  */
15420  tree int_ftype_v4sf_v4sf
15421    = build_function_type_list (integer_type_node,
15422				V4SF_type_node, V4SF_type_node, NULL_TREE);
15423  tree v4si_ftype_v4sf_v4sf
15424    = build_function_type_list (V4SI_type_node,
15425				V4SF_type_node, V4SF_type_node, NULL_TREE);
15426  /* MMX/SSE/integer conversions.  */
15427  tree int_ftype_v4sf
15428    = build_function_type_list (integer_type_node,
15429				V4SF_type_node, NULL_TREE);
15430  tree int64_ftype_v4sf
15431    = build_function_type_list (long_long_integer_type_node,
15432				V4SF_type_node, NULL_TREE);
15433  tree int_ftype_v8qi
15434    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15435  tree v4sf_ftype_v4sf_int
15436    = build_function_type_list (V4SF_type_node,
15437				V4SF_type_node, integer_type_node, NULL_TREE);
15438  tree v4sf_ftype_v4sf_int64
15439    = build_function_type_list (V4SF_type_node,
15440				V4SF_type_node, long_long_integer_type_node,
15441				NULL_TREE);
15442  tree v4sf_ftype_v4sf_v2si
15443    = build_function_type_list (V4SF_type_node,
15444				V4SF_type_node, V2SI_type_node, NULL_TREE);
15445
15446  /* Miscellaneous.  */
15447  tree v8qi_ftype_v4hi_v4hi
15448    = build_function_type_list (V8QI_type_node,
15449				V4HI_type_node, V4HI_type_node, NULL_TREE);
15450  tree v4hi_ftype_v2si_v2si
15451    = build_function_type_list (V4HI_type_node,
15452				V2SI_type_node, V2SI_type_node, NULL_TREE);
15453  tree v4sf_ftype_v4sf_v4sf_int
15454    = build_function_type_list (V4SF_type_node,
15455				V4SF_type_node, V4SF_type_node,
15456				integer_type_node, NULL_TREE);
15457  tree v2si_ftype_v4hi_v4hi
15458    = build_function_type_list (V2SI_type_node,
15459				V4HI_type_node, V4HI_type_node, NULL_TREE);
15460  tree v4hi_ftype_v4hi_int
15461    = build_function_type_list (V4HI_type_node,
15462				V4HI_type_node, integer_type_node, NULL_TREE);
15463  tree v4hi_ftype_v4hi_di
15464    = build_function_type_list (V4HI_type_node,
15465				V4HI_type_node, long_long_unsigned_type_node,
15466				NULL_TREE);
15467  tree v2si_ftype_v2si_di
15468    = build_function_type_list (V2SI_type_node,
15469				V2SI_type_node, long_long_unsigned_type_node,
15470				NULL_TREE);
15471  tree void_ftype_void
15472    = build_function_type (void_type_node, void_list_node);
15473  tree void_ftype_unsigned
15474    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15475  tree void_ftype_unsigned_unsigned
15476    = build_function_type_list (void_type_node, unsigned_type_node,
15477				unsigned_type_node, NULL_TREE);
15478  tree void_ftype_pcvoid_unsigned_unsigned
15479    = build_function_type_list (void_type_node, const_ptr_type_node,
15480				unsigned_type_node, unsigned_type_node,
15481				NULL_TREE);
15482  tree unsigned_ftype_void
15483    = build_function_type (unsigned_type_node, void_list_node);
15484  tree v2si_ftype_v4sf
15485    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15486  /* Loads/stores.  */
15487  tree void_ftype_v8qi_v8qi_pchar
15488    = build_function_type_list (void_type_node,
15489				V8QI_type_node, V8QI_type_node,
15490				pchar_type_node, NULL_TREE);
15491  tree v4sf_ftype_pcfloat
15492    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15493  /* @@@ the type is bogus */
15494  tree v4sf_ftype_v4sf_pv2si
15495    = build_function_type_list (V4SF_type_node,
15496				V4SF_type_node, pv2si_type_node, NULL_TREE);
15497  tree void_ftype_pv2si_v4sf
15498    = build_function_type_list (void_type_node,
15499				pv2si_type_node, V4SF_type_node, NULL_TREE);
15500  tree void_ftype_pfloat_v4sf
15501    = build_function_type_list (void_type_node,
15502				pfloat_type_node, V4SF_type_node, NULL_TREE);
15503  tree void_ftype_pdi_di
15504    = build_function_type_list (void_type_node,
15505				pdi_type_node, long_long_unsigned_type_node,
15506				NULL_TREE);
15507  tree void_ftype_pv2di_v2di
15508    = build_function_type_list (void_type_node,
15509				pv2di_type_node, V2DI_type_node, NULL_TREE);
15510  /* Normal vector unops.  */
15511  tree v4sf_ftype_v4sf
15512    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15513  tree v16qi_ftype_v16qi
15514    = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15515  tree v8hi_ftype_v8hi
15516    = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15517  tree v4si_ftype_v4si
15518    = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15519  tree v8qi_ftype_v8qi
15520    = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15521  tree v4hi_ftype_v4hi
15522    = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15523
15524  /* Normal vector binops.  */
15525  tree v4sf_ftype_v4sf_v4sf
15526    = build_function_type_list (V4SF_type_node,
15527				V4SF_type_node, V4SF_type_node, NULL_TREE);
15528  tree v8qi_ftype_v8qi_v8qi
15529    = build_function_type_list (V8QI_type_node,
15530				V8QI_type_node, V8QI_type_node, NULL_TREE);
15531  tree v4hi_ftype_v4hi_v4hi
15532    = build_function_type_list (V4HI_type_node,
15533				V4HI_type_node, V4HI_type_node, NULL_TREE);
15534  tree v2si_ftype_v2si_v2si
15535    = build_function_type_list (V2SI_type_node,
15536				V2SI_type_node, V2SI_type_node, NULL_TREE);
15537  tree di_ftype_di_di
15538    = build_function_type_list (long_long_unsigned_type_node,
15539				long_long_unsigned_type_node,
15540				long_long_unsigned_type_node, NULL_TREE);
15541
15542  tree di_ftype_di_di_int
15543    = build_function_type_list (long_long_unsigned_type_node,
15544				long_long_unsigned_type_node,
15545				long_long_unsigned_type_node,
15546				integer_type_node, NULL_TREE);
15547
15548  tree v2si_ftype_v2sf
15549    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15550  tree v2sf_ftype_v2si
15551    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15552  tree v2si_ftype_v2si
15553    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15554  tree v2sf_ftype_v2sf
15555    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15556  tree v2sf_ftype_v2sf_v2sf
15557    = build_function_type_list (V2SF_type_node,
15558				V2SF_type_node, V2SF_type_node, NULL_TREE);
15559  tree v2si_ftype_v2sf_v2sf
15560    = build_function_type_list (V2SI_type_node,
15561				V2SF_type_node, V2SF_type_node, NULL_TREE);
15562  tree pint_type_node    = build_pointer_type (integer_type_node);
15563  tree pdouble_type_node = build_pointer_type (double_type_node);
15564  tree pcdouble_type_node = build_pointer_type (
15565				build_type_variant (double_type_node, 1, 0));
15566  tree int_ftype_v2df_v2df
15567    = build_function_type_list (integer_type_node,
15568				V2DF_type_node, V2DF_type_node, NULL_TREE);
15569
15570  tree void_ftype_pcvoid
15571    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15572  tree v4sf_ftype_v4si
15573    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15574  tree v4si_ftype_v4sf
15575    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15576  tree v2df_ftype_v4si
15577    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15578  tree v4si_ftype_v2df
15579    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15580  tree v2si_ftype_v2df
15581    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15582  tree v4sf_ftype_v2df
15583    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15584  tree v2df_ftype_v2si
15585    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15586  tree v2df_ftype_v4sf
15587    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15588  tree int_ftype_v2df
15589    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15590  tree int64_ftype_v2df
15591    = build_function_type_list (long_long_integer_type_node,
15592				V2DF_type_node, NULL_TREE);
15593  tree v2df_ftype_v2df_int
15594    = build_function_type_list (V2DF_type_node,
15595				V2DF_type_node, integer_type_node, NULL_TREE);
15596  tree v2df_ftype_v2df_int64
15597    = build_function_type_list (V2DF_type_node,
15598				V2DF_type_node, long_long_integer_type_node,
15599				NULL_TREE);
15600  tree v4sf_ftype_v4sf_v2df
15601    = build_function_type_list (V4SF_type_node,
15602				V4SF_type_node, V2DF_type_node, NULL_TREE);
15603  tree v2df_ftype_v2df_v4sf
15604    = build_function_type_list (V2DF_type_node,
15605				V2DF_type_node, V4SF_type_node, NULL_TREE);
15606  tree v2df_ftype_v2df_v2df_int
15607    = build_function_type_list (V2DF_type_node,
15608				V2DF_type_node, V2DF_type_node,
15609				integer_type_node,
15610				NULL_TREE);
15611  tree v2df_ftype_v2df_pcdouble
15612    = build_function_type_list (V2DF_type_node,
15613				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15614  tree void_ftype_pdouble_v2df
15615    = build_function_type_list (void_type_node,
15616				pdouble_type_node, V2DF_type_node, NULL_TREE);
15617  tree void_ftype_pint_int
15618    = build_function_type_list (void_type_node,
15619				pint_type_node, integer_type_node, NULL_TREE);
15620  tree void_ftype_v16qi_v16qi_pchar
15621    = build_function_type_list (void_type_node,
15622				V16QI_type_node, V16QI_type_node,
15623				pchar_type_node, NULL_TREE);
15624  tree v2df_ftype_pcdouble
15625    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15626  tree v2df_ftype_v2df_v2df
15627    = build_function_type_list (V2DF_type_node,
15628				V2DF_type_node, V2DF_type_node, NULL_TREE);
15629  tree v16qi_ftype_v16qi_v16qi
15630    = build_function_type_list (V16QI_type_node,
15631				V16QI_type_node, V16QI_type_node, NULL_TREE);
15632  tree v8hi_ftype_v8hi_v8hi
15633    = build_function_type_list (V8HI_type_node,
15634				V8HI_type_node, V8HI_type_node, NULL_TREE);
15635  tree v4si_ftype_v4si_v4si
15636    = build_function_type_list (V4SI_type_node,
15637				V4SI_type_node, V4SI_type_node, NULL_TREE);
15638  tree v2di_ftype_v2di_v2di
15639    = build_function_type_list (V2DI_type_node,
15640				V2DI_type_node, V2DI_type_node, NULL_TREE);
15641  tree v2di_ftype_v2df_v2df
15642    = build_function_type_list (V2DI_type_node,
15643				V2DF_type_node, V2DF_type_node, NULL_TREE);
15644  tree v2df_ftype_v2df
15645    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15646  tree v2di_ftype_v2di_int
15647    = build_function_type_list (V2DI_type_node,
15648				V2DI_type_node, integer_type_node, NULL_TREE);
15649  tree v2di_ftype_v2di_v2di_int
15650    = build_function_type_list (V2DI_type_node, V2DI_type_node,
15651				V2DI_type_node, integer_type_node, NULL_TREE);
15652  tree v4si_ftype_v4si_int
15653    = build_function_type_list (V4SI_type_node,
15654				V4SI_type_node, integer_type_node, NULL_TREE);
15655  tree v8hi_ftype_v8hi_int
15656    = build_function_type_list (V8HI_type_node,
15657				V8HI_type_node, integer_type_node, NULL_TREE);
15658  tree v4si_ftype_v8hi_v8hi
15659    = build_function_type_list (V4SI_type_node,
15660				V8HI_type_node, V8HI_type_node, NULL_TREE);
15661  tree di_ftype_v8qi_v8qi
15662    = build_function_type_list (long_long_unsigned_type_node,
15663				V8QI_type_node, V8QI_type_node, NULL_TREE);
15664  tree di_ftype_v2si_v2si
15665    = build_function_type_list (long_long_unsigned_type_node,
15666				V2SI_type_node, V2SI_type_node, NULL_TREE);
15667  tree v2di_ftype_v16qi_v16qi
15668    = build_function_type_list (V2DI_type_node,
15669				V16QI_type_node, V16QI_type_node, NULL_TREE);
15670  tree v2di_ftype_v4si_v4si
15671    = build_function_type_list (V2DI_type_node,
15672				V4SI_type_node, V4SI_type_node, NULL_TREE);
15673  tree int_ftype_v16qi
15674    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15675  tree v16qi_ftype_pcchar
15676    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15677  tree void_ftype_pchar_v16qi
15678    = build_function_type_list (void_type_node,
15679			        pchar_type_node, V16QI_type_node, NULL_TREE);
15680
15681  tree v2di_ftype_v2di_unsigned_unsigned
15682    = build_function_type_list (V2DI_type_node, V2DI_type_node,
15683                                unsigned_type_node, unsigned_type_node,
15684                                NULL_TREE);
15685  tree v2di_ftype_v2di_v2di_unsigned_unsigned
15686    = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15687                                unsigned_type_node, unsigned_type_node,
15688                                NULL_TREE);
15689  tree v2di_ftype_v2di_v16qi
15690    = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15691                                NULL_TREE);
15692
15693  tree float80_type;
15694  tree float128_type;
15695  tree ftype;
15696
15697  /* The __float80 type.  */
15698  if (TYPE_MODE (long_double_type_node) == XFmode)
15699    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15700					       "__float80");
15701  else
15702    {
15703      /* The __float80 type.  */
15704      float80_type = make_node (REAL_TYPE);
15705      TYPE_PRECISION (float80_type) = 80;
15706      layout_type (float80_type);
15707      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15708    }
15709
15710  if (TARGET_64BIT)
15711    {
15712      float128_type = make_node (REAL_TYPE);
15713      TYPE_PRECISION (float128_type) = 128;
15714      layout_type (float128_type);
15715      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15716    }
15717
15718  /* Add all builtins that are more or less simple operations on two
15719     operands.  */
15720  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15721    {
15722      /* Use one of the operands; the target can have a different mode for
15723	 mask-generating compares.  */
15724      enum machine_mode mode;
15725      tree type;
15726
15727      if (d->name == 0)
15728	continue;
15729      mode = insn_data[d->icode].operand[1].mode;
15730
15731      switch (mode)
15732	{
15733	case V16QImode:
15734	  type = v16qi_ftype_v16qi_v16qi;
15735	  break;
15736	case V8HImode:
15737	  type = v8hi_ftype_v8hi_v8hi;
15738	  break;
15739	case V4SImode:
15740	  type = v4si_ftype_v4si_v4si;
15741	  break;
15742	case V2DImode:
15743	  type = v2di_ftype_v2di_v2di;
15744	  break;
15745	case V2DFmode:
15746	  type = v2df_ftype_v2df_v2df;
15747	  break;
15748	case V4SFmode:
15749	  type = v4sf_ftype_v4sf_v4sf;
15750	  break;
15751	case V8QImode:
15752	  type = v8qi_ftype_v8qi_v8qi;
15753	  break;
15754	case V4HImode:
15755	  type = v4hi_ftype_v4hi_v4hi;
15756	  break;
15757	case V2SImode:
15758	  type = v2si_ftype_v2si_v2si;
15759	  break;
15760	case DImode:
15761	  type = di_ftype_di_di;
15762	  break;
15763
15764	default:
15765	  gcc_unreachable ();
15766	}
15767
15768      /* Override for comparisons.  */
15769      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15770	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15771	type = v4si_ftype_v4sf_v4sf;
15772
15773      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15774	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15775	type = v2di_ftype_v2df_v2df;
15776
15777      def_builtin (d->mask, d->name, type, d->code);
15778    }
15779
15780  /* Add all builtins that are more or less simple operations on 1 operand.  */
15781  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15782    {
15783      enum machine_mode mode;
15784      tree type;
15785
15786      if (d->name == 0)
15787	continue;
15788      mode = insn_data[d->icode].operand[1].mode;
15789
15790      switch (mode)
15791	{
15792	case V16QImode:
15793	  type = v16qi_ftype_v16qi;
15794	  break;
15795	case V8HImode:
15796	  type = v8hi_ftype_v8hi;
15797	  break;
15798	case V4SImode:
15799	  type = v4si_ftype_v4si;
15800	  break;
15801	case V2DFmode:
15802	  type = v2df_ftype_v2df;
15803	  break;
15804	case V4SFmode:
15805	  type = v4sf_ftype_v4sf;
15806	  break;
15807	case V8QImode:
15808	  type = v8qi_ftype_v8qi;
15809	  break;
15810	case V4HImode:
15811	  type = v4hi_ftype_v4hi;
15812	  break;
15813	case V2SImode:
15814	  type = v2si_ftype_v2si;
15815	  break;
15816
15817	default:
15818	  abort ();
15819	}
15820
15821      def_builtin (d->mask, d->name, type, d->code);
15822    }
15823
15824  /* Add the remaining MMX insns with somewhat more complicated types.  */
15825  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15826  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15827  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15828  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15829
15830  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15831  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15832  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15833
15834  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15835  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15836
15837  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15838  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15839
15840  /* comi/ucomi insns.  */
15841  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15842    if (d->mask == MASK_SSE2)
15843      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15844    else
15845      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15846
15847  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15848  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15849  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15850
15851  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15852  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15853  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15854  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15855  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15856  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15857  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15858  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15859  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15860  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15861  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15862
15863  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15864
15865  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15866  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15867
15868  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15869  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15870  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15871  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15872
15873  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15874  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15875  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15876  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15877
15878  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15879
15880  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15881
15882  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15883  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15884  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15885  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15886  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15887  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15888
15889  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15890
15891  /* Original 3DNow!  */
15892  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15893  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15894  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15895  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15896  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15897  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15898  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15899  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15900  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15901  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15902  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15903  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15904  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15905  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15906  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15907  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15908  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15909  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15910  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15911  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15912
15913  /* 3DNow! extension as used in the Athlon CPU.  */
15914  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15915  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15916  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15917  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15918  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15919  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15920
15921  /* SSE2 */
15922  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15923
15924  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15925  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15926
15927  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15928  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15929
15930  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15931  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15932  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15933  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15934  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15935
15936  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15937  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15938  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15939  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15940
15941  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15942  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15943
15944  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15945
15946  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15947  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15948
15949  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15950  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15951  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15952  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15953  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15954
15955  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15956
15957  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15958  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15959  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15960  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15961
15962  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15963  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15964  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15965
15966  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15967  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15968  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15969  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15970
15971  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15972  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15973  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15974
15975  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15976  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15977
15978  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15979  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15980
15981  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15982  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15983  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15984
15985  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15986  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15987  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15988
15989  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15990  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15991
15992  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15993  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15994  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15995  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15996
15997  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15998  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15999  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16000  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16001
16002  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16003  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16004
16005  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16006
16007  /* Prescott New Instructions.  */
16008  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16009	       void_ftype_pcvoid_unsigned_unsigned,
16010	       IX86_BUILTIN_MONITOR);
16011  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16012	       void_ftype_unsigned_unsigned,
16013	       IX86_BUILTIN_MWAIT);
16014  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16015	       v4sf_ftype_v4sf,
16016	       IX86_BUILTIN_MOVSHDUP);
16017  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16018	       v4sf_ftype_v4sf,
16019	       IX86_BUILTIN_MOVSLDUP);
16020  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16021	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16022
16023  /* SSSE3.  */
16024  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16025	       v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16026  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16027	       IX86_BUILTIN_PALIGNR);
16028
16029  /* AMDFAM10 SSE4A New built-ins  */
16030  def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16031               void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16032  def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16033               void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16034  def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16035               v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16036  def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16037               v2di_ftype_v2di_v16qi,  IX86_BUILTIN_EXTRQ);
16038  def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16039               v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16040  def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16041               v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16042
16043  /* Access to the vec_init patterns.  */
16044  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16045				    integer_type_node, NULL_TREE);
16046  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16047	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16048
16049  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16050				    short_integer_type_node,
16051				    short_integer_type_node,
16052				    short_integer_type_node, NULL_TREE);
16053  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16054	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16055
16056  ftype = build_function_type_list (V8QI_type_node, char_type_node,
16057				    char_type_node, char_type_node,
16058				    char_type_node, char_type_node,
16059				    char_type_node, char_type_node,
16060				    char_type_node, NULL_TREE);
16061  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16062	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16063
16064  /* Access to the vec_extract patterns.  */
16065  ftype = build_function_type_list (double_type_node, V2DF_type_node,
16066				    integer_type_node, NULL_TREE);
16067  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16068	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16069
16070  ftype = build_function_type_list (long_long_integer_type_node,
16071				    V2DI_type_node, integer_type_node,
16072				    NULL_TREE);
16073  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16074	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16075
16076  ftype = build_function_type_list (float_type_node, V4SF_type_node,
16077				    integer_type_node, NULL_TREE);
16078  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16079	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16080
16081  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16082				    integer_type_node, NULL_TREE);
16083  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16084	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16085
16086  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16087				    integer_type_node, NULL_TREE);
16088  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16089	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16090
16091  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16092				    integer_type_node, NULL_TREE);
16093  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16094	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16095
16096  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16097				    integer_type_node, NULL_TREE);
16098  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16099	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16100
16101  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16102				    integer_type_node, NULL_TREE);
16103  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16104
16105  /* Access to the vec_set patterns.  */
16106  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16107				    intHI_type_node,
16108				    integer_type_node, NULL_TREE);
16109  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16110	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
16111
16112  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16113				    intHI_type_node,
16114				    integer_type_node, NULL_TREE);
16115  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16116	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
16117}
16118
16119/* Errors in the source file can cause expand_expr to return const0_rtx
16120   where we expect a vector.  To avoid crashing, use one of the vector
16121   clear instructions.  */
16122static rtx
16123safe_vector_operand (rtx x, enum machine_mode mode)
16124{
16125  if (x == const0_rtx)
16126    x = CONST0_RTX (mode);
16127  return x;
16128}
16129
16130/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
16131
16132static rtx
16133ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16134{
16135  rtx pat, xops[3];
16136  tree arg0 = TREE_VALUE (arglist);
16137  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16138  rtx op0 = expand_normal (arg0);
16139  rtx op1 = expand_normal (arg1);
16140  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16141  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16142  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16143
16144  if (VECTOR_MODE_P (mode0))
16145    op0 = safe_vector_operand (op0, mode0);
16146  if (VECTOR_MODE_P (mode1))
16147    op1 = safe_vector_operand (op1, mode1);
16148
16149  if (optimize || !target
16150      || GET_MODE (target) != tmode
16151      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16152    target = gen_reg_rtx (tmode);
16153
16154  if (GET_MODE (op1) == SImode && mode1 == TImode)
16155    {
16156      rtx x = gen_reg_rtx (V4SImode);
16157      emit_insn (gen_sse2_loadd (x, op1));
16158      op1 = gen_lowpart (TImode, x);
16159    }
16160
16161  /* The insn must want input operands in the same modes as the
16162     result.  */
16163  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16164	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16165
16166  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16167    op0 = copy_to_mode_reg (mode0, op0);
16168  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16169    op1 = copy_to_mode_reg (mode1, op1);
16170
16171  /* ??? Using ix86_fixup_binary_operands is problematic when
16172     we've got mismatched modes.  Fake it.  */
16173
16174  xops[0] = target;
16175  xops[1] = op0;
16176  xops[2] = op1;
16177
16178  if (tmode == mode0 && tmode == mode1)
16179    {
16180      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16181      op0 = xops[1];
16182      op1 = xops[2];
16183    }
16184  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16185    {
16186      op0 = force_reg (mode0, op0);
16187      op1 = force_reg (mode1, op1);
16188      target = gen_reg_rtx (tmode);
16189    }
16190
16191  pat = GEN_FCN (icode) (target, op0, op1);
16192  if (! pat)
16193    return 0;
16194  emit_insn (pat);
16195  return target;
16196}
16197
16198/* Subroutine of ix86_expand_builtin to take care of stores.  */
16199
16200static rtx
16201ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16202{
16203  rtx pat;
16204  tree arg0 = TREE_VALUE (arglist);
16205  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16206  rtx op0 = expand_normal (arg0);
16207  rtx op1 = expand_normal (arg1);
16208  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16209  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16210
16211  if (VECTOR_MODE_P (mode1))
16212    op1 = safe_vector_operand (op1, mode1);
16213
16214  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16215  op1 = copy_to_mode_reg (mode1, op1);
16216
16217  pat = GEN_FCN (icode) (op0, op1);
16218  if (pat)
16219    emit_insn (pat);
16220  return 0;
16221}
16222
16223/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
16224
16225static rtx
16226ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16227			  rtx target, int do_load)
16228{
16229  rtx pat;
16230  tree arg0 = TREE_VALUE (arglist);
16231  rtx op0 = expand_normal (arg0);
16232  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16233  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16234
16235  if (optimize || !target
16236      || GET_MODE (target) != tmode
16237      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16238    target = gen_reg_rtx (tmode);
16239  if (do_load)
16240    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16241  else
16242    {
16243      if (VECTOR_MODE_P (mode0))
16244	op0 = safe_vector_operand (op0, mode0);
16245
16246      if ((optimize && !register_operand (op0, mode0))
16247	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16248	op0 = copy_to_mode_reg (mode0, op0);
16249    }
16250
16251  pat = GEN_FCN (icode) (target, op0);
16252  if (! pat)
16253    return 0;
16254  emit_insn (pat);
16255  return target;
16256}
16257
16258/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16259   sqrtss, rsqrtss, rcpss.  */
16260
16261static rtx
16262ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16263{
16264  rtx pat;
16265  tree arg0 = TREE_VALUE (arglist);
16266  rtx op1, op0 = expand_normal (arg0);
16267  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16268  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16269
16270  if (optimize || !target
16271      || GET_MODE (target) != tmode
16272      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16273    target = gen_reg_rtx (tmode);
16274
16275  if (VECTOR_MODE_P (mode0))
16276    op0 = safe_vector_operand (op0, mode0);
16277
16278  if ((optimize && !register_operand (op0, mode0))
16279      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16280    op0 = copy_to_mode_reg (mode0, op0);
16281
16282  op1 = op0;
16283  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16284    op1 = copy_to_mode_reg (mode0, op1);
16285
16286  pat = GEN_FCN (icode) (target, op0, op1);
16287  if (! pat)
16288    return 0;
16289  emit_insn (pat);
16290  return target;
16291}
16292
16293/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
16294
16295static rtx
16296ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16297			 rtx target)
16298{
16299  rtx pat;
16300  tree arg0 = TREE_VALUE (arglist);
16301  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16302  rtx op0 = expand_normal (arg0);
16303  rtx op1 = expand_normal (arg1);
16304  rtx op2;
16305  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16306  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16307  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16308  enum rtx_code comparison = d->comparison;
16309
16310  if (VECTOR_MODE_P (mode0))
16311    op0 = safe_vector_operand (op0, mode0);
16312  if (VECTOR_MODE_P (mode1))
16313    op1 = safe_vector_operand (op1, mode1);
16314
16315  /* Swap operands if we have a comparison that isn't available in
16316     hardware.  */
16317  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16318    {
16319      rtx tmp = gen_reg_rtx (mode1);
16320      emit_move_insn (tmp, op1);
16321      op1 = op0;
16322      op0 = tmp;
16323    }
16324
16325  if (optimize || !target
16326      || GET_MODE (target) != tmode
16327      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16328    target = gen_reg_rtx (tmode);
16329
16330  if ((optimize && !register_operand (op0, mode0))
16331      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16332    op0 = copy_to_mode_reg (mode0, op0);
16333  if ((optimize && !register_operand (op1, mode1))
16334      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16335    op1 = copy_to_mode_reg (mode1, op1);
16336
16337  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16338  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16339  if (! pat)
16340    return 0;
16341  emit_insn (pat);
16342  return target;
16343}
16344
16345/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
16346
16347static rtx
16348ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16349		      rtx target)
16350{
16351  rtx pat;
16352  tree arg0 = TREE_VALUE (arglist);
16353  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16354  rtx op0 = expand_normal (arg0);
16355  rtx op1 = expand_normal (arg1);
16356  rtx op2;
16357  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16358  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16359  enum rtx_code comparison = d->comparison;
16360
16361  if (VECTOR_MODE_P (mode0))
16362    op0 = safe_vector_operand (op0, mode0);
16363  if (VECTOR_MODE_P (mode1))
16364    op1 = safe_vector_operand (op1, mode1);
16365
16366  /* Swap operands if we have a comparison that isn't available in
16367     hardware.  */
16368  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16369    {
16370      rtx tmp = op1;
16371      op1 = op0;
16372      op0 = tmp;
16373    }
16374
16375  target = gen_reg_rtx (SImode);
16376  emit_move_insn (target, const0_rtx);
16377  target = gen_rtx_SUBREG (QImode, target, 0);
16378
16379  if ((optimize && !register_operand (op0, mode0))
16380      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16381    op0 = copy_to_mode_reg (mode0, op0);
16382  if ((optimize && !register_operand (op1, mode1))
16383      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16384    op1 = copy_to_mode_reg (mode1, op1);
16385
16386  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16387  pat = GEN_FCN (d->icode) (op0, op1);
16388  if (! pat)
16389    return 0;
16390  emit_insn (pat);
16391  emit_insn (gen_rtx_SET (VOIDmode,
16392			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16393			  gen_rtx_fmt_ee (comparison, QImode,
16394					  SET_DEST (pat),
16395					  const0_rtx)));
16396
16397  return SUBREG_REG (target);
16398}
16399
16400/* Return the integer constant in ARG.  Constrain it to be in the range
16401   of the subparts of VEC_TYPE; issue an error if not.  */
16402
16403static int
16404get_element_number (tree vec_type, tree arg)
16405{
16406  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16407
16408  if (!host_integerp (arg, 1)
16409      || (elt = tree_low_cst (arg, 1), elt > max))
16410    {
16411      error ("selector must be an integer constant in the range 0..%wi", max);
16412      return 0;
16413    }
16414
16415  return elt;
16416}
16417
16418/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16419   ix86_expand_vector_init.  We DO have language-level syntax for this, in
16420   the form of  (type){ init-list }.  Except that since we can't place emms
16421   instructions from inside the compiler, we can't allow the use of MMX
16422   registers unless the user explicitly asks for it.  So we do *not* define
16423   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
16424   we have builtins invoked by mmintrin.h that gives us license to emit
16425   these sorts of instructions.  */
16426
16427static rtx
16428ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16429{
16430  enum machine_mode tmode = TYPE_MODE (type);
16431  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16432  int i, n_elt = GET_MODE_NUNITS (tmode);
16433  rtvec v = rtvec_alloc (n_elt);
16434
16435  gcc_assert (VECTOR_MODE_P (tmode));
16436
16437  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16438    {
16439      rtx x = expand_normal (TREE_VALUE (arglist));
16440      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16441    }
16442
16443  gcc_assert (arglist == NULL);
16444
16445  if (!target || !register_operand (target, tmode))
16446    target = gen_reg_rtx (tmode);
16447
16448  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16449  return target;
16450}
16451
16452/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16453   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
16454   had a language-level syntax for referencing vector elements.  */
16455
16456static rtx
16457ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16458{
16459  enum machine_mode tmode, mode0;
16460  tree arg0, arg1;
16461  int elt;
16462  rtx op0;
16463
16464  arg0 = TREE_VALUE (arglist);
16465  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16466
16467  op0 = expand_normal (arg0);
16468  elt = get_element_number (TREE_TYPE (arg0), arg1);
16469
16470  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16471  mode0 = TYPE_MODE (TREE_TYPE (arg0));
16472  gcc_assert (VECTOR_MODE_P (mode0));
16473
16474  op0 = force_reg (mode0, op0);
16475
16476  if (optimize || !target || !register_operand (target, tmode))
16477    target = gen_reg_rtx (tmode);
16478
16479  ix86_expand_vector_extract (true, target, op0, elt);
16480
16481  return target;
16482}
16483
16484/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16485   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
16486   a language-level syntax for referencing vector elements.  */
16487
16488static rtx
16489ix86_expand_vec_set_builtin (tree arglist)
16490{
16491  enum machine_mode tmode, mode1;
16492  tree arg0, arg1, arg2;
16493  int elt;
16494  rtx op0, op1, target;
16495
16496  arg0 = TREE_VALUE (arglist);
16497  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16498  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16499
16500  tmode = TYPE_MODE (TREE_TYPE (arg0));
16501  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16502  gcc_assert (VECTOR_MODE_P (tmode));
16503
16504  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16505  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16506  elt = get_element_number (TREE_TYPE (arg0), arg2);
16507
16508  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16509    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16510
16511  op0 = force_reg (tmode, op0);
16512  op1 = force_reg (mode1, op1);
16513
16514  /* OP0 is the source of these builtin functions and shouldn't be
16515     modified.  Create a copy, use it and return it as target.  */
16516  target = gen_reg_rtx (tmode);
16517  emit_move_insn (target, op0);
16518  ix86_expand_vector_set (true, target, op1, elt);
16519
16520  return target;
16521}
16522
16523/* Expand an expression EXP that calls a built-in function,
16524   with result going to TARGET if that's convenient
16525   (and in mode MODE if that's convenient).
16526   SUBTARGET may be used as the target for computing one of EXP's operands.
16527   IGNORE is nonzero if the value is to be ignored.  */
16528
16529static rtx
16530ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16531		     enum machine_mode mode ATTRIBUTE_UNUSED,
16532		     int ignore ATTRIBUTE_UNUSED)
16533{
16534  const struct builtin_description *d;
16535  size_t i;
16536  enum insn_code icode;
16537  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16538  tree arglist = TREE_OPERAND (exp, 1);
16539  tree arg0, arg1, arg2, arg3;
16540  rtx op0, op1, op2, op3, pat;
16541  enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16542  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16543
16544  switch (fcode)
16545    {
16546    case IX86_BUILTIN_EMMS:
16547      emit_insn (gen_mmx_emms ());
16548      return 0;
16549
16550    case IX86_BUILTIN_SFENCE:
16551      emit_insn (gen_sse_sfence ());
16552      return 0;
16553
16554    case IX86_BUILTIN_MASKMOVQ:
16555    case IX86_BUILTIN_MASKMOVDQU:
16556      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16557	       ? CODE_FOR_mmx_maskmovq
16558	       : CODE_FOR_sse2_maskmovdqu);
16559      /* Note the arg order is different from the operand order.  */
16560      arg1 = TREE_VALUE (arglist);
16561      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16562      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16563      op0 = expand_normal (arg0);
16564      op1 = expand_normal (arg1);
16565      op2 = expand_normal (arg2);
16566      mode0 = insn_data[icode].operand[0].mode;
16567      mode1 = insn_data[icode].operand[1].mode;
16568      mode2 = insn_data[icode].operand[2].mode;
16569
16570      op0 = force_reg (Pmode, op0);
16571      op0 = gen_rtx_MEM (mode1, op0);
16572
16573      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16574	op0 = copy_to_mode_reg (mode0, op0);
16575      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16576	op1 = copy_to_mode_reg (mode1, op1);
16577      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16578	op2 = copy_to_mode_reg (mode2, op2);
16579      pat = GEN_FCN (icode) (op0, op1, op2);
16580      if (! pat)
16581	return 0;
16582      emit_insn (pat);
16583      return 0;
16584
16585    case IX86_BUILTIN_SQRTSS:
16586      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16587    case IX86_BUILTIN_RSQRTSS:
16588      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16589    case IX86_BUILTIN_RCPSS:
16590      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16591
16592    case IX86_BUILTIN_LOADUPS:
16593      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16594
16595    case IX86_BUILTIN_STOREUPS:
16596      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16597
16598    case IX86_BUILTIN_LOADHPS:
16599    case IX86_BUILTIN_LOADLPS:
16600    case IX86_BUILTIN_LOADHPD:
16601    case IX86_BUILTIN_LOADLPD:
16602      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16603	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16604	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16605	       : CODE_FOR_sse2_loadlpd);
16606      arg0 = TREE_VALUE (arglist);
16607      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16608      op0 = expand_normal (arg0);
16609      op1 = expand_normal (arg1);
16610      tmode = insn_data[icode].operand[0].mode;
16611      mode0 = insn_data[icode].operand[1].mode;
16612      mode1 = insn_data[icode].operand[2].mode;
16613
16614      op0 = force_reg (mode0, op0);
16615      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16616      if (optimize || target == 0
16617	  || GET_MODE (target) != tmode
16618	  || !register_operand (target, tmode))
16619	target = gen_reg_rtx (tmode);
16620      pat = GEN_FCN (icode) (target, op0, op1);
16621      if (! pat)
16622	return 0;
16623      emit_insn (pat);
16624      return target;
16625
16626    case IX86_BUILTIN_STOREHPS:
16627    case IX86_BUILTIN_STORELPS:
16628      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16629	       : CODE_FOR_sse_storelps);
16630      arg0 = TREE_VALUE (arglist);
16631      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16632      op0 = expand_normal (arg0);
16633      op1 = expand_normal (arg1);
16634      mode0 = insn_data[icode].operand[0].mode;
16635      mode1 = insn_data[icode].operand[1].mode;
16636
16637      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16638      op1 = force_reg (mode1, op1);
16639
16640      pat = GEN_FCN (icode) (op0, op1);
16641      if (! pat)
16642	return 0;
16643      emit_insn (pat);
16644      return const0_rtx;
16645
16646    case IX86_BUILTIN_MOVNTPS:
16647      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16648    case IX86_BUILTIN_MOVNTQ:
16649      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16650
16651    case IX86_BUILTIN_LDMXCSR:
16652      op0 = expand_normal (TREE_VALUE (arglist));
16653      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16654      emit_move_insn (target, op0);
16655      emit_insn (gen_sse_ldmxcsr (target));
16656      return 0;
16657
16658    case IX86_BUILTIN_STMXCSR:
16659      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16660      emit_insn (gen_sse_stmxcsr (target));
16661      return copy_to_mode_reg (SImode, target);
16662
16663    case IX86_BUILTIN_SHUFPS:
16664    case IX86_BUILTIN_SHUFPD:
16665      icode = (fcode == IX86_BUILTIN_SHUFPS
16666	       ? CODE_FOR_sse_shufps
16667	       : CODE_FOR_sse2_shufpd);
16668      arg0 = TREE_VALUE (arglist);
16669      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16670      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16671      op0 = expand_normal (arg0);
16672      op1 = expand_normal (arg1);
16673      op2 = expand_normal (arg2);
16674      tmode = insn_data[icode].operand[0].mode;
16675      mode0 = insn_data[icode].operand[1].mode;
16676      mode1 = insn_data[icode].operand[2].mode;
16677      mode2 = insn_data[icode].operand[3].mode;
16678
16679      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16680	op0 = copy_to_mode_reg (mode0, op0);
16681      if ((optimize && !register_operand (op1, mode1))
16682	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16683	op1 = copy_to_mode_reg (mode1, op1);
16684      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16685	{
16686	  /* @@@ better error message */
16687	  error ("mask must be an immediate");
16688	  return gen_reg_rtx (tmode);
16689	}
16690      if (optimize || target == 0
16691	  || GET_MODE (target) != tmode
16692	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16693	target = gen_reg_rtx (tmode);
16694      pat = GEN_FCN (icode) (target, op0, op1, op2);
16695      if (! pat)
16696	return 0;
16697      emit_insn (pat);
16698      return target;
16699
16700    case IX86_BUILTIN_PSHUFW:
16701    case IX86_BUILTIN_PSHUFD:
16702    case IX86_BUILTIN_PSHUFHW:
16703    case IX86_BUILTIN_PSHUFLW:
16704      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16705	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16706	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16707	       : CODE_FOR_mmx_pshufw);
16708      arg0 = TREE_VALUE (arglist);
16709      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16710      op0 = expand_normal (arg0);
16711      op1 = expand_normal (arg1);
16712      tmode = insn_data[icode].operand[0].mode;
16713      mode1 = insn_data[icode].operand[1].mode;
16714      mode2 = insn_data[icode].operand[2].mode;
16715
16716      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16717	op0 = copy_to_mode_reg (mode1, op0);
16718      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16719	{
16720	  /* @@@ better error message */
16721	  error ("mask must be an immediate");
16722	  return const0_rtx;
16723	}
16724      if (target == 0
16725	  || GET_MODE (target) != tmode
16726	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16727	target = gen_reg_rtx (tmode);
16728      pat = GEN_FCN (icode) (target, op0, op1);
16729      if (! pat)
16730	return 0;
16731      emit_insn (pat);
16732      return target;
16733
16734    case IX86_BUILTIN_PSLLWI128:
16735      icode = CODE_FOR_ashlv8hi3;
16736      goto do_pshifti;
16737    case IX86_BUILTIN_PSLLDI128:
16738      icode = CODE_FOR_ashlv4si3;
16739      goto do_pshifti;
16740    case IX86_BUILTIN_PSLLQI128:
16741      icode = CODE_FOR_ashlv2di3;
16742      goto do_pshifti;
16743    case IX86_BUILTIN_PSRAWI128:
16744      icode = CODE_FOR_ashrv8hi3;
16745      goto do_pshifti;
16746    case IX86_BUILTIN_PSRADI128:
16747      icode = CODE_FOR_ashrv4si3;
16748      goto do_pshifti;
16749    case IX86_BUILTIN_PSRLWI128:
16750      icode = CODE_FOR_lshrv8hi3;
16751      goto do_pshifti;
16752    case IX86_BUILTIN_PSRLDI128:
16753      icode = CODE_FOR_lshrv4si3;
16754      goto do_pshifti;
16755    case IX86_BUILTIN_PSRLQI128:
16756      icode = CODE_FOR_lshrv2di3;
16757      goto do_pshifti;
16758    do_pshifti:
16759      arg0 = TREE_VALUE (arglist);
16760      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16761      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16762      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16763
16764      if (GET_CODE (op1) != CONST_INT)
16765	{
16766	  error ("shift must be an immediate");
16767	  return const0_rtx;
16768	}
16769      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16770	op1 = GEN_INT (255);
16771
16772      tmode = insn_data[icode].operand[0].mode;
16773      mode1 = insn_data[icode].operand[1].mode;
16774      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16775	op0 = copy_to_reg (op0);
16776
16777      target = gen_reg_rtx (tmode);
16778      pat = GEN_FCN (icode) (target, op0, op1);
16779      if (!pat)
16780	return 0;
16781      emit_insn (pat);
16782      return target;
16783
16784    case IX86_BUILTIN_PSLLW128:
16785      icode = CODE_FOR_ashlv8hi3;
16786      goto do_pshift;
16787    case IX86_BUILTIN_PSLLD128:
16788      icode = CODE_FOR_ashlv4si3;
16789      goto do_pshift;
16790    case IX86_BUILTIN_PSLLQ128:
16791      icode = CODE_FOR_ashlv2di3;
16792      goto do_pshift;
16793    case IX86_BUILTIN_PSRAW128:
16794      icode = CODE_FOR_ashrv8hi3;
16795      goto do_pshift;
16796    case IX86_BUILTIN_PSRAD128:
16797      icode = CODE_FOR_ashrv4si3;
16798      goto do_pshift;
16799    case IX86_BUILTIN_PSRLW128:
16800      icode = CODE_FOR_lshrv8hi3;
16801      goto do_pshift;
16802    case IX86_BUILTIN_PSRLD128:
16803      icode = CODE_FOR_lshrv4si3;
16804      goto do_pshift;
16805    case IX86_BUILTIN_PSRLQ128:
16806      icode = CODE_FOR_lshrv2di3;
16807      goto do_pshift;
16808    do_pshift:
16809      arg0 = TREE_VALUE (arglist);
16810      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16811      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16812      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16813
16814      tmode = insn_data[icode].operand[0].mode;
16815      mode1 = insn_data[icode].operand[1].mode;
16816
16817      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16818	op0 = copy_to_reg (op0);
16819
16820      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16821      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16822	op1 = copy_to_reg (op1);
16823
16824      target = gen_reg_rtx (tmode);
16825      pat = GEN_FCN (icode) (target, op0, op1);
16826      if (!pat)
16827	return 0;
16828      emit_insn (pat);
16829      return target;
16830
16831    case IX86_BUILTIN_PSLLDQI128:
16832    case IX86_BUILTIN_PSRLDQI128:
16833      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16834	       : CODE_FOR_sse2_lshrti3);
16835      arg0 = TREE_VALUE (arglist);
16836      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16837      op0 = expand_normal (arg0);
16838      op1 = expand_normal (arg1);
16839      tmode = insn_data[icode].operand[0].mode;
16840      mode1 = insn_data[icode].operand[1].mode;
16841      mode2 = insn_data[icode].operand[2].mode;
16842
16843      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16844	{
16845	  op0 = copy_to_reg (op0);
16846	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16847	}
16848      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16849	{
16850	  error ("shift must be an immediate");
16851	  return const0_rtx;
16852	}
16853      target = gen_reg_rtx (V2DImode);
16854      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16855			     op0, op1);
16856      if (! pat)
16857	return 0;
16858      emit_insn (pat);
16859      return target;
16860
16861    case IX86_BUILTIN_FEMMS:
16862      emit_insn (gen_mmx_femms ());
16863      return NULL_RTX;
16864
16865    case IX86_BUILTIN_PAVGUSB:
16866      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16867
16868    case IX86_BUILTIN_PF2ID:
16869      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16870
16871    case IX86_BUILTIN_PFACC:
16872      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16873
16874    case IX86_BUILTIN_PFADD:
16875     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16876
16877    case IX86_BUILTIN_PFCMPEQ:
16878      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16879
16880    case IX86_BUILTIN_PFCMPGE:
16881      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16882
16883    case IX86_BUILTIN_PFCMPGT:
16884      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16885
16886    case IX86_BUILTIN_PFMAX:
16887      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16888
16889    case IX86_BUILTIN_PFMIN:
16890      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16891
16892    case IX86_BUILTIN_PFMUL:
16893      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16894
16895    case IX86_BUILTIN_PFRCP:
16896      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16897
16898    case IX86_BUILTIN_PFRCPIT1:
16899      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16900
16901    case IX86_BUILTIN_PFRCPIT2:
16902      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16903
16904    case IX86_BUILTIN_PFRSQIT1:
16905      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16906
16907    case IX86_BUILTIN_PFRSQRT:
16908      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16909
16910    case IX86_BUILTIN_PFSUB:
16911      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16912
16913    case IX86_BUILTIN_PFSUBR:
16914      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16915
16916    case IX86_BUILTIN_PI2FD:
16917      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16918
16919    case IX86_BUILTIN_PMULHRW:
16920      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16921
16922    case IX86_BUILTIN_PF2IW:
16923      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16924
16925    case IX86_BUILTIN_PFNACC:
16926      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16927
16928    case IX86_BUILTIN_PFPNACC:
16929      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16930
16931    case IX86_BUILTIN_PI2FW:
16932      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16933
16934    case IX86_BUILTIN_PSWAPDSI:
16935      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16936
16937    case IX86_BUILTIN_PSWAPDSF:
16938      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16939
16940    case IX86_BUILTIN_SQRTSD:
16941      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16942    case IX86_BUILTIN_LOADUPD:
16943      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16944    case IX86_BUILTIN_STOREUPD:
16945      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16946
16947    case IX86_BUILTIN_MFENCE:
16948	emit_insn (gen_sse2_mfence ());
16949	return 0;
16950    case IX86_BUILTIN_LFENCE:
16951	emit_insn (gen_sse2_lfence ());
16952	return 0;
16953
16954    case IX86_BUILTIN_CLFLUSH:
16955	arg0 = TREE_VALUE (arglist);
16956	op0 = expand_normal (arg0);
16957	icode = CODE_FOR_sse2_clflush;
16958	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16959	    op0 = copy_to_mode_reg (Pmode, op0);
16960
16961	emit_insn (gen_sse2_clflush (op0));
16962	return 0;
16963
16964    case IX86_BUILTIN_MOVNTPD:
16965      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16966    case IX86_BUILTIN_MOVNTDQ:
16967      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16968    case IX86_BUILTIN_MOVNTI:
16969      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16970
16971    case IX86_BUILTIN_LOADDQU:
16972      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16973    case IX86_BUILTIN_STOREDQU:
16974      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16975
16976    case IX86_BUILTIN_MONITOR:
16977      arg0 = TREE_VALUE (arglist);
16978      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16979      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16980      op0 = expand_normal (arg0);
16981      op1 = expand_normal (arg1);
16982      op2 = expand_normal (arg2);
16983      if (!REG_P (op0))
16984	op0 = copy_to_mode_reg (Pmode, op0);
16985      if (!REG_P (op1))
16986	op1 = copy_to_mode_reg (SImode, op1);
16987      if (!REG_P (op2))
16988	op2 = copy_to_mode_reg (SImode, op2);
16989      if (!TARGET_64BIT)
16990	emit_insn (gen_sse3_monitor (op0, op1, op2));
16991      else
16992	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16993      return 0;
16994
16995    case IX86_BUILTIN_MWAIT:
16996      arg0 = TREE_VALUE (arglist);
16997      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16998      op0 = expand_normal (arg0);
16999      op1 = expand_normal (arg1);
17000      if (!REG_P (op0))
17001	op0 = copy_to_mode_reg (SImode, op0);
17002      if (!REG_P (op1))
17003	op1 = copy_to_mode_reg (SImode, op1);
17004      emit_insn (gen_sse3_mwait (op0, op1));
17005      return 0;
17006
17007    case IX86_BUILTIN_LDDQU:
17008      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17009				       target, 1);
17010
17011    case IX86_BUILTIN_PALIGNR:
17012    case IX86_BUILTIN_PALIGNR128:
17013      if (fcode == IX86_BUILTIN_PALIGNR)
17014	{
17015	  icode = CODE_FOR_ssse3_palignrdi;
17016	  mode = DImode;
17017	}
17018      else
17019	{
17020	  icode = CODE_FOR_ssse3_palignrti;
17021	  mode = V2DImode;
17022	}
17023      arg0 = TREE_VALUE (arglist);
17024      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17025      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17026      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17027      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17028      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17029      tmode = insn_data[icode].operand[0].mode;
17030      mode1 = insn_data[icode].operand[1].mode;
17031      mode2 = insn_data[icode].operand[2].mode;
17032      mode3 = insn_data[icode].operand[3].mode;
17033
17034      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17035	{
17036	  op0 = copy_to_reg (op0);
17037	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17038	}
17039      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17040	{
17041	  op1 = copy_to_reg (op1);
17042	  op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17043	}
17044      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17045	{
17046	  error ("shift must be an immediate");
17047	  return const0_rtx;
17048	}
17049      target = gen_reg_rtx (mode);
17050      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17051			     op0, op1, op2);
17052      if (! pat)
17053	return 0;
17054      emit_insn (pat);
17055      return target;
17056
17057    case IX86_BUILTIN_MOVNTSD:
17058      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17059
17060    case IX86_BUILTIN_MOVNTSS:
17061      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17062
17063    case IX86_BUILTIN_INSERTQ:
17064    case IX86_BUILTIN_EXTRQ:
17065      icode = (fcode == IX86_BUILTIN_EXTRQ
17066               ? CODE_FOR_sse4a_extrq
17067               : CODE_FOR_sse4a_insertq);
17068      arg0 = TREE_VALUE (arglist);
17069      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17070      op0 = expand_normal (arg0);
17071      op1 = expand_normal (arg1);
17072      tmode = insn_data[icode].operand[0].mode;
17073      mode1 = insn_data[icode].operand[1].mode;
17074      mode2 = insn_data[icode].operand[2].mode;
17075      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17076        op0 = copy_to_mode_reg (mode1, op0);
17077      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17078        op1 = copy_to_mode_reg (mode2, op1);
17079      if (optimize || target == 0
17080          || GET_MODE (target) != tmode
17081          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17082        target = gen_reg_rtx (tmode);
17083      pat = GEN_FCN (icode) (target, op0, op1);
17084      if (! pat)
17085        return NULL_RTX;
17086      emit_insn (pat);
17087      return target;
17088
17089    case IX86_BUILTIN_EXTRQI:
17090      icode = CODE_FOR_sse4a_extrqi;
17091      arg0 = TREE_VALUE (arglist);
17092      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17093      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17094      op0 = expand_normal (arg0);
17095      op1 = expand_normal (arg1);
17096      op2 = expand_normal (arg2);
17097      tmode = insn_data[icode].operand[0].mode;
17098      mode1 = insn_data[icode].operand[1].mode;
17099      mode2 = insn_data[icode].operand[2].mode;
17100      mode3 = insn_data[icode].operand[3].mode;
17101      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17102        op0 = copy_to_mode_reg (mode1, op0);
17103      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17104        {
17105          error ("index mask must be an immediate");
17106          return gen_reg_rtx (tmode);
17107        }
17108      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17109        {
17110          error ("length mask must be an immediate");
17111          return gen_reg_rtx (tmode);
17112        }
17113      if (optimize || target == 0
17114          || GET_MODE (target) != tmode
17115          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17116        target = gen_reg_rtx (tmode);
17117      pat = GEN_FCN (icode) (target, op0, op1, op2);
17118      if (! pat)
17119        return NULL_RTX;
17120      emit_insn (pat);
17121      return target;
17122
17123    case IX86_BUILTIN_INSERTQI:
17124      icode = CODE_FOR_sse4a_insertqi;
17125      arg0 = TREE_VALUE (arglist);
17126      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17127      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17128      arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17129      op0 = expand_normal (arg0);
17130      op1 = expand_normal (arg1);
17131      op2 = expand_normal (arg2);
17132      op3 = expand_normal (arg3);
17133      tmode = insn_data[icode].operand[0].mode;
17134      mode1 = insn_data[icode].operand[1].mode;
17135      mode2 = insn_data[icode].operand[2].mode;
17136      mode3 = insn_data[icode].operand[3].mode;
17137      mode4 = insn_data[icode].operand[4].mode;
17138
17139      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17140        op0 = copy_to_mode_reg (mode1, op0);
17141
17142      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17143        op1 = copy_to_mode_reg (mode2, op1);
17144
17145      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17146        {
17147          error ("index mask must be an immediate");
17148          return gen_reg_rtx (tmode);
17149        }
17150      if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17151        {
17152          error ("length mask must be an immediate");
17153          return gen_reg_rtx (tmode);
17154        }
17155      if (optimize || target == 0
17156          || GET_MODE (target) != tmode
17157          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17158        target = gen_reg_rtx (tmode);
17159      pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17160      if (! pat)
17161        return NULL_RTX;
17162      emit_insn (pat);
17163      return target;
17164
17165    case IX86_BUILTIN_VEC_INIT_V2SI:
17166    case IX86_BUILTIN_VEC_INIT_V4HI:
17167    case IX86_BUILTIN_VEC_INIT_V8QI:
17168      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17169
17170    case IX86_BUILTIN_VEC_EXT_V2DF:
17171    case IX86_BUILTIN_VEC_EXT_V2DI:
17172    case IX86_BUILTIN_VEC_EXT_V4SF:
17173    case IX86_BUILTIN_VEC_EXT_V4SI:
17174    case IX86_BUILTIN_VEC_EXT_V8HI:
17175    case IX86_BUILTIN_VEC_EXT_V16QI:
17176    case IX86_BUILTIN_VEC_EXT_V2SI:
17177    case IX86_BUILTIN_VEC_EXT_V4HI:
17178      return ix86_expand_vec_ext_builtin (arglist, target);
17179
17180    case IX86_BUILTIN_VEC_SET_V8HI:
17181    case IX86_BUILTIN_VEC_SET_V4HI:
17182      return ix86_expand_vec_set_builtin (arglist);
17183
17184    default:
17185      break;
17186    }
17187
17188  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17189    if (d->code == fcode)
17190      {
17191	/* Compares are treated specially.  */
17192	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17193	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17194	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
17195	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17196	  return ix86_expand_sse_compare (d, arglist, target);
17197
17198	return ix86_expand_binop_builtin (d->icode, arglist, target);
17199      }
17200
17201  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17202    if (d->code == fcode)
17203      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17204
17205  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17206    if (d->code == fcode)
17207      return ix86_expand_sse_comi (d, arglist, target);
17208
17209  gcc_unreachable ();
17210}
17211
17212/* Store OPERAND to the memory after reload is completed.  This means
17213   that we can't easily use assign_stack_local.  */
17214rtx
17215ix86_force_to_memory (enum machine_mode mode, rtx operand)
17216{
17217  rtx result;
17218
17219  gcc_assert (reload_completed);
17220  if (TARGET_RED_ZONE)
17221    {
17222      result = gen_rtx_MEM (mode,
17223			    gen_rtx_PLUS (Pmode,
17224					  stack_pointer_rtx,
17225					  GEN_INT (-RED_ZONE_SIZE)));
17226      emit_move_insn (result, operand);
17227    }
17228  else if (!TARGET_RED_ZONE && TARGET_64BIT)
17229    {
17230      switch (mode)
17231	{
17232	case HImode:
17233	case SImode:
17234	  operand = gen_lowpart (DImode, operand);
17235	  /* FALLTHRU */
17236	case DImode:
17237	  emit_insn (
17238		      gen_rtx_SET (VOIDmode,
17239				   gen_rtx_MEM (DImode,
17240						gen_rtx_PRE_DEC (DImode,
17241							stack_pointer_rtx)),
17242				   operand));
17243	  break;
17244	default:
17245	  gcc_unreachable ();
17246	}
17247      result = gen_rtx_MEM (mode, stack_pointer_rtx);
17248    }
17249  else
17250    {
17251      switch (mode)
17252	{
17253	case DImode:
17254	  {
17255	    rtx operands[2];
17256	    split_di (&operand, 1, operands, operands + 1);
17257	    emit_insn (
17258			gen_rtx_SET (VOIDmode,
17259				     gen_rtx_MEM (SImode,
17260						  gen_rtx_PRE_DEC (Pmode,
17261							stack_pointer_rtx)),
17262				     operands[1]));
17263	    emit_insn (
17264			gen_rtx_SET (VOIDmode,
17265				     gen_rtx_MEM (SImode,
17266						  gen_rtx_PRE_DEC (Pmode,
17267							stack_pointer_rtx)),
17268				     operands[0]));
17269	  }
17270	  break;
17271	case HImode:
17272	  /* Store HImodes as SImodes.  */
17273	  operand = gen_lowpart (SImode, operand);
17274	  /* FALLTHRU */
17275	case SImode:
17276	  emit_insn (
17277		      gen_rtx_SET (VOIDmode,
17278				   gen_rtx_MEM (GET_MODE (operand),
17279						gen_rtx_PRE_DEC (SImode,
17280							stack_pointer_rtx)),
17281				   operand));
17282	  break;
17283	default:
17284	  gcc_unreachable ();
17285	}
17286      result = gen_rtx_MEM (mode, stack_pointer_rtx);
17287    }
17288  return result;
17289}
17290
17291/* Free operand from the memory.  */
17292void
17293ix86_free_from_memory (enum machine_mode mode)
17294{
17295  if (!TARGET_RED_ZONE)
17296    {
17297      int size;
17298
17299      if (mode == DImode || TARGET_64BIT)
17300	size = 8;
17301      else
17302	size = 4;
17303      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
17304         to pop or add instruction if registers are available.  */
17305      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17306			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17307					    GEN_INT (size))));
17308    }
17309}
17310
17311/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17312   QImode must go into class Q_REGS.
17313   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
17314   movdf to do mem-to-mem moves through integer regs.  */
17315enum reg_class
17316ix86_preferred_reload_class (rtx x, enum reg_class class)
17317{
17318  enum machine_mode mode = GET_MODE (x);
17319
17320  /* We're only allowed to return a subclass of CLASS.  Many of the
17321     following checks fail for NO_REGS, so eliminate that early.  */
17322  if (class == NO_REGS)
17323    return NO_REGS;
17324
17325  /* All classes can load zeros.  */
17326  if (x == CONST0_RTX (mode))
17327    return class;
17328
17329  /* Force constants into memory if we are loading a (nonzero) constant into
17330     an MMX or SSE register.  This is because there are no MMX/SSE instructions
17331     to load from a constant.  */
17332  if (CONSTANT_P (x)
17333      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17334    return NO_REGS;
17335
17336  /* Prefer SSE regs only, if we can use them for math.  */
17337  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17338    return SSE_CLASS_P (class) ? class : NO_REGS;
17339
17340  /* Floating-point constants need more complex checks.  */
17341  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17342    {
17343      /* General regs can load everything.  */
17344      if (reg_class_subset_p (class, GENERAL_REGS))
17345        return class;
17346
17347      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
17348	 zero above.  We only want to wind up preferring 80387 registers if
17349	 we plan on doing computation with them.  */
17350      if (TARGET_80387
17351	  && standard_80387_constant_p (x))
17352	{
17353	  /* Limit class to non-sse.  */
17354	  if (class == FLOAT_SSE_REGS)
17355	    return FLOAT_REGS;
17356	  if (class == FP_TOP_SSE_REGS)
17357	    return FP_TOP_REG;
17358	  if (class == FP_SECOND_SSE_REGS)
17359	    return FP_SECOND_REG;
17360	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17361	    return class;
17362	}
17363
17364      return NO_REGS;
17365    }
17366
17367  /* Generally when we see PLUS here, it's the function invariant
17368     (plus soft-fp const_int).  Which can only be computed into general
17369     regs.  */
17370  if (GET_CODE (x) == PLUS)
17371    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17372
17373  /* QImode constants are easy to load, but non-constant QImode data
17374     must go into Q_REGS.  */
17375  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17376    {
17377      if (reg_class_subset_p (class, Q_REGS))
17378	return class;
17379      if (reg_class_subset_p (Q_REGS, class))
17380	return Q_REGS;
17381      return NO_REGS;
17382    }
17383
17384  return class;
17385}
17386
17387/* Discourage putting floating-point values in SSE registers unless
17388   SSE math is being used, and likewise for the 387 registers.  */
17389enum reg_class
17390ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17391{
17392  enum machine_mode mode = GET_MODE (x);
17393
17394  /* Restrict the output reload class to the register bank that we are doing
17395     math on.  If we would like not to return a subset of CLASS, reject this
17396     alternative: if reload cannot do this, it will still use its choice.  */
17397  mode = GET_MODE (x);
17398  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17399    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17400
17401  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17402    {
17403      if (class == FP_TOP_SSE_REGS)
17404	return FP_TOP_REG;
17405      else if (class == FP_SECOND_SSE_REGS)
17406	return FP_SECOND_REG;
17407      else
17408	return FLOAT_CLASS_P (class) ? class : NO_REGS;
17409    }
17410
17411  return class;
17412}
17413
17414/* If we are copying between general and FP registers, we need a memory
17415   location. The same is true for SSE and MMX registers.
17416
17417   The macro can't work reliably when one of the CLASSES is class containing
17418   registers from multiple units (SSE, MMX, integer).  We avoid this by never
17419   combining those units in single alternative in the machine description.
17420   Ensure that this constraint holds to avoid unexpected surprises.
17421
17422   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17423   enforce these sanity checks.  */
17424
17425int
17426ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17427			      enum machine_mode mode, int strict)
17428{
17429  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17430      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17431      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17432      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17433      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17434      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17435    {
17436      gcc_assert (!strict);
17437      return true;
17438    }
17439
17440  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17441    return true;
17442
17443  /* ??? This is a lie.  We do have moves between mmx/general, and for
17444     mmx/sse2.  But by saying we need secondary memory we discourage the
17445     register allocator from using the mmx registers unless needed.  */
17446  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17447    return true;
17448
17449  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17450    {
17451      /* SSE1 doesn't have any direct moves from other classes.  */
17452      if (!TARGET_SSE2)
17453	return true;
17454
17455      /* If the target says that inter-unit moves are more expensive
17456	 than moving through memory, then don't generate them.  */
17457      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17458	return true;
17459
17460      /* Between SSE and general, we have moves no larger than word size.  */
17461      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17462	return true;
17463
17464      /* ??? For the cost of one register reformat penalty, we could use
17465	 the same instructions to move SFmode and DFmode data, but the
17466	 relevant move patterns don't support those alternatives.  */
17467      if (mode == SFmode || mode == DFmode)
17468	return true;
17469    }
17470
17471  return false;
17472}
17473
17474/* Return true if the registers in CLASS cannot represent the change from
17475   modes FROM to TO.  */
17476
17477bool
17478ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17479			       enum reg_class class)
17480{
17481  if (from == to)
17482    return false;
17483
17484  /* x87 registers can't do subreg at all, as all values are reformatted
17485     to extended precision.  */
17486  if (MAYBE_FLOAT_CLASS_P (class))
17487    return true;
17488
17489  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17490    {
17491      /* Vector registers do not support QI or HImode loads.  If we don't
17492	 disallow a change to these modes, reload will assume it's ok to
17493	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
17494	 the vec_dupv4hi pattern.  */
17495      if (GET_MODE_SIZE (from) < 4)
17496	return true;
17497
17498      /* Vector registers do not support subreg with nonzero offsets, which
17499	 are otherwise valid for integer registers.  Since we can't see
17500	 whether we have a nonzero offset from here, prohibit all
17501         nonparadoxical subregs changing size.  */
17502      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17503	return true;
17504    }
17505
17506  return false;
17507}
17508
17509/* Return the cost of moving data from a register in class CLASS1 to
17510   one in class CLASS2.
17511
17512   It is not required that the cost always equal 2 when FROM is the same as TO;
17513   on some machines it is expensive to move between registers if they are not
17514   general registers.  */
17515
17516int
17517ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17518			 enum reg_class class2)
17519{
17520  /* In case we require secondary memory, compute cost of the store followed
17521     by load.  In order to avoid bad register allocation choices, we need
17522     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
17523
17524  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17525    {
17526      int cost = 1;
17527
17528      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17529		   MEMORY_MOVE_COST (mode, class1, 1));
17530      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17531		   MEMORY_MOVE_COST (mode, class2, 1));
17532
17533      /* In case of copying from general_purpose_register we may emit multiple
17534         stores followed by single load causing memory size mismatch stall.
17535         Count this as arbitrarily high cost of 20.  */
17536      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17537	cost += 20;
17538
17539      /* In the case of FP/MMX moves, the registers actually overlap, and we
17540	 have to switch modes in order to treat them differently.  */
17541      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17542          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17543	cost += 20;
17544
17545      return cost;
17546    }
17547
17548  /* Moves between SSE/MMX and integer unit are expensive.  */
17549  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17550      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17551    return ix86_cost->mmxsse_to_integer;
17552  if (MAYBE_FLOAT_CLASS_P (class1))
17553    return ix86_cost->fp_move;
17554  if (MAYBE_SSE_CLASS_P (class1))
17555    return ix86_cost->sse_move;
17556  if (MAYBE_MMX_CLASS_P (class1))
17557    return ix86_cost->mmx_move;
17558  return 2;
17559}
17560
17561/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
17562
17563bool
17564ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17565{
17566  /* Flags and only flags can only hold CCmode values.  */
17567  if (CC_REGNO_P (regno))
17568    return GET_MODE_CLASS (mode) == MODE_CC;
17569  if (GET_MODE_CLASS (mode) == MODE_CC
17570      || GET_MODE_CLASS (mode) == MODE_RANDOM
17571      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17572    return 0;
17573  if (FP_REGNO_P (regno))
17574    return VALID_FP_MODE_P (mode);
17575  if (SSE_REGNO_P (regno))
17576    {
17577      /* We implement the move patterns for all vector modes into and
17578	 out of SSE registers, even when no operation instructions
17579	 are available.  */
17580      return (VALID_SSE_REG_MODE (mode)
17581	      || VALID_SSE2_REG_MODE (mode)
17582	      || VALID_MMX_REG_MODE (mode)
17583	      || VALID_MMX_REG_MODE_3DNOW (mode));
17584    }
17585  if (MMX_REGNO_P (regno))
17586    {
17587      /* We implement the move patterns for 3DNOW modes even in MMX mode,
17588	 so if the register is available at all, then we can move data of
17589	 the given mode into or out of it.  */
17590      return (VALID_MMX_REG_MODE (mode)
17591	      || VALID_MMX_REG_MODE_3DNOW (mode));
17592    }
17593
17594  if (mode == QImode)
17595    {
17596      /* Take care for QImode values - they can be in non-QI regs,
17597	 but then they do cause partial register stalls.  */
17598      if (regno < 4 || TARGET_64BIT)
17599	return 1;
17600      if (!TARGET_PARTIAL_REG_STALL)
17601	return 1;
17602      return reload_in_progress || reload_completed;
17603    }
17604  /* We handle both integer and floats in the general purpose registers.  */
17605  else if (VALID_INT_MODE_P (mode))
17606    return 1;
17607  else if (VALID_FP_MODE_P (mode))
17608    return 1;
17609  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
17610     on to use that value in smaller contexts, this can easily force a
17611     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
17612     supporting DImode, allow it.  */
17613  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17614    return 1;
17615
17616  return 0;
17617}
17618
17619/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
17620   tieable integer mode.  */
17621
17622static bool
17623ix86_tieable_integer_mode_p (enum machine_mode mode)
17624{
17625  switch (mode)
17626    {
17627    case HImode:
17628    case SImode:
17629      return true;
17630
17631    case QImode:
17632      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17633
17634    case DImode:
17635      return TARGET_64BIT;
17636
17637    default:
17638      return false;
17639    }
17640}
17641
17642/* Return true if MODE1 is accessible in a register that can hold MODE2
17643   without copying.  That is, all register classes that can hold MODE2
17644   can also hold MODE1.  */
17645
17646bool
17647ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17648{
17649  if (mode1 == mode2)
17650    return true;
17651
17652  if (ix86_tieable_integer_mode_p (mode1)
17653      && ix86_tieable_integer_mode_p (mode2))
17654    return true;
17655
17656  /* MODE2 being XFmode implies fp stack or general regs, which means we
17657     can tie any smaller floating point modes to it.  Note that we do not
17658     tie this with TFmode.  */
17659  if (mode2 == XFmode)
17660    return mode1 == SFmode || mode1 == DFmode;
17661
17662  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17663     that we can tie it with SFmode.  */
17664  if (mode2 == DFmode)
17665    return mode1 == SFmode;
17666
17667  /* If MODE2 is only appropriate for an SSE register, then tie with
17668     any other mode acceptable to SSE registers.  */
17669  if (GET_MODE_SIZE (mode2) >= 8
17670      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17671    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17672
17673  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17674     with any other mode acceptable to MMX registers.  */
17675  if (GET_MODE_SIZE (mode2) == 8
17676      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17677    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17678
17679  return false;
17680}
17681
17682/* Return the cost of moving data of mode M between a
17683   register and memory.  A value of 2 is the default; this cost is
17684   relative to those in `REGISTER_MOVE_COST'.
17685
17686   If moving between registers and memory is more expensive than
17687   between two registers, you should define this macro to express the
17688   relative cost.
17689
17690   Model also increased moving costs of QImode registers in non
17691   Q_REGS classes.
17692 */
17693int
17694ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17695{
17696  if (FLOAT_CLASS_P (class))
17697    {
17698      int index;
17699      switch (mode)
17700	{
17701	  case SFmode:
17702	    index = 0;
17703	    break;
17704	  case DFmode:
17705	    index = 1;
17706	    break;
17707	  case XFmode:
17708	    index = 2;
17709	    break;
17710	  default:
17711	    return 100;
17712	}
17713      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17714    }
17715  if (SSE_CLASS_P (class))
17716    {
17717      int index;
17718      switch (GET_MODE_SIZE (mode))
17719	{
17720	  case 4:
17721	    index = 0;
17722	    break;
17723	  case 8:
17724	    index = 1;
17725	    break;
17726	  case 16:
17727	    index = 2;
17728	    break;
17729	  default:
17730	    return 100;
17731	}
17732      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17733    }
17734  if (MMX_CLASS_P (class))
17735    {
17736      int index;
17737      switch (GET_MODE_SIZE (mode))
17738	{
17739	  case 4:
17740	    index = 0;
17741	    break;
17742	  case 8:
17743	    index = 1;
17744	    break;
17745	  default:
17746	    return 100;
17747	}
17748      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17749    }
17750  switch (GET_MODE_SIZE (mode))
17751    {
17752      case 1:
17753	if (in)
17754	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17755		  : ix86_cost->movzbl_load);
17756	else
17757	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17758		  : ix86_cost->int_store[0] + 4);
17759	break;
17760      case 2:
17761	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17762      default:
17763	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17764	if (mode == TFmode)
17765	  mode = XFmode;
17766	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17767		* (((int) GET_MODE_SIZE (mode)
17768		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17769    }
17770}
17771
17772/* Compute a (partial) cost for rtx X.  Return true if the complete
17773   cost has been computed, and false if subexpressions should be
17774   scanned.  In either case, *TOTAL contains the cost result.  */
17775
17776static bool
17777ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17778{
17779  enum machine_mode mode = GET_MODE (x);
17780
17781  switch (code)
17782    {
17783    case CONST_INT:
17784    case CONST:
17785    case LABEL_REF:
17786    case SYMBOL_REF:
17787      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17788	*total = 3;
17789      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17790	*total = 2;
17791      else if (flag_pic && SYMBOLIC_CONST (x)
17792	       && (!TARGET_64BIT
17793		   || (!GET_CODE (x) != LABEL_REF
17794		       && (GET_CODE (x) != SYMBOL_REF
17795		           || !SYMBOL_REF_LOCAL_P (x)))))
17796	*total = 1;
17797      else
17798	*total = 0;
17799      return true;
17800
17801    case CONST_DOUBLE:
17802      if (mode == VOIDmode)
17803	*total = 0;
17804      else
17805	switch (standard_80387_constant_p (x))
17806	  {
17807	  case 1: /* 0.0 */
17808	    *total = 1;
17809	    break;
17810	  default: /* Other constants */
17811	    *total = 2;
17812	    break;
17813	  case 0:
17814	  case -1:
17815	    /* Start with (MEM (SYMBOL_REF)), since that's where
17816	       it'll probably end up.  Add a penalty for size.  */
17817	    *total = (COSTS_N_INSNS (1)
17818		      + (flag_pic != 0 && !TARGET_64BIT)
17819		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17820	    break;
17821	  }
17822      return true;
17823
17824    case ZERO_EXTEND:
17825      /* The zero extensions is often completely free on x86_64, so make
17826	 it as cheap as possible.  */
17827      if (TARGET_64BIT && mode == DImode
17828	  && GET_MODE (XEXP (x, 0)) == SImode)
17829	*total = 1;
17830      else if (TARGET_ZERO_EXTEND_WITH_AND)
17831	*total = ix86_cost->add;
17832      else
17833	*total = ix86_cost->movzx;
17834      return false;
17835
17836    case SIGN_EXTEND:
17837      *total = ix86_cost->movsx;
17838      return false;
17839
17840    case ASHIFT:
17841      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17842	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17843	{
17844	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17845	  if (value == 1)
17846	    {
17847	      *total = ix86_cost->add;
17848	      return false;
17849	    }
17850	  if ((value == 2 || value == 3)
17851	      && ix86_cost->lea <= ix86_cost->shift_const)
17852	    {
17853	      *total = ix86_cost->lea;
17854	      return false;
17855	    }
17856	}
17857      /* FALLTHRU */
17858
17859    case ROTATE:
17860    case ASHIFTRT:
17861    case LSHIFTRT:
17862    case ROTATERT:
17863      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17864	{
17865	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17866	    {
17867	      if (INTVAL (XEXP (x, 1)) > 32)
17868		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17869	      else
17870		*total = ix86_cost->shift_const * 2;
17871	    }
17872	  else
17873	    {
17874	      if (GET_CODE (XEXP (x, 1)) == AND)
17875		*total = ix86_cost->shift_var * 2;
17876	      else
17877		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17878	    }
17879	}
17880      else
17881	{
17882	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17883	    *total = ix86_cost->shift_const;
17884	  else
17885	    *total = ix86_cost->shift_var;
17886	}
17887      return false;
17888
17889    case MULT:
17890      if (FLOAT_MODE_P (mode))
17891	{
17892	  *total = ix86_cost->fmul;
17893	  return false;
17894	}
17895      else
17896	{
17897	  rtx op0 = XEXP (x, 0);
17898	  rtx op1 = XEXP (x, 1);
17899	  int nbits;
17900	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17901	    {
17902	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17903	      for (nbits = 0; value != 0; value &= value - 1)
17904	        nbits++;
17905	    }
17906	  else
17907	    /* This is arbitrary.  */
17908	    nbits = 7;
17909
17910	  /* Compute costs correctly for widening multiplication.  */
17911	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17912	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17913	         == GET_MODE_SIZE (mode))
17914	    {
17915	      int is_mulwiden = 0;
17916	      enum machine_mode inner_mode = GET_MODE (op0);
17917
17918	      if (GET_CODE (op0) == GET_CODE (op1))
17919		is_mulwiden = 1, op1 = XEXP (op1, 0);
17920	      else if (GET_CODE (op1) == CONST_INT)
17921		{
17922		  if (GET_CODE (op0) == SIGN_EXTEND)
17923		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17924			          == INTVAL (op1);
17925		  else
17926		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17927	        }
17928
17929	      if (is_mulwiden)
17930	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17931	    }
17932
17933  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17934		    + nbits * ix86_cost->mult_bit
17935	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17936
17937          return true;
17938	}
17939
17940    case DIV:
17941    case UDIV:
17942    case MOD:
17943    case UMOD:
17944      if (FLOAT_MODE_P (mode))
17945	*total = ix86_cost->fdiv;
17946      else
17947	*total = ix86_cost->divide[MODE_INDEX (mode)];
17948      return false;
17949
17950    case PLUS:
17951      if (FLOAT_MODE_P (mode))
17952	*total = ix86_cost->fadd;
17953      else if (GET_MODE_CLASS (mode) == MODE_INT
17954	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17955	{
17956	  if (GET_CODE (XEXP (x, 0)) == PLUS
17957	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17958	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17959	      && CONSTANT_P (XEXP (x, 1)))
17960	    {
17961	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17962	      if (val == 2 || val == 4 || val == 8)
17963		{
17964		  *total = ix86_cost->lea;
17965		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17966		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17967				      outer_code);
17968		  *total += rtx_cost (XEXP (x, 1), outer_code);
17969		  return true;
17970		}
17971	    }
17972	  else if (GET_CODE (XEXP (x, 0)) == MULT
17973		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17974	    {
17975	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17976	      if (val == 2 || val == 4 || val == 8)
17977		{
17978		  *total = ix86_cost->lea;
17979		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17980		  *total += rtx_cost (XEXP (x, 1), outer_code);
17981		  return true;
17982		}
17983	    }
17984	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17985	    {
17986	      *total = ix86_cost->lea;
17987	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17988	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17989	      *total += rtx_cost (XEXP (x, 1), outer_code);
17990	      return true;
17991	    }
17992	}
17993      /* FALLTHRU */
17994
17995    case MINUS:
17996      if (FLOAT_MODE_P (mode))
17997	{
17998	  *total = ix86_cost->fadd;
17999	  return false;
18000	}
18001      /* FALLTHRU */
18002
18003    case AND:
18004    case IOR:
18005    case XOR:
18006      if (!TARGET_64BIT && mode == DImode)
18007	{
18008	  *total = (ix86_cost->add * 2
18009		    + (rtx_cost (XEXP (x, 0), outer_code)
18010		       << (GET_MODE (XEXP (x, 0)) != DImode))
18011		    + (rtx_cost (XEXP (x, 1), outer_code)
18012	               << (GET_MODE (XEXP (x, 1)) != DImode)));
18013	  return true;
18014	}
18015      /* FALLTHRU */
18016
18017    case NEG:
18018      if (FLOAT_MODE_P (mode))
18019	{
18020	  *total = ix86_cost->fchs;
18021	  return false;
18022	}
18023      /* FALLTHRU */
18024
18025    case NOT:
18026      if (!TARGET_64BIT && mode == DImode)
18027	*total = ix86_cost->add * 2;
18028      else
18029	*total = ix86_cost->add;
18030      return false;
18031
18032    case COMPARE:
18033      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18034	  && XEXP (XEXP (x, 0), 1) == const1_rtx
18035	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18036	  && XEXP (x, 1) == const0_rtx)
18037	{
18038	  /* This kind of construct is implemented using test[bwl].
18039	     Treat it as if we had an AND.  */
18040	  *total = (ix86_cost->add
18041		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18042		    + rtx_cost (const1_rtx, outer_code));
18043	  return true;
18044	}
18045      return false;
18046
18047    case FLOAT_EXTEND:
18048      if (!TARGET_SSE_MATH
18049	  || mode == XFmode
18050	  || (mode == DFmode && !TARGET_SSE2))
18051	/* For standard 80387 constants, raise the cost to prevent
18052	   compress_float_constant() to generate load from memory.  */
18053	switch (standard_80387_constant_p (XEXP (x, 0)))
18054	  {
18055	  case -1:
18056	  case 0:
18057	    *total = 0;
18058	    break;
18059	  case 1: /* 0.0 */
18060	    *total = 1;
18061	    break;
18062	  default:
18063	    *total = (x86_ext_80387_constants & TUNEMASK
18064		      || optimize_size
18065		      ? 1 : 0);
18066	  }
18067      return false;
18068
18069    case ABS:
18070      if (FLOAT_MODE_P (mode))
18071	*total = ix86_cost->fabs;
18072      return false;
18073
18074    case SQRT:
18075      if (FLOAT_MODE_P (mode))
18076	*total = ix86_cost->fsqrt;
18077      return false;
18078
18079    case UNSPEC:
18080      if (XINT (x, 1) == UNSPEC_TP)
18081	*total = 0;
18082      return false;
18083
18084    default:
18085      return false;
18086    }
18087}
18088
18089#if TARGET_MACHO
18090
18091static int current_machopic_label_num;
18092
18093/* Given a symbol name and its associated stub, write out the
18094   definition of the stub.  */
18095
18096void
18097machopic_output_stub (FILE *file, const char *symb, const char *stub)
18098{
18099  unsigned int length;
18100  char *binder_name, *symbol_name, lazy_ptr_name[32];
18101  int label = ++current_machopic_label_num;
18102
18103  /* For 64-bit we shouldn't get here.  */
18104  gcc_assert (!TARGET_64BIT);
18105
18106  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
18107  symb = (*targetm.strip_name_encoding) (symb);
18108
18109  length = strlen (stub);
18110  binder_name = alloca (length + 32);
18111  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18112
18113  length = strlen (symb);
18114  symbol_name = alloca (length + 32);
18115  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18116
18117  sprintf (lazy_ptr_name, "L%d$lz", label);
18118
18119  if (MACHOPIC_PURE)
18120    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18121  else
18122    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18123
18124  fprintf (file, "%s:\n", stub);
18125  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18126
18127  if (MACHOPIC_PURE)
18128    {
18129      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18130      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18131      fprintf (file, "\tjmp\t*%%edx\n");
18132    }
18133  else
18134    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18135
18136  fprintf (file, "%s:\n", binder_name);
18137
18138  if (MACHOPIC_PURE)
18139    {
18140      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18141      fprintf (file, "\tpushl\t%%eax\n");
18142    }
18143  else
18144    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18145
18146  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18147
18148  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18149  fprintf (file, "%s:\n", lazy_ptr_name);
18150  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18151  fprintf (file, "\t.long %s\n", binder_name);
18152}
18153
18154void
18155darwin_x86_file_end (void)
18156{
18157  darwin_file_end ();
18158  ix86_file_end ();
18159}
18160#endif /* TARGET_MACHO */
18161
18162/* Order the registers for register allocator.  */
18163
18164void
18165x86_order_regs_for_local_alloc (void)
18166{
18167   int pos = 0;
18168   int i;
18169
18170   /* First allocate the local general purpose registers.  */
18171   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18172     if (GENERAL_REGNO_P (i) && call_used_regs[i])
18173	reg_alloc_order [pos++] = i;
18174
18175   /* Global general purpose registers.  */
18176   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18177     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18178	reg_alloc_order [pos++] = i;
18179
18180   /* x87 registers come first in case we are doing FP math
18181      using them.  */
18182   if (!TARGET_SSE_MATH)
18183     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18184       reg_alloc_order [pos++] = i;
18185
18186   /* SSE registers.  */
18187   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18188     reg_alloc_order [pos++] = i;
18189   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18190     reg_alloc_order [pos++] = i;
18191
18192   /* x87 registers.  */
18193   if (TARGET_SSE_MATH)
18194     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18195       reg_alloc_order [pos++] = i;
18196
18197   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18198     reg_alloc_order [pos++] = i;
18199
18200   /* Initialize the rest of array as we do not allocate some registers
18201      at all.  */
18202   while (pos < FIRST_PSEUDO_REGISTER)
18203     reg_alloc_order [pos++] = 0;
18204}
18205
18206/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18207   struct attribute_spec.handler.  */
18208static tree
18209ix86_handle_struct_attribute (tree *node, tree name,
18210			      tree args ATTRIBUTE_UNUSED,
18211			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18212{
18213  tree *type = NULL;
18214  if (DECL_P (*node))
18215    {
18216      if (TREE_CODE (*node) == TYPE_DECL)
18217	type = &TREE_TYPE (*node);
18218    }
18219  else
18220    type = node;
18221
18222  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18223		 || TREE_CODE (*type) == UNION_TYPE)))
18224    {
18225      warning (OPT_Wattributes, "%qs attribute ignored",
18226	       IDENTIFIER_POINTER (name));
18227      *no_add_attrs = true;
18228    }
18229
18230  else if ((is_attribute_p ("ms_struct", name)
18231	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18232	   || ((is_attribute_p ("gcc_struct", name)
18233		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18234    {
18235      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18236               IDENTIFIER_POINTER (name));
18237      *no_add_attrs = true;
18238    }
18239
18240  return NULL_TREE;
18241}
18242
18243static bool
18244ix86_ms_bitfield_layout_p (tree record_type)
18245{
18246  return (TARGET_MS_BITFIELD_LAYOUT &&
18247	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18248    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18249}
18250
18251/* Returns an expression indicating where the this parameter is
18252   located on entry to the FUNCTION.  */
18253
18254static rtx
18255x86_this_parameter (tree function)
18256{
18257  tree type = TREE_TYPE (function);
18258
18259  if (TARGET_64BIT)
18260    {
18261      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18262      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18263    }
18264
18265  if (ix86_function_regparm (type, function) > 0)
18266    {
18267      tree parm;
18268
18269      parm = TYPE_ARG_TYPES (type);
18270      /* Figure out whether or not the function has a variable number of
18271	 arguments.  */
18272      for (; parm; parm = TREE_CHAIN (parm))
18273	if (TREE_VALUE (parm) == void_type_node)
18274	  break;
18275      /* If not, the this parameter is in the first argument.  */
18276      if (parm)
18277	{
18278	  int regno = 0;
18279	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18280	    regno = 2;
18281	  return gen_rtx_REG (SImode, regno);
18282	}
18283    }
18284
18285  if (aggregate_value_p (TREE_TYPE (type), type))
18286    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18287  else
18288    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18289}
18290
18291/* Determine whether x86_output_mi_thunk can succeed.  */
18292
18293static bool
18294x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18295			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18296			 HOST_WIDE_INT vcall_offset, tree function)
18297{
18298  /* 64-bit can handle anything.  */
18299  if (TARGET_64BIT)
18300    return true;
18301
18302  /* For 32-bit, everything's fine if we have one free register.  */
18303  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18304    return true;
18305
18306  /* Need a free register for vcall_offset.  */
18307  if (vcall_offset)
18308    return false;
18309
18310  /* Need a free register for GOT references.  */
18311  if (flag_pic && !(*targetm.binds_local_p) (function))
18312    return false;
18313
18314  /* Otherwise ok.  */
18315  return true;
18316}
18317
18318/* Output the assembler code for a thunk function.  THUNK_DECL is the
18319   declaration for the thunk function itself, FUNCTION is the decl for
18320   the target function.  DELTA is an immediate constant offset to be
18321   added to THIS.  If VCALL_OFFSET is nonzero, the word at
18322   *(*this + vcall_offset) should be added to THIS.  */
18323
18324static void
18325x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18326		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18327		     HOST_WIDE_INT vcall_offset, tree function)
18328{
18329  rtx xops[3];
18330  rtx this = x86_this_parameter (function);
18331  rtx this_reg, tmp;
18332
18333  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
18334     pull it in now and let DELTA benefit.  */
18335  if (REG_P (this))
18336    this_reg = this;
18337  else if (vcall_offset)
18338    {
18339      /* Put the this parameter into %eax.  */
18340      xops[0] = this;
18341      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18342      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18343    }
18344  else
18345    this_reg = NULL_RTX;
18346
18347  /* Adjust the this parameter by a fixed constant.  */
18348  if (delta)
18349    {
18350      xops[0] = GEN_INT (delta);
18351      xops[1] = this_reg ? this_reg : this;
18352      if (TARGET_64BIT)
18353	{
18354	  if (!x86_64_general_operand (xops[0], DImode))
18355	    {
18356	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18357	      xops[1] = tmp;
18358	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18359	      xops[0] = tmp;
18360	      xops[1] = this;
18361	    }
18362	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18363	}
18364      else
18365	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18366    }
18367
18368  /* Adjust the this parameter by a value stored in the vtable.  */
18369  if (vcall_offset)
18370    {
18371      if (TARGET_64BIT)
18372	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18373      else
18374	{
18375	  int tmp_regno = 2 /* ECX */;
18376	  if (lookup_attribute ("fastcall",
18377	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
18378	    tmp_regno = 0 /* EAX */;
18379	  tmp = gen_rtx_REG (SImode, tmp_regno);
18380	}
18381
18382      xops[0] = gen_rtx_MEM (Pmode, this_reg);
18383      xops[1] = tmp;
18384      if (TARGET_64BIT)
18385	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18386      else
18387	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18388
18389      /* Adjust the this parameter.  */
18390      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18391      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18392	{
18393	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18394	  xops[0] = GEN_INT (vcall_offset);
18395	  xops[1] = tmp2;
18396	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18397	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18398	}
18399      xops[1] = this_reg;
18400      if (TARGET_64BIT)
18401	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18402      else
18403	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18404    }
18405
18406  /* If necessary, drop THIS back to its stack slot.  */
18407  if (this_reg && this_reg != this)
18408    {
18409      xops[0] = this_reg;
18410      xops[1] = this;
18411      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18412    }
18413
18414  xops[0] = XEXP (DECL_RTL (function), 0);
18415  if (TARGET_64BIT)
18416    {
18417      if (!flag_pic || (*targetm.binds_local_p) (function))
18418	output_asm_insn ("jmp\t%P0", xops);
18419      else
18420	{
18421	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18422	  tmp = gen_rtx_CONST (Pmode, tmp);
18423	  tmp = gen_rtx_MEM (QImode, tmp);
18424	  xops[0] = tmp;
18425	  output_asm_insn ("jmp\t%A0", xops);
18426	}
18427    }
18428  else
18429    {
18430      if (!flag_pic || (*targetm.binds_local_p) (function))
18431	output_asm_insn ("jmp\t%P0", xops);
18432      else
18433#if TARGET_MACHO
18434	if (TARGET_MACHO)
18435	  {
18436	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
18437	    tmp = (gen_rtx_SYMBOL_REF
18438		   (Pmode,
18439		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18440	    tmp = gen_rtx_MEM (QImode, tmp);
18441	    xops[0] = tmp;
18442	    output_asm_insn ("jmp\t%0", xops);
18443	  }
18444	else
18445#endif /* TARGET_MACHO */
18446	{
18447	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18448	  output_set_got (tmp, NULL_RTX);
18449
18450	  xops[1] = tmp;
18451	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18452	  output_asm_insn ("jmp\t{*}%1", xops);
18453	}
18454    }
18455}
18456
18457static void
18458x86_file_start (void)
18459{
18460  default_file_start ();
18461#if TARGET_MACHO
18462  darwin_file_start ();
18463#endif
18464  if (X86_FILE_START_VERSION_DIRECTIVE)
18465    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18466  if (X86_FILE_START_FLTUSED)
18467    fputs ("\t.global\t__fltused\n", asm_out_file);
18468  if (ix86_asm_dialect == ASM_INTEL)
18469    fputs ("\t.intel_syntax\n", asm_out_file);
18470}
18471
18472int
18473x86_field_alignment (tree field, int computed)
18474{
18475  enum machine_mode mode;
18476  tree type = TREE_TYPE (field);
18477
18478  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18479    return computed;
18480  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18481		    ? get_inner_array_type (type) : type);
18482  if (mode == DFmode || mode == DCmode
18483      || GET_MODE_CLASS (mode) == MODE_INT
18484      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18485    return MIN (32, computed);
18486  return computed;
18487}
18488
18489/* Output assembler code to FILE to increment profiler label # LABELNO
18490   for profiling a function entry.  */
18491void
18492x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18493{
18494  if (TARGET_64BIT)
18495    if (flag_pic)
18496      {
18497#ifndef NO_PROFILE_COUNTERS
18498	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18499#endif
18500	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18501      }
18502    else
18503      {
18504#ifndef NO_PROFILE_COUNTERS
18505	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18506#endif
18507	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18508      }
18509  else if (flag_pic)
18510    {
18511#ifndef NO_PROFILE_COUNTERS
18512      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18513	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18514#endif
18515      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18516    }
18517  else
18518    {
18519#ifndef NO_PROFILE_COUNTERS
18520      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18521	       PROFILE_COUNT_REGISTER);
18522#endif
18523      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18524    }
18525}
18526
18527/* We don't have exact information about the insn sizes, but we may assume
18528   quite safely that we are informed about all 1 byte insns and memory
18529   address sizes.  This is enough to eliminate unnecessary padding in
18530   99% of cases.  */
18531
18532static int
18533min_insn_size (rtx insn)
18534{
18535  int l = 0;
18536
18537  if (!INSN_P (insn) || !active_insn_p (insn))
18538    return 0;
18539
18540  /* Discard alignments we've emit and jump instructions.  */
18541  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18542      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18543    return 0;
18544  if (GET_CODE (insn) == JUMP_INSN
18545      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18546	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18547    return 0;
18548
18549  /* Important case - calls are always 5 bytes.
18550     It is common to have many calls in the row.  */
18551  if (GET_CODE (insn) == CALL_INSN
18552      && symbolic_reference_mentioned_p (PATTERN (insn))
18553      && !SIBLING_CALL_P (insn))
18554    return 5;
18555  if (get_attr_length (insn) <= 1)
18556    return 1;
18557
18558  /* For normal instructions we may rely on the sizes of addresses
18559     and the presence of symbol to require 4 bytes of encoding.
18560     This is not the case for jumps where references are PC relative.  */
18561  if (GET_CODE (insn) != JUMP_INSN)
18562    {
18563      l = get_attr_length_address (insn);
18564      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18565	l = 4;
18566    }
18567  if (l)
18568    return 1+l;
18569  else
18570    return 2;
18571}
18572
18573/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18574   window.  */
18575
18576static void
18577ix86_avoid_jump_misspredicts (void)
18578{
18579  rtx insn, start = get_insns ();
18580  int nbytes = 0, njumps = 0;
18581  int isjump = 0;
18582
18583  /* Look for all minimal intervals of instructions containing 4 jumps.
18584     The intervals are bounded by START and INSN.  NBYTES is the total
18585     size of instructions in the interval including INSN and not including
18586     START.  When the NBYTES is smaller than 16 bytes, it is possible
18587     that the end of START and INSN ends up in the same 16byte page.
18588
18589     The smallest offset in the page INSN can start is the case where START
18590     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
18591     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18592     */
18593  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18594    {
18595
18596      nbytes += min_insn_size (insn);
18597      if (dump_file)
18598        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18599		INSN_UID (insn), min_insn_size (insn));
18600      if ((GET_CODE (insn) == JUMP_INSN
18601	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
18602	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18603	  || GET_CODE (insn) == CALL_INSN)
18604	njumps++;
18605      else
18606	continue;
18607
18608      while (njumps > 3)
18609	{
18610	  start = NEXT_INSN (start);
18611	  if ((GET_CODE (start) == JUMP_INSN
18612	       && GET_CODE (PATTERN (start)) != ADDR_VEC
18613	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18614	      || GET_CODE (start) == CALL_INSN)
18615	    njumps--, isjump = 1;
18616	  else
18617	    isjump = 0;
18618	  nbytes -= min_insn_size (start);
18619	}
18620      gcc_assert (njumps >= 0);
18621      if (dump_file)
18622        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18623		INSN_UID (start), INSN_UID (insn), nbytes);
18624
18625      if (njumps == 3 && isjump && nbytes < 16)
18626	{
18627	  int padsize = 15 - nbytes + min_insn_size (insn);
18628
18629	  if (dump_file)
18630	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18631		     INSN_UID (insn), padsize);
18632          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18633	}
18634    }
18635}
18636
18637/* AMD Athlon works faster
18638   when RET is not destination of conditional jump or directly preceded
18639   by other jump instruction.  We avoid the penalty by inserting NOP just
18640   before the RET instructions in such cases.  */
18641static void
18642ix86_pad_returns (void)
18643{
18644  edge e;
18645  edge_iterator ei;
18646
18647  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18648    {
18649      basic_block bb = e->src;
18650      rtx ret = BB_END (bb);
18651      rtx prev;
18652      bool replace = false;
18653
18654      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18655	  || !maybe_hot_bb_p (bb))
18656	continue;
18657      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18658	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18659	  break;
18660      if (prev && GET_CODE (prev) == CODE_LABEL)
18661	{
18662	  edge e;
18663	  edge_iterator ei;
18664
18665	  FOR_EACH_EDGE (e, ei, bb->preds)
18666	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
18667		&& !(e->flags & EDGE_FALLTHRU))
18668	      replace = true;
18669	}
18670      if (!replace)
18671	{
18672	  prev = prev_active_insn (ret);
18673	  if (prev
18674	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18675		  || GET_CODE (prev) == CALL_INSN))
18676	    replace = true;
18677	  /* Empty functions get branch mispredict even when the jump destination
18678	     is not visible to us.  */
18679	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18680	    replace = true;
18681	}
18682      if (replace)
18683	{
18684	  emit_insn_before (gen_return_internal_long (), ret);
18685	  delete_insn (ret);
18686	}
18687    }
18688}
18689
18690/* Implement machine specific optimizations.  We implement padding of returns
18691   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18692static void
18693ix86_reorg (void)
18694{
18695  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18696    ix86_pad_returns ();
18697  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18698    ix86_avoid_jump_misspredicts ();
18699}
18700
18701/* Return nonzero when QImode register that must be represented via REX prefix
18702   is used.  */
18703bool
18704x86_extended_QIreg_mentioned_p (rtx insn)
18705{
18706  int i;
18707  extract_insn_cached (insn);
18708  for (i = 0; i < recog_data.n_operands; i++)
18709    if (REG_P (recog_data.operand[i])
18710	&& REGNO (recog_data.operand[i]) >= 4)
18711       return true;
18712  return false;
18713}
18714
18715/* Return nonzero when P points to register encoded via REX prefix.
18716   Called via for_each_rtx.  */
18717static int
18718extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18719{
18720   unsigned int regno;
18721   if (!REG_P (*p))
18722     return 0;
18723   regno = REGNO (*p);
18724   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18725}
18726
18727/* Return true when INSN mentions register that must be encoded using REX
18728   prefix.  */
18729bool
18730x86_extended_reg_mentioned_p (rtx insn)
18731{
18732  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18733}
18734
18735/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18736   optabs would emit if we didn't have TFmode patterns.  */
18737
18738void
18739x86_emit_floatuns (rtx operands[2])
18740{
18741  rtx neglab, donelab, i0, i1, f0, in, out;
18742  enum machine_mode mode, inmode;
18743
18744  inmode = GET_MODE (operands[1]);
18745  gcc_assert (inmode == SImode || inmode == DImode);
18746
18747  out = operands[0];
18748  in = force_reg (inmode, operands[1]);
18749  mode = GET_MODE (out);
18750  neglab = gen_label_rtx ();
18751  donelab = gen_label_rtx ();
18752  i1 = gen_reg_rtx (Pmode);
18753  f0 = gen_reg_rtx (mode);
18754
18755  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18756
18757  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18758  emit_jump_insn (gen_jump (donelab));
18759  emit_barrier ();
18760
18761  emit_label (neglab);
18762
18763  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18764  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18765  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18766  expand_float (f0, i0, 0);
18767  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18768
18769  emit_label (donelab);
18770}
18771
18772/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18773   with all elements equal to VAR.  Return true if successful.  */
18774
18775static bool
18776ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18777				   rtx target, rtx val)
18778{
18779  enum machine_mode smode, wsmode, wvmode;
18780  rtx x;
18781
18782  switch (mode)
18783    {
18784    case V2SImode:
18785    case V2SFmode:
18786      if (!mmx_ok)
18787	return false;
18788      /* FALLTHRU */
18789
18790    case V2DFmode:
18791    case V2DImode:
18792    case V4SFmode:
18793    case V4SImode:
18794      val = force_reg (GET_MODE_INNER (mode), val);
18795      x = gen_rtx_VEC_DUPLICATE (mode, val);
18796      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18797      return true;
18798
18799    case V4HImode:
18800      if (!mmx_ok)
18801	return false;
18802      if (TARGET_SSE || TARGET_3DNOW_A)
18803	{
18804	  val = gen_lowpart (SImode, val);
18805	  x = gen_rtx_TRUNCATE (HImode, val);
18806	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18807	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18808	  return true;
18809	}
18810      else
18811	{
18812	  smode = HImode;
18813	  wsmode = SImode;
18814	  wvmode = V2SImode;
18815	  goto widen;
18816	}
18817
18818    case V8QImode:
18819      if (!mmx_ok)
18820	return false;
18821      smode = QImode;
18822      wsmode = HImode;
18823      wvmode = V4HImode;
18824      goto widen;
18825    case V8HImode:
18826      if (TARGET_SSE2)
18827	{
18828	  rtx tmp1, tmp2;
18829	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18830	  tmp1 = gen_reg_rtx (SImode);
18831	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18832	  /* Insert the SImode value as low element of V4SImode vector. */
18833	  tmp2 = gen_reg_rtx (V4SImode);
18834	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18835				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18836				    CONST0_RTX (V4SImode),
18837				    const1_rtx);
18838	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18839	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18840	  tmp1 = gen_reg_rtx (V8HImode);
18841	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18842	  /* Duplicate the low short through the whole low SImode word.  */
18843	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18844	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18845	  tmp2 = gen_reg_rtx (V4SImode);
18846	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18847	  /* Replicate the low element of the V4SImode vector.  */
18848	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18849	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18850	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18851	  return true;
18852	}
18853      smode = HImode;
18854      wsmode = SImode;
18855      wvmode = V4SImode;
18856      goto widen;
18857    case V16QImode:
18858      if (TARGET_SSE2)
18859	{
18860	  rtx tmp1, tmp2;
18861	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18862	  tmp1 = gen_reg_rtx (SImode);
18863	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18864	  /* Insert the SImode value as low element of V4SImode vector. */
18865	  tmp2 = gen_reg_rtx (V4SImode);
18866	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18867				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18868				    CONST0_RTX (V4SImode),
18869				    const1_rtx);
18870	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18871	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18872	  tmp1 = gen_reg_rtx (V16QImode);
18873	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18874	  /* Duplicate the low byte through the whole low SImode word.  */
18875	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18876	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18877	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18878	  tmp2 = gen_reg_rtx (V4SImode);
18879	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18880	  /* Replicate the low element of the V4SImode vector.  */
18881	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18882	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18883	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18884	  return true;
18885	}
18886      smode = QImode;
18887      wsmode = HImode;
18888      wvmode = V8HImode;
18889      goto widen;
18890    widen:
18891      /* Replicate the value once into the next wider mode and recurse.  */
18892      val = convert_modes (wsmode, smode, val, true);
18893      x = expand_simple_binop (wsmode, ASHIFT, val,
18894			       GEN_INT (GET_MODE_BITSIZE (smode)),
18895			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18896      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18897
18898      x = gen_reg_rtx (wvmode);
18899      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18900	gcc_unreachable ();
18901      emit_move_insn (target, gen_lowpart (mode, x));
18902      return true;
18903
18904    default:
18905      return false;
18906    }
18907}
18908
18909/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18910   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18911   if successful.  */
18912
18913static bool
18914ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18915				     rtx target, rtx var, int one_var)
18916{
18917  enum machine_mode vsimode;
18918  rtx new_target;
18919  rtx x, tmp;
18920
18921  switch (mode)
18922    {
18923    case V2SFmode:
18924    case V2SImode:
18925      if (!mmx_ok)
18926	return false;
18927      /* FALLTHRU */
18928
18929    case V2DFmode:
18930    case V2DImode:
18931      if (one_var != 0)
18932	return false;
18933      var = force_reg (GET_MODE_INNER (mode), var);
18934      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18935      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18936      return true;
18937
18938    case V4SFmode:
18939    case V4SImode:
18940      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18941	new_target = gen_reg_rtx (mode);
18942      else
18943	new_target = target;
18944      var = force_reg (GET_MODE_INNER (mode), var);
18945      x = gen_rtx_VEC_DUPLICATE (mode, var);
18946      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18947      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18948      if (one_var != 0)
18949	{
18950	  /* We need to shuffle the value to the correct position, so
18951	     create a new pseudo to store the intermediate result.  */
18952
18953	  /* With SSE2, we can use the integer shuffle insns.  */
18954	  if (mode != V4SFmode && TARGET_SSE2)
18955	    {
18956	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18957					    GEN_INT (1),
18958					    GEN_INT (one_var == 1 ? 0 : 1),
18959					    GEN_INT (one_var == 2 ? 0 : 1),
18960					    GEN_INT (one_var == 3 ? 0 : 1)));
18961	      if (target != new_target)
18962		emit_move_insn (target, new_target);
18963	      return true;
18964	    }
18965
18966	  /* Otherwise convert the intermediate result to V4SFmode and
18967	     use the SSE1 shuffle instructions.  */
18968	  if (mode != V4SFmode)
18969	    {
18970	      tmp = gen_reg_rtx (V4SFmode);
18971	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18972	    }
18973	  else
18974	    tmp = new_target;
18975
18976	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18977				       GEN_INT (1),
18978				       GEN_INT (one_var == 1 ? 0 : 1),
18979				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18980				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18981
18982	  if (mode != V4SFmode)
18983	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18984	  else if (tmp != target)
18985	    emit_move_insn (target, tmp);
18986	}
18987      else if (target != new_target)
18988	emit_move_insn (target, new_target);
18989      return true;
18990
18991    case V8HImode:
18992    case V16QImode:
18993      vsimode = V4SImode;
18994      goto widen;
18995    case V4HImode:
18996    case V8QImode:
18997      if (!mmx_ok)
18998	return false;
18999      vsimode = V2SImode;
19000      goto widen;
19001    widen:
19002      if (one_var != 0)
19003	return false;
19004
19005      /* Zero extend the variable element to SImode and recurse.  */
19006      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19007
19008      x = gen_reg_rtx (vsimode);
19009      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19010						var, one_var))
19011	gcc_unreachable ();
19012
19013      emit_move_insn (target, gen_lowpart (mode, x));
19014      return true;
19015
19016    default:
19017      return false;
19018    }
19019}
19020
19021/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
19022   consisting of the values in VALS.  It is known that all elements
19023   except ONE_VAR are constants.  Return true if successful.  */
19024
19025static bool
19026ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19027				 rtx target, rtx vals, int one_var)
19028{
19029  rtx var = XVECEXP (vals, 0, one_var);
19030  enum machine_mode wmode;
19031  rtx const_vec, x;
19032
19033  const_vec = copy_rtx (vals);
19034  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19035  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19036
19037  switch (mode)
19038    {
19039    case V2DFmode:
19040    case V2DImode:
19041    case V2SFmode:
19042    case V2SImode:
19043      /* For the two element vectors, it's just as easy to use
19044	 the general case.  */
19045      return false;
19046
19047    case V4SFmode:
19048    case V4SImode:
19049    case V8HImode:
19050    case V4HImode:
19051      break;
19052
19053    case V16QImode:
19054      wmode = V8HImode;
19055      goto widen;
19056    case V8QImode:
19057      wmode = V4HImode;
19058      goto widen;
19059    widen:
19060      /* There's no way to set one QImode entry easily.  Combine
19061	 the variable value with its adjacent constant value, and
19062	 promote to an HImode set.  */
19063      x = XVECEXP (vals, 0, one_var ^ 1);
19064      if (one_var & 1)
19065	{
19066	  var = convert_modes (HImode, QImode, var, true);
19067	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19068				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
19069	  x = GEN_INT (INTVAL (x) & 0xff);
19070	}
19071      else
19072	{
19073	  var = convert_modes (HImode, QImode, var, true);
19074	  x = gen_int_mode (INTVAL (x) << 8, HImode);
19075	}
19076      if (x != const0_rtx)
19077	var = expand_simple_binop (HImode, IOR, var, x, var,
19078				   1, OPTAB_LIB_WIDEN);
19079
19080      x = gen_reg_rtx (wmode);
19081      emit_move_insn (x, gen_lowpart (wmode, const_vec));
19082      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19083
19084      emit_move_insn (target, gen_lowpart (mode, x));
19085      return true;
19086
19087    default:
19088      return false;
19089    }
19090
19091  emit_move_insn (target, const_vec);
19092  ix86_expand_vector_set (mmx_ok, target, var, one_var);
19093  return true;
19094}
19095
19096/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
19097   all values variable, and none identical.  */
19098
19099static void
19100ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19101				 rtx target, rtx vals)
19102{
19103  enum machine_mode half_mode = GET_MODE_INNER (mode);
19104  rtx op0 = NULL, op1 = NULL;
19105  bool use_vec_concat = false;
19106
19107  switch (mode)
19108    {
19109    case V2SFmode:
19110    case V2SImode:
19111      if (!mmx_ok && !TARGET_SSE)
19112	break;
19113      /* FALLTHRU */
19114
19115    case V2DFmode:
19116    case V2DImode:
19117      /* For the two element vectors, we always implement VEC_CONCAT.  */
19118      op0 = XVECEXP (vals, 0, 0);
19119      op1 = XVECEXP (vals, 0, 1);
19120      use_vec_concat = true;
19121      break;
19122
19123    case V4SFmode:
19124      half_mode = V2SFmode;
19125      goto half;
19126    case V4SImode:
19127      half_mode = V2SImode;
19128      goto half;
19129    half:
19130      {
19131	rtvec v;
19132
19133	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
19134	   Recurse to load the two halves.  */
19135
19136	op0 = gen_reg_rtx (half_mode);
19137	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19138	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19139
19140	op1 = gen_reg_rtx (half_mode);
19141	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19142	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19143
19144	use_vec_concat = true;
19145      }
19146      break;
19147
19148    case V8HImode:
19149    case V16QImode:
19150    case V4HImode:
19151    case V8QImode:
19152      break;
19153
19154    default:
19155      gcc_unreachable ();
19156    }
19157
19158  if (use_vec_concat)
19159    {
19160      if (!register_operand (op0, half_mode))
19161	op0 = force_reg (half_mode, op0);
19162      if (!register_operand (op1, half_mode))
19163	op1 = force_reg (half_mode, op1);
19164
19165      emit_insn (gen_rtx_SET (VOIDmode, target,
19166			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
19167    }
19168  else
19169    {
19170      int i, j, n_elts, n_words, n_elt_per_word;
19171      enum machine_mode inner_mode;
19172      rtx words[4], shift;
19173
19174      inner_mode = GET_MODE_INNER (mode);
19175      n_elts = GET_MODE_NUNITS (mode);
19176      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19177      n_elt_per_word = n_elts / n_words;
19178      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19179
19180      for (i = 0; i < n_words; ++i)
19181	{
19182	  rtx word = NULL_RTX;
19183
19184	  for (j = 0; j < n_elt_per_word; ++j)
19185	    {
19186	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19187	      elt = convert_modes (word_mode, inner_mode, elt, true);
19188
19189	      if (j == 0)
19190		word = elt;
19191	      else
19192		{
19193		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19194					      word, 1, OPTAB_LIB_WIDEN);
19195		  word = expand_simple_binop (word_mode, IOR, word, elt,
19196					      word, 1, OPTAB_LIB_WIDEN);
19197		}
19198	    }
19199
19200	  words[i] = word;
19201	}
19202
19203      if (n_words == 1)
19204	emit_move_insn (target, gen_lowpart (mode, words[0]));
19205      else if (n_words == 2)
19206	{
19207	  rtx tmp = gen_reg_rtx (mode);
19208	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19209	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19210	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19211	  emit_move_insn (target, tmp);
19212	}
19213      else if (n_words == 4)
19214	{
19215	  rtx tmp = gen_reg_rtx (V4SImode);
19216	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19217	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19218	  emit_move_insn (target, gen_lowpart (mode, tmp));
19219	}
19220      else
19221	gcc_unreachable ();
19222    }
19223}
19224
19225/* Initialize vector TARGET via VALS.  Suppress the use of MMX
19226   instructions unless MMX_OK is true.  */
19227
19228void
19229ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19230{
19231  enum machine_mode mode = GET_MODE (target);
19232  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19233  int n_elts = GET_MODE_NUNITS (mode);
19234  int n_var = 0, one_var = -1;
19235  bool all_same = true, all_const_zero = true;
19236  int i;
19237  rtx x;
19238
19239  for (i = 0; i < n_elts; ++i)
19240    {
19241      x = XVECEXP (vals, 0, i);
19242      if (!CONSTANT_P (x))
19243	n_var++, one_var = i;
19244      else if (x != CONST0_RTX (inner_mode))
19245	all_const_zero = false;
19246      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19247	all_same = false;
19248    }
19249
19250  /* Constants are best loaded from the constant pool.  */
19251  if (n_var == 0)
19252    {
19253      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19254      return;
19255    }
19256
19257  /* If all values are identical, broadcast the value.  */
19258  if (all_same
19259      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19260					    XVECEXP (vals, 0, 0)))
19261    return;
19262
19263  /* Values where only one field is non-constant are best loaded from
19264     the pool and overwritten via move later.  */
19265  if (n_var == 1)
19266    {
19267      if (all_const_zero
19268	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19269						  XVECEXP (vals, 0, one_var),
19270						  one_var))
19271	return;
19272
19273      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19274	return;
19275    }
19276
19277  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19278}
19279
19280void
19281ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19282{
19283  enum machine_mode mode = GET_MODE (target);
19284  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19285  bool use_vec_merge = false;
19286  rtx tmp;
19287
19288  switch (mode)
19289    {
19290    case V2SFmode:
19291    case V2SImode:
19292      if (mmx_ok)
19293	{
19294	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19295	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19296	  if (elt == 0)
19297	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19298	  else
19299	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19300	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19301	  return;
19302	}
19303      break;
19304
19305    case V2DFmode:
19306    case V2DImode:
19307      {
19308	rtx op0, op1;
19309
19310	/* For the two element vectors, we implement a VEC_CONCAT with
19311	   the extraction of the other element.  */
19312
19313	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19314	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19315
19316	if (elt == 0)
19317	  op0 = val, op1 = tmp;
19318	else
19319	  op0 = tmp, op1 = val;
19320
19321	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19322	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19323      }
19324      return;
19325
19326    case V4SFmode:
19327      switch (elt)
19328	{
19329	case 0:
19330	  use_vec_merge = true;
19331	  break;
19332
19333	case 1:
19334	  /* tmp = target = A B C D */
19335	  tmp = copy_to_reg (target);
19336	  /* target = A A B B */
19337	  emit_insn (gen_sse_unpcklps (target, target, target));
19338	  /* target = X A B B */
19339	  ix86_expand_vector_set (false, target, val, 0);
19340	  /* target = A X C D  */
19341	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19342				       GEN_INT (1), GEN_INT (0),
19343				       GEN_INT (2+4), GEN_INT (3+4)));
19344	  return;
19345
19346	case 2:
19347	  /* tmp = target = A B C D */
19348	  tmp = copy_to_reg (target);
19349	  /* tmp = X B C D */
19350	  ix86_expand_vector_set (false, tmp, val, 0);
19351	  /* target = A B X D */
19352	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19353				       GEN_INT (0), GEN_INT (1),
19354				       GEN_INT (0+4), GEN_INT (3+4)));
19355	  return;
19356
19357	case 3:
19358	  /* tmp = target = A B C D */
19359	  tmp = copy_to_reg (target);
19360	  /* tmp = X B C D */
19361	  ix86_expand_vector_set (false, tmp, val, 0);
19362	  /* target = A B X D */
19363	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19364				       GEN_INT (0), GEN_INT (1),
19365				       GEN_INT (2+4), GEN_INT (0+4)));
19366	  return;
19367
19368	default:
19369	  gcc_unreachable ();
19370	}
19371      break;
19372
19373    case V4SImode:
19374      /* Element 0 handled by vec_merge below.  */
19375      if (elt == 0)
19376	{
19377	  use_vec_merge = true;
19378	  break;
19379	}
19380
19381      if (TARGET_SSE2)
19382	{
19383	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
19384	     store into element 0, then shuffle them back.  */
19385
19386	  rtx order[4];
19387
19388	  order[0] = GEN_INT (elt);
19389	  order[1] = const1_rtx;
19390	  order[2] = const2_rtx;
19391	  order[3] = GEN_INT (3);
19392	  order[elt] = const0_rtx;
19393
19394	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19395					order[1], order[2], order[3]));
19396
19397	  ix86_expand_vector_set (false, target, val, 0);
19398
19399	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19400					order[1], order[2], order[3]));
19401	}
19402      else
19403	{
19404	  /* For SSE1, we have to reuse the V4SF code.  */
19405	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19406				  gen_lowpart (SFmode, val), elt);
19407	}
19408      return;
19409
19410    case V8HImode:
19411      use_vec_merge = TARGET_SSE2;
19412      break;
19413    case V4HImode:
19414      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19415      break;
19416
19417    case V16QImode:
19418    case V8QImode:
19419    default:
19420      break;
19421    }
19422
19423  if (use_vec_merge)
19424    {
19425      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19426      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19427      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19428    }
19429  else
19430    {
19431      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19432
19433      emit_move_insn (mem, target);
19434
19435      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19436      emit_move_insn (tmp, val);
19437
19438      emit_move_insn (target, mem);
19439    }
19440}
19441
19442void
19443ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19444{
19445  enum machine_mode mode = GET_MODE (vec);
19446  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19447  bool use_vec_extr = false;
19448  rtx tmp;
19449
19450  switch (mode)
19451    {
19452    case V2SImode:
19453    case V2SFmode:
19454      if (!mmx_ok)
19455	break;
19456      /* FALLTHRU */
19457
19458    case V2DFmode:
19459    case V2DImode:
19460      use_vec_extr = true;
19461      break;
19462
19463    case V4SFmode:
19464      switch (elt)
19465	{
19466	case 0:
19467	  tmp = vec;
19468	  break;
19469
19470	case 1:
19471	case 3:
19472	  tmp = gen_reg_rtx (mode);
19473	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19474				       GEN_INT (elt), GEN_INT (elt),
19475				       GEN_INT (elt+4), GEN_INT (elt+4)));
19476	  break;
19477
19478	case 2:
19479	  tmp = gen_reg_rtx (mode);
19480	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19481	  break;
19482
19483	default:
19484	  gcc_unreachable ();
19485	}
19486      vec = tmp;
19487      use_vec_extr = true;
19488      elt = 0;
19489      break;
19490
19491    case V4SImode:
19492      if (TARGET_SSE2)
19493	{
19494	  switch (elt)
19495	    {
19496	    case 0:
19497	      tmp = vec;
19498	      break;
19499
19500	    case 1:
19501	    case 3:
19502	      tmp = gen_reg_rtx (mode);
19503	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19504					    GEN_INT (elt), GEN_INT (elt),
19505					    GEN_INT (elt), GEN_INT (elt)));
19506	      break;
19507
19508	    case 2:
19509	      tmp = gen_reg_rtx (mode);
19510	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19511	      break;
19512
19513	    default:
19514	      gcc_unreachable ();
19515	    }
19516	  vec = tmp;
19517	  use_vec_extr = true;
19518	  elt = 0;
19519	}
19520      else
19521	{
19522	  /* For SSE1, we have to reuse the V4SF code.  */
19523	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19524				      gen_lowpart (V4SFmode, vec), elt);
19525	  return;
19526	}
19527      break;
19528
19529    case V8HImode:
19530      use_vec_extr = TARGET_SSE2;
19531      break;
19532    case V4HImode:
19533      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19534      break;
19535
19536    case V16QImode:
19537    case V8QImode:
19538      /* ??? Could extract the appropriate HImode element and shift.  */
19539    default:
19540      break;
19541    }
19542
19543  if (use_vec_extr)
19544    {
19545      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19546      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19547
19548      /* Let the rtl optimizers know about the zero extension performed.  */
19549      if (inner_mode == HImode)
19550	{
19551	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19552	  target = gen_lowpart (SImode, target);
19553	}
19554
19555      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19556    }
19557  else
19558    {
19559      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19560
19561      emit_move_insn (mem, vec);
19562
19563      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19564      emit_move_insn (target, tmp);
19565    }
19566}
19567
19568/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
19569   pattern to reduce; DEST is the destination; IN is the input vector.  */
19570
19571void
19572ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19573{
19574  rtx tmp1, tmp2, tmp3;
19575
19576  tmp1 = gen_reg_rtx (V4SFmode);
19577  tmp2 = gen_reg_rtx (V4SFmode);
19578  tmp3 = gen_reg_rtx (V4SFmode);
19579
19580  emit_insn (gen_sse_movhlps (tmp1, in, in));
19581  emit_insn (fn (tmp2, tmp1, in));
19582
19583  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19584			       GEN_INT (1), GEN_INT (1),
19585			       GEN_INT (1+4), GEN_INT (1+4)));
19586  emit_insn (fn (dest, tmp2, tmp3));
19587}
19588
19589/* Target hook for scalar_mode_supported_p.  */
19590static bool
19591ix86_scalar_mode_supported_p (enum machine_mode mode)
19592{
19593  if (DECIMAL_FLOAT_MODE_P (mode))
19594    return true;
19595  else
19596    return default_scalar_mode_supported_p (mode);
19597}
19598
19599/* Implements target hook vector_mode_supported_p.  */
19600static bool
19601ix86_vector_mode_supported_p (enum machine_mode mode)
19602{
19603  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19604    return true;
19605  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19606    return true;
19607  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19608    return true;
19609  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19610    return true;
19611  return false;
19612}
19613
19614/* Worker function for TARGET_MD_ASM_CLOBBERS.
19615
19616   We do this in the new i386 backend to maintain source compatibility
19617   with the old cc0-based compiler.  */
19618
19619static tree
19620ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19621		      tree inputs ATTRIBUTE_UNUSED,
19622		      tree clobbers)
19623{
19624  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19625			clobbers);
19626  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19627			clobbers);
19628  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19629			clobbers);
19630  return clobbers;
19631}
19632
19633/* Return true if this goes in small data/bss.  */
19634
19635static bool
19636ix86_in_large_data_p (tree exp)
19637{
19638  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19639    return false;
19640
19641  /* Functions are never large data.  */
19642  if (TREE_CODE (exp) == FUNCTION_DECL)
19643    return false;
19644
19645  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19646    {
19647      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19648      if (strcmp (section, ".ldata") == 0
19649	  || strcmp (section, ".lbss") == 0)
19650	return true;
19651      return false;
19652    }
19653  else
19654    {
19655      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19656
19657      /* If this is an incomplete type with size 0, then we can't put it
19658	 in data because it might be too big when completed.  */
19659      if (!size || size > ix86_section_threshold)
19660	return true;
19661    }
19662
19663  return false;
19664}
19665static void
19666ix86_encode_section_info (tree decl, rtx rtl, int first)
19667{
19668  default_encode_section_info (decl, rtl, first);
19669
19670  if (TREE_CODE (decl) == VAR_DECL
19671      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19672      && ix86_in_large_data_p (decl))
19673    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19674}
19675
19676/* Worker function for REVERSE_CONDITION.  */
19677
19678enum rtx_code
19679ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19680{
19681  return (mode != CCFPmode && mode != CCFPUmode
19682	  ? reverse_condition (code)
19683	  : reverse_condition_maybe_unordered (code));
19684}
19685
19686/* Output code to perform an x87 FP register move, from OPERANDS[1]
19687   to OPERANDS[0].  */
19688
19689const char *
19690output_387_reg_move (rtx insn, rtx *operands)
19691{
19692  if (REG_P (operands[1])
19693      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19694    {
19695      if (REGNO (operands[0]) == FIRST_STACK_REG)
19696	return output_387_ffreep (operands, 0);
19697      return "fstp\t%y0";
19698    }
19699  if (STACK_TOP_P (operands[0]))
19700    return "fld%z1\t%y1";
19701  return "fst\t%y0";
19702}
19703
19704/* Output code to perform a conditional jump to LABEL, if C2 flag in
19705   FP status register is set.  */
19706
19707void
19708ix86_emit_fp_unordered_jump (rtx label)
19709{
19710  rtx reg = gen_reg_rtx (HImode);
19711  rtx temp;
19712
19713  emit_insn (gen_x86_fnstsw_1 (reg));
19714
19715  if (TARGET_USE_SAHF)
19716    {
19717      emit_insn (gen_x86_sahf_1 (reg));
19718
19719      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19720      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19721    }
19722  else
19723    {
19724      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19725
19726      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19727      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19728    }
19729
19730  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19731			      gen_rtx_LABEL_REF (VOIDmode, label),
19732			      pc_rtx);
19733  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19734  emit_jump_insn (temp);
19735}
19736
19737/* Output code to perform a log1p XFmode calculation.  */
19738
19739void ix86_emit_i387_log1p (rtx op0, rtx op1)
19740{
19741  rtx label1 = gen_label_rtx ();
19742  rtx label2 = gen_label_rtx ();
19743
19744  rtx tmp = gen_reg_rtx (XFmode);
19745  rtx tmp2 = gen_reg_rtx (XFmode);
19746
19747  emit_insn (gen_absxf2 (tmp, op1));
19748  emit_insn (gen_cmpxf (tmp,
19749    CONST_DOUBLE_FROM_REAL_VALUE (
19750       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19751       XFmode)));
19752  emit_jump_insn (gen_bge (label1));
19753
19754  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19755  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19756  emit_jump (label2);
19757
19758  emit_label (label1);
19759  emit_move_insn (tmp, CONST1_RTX (XFmode));
19760  emit_insn (gen_addxf3 (tmp, op1, tmp));
19761  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19762  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19763
19764  emit_label (label2);
19765}
19766
19767/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19768
19769static void
19770i386_solaris_elf_named_section (const char *name, unsigned int flags,
19771				tree decl)
19772{
19773  /* With Binutils 2.15, the "@unwind" marker must be specified on
19774     every occurrence of the ".eh_frame" section, not just the first
19775     one.  */
19776  if (TARGET_64BIT
19777      && strcmp (name, ".eh_frame") == 0)
19778    {
19779      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19780	       flags & SECTION_WRITE ? "aw" : "a");
19781      return;
19782    }
19783  default_elf_asm_named_section (name, flags, decl);
19784}
19785
19786/* Return the mangling of TYPE if it is an extended fundamental type.  */
19787
19788static const char *
19789ix86_mangle_fundamental_type (tree type)
19790{
19791  switch (TYPE_MODE (type))
19792    {
19793    case TFmode:
19794      /* __float128 is "g".  */
19795      return "g";
19796    case XFmode:
19797      /* "long double" or __float80 is "e".  */
19798      return "e";
19799    default:
19800      return NULL;
19801    }
19802}
19803
19804/* For 32-bit code we can save PIC register setup by using
19805   __stack_chk_fail_local hidden function instead of calling
19806   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19807   register, so it is better to call __stack_chk_fail directly.  */
19808
19809static tree
19810ix86_stack_protect_fail (void)
19811{
19812  return TARGET_64BIT
19813	 ? default_external_stack_protect_fail ()
19814	 : default_hidden_stack_protect_fail ();
19815}
19816
19817/* Select a format to encode pointers in exception handling data.  CODE
19818   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19819   true if the symbol may be affected by dynamic relocations.
19820
19821   ??? All x86 object file formats are capable of representing this.
19822   After all, the relocation needed is the same as for the call insn.
19823   Whether or not a particular assembler allows us to enter such, I
19824   guess we'll have to see.  */
19825int
19826asm_preferred_eh_data_format (int code, int global)
19827{
19828  if (flag_pic)
19829    {
19830      int type = DW_EH_PE_sdata8;
19831      if (!TARGET_64BIT
19832	  || ix86_cmodel == CM_SMALL_PIC
19833	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19834	type = DW_EH_PE_sdata4;
19835      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19836    }
19837  if (ix86_cmodel == CM_SMALL
19838      || (ix86_cmodel == CM_MEDIUM && code))
19839    return DW_EH_PE_udata4;
19840  return DW_EH_PE_absptr;
19841}
19842
19843#include "gt-i386.h"
19844