1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/* $FreeBSD$ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables.  */
61#define MODE_INDEX(mode)					\
62  ((mode) == QImode ? 0						\
63   : (mode) == HImode ? 1					\
64   : (mode) == SImode ? 2					\
65   : (mode) == DImode ? 3					\
66   : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = {	/* costs for tuning for size */
74  COSTS_N_BYTES (2),			/* cost of an add instruction */
75  COSTS_N_BYTES (3),			/* cost of a lea instruction */
76  COSTS_N_BYTES (2),			/* variable shift costs */
77  COSTS_N_BYTES (3),			/* constant shift costs */
78  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
79   COSTS_N_BYTES (3),			/*                               HI */
80   COSTS_N_BYTES (3),			/*                               SI */
81   COSTS_N_BYTES (3),			/*                               DI */
82   COSTS_N_BYTES (5)},			/*                            other */
83  0,					/* cost of multiply per each bit set */
84  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
85   COSTS_N_BYTES (3),			/*                          HI */
86   COSTS_N_BYTES (3),			/*                          SI */
87   COSTS_N_BYTES (3),			/*                          DI */
88   COSTS_N_BYTES (5)},			/*                       other */
89  COSTS_N_BYTES (3),			/* cost of movsx */
90  COSTS_N_BYTES (3),			/* cost of movzx */
91  0,					/* "large" insn */
92  2,					/* MOVE_RATIO */
93  2,					/* cost for loading QImode using movzbl */
94  {2, 2, 2},				/* cost of loading integer registers
95					   in QImode, HImode and SImode.
96					   Relative to reg-reg move (2).  */
97  {2, 2, 2},				/* cost of storing integer registers */
98  2,					/* cost of reg,reg fld/fst */
99  {2, 2, 2},				/* cost of loading fp registers
100					   in SFmode, DFmode and XFmode */
101  {2, 2, 2},				/* cost of storing fp registers
102					   in SFmode, DFmode and XFmode */
103  3,					/* cost of moving MMX register */
104  {3, 3},				/* cost of loading MMX registers
105					   in SImode and DImode */
106  {3, 3},				/* cost of storing MMX registers
107					   in SImode and DImode */
108  3,					/* cost of moving SSE register */
109  {3, 3, 3},				/* cost of loading SSE registers
110					   in SImode, DImode and TImode */
111  {3, 3, 3},				/* cost of storing SSE registers
112					   in SImode, DImode and TImode */
113  3,					/* MMX or SSE register to integer */
114  0,					/* size of prefetch block */
115  0,					/* number of parallel prefetches */
116  2,					/* Branch cost */
117  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
118  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
119  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
120  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
121  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
122  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = {	/* 386 specific costs */
128  COSTS_N_INSNS (1),			/* cost of an add instruction */
129  COSTS_N_INSNS (1),			/* cost of a lea instruction */
130  COSTS_N_INSNS (3),			/* variable shift costs */
131  COSTS_N_INSNS (2),			/* constant shift costs */
132  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
133   COSTS_N_INSNS (6),			/*                               HI */
134   COSTS_N_INSNS (6),			/*                               SI */
135   COSTS_N_INSNS (6),			/*                               DI */
136   COSTS_N_INSNS (6)},			/*                               other */
137  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
138  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
139   COSTS_N_INSNS (23),			/*                          HI */
140   COSTS_N_INSNS (23),			/*                          SI */
141   COSTS_N_INSNS (23),			/*                          DI */
142   COSTS_N_INSNS (23)},			/*                          other */
143  COSTS_N_INSNS (3),			/* cost of movsx */
144  COSTS_N_INSNS (2),			/* cost of movzx */
145  15,					/* "large" insn */
146  3,					/* MOVE_RATIO */
147  4,					/* cost for loading QImode using movzbl */
148  {2, 4, 2},				/* cost of loading integer registers
149					   in QImode, HImode and SImode.
150					   Relative to reg-reg move (2).  */
151  {2, 4, 2},				/* cost of storing integer registers */
152  2,					/* cost of reg,reg fld/fst */
153  {8, 8, 8},				/* cost of loading fp registers
154					   in SFmode, DFmode and XFmode */
155  {8, 8, 8},				/* cost of storing fp registers
156					   in SFmode, DFmode and XFmode */
157  2,					/* cost of moving MMX register */
158  {4, 8},				/* cost of loading MMX registers
159					   in SImode and DImode */
160  {4, 8},				/* cost of storing MMX registers
161					   in SImode and DImode */
162  2,					/* cost of moving SSE register */
163  {4, 8, 16},				/* cost of loading SSE registers
164					   in SImode, DImode and TImode */
165  {4, 8, 16},				/* cost of storing SSE registers
166					   in SImode, DImode and TImode */
167  3,					/* MMX or SSE register to integer */
168  0,					/* size of prefetch block */
169  0,					/* number of parallel prefetches */
170  1,					/* Branch cost */
171  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
172  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
173  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
174  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
175  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
176  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
177};
178
179static const
180struct processor_costs i486_cost = {	/* 486 specific costs */
181  COSTS_N_INSNS (1),			/* cost of an add instruction */
182  COSTS_N_INSNS (1),			/* cost of a lea instruction */
183  COSTS_N_INSNS (3),			/* variable shift costs */
184  COSTS_N_INSNS (2),			/* constant shift costs */
185  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
186   COSTS_N_INSNS (12),			/*                               HI */
187   COSTS_N_INSNS (12),			/*                               SI */
188   COSTS_N_INSNS (12),			/*                               DI */
189   COSTS_N_INSNS (12)},			/*                               other */
190  1,					/* cost of multiply per each bit set */
191  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
192   COSTS_N_INSNS (40),			/*                          HI */
193   COSTS_N_INSNS (40),			/*                          SI */
194   COSTS_N_INSNS (40),			/*                          DI */
195   COSTS_N_INSNS (40)},			/*                          other */
196  COSTS_N_INSNS (3),			/* cost of movsx */
197  COSTS_N_INSNS (2),			/* cost of movzx */
198  15,					/* "large" insn */
199  3,					/* MOVE_RATIO */
200  4,					/* cost for loading QImode using movzbl */
201  {2, 4, 2},				/* cost of loading integer registers
202					   in QImode, HImode and SImode.
203					   Relative to reg-reg move (2).  */
204  {2, 4, 2},				/* cost of storing integer registers */
205  2,					/* cost of reg,reg fld/fst */
206  {8, 8, 8},				/* cost of loading fp registers
207					   in SFmode, DFmode and XFmode */
208  {8, 8, 8},				/* cost of storing fp registers
209					   in SFmode, DFmode and XFmode */
210  2,					/* cost of moving MMX register */
211  {4, 8},				/* cost of loading MMX registers
212					   in SImode and DImode */
213  {4, 8},				/* cost of storing MMX registers
214					   in SImode and DImode */
215  2,					/* cost of moving SSE register */
216  {4, 8, 16},				/* cost of loading SSE registers
217					   in SImode, DImode and TImode */
218  {4, 8, 16},				/* cost of storing SSE registers
219					   in SImode, DImode and TImode */
220  3,					/* MMX or SSE register to integer */
221  0,					/* size of prefetch block */
222  0,					/* number of parallel prefetches */
223  1,					/* Branch cost */
224  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
225  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
226  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
227  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
228  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
229  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
230};
231
232static const
233struct processor_costs pentium_cost = {
234  COSTS_N_INSNS (1),			/* cost of an add instruction */
235  COSTS_N_INSNS (1),			/* cost of a lea instruction */
236  COSTS_N_INSNS (4),			/* variable shift costs */
237  COSTS_N_INSNS (1),			/* constant shift costs */
238  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
239   COSTS_N_INSNS (11),			/*                               HI */
240   COSTS_N_INSNS (11),			/*                               SI */
241   COSTS_N_INSNS (11),			/*                               DI */
242   COSTS_N_INSNS (11)},			/*                               other */
243  0,					/* cost of multiply per each bit set */
244  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
245   COSTS_N_INSNS (25),			/*                          HI */
246   COSTS_N_INSNS (25),			/*                          SI */
247   COSTS_N_INSNS (25),			/*                          DI */
248   COSTS_N_INSNS (25)},			/*                          other */
249  COSTS_N_INSNS (3),			/* cost of movsx */
250  COSTS_N_INSNS (2),			/* cost of movzx */
251  8,					/* "large" insn */
252  6,					/* MOVE_RATIO */
253  6,					/* cost for loading QImode using movzbl */
254  {2, 4, 2},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 4, 2},				/* cost of storing integer registers */
258  2,					/* cost of reg,reg fld/fst */
259  {2, 2, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 6},				/* cost of storing fp registers
262					   in SFmode, DFmode and XFmode */
263  8,					/* cost of moving MMX register */
264  {8, 8},				/* cost of loading MMX registers
265					   in SImode and DImode */
266  {8, 8},				/* cost of storing MMX registers
267					   in SImode and DImode */
268  2,					/* cost of moving SSE register */
269  {4, 8, 16},				/* cost of loading SSE registers
270					   in SImode, DImode and TImode */
271  {4, 8, 16},				/* cost of storing SSE registers
272					   in SImode, DImode and TImode */
273  3,					/* MMX or SSE register to integer */
274  0,					/* size of prefetch block */
275  0,					/* number of parallel prefetches */
276  2,					/* Branch cost */
277  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
278  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
279  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
280  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
281  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
282  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287  COSTS_N_INSNS (1),			/* cost of an add instruction */
288  COSTS_N_INSNS (1),			/* cost of a lea instruction */
289  COSTS_N_INSNS (1),			/* variable shift costs */
290  COSTS_N_INSNS (1),			/* constant shift costs */
291  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
292   COSTS_N_INSNS (4),			/*                               HI */
293   COSTS_N_INSNS (4),			/*                               SI */
294   COSTS_N_INSNS (4),			/*                               DI */
295   COSTS_N_INSNS (4)},			/*                               other */
296  0,					/* cost of multiply per each bit set */
297  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
298   COSTS_N_INSNS (17),			/*                          HI */
299   COSTS_N_INSNS (17),			/*                          SI */
300   COSTS_N_INSNS (17),			/*                          DI */
301   COSTS_N_INSNS (17)},			/*                          other */
302  COSTS_N_INSNS (1),			/* cost of movsx */
303  COSTS_N_INSNS (1),			/* cost of movzx */
304  8,					/* "large" insn */
305  6,					/* MOVE_RATIO */
306  2,					/* cost for loading QImode using movzbl */
307  {4, 4, 4},				/* cost of loading integer registers
308					   in QImode, HImode and SImode.
309					   Relative to reg-reg move (2).  */
310  {2, 2, 2},				/* cost of storing integer registers */
311  2,					/* cost of reg,reg fld/fst */
312  {2, 2, 6},				/* cost of loading fp registers
313					   in SFmode, DFmode and XFmode */
314  {4, 4, 6},				/* cost of storing fp registers
315					   in SFmode, DFmode and XFmode */
316  2,					/* cost of moving MMX register */
317  {2, 2},				/* cost of loading MMX registers
318					   in SImode and DImode */
319  {2, 2},				/* cost of storing MMX registers
320					   in SImode and DImode */
321  2,					/* cost of moving SSE register */
322  {2, 2, 8},				/* cost of loading SSE registers
323					   in SImode, DImode and TImode */
324  {2, 2, 8},				/* cost of storing SSE registers
325					   in SImode, DImode and TImode */
326  3,					/* MMX or SSE register to integer */
327  32,					/* size of prefetch block */
328  6,					/* number of parallel prefetches */
329  2,					/* Branch cost */
330  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
331  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
332  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
333  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
334  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
335  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
336};
337
338static const
339struct processor_costs geode_cost = {
340  COSTS_N_INSNS (1),			/* cost of an add instruction */
341  COSTS_N_INSNS (1),			/* cost of a lea instruction */
342  COSTS_N_INSNS (2),			/* variable shift costs */
343  COSTS_N_INSNS (1),			/* constant shift costs */
344  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
345   COSTS_N_INSNS (4),			/*                               HI */
346   COSTS_N_INSNS (7),			/*                               SI */
347   COSTS_N_INSNS (7),			/*                               DI */
348   COSTS_N_INSNS (7)},			/*                               other */
349  0,					/* cost of multiply per each bit set */
350  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
351   COSTS_N_INSNS (23),			/*                          HI */
352   COSTS_N_INSNS (39),			/*                          SI */
353   COSTS_N_INSNS (39),			/*                          DI */
354   COSTS_N_INSNS (39)},			/*                          other */
355  COSTS_N_INSNS (1),			/* cost of movsx */
356  COSTS_N_INSNS (1),			/* cost of movzx */
357  8,					/* "large" insn */
358  4,					/* MOVE_RATIO */
359  1,					/* cost for loading QImode using movzbl */
360  {1, 1, 1},				/* cost of loading integer registers
361					   in QImode, HImode and SImode.
362					   Relative to reg-reg move (2).  */
363  {1, 1, 1},				/* cost of storing integer registers */
364  1,					/* cost of reg,reg fld/fst */
365  {1, 1, 1},				/* cost of loading fp registers
366					   in SFmode, DFmode and XFmode */
367  {4, 6, 6},				/* cost of storing fp registers
368					   in SFmode, DFmode and XFmode */
369
370  1,					/* cost of moving MMX register */
371  {1, 1},				/* cost of loading MMX registers
372					   in SImode and DImode */
373  {1, 1},				/* cost of storing MMX registers
374					   in SImode and DImode */
375  1,					/* cost of moving SSE register */
376  {1, 1, 1},				/* cost of loading SSE registers
377					   in SImode, DImode and TImode */
378  {1, 1, 1},				/* cost of storing SSE registers
379					   in SImode, DImode and TImode */
380  1,					/* MMX or SSE register to integer */
381  32,					/* size of prefetch block */
382  1,					/* number of parallel prefetches */
383  1,					/* Branch cost */
384  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
385  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
386  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
387  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
388  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
389  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
390};
391
392static const
393struct processor_costs k6_cost = {
394  COSTS_N_INSNS (1),			/* cost of an add instruction */
395  COSTS_N_INSNS (2),			/* cost of a lea instruction */
396  COSTS_N_INSNS (1),			/* variable shift costs */
397  COSTS_N_INSNS (1),			/* constant shift costs */
398  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
399   COSTS_N_INSNS (3),			/*                               HI */
400   COSTS_N_INSNS (3),			/*                               SI */
401   COSTS_N_INSNS (3),			/*                               DI */
402   COSTS_N_INSNS (3)},			/*                               other */
403  0,					/* cost of multiply per each bit set */
404  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
405   COSTS_N_INSNS (18),			/*                          HI */
406   COSTS_N_INSNS (18),			/*                          SI */
407   COSTS_N_INSNS (18),			/*                          DI */
408   COSTS_N_INSNS (18)},			/*                          other */
409  COSTS_N_INSNS (2),			/* cost of movsx */
410  COSTS_N_INSNS (2),			/* cost of movzx */
411  8,					/* "large" insn */
412  4,					/* MOVE_RATIO */
413  3,					/* cost for loading QImode using movzbl */
414  {4, 5, 4},				/* cost of loading integer registers
415					   in QImode, HImode and SImode.
416					   Relative to reg-reg move (2).  */
417  {2, 3, 2},				/* cost of storing integer registers */
418  4,					/* cost of reg,reg fld/fst */
419  {6, 6, 6},				/* cost of loading fp registers
420					   in SFmode, DFmode and XFmode */
421  {4, 4, 4},				/* cost of storing fp registers
422					   in SFmode, DFmode and XFmode */
423  2,					/* cost of moving MMX register */
424  {2, 2},				/* cost of loading MMX registers
425					   in SImode and DImode */
426  {2, 2},				/* cost of storing MMX registers
427					   in SImode and DImode */
428  2,					/* cost of moving SSE register */
429  {2, 2, 8},				/* cost of loading SSE registers
430					   in SImode, DImode and TImode */
431  {2, 2, 8},				/* cost of storing SSE registers
432					   in SImode, DImode and TImode */
433  6,					/* MMX or SSE register to integer */
434  32,					/* size of prefetch block */
435  1,					/* number of parallel prefetches */
436  1,					/* Branch cost */
437  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
438  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
439  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
440  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
441  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
442  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
443};
444
445static const
446struct processor_costs athlon_cost = {
447  COSTS_N_INSNS (1),			/* cost of an add instruction */
448  COSTS_N_INSNS (2),			/* cost of a lea instruction */
449  COSTS_N_INSNS (1),			/* variable shift costs */
450  COSTS_N_INSNS (1),			/* constant shift costs */
451  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
452   COSTS_N_INSNS (5),			/*                               HI */
453   COSTS_N_INSNS (5),			/*                               SI */
454   COSTS_N_INSNS (5),			/*                               DI */
455   COSTS_N_INSNS (5)},			/*                               other */
456  0,					/* cost of multiply per each bit set */
457  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
458   COSTS_N_INSNS (26),			/*                          HI */
459   COSTS_N_INSNS (42),			/*                          SI */
460   COSTS_N_INSNS (74),			/*                          DI */
461   COSTS_N_INSNS (74)},			/*                          other */
462  COSTS_N_INSNS (1),			/* cost of movsx */
463  COSTS_N_INSNS (1),			/* cost of movzx */
464  8,					/* "large" insn */
465  9,					/* MOVE_RATIO */
466  4,					/* cost for loading QImode using movzbl */
467  {3, 4, 3},				/* cost of loading integer registers
468					   in QImode, HImode and SImode.
469					   Relative to reg-reg move (2).  */
470  {3, 4, 3},				/* cost of storing integer registers */
471  4,					/* cost of reg,reg fld/fst */
472  {4, 4, 12},				/* cost of loading fp registers
473					   in SFmode, DFmode and XFmode */
474  {6, 6, 8},				/* cost of storing fp registers
475					   in SFmode, DFmode and XFmode */
476  2,					/* cost of moving MMX register */
477  {4, 4},				/* cost of loading MMX registers
478					   in SImode and DImode */
479  {4, 4},				/* cost of storing MMX registers
480					   in SImode and DImode */
481  2,					/* cost of moving SSE register */
482  {4, 4, 6},				/* cost of loading SSE registers
483					   in SImode, DImode and TImode */
484  {4, 4, 5},				/* cost of storing SSE registers
485					   in SImode, DImode and TImode */
486  5,					/* MMX or SSE register to integer */
487  64,					/* size of prefetch block */
488  6,					/* number of parallel prefetches */
489  5,					/* Branch cost */
490  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
491  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
492  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
493  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
494  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
495  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
496};
497
498static const
499struct processor_costs k8_cost = {
500  COSTS_N_INSNS (1),			/* cost of an add instruction */
501  COSTS_N_INSNS (2),			/* cost of a lea instruction */
502  COSTS_N_INSNS (1),			/* variable shift costs */
503  COSTS_N_INSNS (1),			/* constant shift costs */
504  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
505   COSTS_N_INSNS (4),			/*                               HI */
506   COSTS_N_INSNS (3),			/*                               SI */
507   COSTS_N_INSNS (4),			/*                               DI */
508   COSTS_N_INSNS (5)},			/*                               other */
509  0,					/* cost of multiply per each bit set */
510  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
511   COSTS_N_INSNS (26),			/*                          HI */
512   COSTS_N_INSNS (42),			/*                          SI */
513   COSTS_N_INSNS (74),			/*                          DI */
514   COSTS_N_INSNS (74)},			/*                          other */
515  COSTS_N_INSNS (1),			/* cost of movsx */
516  COSTS_N_INSNS (1),			/* cost of movzx */
517  8,					/* "large" insn */
518  9,					/* MOVE_RATIO */
519  4,					/* cost for loading QImode using movzbl */
520  {3, 4, 3},				/* cost of loading integer registers
521					   in QImode, HImode and SImode.
522					   Relative to reg-reg move (2).  */
523  {3, 4, 3},				/* cost of storing integer registers */
524  4,					/* cost of reg,reg fld/fst */
525  {4, 4, 12},				/* cost of loading fp registers
526					   in SFmode, DFmode and XFmode */
527  {6, 6, 8},				/* cost of storing fp registers
528					   in SFmode, DFmode and XFmode */
529  2,					/* cost of moving MMX register */
530  {3, 3},				/* cost of loading MMX registers
531					   in SImode and DImode */
532  {4, 4},				/* cost of storing MMX registers
533					   in SImode and DImode */
534  2,					/* cost of moving SSE register */
535  {4, 3, 6},				/* cost of loading SSE registers
536					   in SImode, DImode and TImode */
537  {4, 4, 5},				/* cost of storing SSE registers
538					   in SImode, DImode and TImode */
539  5,					/* MMX or SSE register to integer */
540  64,					/* size of prefetch block */
541  6,					/* number of parallel prefetches */
542  5,					/* Branch cost */
543  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
544  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
545  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
546  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
547  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
548  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
549};
550
551struct processor_costs amdfam10_cost = {
552  COSTS_N_INSNS (1),                    /* cost of an add instruction */
553  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
554  COSTS_N_INSNS (1),                    /* variable shift costs */
555  COSTS_N_INSNS (1),                    /* constant shift costs */
556  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
557   COSTS_N_INSNS (4),                   /*                               HI */
558   COSTS_N_INSNS (3),                   /*                               SI */
559   COSTS_N_INSNS (4),                   /*                               DI */
560   COSTS_N_INSNS (5)},                  /*                               other */
561  0,                                    /* cost of multiply per each bit set */
562  {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
563   COSTS_N_INSNS (35),                  /*                          HI */
564   COSTS_N_INSNS (51),                  /*                          SI */
565   COSTS_N_INSNS (83),                  /*                          DI */
566   COSTS_N_INSNS (83)},                 /*                          other */
567  COSTS_N_INSNS (1),			/* cost of movsx */
568  COSTS_N_INSNS (1),			/* cost of movzx */
569  8,					/* "large" insn */
570  9,					/* MOVE_RATIO */
571  4,					/* cost for loading QImode using movzbl */
572  {3, 4, 3},				/* cost of loading integer registers
573					   in QImode, HImode and SImode.
574					   Relative to reg-reg move (2).  */
575  {3, 4, 3},				/* cost of storing integer registers */
576  4,					/* cost of reg,reg fld/fst */
577  {4, 4, 12},				/* cost of loading fp registers
578		   			   in SFmode, DFmode and XFmode */
579  {6, 6, 8},				/* cost of storing fp registers
580 		   			   in SFmode, DFmode and XFmode */
581  2,					/* cost of moving MMX register */
582  {3, 3},				/* cost of loading MMX registers
583					   in SImode and DImode */
584  {4, 4},				/* cost of storing MMX registers
585					   in SImode and DImode */
586  2,					/* cost of moving SSE register */
587  {4, 4, 3},				/* cost of loading SSE registers
588					   in SImode, DImode and TImode */
589  {4, 4, 5},				/* cost of storing SSE registers
590					   in SImode, DImode and TImode */
591  3,					/* MMX or SSE register to integer */
592  					/* On K8
593  					    MOVD reg64, xmmreg 	Double	FSTORE 4
594					    MOVD reg32, xmmreg 	Double	FSTORE 4
595					   On AMDFAM10
596					    MOVD reg64, xmmreg 	Double	FADD 3
597                                                                1/1  1/1
598					    MOVD reg32, xmmreg 	Double	FADD 3
599                                                                1/1  1/1 */
600  64,					/* size of prefetch block */
601  /* New AMD processors never drop prefetches; if they cannot be performed
602     immediately, they are queued.  We set number of simultaneous prefetches
603     to a large constant to reflect this (it probably is not a good idea not
604     to limit number of prefetches at all, as their execution also takes some
605     time).  */
606  100,					/* number of parallel prefetches */
607  5,					/* Branch cost */
608  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
609  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
610  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
611  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
612  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
613  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
614};
615
616static const
617struct processor_costs pentium4_cost = {
618  COSTS_N_INSNS (1),			/* cost of an add instruction */
619  COSTS_N_INSNS (3),			/* cost of a lea instruction */
620  COSTS_N_INSNS (4),			/* variable shift costs */
621  COSTS_N_INSNS (4),			/* constant shift costs */
622  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
623   COSTS_N_INSNS (15),			/*                               HI */
624   COSTS_N_INSNS (15),			/*                               SI */
625   COSTS_N_INSNS (15),			/*                               DI */
626   COSTS_N_INSNS (15)},			/*                               other */
627  0,					/* cost of multiply per each bit set */
628  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
629   COSTS_N_INSNS (56),			/*                          HI */
630   COSTS_N_INSNS (56),			/*                          SI */
631   COSTS_N_INSNS (56),			/*                          DI */
632   COSTS_N_INSNS (56)},			/*                          other */
633  COSTS_N_INSNS (1),			/* cost of movsx */
634  COSTS_N_INSNS (1),			/* cost of movzx */
635  16,					/* "large" insn */
636  6,					/* MOVE_RATIO */
637  2,					/* cost for loading QImode using movzbl */
638  {4, 5, 4},				/* cost of loading integer registers
639					   in QImode, HImode and SImode.
640					   Relative to reg-reg move (2).  */
641  {2, 3, 2},				/* cost of storing integer registers */
642  2,					/* cost of reg,reg fld/fst */
643  {2, 2, 6},				/* cost of loading fp registers
644					   in SFmode, DFmode and XFmode */
645  {4, 4, 6},				/* cost of storing fp registers
646					   in SFmode, DFmode and XFmode */
647  2,					/* cost of moving MMX register */
648  {2, 2},				/* cost of loading MMX registers
649					   in SImode and DImode */
650  {2, 2},				/* cost of storing MMX registers
651					   in SImode and DImode */
652  12,					/* cost of moving SSE register */
653  {12, 12, 12},				/* cost of loading SSE registers
654					   in SImode, DImode and TImode */
655  {2, 2, 8},				/* cost of storing SSE registers
656					   in SImode, DImode and TImode */
657  10,					/* MMX or SSE register to integer */
658  64,					/* size of prefetch block */
659  6,					/* number of parallel prefetches */
660  2,					/* Branch cost */
661  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
662  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
663  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
664  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
665  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
666  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
667};
668
669static const
670struct processor_costs nocona_cost = {
671  COSTS_N_INSNS (1),			/* cost of an add instruction */
672  COSTS_N_INSNS (1),			/* cost of a lea instruction */
673  COSTS_N_INSNS (1),			/* variable shift costs */
674  COSTS_N_INSNS (1),			/* constant shift costs */
675  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
676   COSTS_N_INSNS (10),			/*                               HI */
677   COSTS_N_INSNS (10),			/*                               SI */
678   COSTS_N_INSNS (10),			/*                               DI */
679   COSTS_N_INSNS (10)},			/*                               other */
680  0,					/* cost of multiply per each bit set */
681  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
682   COSTS_N_INSNS (66),			/*                          HI */
683   COSTS_N_INSNS (66),			/*                          SI */
684   COSTS_N_INSNS (66),			/*                          DI */
685   COSTS_N_INSNS (66)},			/*                          other */
686  COSTS_N_INSNS (1),			/* cost of movsx */
687  COSTS_N_INSNS (1),			/* cost of movzx */
688  16,					/* "large" insn */
689  17,					/* MOVE_RATIO */
690  4,					/* cost for loading QImode using movzbl */
691  {4, 4, 4},				/* cost of loading integer registers
692					   in QImode, HImode and SImode.
693					   Relative to reg-reg move (2).  */
694  {4, 4, 4},				/* cost of storing integer registers */
695  3,					/* cost of reg,reg fld/fst */
696  {12, 12, 12},				/* cost of loading fp registers
697					   in SFmode, DFmode and XFmode */
698  {4, 4, 4},				/* cost of storing fp registers
699					   in SFmode, DFmode and XFmode */
700  6,					/* cost of moving MMX register */
701  {12, 12},				/* cost of loading MMX registers
702					   in SImode and DImode */
703  {12, 12},				/* cost of storing MMX registers
704					   in SImode and DImode */
705  6,					/* cost of moving SSE register */
706  {12, 12, 12},				/* cost of loading SSE registers
707					   in SImode, DImode and TImode */
708  {12, 12, 12},				/* cost of storing SSE registers
709					   in SImode, DImode and TImode */
710  8,					/* MMX or SSE register to integer */
711  128,					/* size of prefetch block */
712  8,					/* number of parallel prefetches */
713  1,					/* Branch cost */
714  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
715  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
716  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
717  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
718  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
719  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
720};
721
722static const
723struct processor_costs core2_cost = {
724  COSTS_N_INSNS (1),			/* cost of an add instruction */
725  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
726  COSTS_N_INSNS (1),			/* variable shift costs */
727  COSTS_N_INSNS (1),			/* constant shift costs */
728  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
729   COSTS_N_INSNS (3),			/*                               HI */
730   COSTS_N_INSNS (3),			/*                               SI */
731   COSTS_N_INSNS (3),			/*                               DI */
732   COSTS_N_INSNS (3)},			/*                               other */
733  0,					/* cost of multiply per each bit set */
734  {COSTS_N_INSNS (22),			/* cost of a divide/mod for QI */
735   COSTS_N_INSNS (22),			/*                          HI */
736   COSTS_N_INSNS (22),			/*                          SI */
737   COSTS_N_INSNS (22),			/*                          DI */
738   COSTS_N_INSNS (22)},			/*                          other */
739  COSTS_N_INSNS (1),			/* cost of movsx */
740  COSTS_N_INSNS (1),			/* cost of movzx */
741  8,					/* "large" insn */
742  16,					/* MOVE_RATIO */
743  2,					/* cost for loading QImode using movzbl */
744  {6, 6, 6},				/* cost of loading integer registers
745					   in QImode, HImode and SImode.
746					   Relative to reg-reg move (2).  */
747  {4, 4, 4},				/* cost of storing integer registers */
748  2,					/* cost of reg,reg fld/fst */
749  {6, 6, 6},				/* cost of loading fp registers
750					   in SFmode, DFmode and XFmode */
751  {4, 4, 4},				/* cost of loading integer registers */
752  2,					/* cost of moving MMX register */
753  {6, 6},				/* cost of loading MMX registers
754					   in SImode and DImode */
755  {4, 4},				/* cost of storing MMX registers
756					   in SImode and DImode */
757  2,					/* cost of moving SSE register */
758  {6, 6, 6},				/* cost of loading SSE registers
759					   in SImode, DImode and TImode */
760  {4, 4, 4},				/* cost of storing SSE registers
761					   in SImode, DImode and TImode */
762  2,					/* MMX or SSE register to integer */
763  128,					/* size of prefetch block */
764  8,					/* number of parallel prefetches */
765  3,					/* Branch cost */
766  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
767  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
768  COSTS_N_INSNS (32),			/* cost of FDIV instruction.  */
769  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
770  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
771  COSTS_N_INSNS (58),			/* cost of FSQRT instruction.  */
772};
773
774/* Generic64 should produce code tuned for Nocona and K8.  */
775static const
776struct processor_costs generic64_cost = {
777  COSTS_N_INSNS (1),			/* cost of an add instruction */
778  /* On all chips taken into consideration lea is 2 cycles and more.  With
779     this cost however our current implementation of synth_mult results in
780     use of unnecessary temporary registers causing regression on several
781     SPECfp benchmarks.  */
782  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
783  COSTS_N_INSNS (1),			/* variable shift costs */
784  COSTS_N_INSNS (1),			/* constant shift costs */
785  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
786   COSTS_N_INSNS (4),			/*                               HI */
787   COSTS_N_INSNS (3),			/*                               SI */
788   COSTS_N_INSNS (4),			/*                               DI */
789   COSTS_N_INSNS (2)},			/*                               other */
790  0,					/* cost of multiply per each bit set */
791  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
792   COSTS_N_INSNS (26),			/*                          HI */
793   COSTS_N_INSNS (42),			/*                          SI */
794   COSTS_N_INSNS (74),			/*                          DI */
795   COSTS_N_INSNS (74)},			/*                          other */
796  COSTS_N_INSNS (1),			/* cost of movsx */
797  COSTS_N_INSNS (1),			/* cost of movzx */
798  8,					/* "large" insn */
799  17,					/* MOVE_RATIO */
800  4,					/* cost for loading QImode using movzbl */
801  {4, 4, 4},				/* cost of loading integer registers
802					   in QImode, HImode and SImode.
803					   Relative to reg-reg move (2).  */
804  {4, 4, 4},				/* cost of storing integer registers */
805  4,					/* cost of reg,reg fld/fst */
806  {12, 12, 12},				/* cost of loading fp registers
807					   in SFmode, DFmode and XFmode */
808  {6, 6, 8},				/* cost of storing fp registers
809					   in SFmode, DFmode and XFmode */
810  2,					/* cost of moving MMX register */
811  {8, 8},				/* cost of loading MMX registers
812					   in SImode and DImode */
813  {8, 8},				/* cost of storing MMX registers
814					   in SImode and DImode */
815  2,					/* cost of moving SSE register */
816  {8, 8, 8},				/* cost of loading SSE registers
817					   in SImode, DImode and TImode */
818  {8, 8, 8},				/* cost of storing SSE registers
819					   in SImode, DImode and TImode */
820  5,					/* MMX or SSE register to integer */
821  64,					/* size of prefetch block */
822  6,					/* number of parallel prefetches */
823  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824     is increased to perhaps more appropriate value of 5.  */
825  3,					/* Branch cost */
826  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
827  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
828  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
829  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
830  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
831  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
832};
833
834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
835static const
836struct processor_costs generic32_cost = {
837  COSTS_N_INSNS (1),			/* cost of an add instruction */
838  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
839  COSTS_N_INSNS (1),			/* variable shift costs */
840  COSTS_N_INSNS (1),			/* constant shift costs */
841  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
842   COSTS_N_INSNS (4),			/*                               HI */
843   COSTS_N_INSNS (3),			/*                               SI */
844   COSTS_N_INSNS (4),			/*                               DI */
845   COSTS_N_INSNS (2)},			/*                               other */
846  0,					/* cost of multiply per each bit set */
847  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
848   COSTS_N_INSNS (26),			/*                          HI */
849   COSTS_N_INSNS (42),			/*                          SI */
850   COSTS_N_INSNS (74),			/*                          DI */
851   COSTS_N_INSNS (74)},			/*                          other */
852  COSTS_N_INSNS (1),			/* cost of movsx */
853  COSTS_N_INSNS (1),			/* cost of movzx */
854  8,					/* "large" insn */
855  17,					/* MOVE_RATIO */
856  4,					/* cost for loading QImode using movzbl */
857  {4, 4, 4},				/* cost of loading integer registers
858					   in QImode, HImode and SImode.
859					   Relative to reg-reg move (2).  */
860  {4, 4, 4},				/* cost of storing integer registers */
861  4,					/* cost of reg,reg fld/fst */
862  {12, 12, 12},				/* cost of loading fp registers
863					   in SFmode, DFmode and XFmode */
864  {6, 6, 8},				/* cost of storing fp registers
865					   in SFmode, DFmode and XFmode */
866  2,					/* cost of moving MMX register */
867  {8, 8},				/* cost of loading MMX registers
868					   in SImode and DImode */
869  {8, 8},				/* cost of storing MMX registers
870					   in SImode and DImode */
871  2,					/* cost of moving SSE register */
872  {8, 8, 8},				/* cost of loading SSE registers
873					   in SImode, DImode and TImode */
874  {8, 8, 8},				/* cost of storing SSE registers
875					   in SImode, DImode and TImode */
876  5,					/* MMX or SSE register to integer */
877  64,					/* size of prefetch block */
878  6,					/* number of parallel prefetches */
879  3,					/* Branch cost */
880  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
881  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
882  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
883  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
884  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
885  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
886};
887
888const struct processor_costs *ix86_cost = &pentium_cost;
889
890/* Processor feature/optimization bitmasks.  */
891#define m_386 (1<<PROCESSOR_I386)
892#define m_486 (1<<PROCESSOR_I486)
893#define m_PENT (1<<PROCESSOR_PENTIUM)
894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895#define m_GEODE  (1<<PROCESSOR_GEODE)
896#define m_K6_GEODE  (m_K6 | m_GEODE)
897#define m_K6  (1<<PROCESSOR_K6)
898#define m_ATHLON  (1<<PROCESSOR_ATHLON)
899#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
900#define m_K8  (1<<PROCESSOR_K8)
901#define m_ATHLON_K8  (m_K8 | m_ATHLON)
902#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
903#define m_NOCONA  (1<<PROCESSOR_NOCONA)
904#define m_CORE2  (1<<PROCESSOR_CORE2)
905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908#define m_ATHLON_K8_AMDFAM10  (m_K8 | m_ATHLON | m_AMDFAM10)
909
910/* Generic instruction choice should be common subset of supported CPUs
911   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
912
913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
915   generic because it is not working well with PPro base chips.  */
916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
917                          | m_GENERIC64;
918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919                            | m_NOCONA | m_CORE2 | m_GENERIC;
920const int x86_zero_extend_with_and = m_486 | m_PENT;
921/* Enable to zero extend integer registers to avoid partial dependencies */
922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923                     | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924const int x86_double_with_add = ~m_386;
925const int x86_use_bit_test = m_386;
926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927                              | m_K6 | m_CORE2 | m_GENERIC;
928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
929                      | m_NOCONA;
930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932                            | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933/* Branch hints were put in P4 based on simulation result. But
934   after P4 was made, no performance benefit was observed with
935   branch hints. It also increases the code size. As the result,
936   icc never generates branch hints.  */
937const int x86_branch_hints = 0;
938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939                         /*m_GENERIC | m_ATHLON_K8 ? */
940/* We probably ought to watch for partial register stalls on Generic32
941   compilation setting as well.  However in current implementation the
942   partial register stalls are not eliminated very well - they can
943   be introduced via subregs synthesized by combine and can happen
944   in caller/callee saving sequences.
945   Because this option pays back little on PPro based chips and is in conflict
946   with partial reg. dependencies used by Athlon/P4 based chips, it is better
947   to leave it off for generic32 for now.  */
948const int x86_partial_reg_stall = m_PPRO;
949const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952                                  | m_CORE2 | m_GENERIC);
953const int x86_use_mov0 = m_K6;
954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955const int x86_read_modify_write = ~m_PENT;
956const int x86_read_modify = ~(m_PENT | m_PPRO);
957const int x86_split_long_moves = m_PPRO;
958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959                               | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
960                               /* m_PENT4 ? */
961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963const int x86_qimode_math = ~(0);
964const int x86_promote_qi_regs = 0;
965/* On PPro this flag is meant to avoid partial register stalls.  Just like
966   the x86_partial_reg_stall this option might be considered for Generic32
967   if our scheme for avoiding partial stalls was more effective.  */
968const int x86_himode_math = ~(m_PPRO);
969const int x86_promote_hi_regs = m_PPRO;
970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972                          | m_CORE2 | m_GENERIC;
973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974                          | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976                          | m_CORE2 | m_GENERIC;
977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978                          | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979/* Enable if integer moves are preferred for DFmode copies */
980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981                                       | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983                                       | m_CORE2 | m_GENERIC;
984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985                                      | m_CORE2 | m_GENERIC;
986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987   for outgoing arguments will be computed and placed into the variable
988   `current_function_outgoing_args_size'. No space will be pushed onto the stack
989   for each call; instead, the function prologue should increase the stack frame
990   size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
991   not proper. */
992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993                                         | m_NOCONA | m_PPRO | m_CORE2
994                                         | m_GENERIC;
995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997const int x86_shift1 = ~m_486;
998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999                                           | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000                                           | m_NOCONA | m_CORE2 | m_GENERIC;
1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002   that thread 128bit SSE registers as single units versus K8 based chips that
1003   divide SSE registers to two 64bit halves.
1004   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005   to allow register renaming on 128bit SSE units, but usually results in one
1006   extra microop on 64bit SSE units.  Experimental results shows that disabling
1007   this option on P4 brings over 20% SPECfp regression, while enabling it on
1008   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1009   of moves.  */
1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011                                           | m_GENERIC | m_AMDFAM10;
1012/* Set for machines where the type and dependencies are resolved on SSE
1013   register parts instead of whole registers, so we may maintain just
1014   lower part of scalar values in proper format leaving the upper part
1015   undefined.  */
1016const int x86_sse_split_regs = m_ATHLON_K8;
1017/* Code generation for scalar reg-reg moves of single and double precision data:
1018     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1019       movaps reg, reg
1020     else
1021       movss reg, reg
1022     if (x86_sse_partial_reg_dependency == true)
1023       movapd reg, reg
1024     else
1025       movsd reg, reg
1026
1027   Code generation for scalar loads of double precision data:
1028     if (x86_sse_split_regs == true)
1029       movlpd mem, reg      (gas syntax)
1030     else
1031       movsd mem, reg
1032
1033   Code generation for unaligned packed loads of single precision data
1034   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035     if (x86_sse_unaligned_move_optimal)
1036       movups mem, reg
1037
1038     if (x86_sse_partial_reg_dependency == true)
1039       {
1040         xorps  reg, reg
1041         movlps mem, reg
1042         movhps mem+8, reg
1043       }
1044     else
1045       {
1046         movlps mem, reg
1047         movhps mem+8, reg
1048       }
1049
1050   Code generation for unaligned packed loads of double precision data
1051   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052     if (x86_sse_unaligned_move_optimal)
1053       movupd mem, reg
1054
1055     if (x86_sse_split_regs == true)
1056       {
1057         movlpd mem, reg
1058         movhpd mem+8, reg
1059       }
1060     else
1061       {
1062         movsd  mem, reg
1063         movhpd mem+8, reg
1064       }
1065 */
1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1072
1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074   integer data in xmm registers.  Which results in pretty abysmal code.  */
1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1076
1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078                                    | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079/* Some CPU cores are not able to predict more than 4 branch instructions in
1080   the 16 byte window.  */
1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082                                | m_NOCONA | m_CORE2 | m_GENERIC;
1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084                         | m_CORE2 | m_GENERIC;
1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086/* Compare and exchange was added for 80486.  */
1087const int x86_cmpxchg = ~m_386;
1088/* Compare and exchange 8 bytes was added for pentium.  */
1089const int x86_cmpxchg8b = ~(m_386 | m_486);
1090/* Exchange and add was added for 80486.  */
1091const int x86_xadd = ~m_386;
1092/* Byteswap was added for 80486.  */
1093const int x86_bswap = ~m_386;
1094const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1095
1096/* In case the average insn count for single function invocation is
1097   lower than this constant, emit fast (but longer) prologue and
1098   epilogue code.  */
1099#define FAST_PROLOGUE_INSN_COUNT 20
1100
1101/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1102static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1103static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1104static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1105
1106/* Array of the smallest class containing reg number REGNO, indexed by
1107   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1108
1109enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1110{
1111  /* ax, dx, cx, bx */
1112  AREG, DREG, CREG, BREG,
1113  /* si, di, bp, sp */
1114  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1115  /* FP registers */
1116  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1117  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1118  /* arg pointer */
1119  NON_Q_REGS,
1120  /* flags, fpsr, dirflag, frame */
1121  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1122  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1123  SSE_REGS, SSE_REGS,
1124  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1125  MMX_REGS, MMX_REGS,
1126  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1127  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1128  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1129  SSE_REGS, SSE_REGS,
1130};
1131
1132/* The "default" register map used in 32bit mode.  */
1133
1134int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1135{
1136  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
1137  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
1138  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1139  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
1140  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1141  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1142  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1143};
1144
1145static int const x86_64_int_parameter_registers[6] =
1146{
1147  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1148  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1149};
1150
1151static int const x86_64_int_return_registers[4] =
1152{
1153  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1154};
1155
1156/* The "default" register map used in 64bit mode.  */
1157int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1158{
1159  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
1160  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
1161  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1162  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
1163  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1164  8,9,10,11,12,13,14,15,		/* extended integer registers */
1165  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
1166};
1167
1168/* Define the register numbers to be used in Dwarf debugging information.
1169   The SVR4 reference port C compiler uses the following register numbers
1170   in its Dwarf output code:
1171	0 for %eax (gcc regno = 0)
1172	1 for %ecx (gcc regno = 2)
1173	2 for %edx (gcc regno = 1)
1174	3 for %ebx (gcc regno = 3)
1175	4 for %esp (gcc regno = 7)
1176	5 for %ebp (gcc regno = 6)
1177	6 for %esi (gcc regno = 4)
1178	7 for %edi (gcc regno = 5)
1179   The following three DWARF register numbers are never generated by
1180   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1181   believes these numbers have these meanings.
1182	8  for %eip    (no gcc equivalent)
1183	9  for %eflags (gcc regno = 17)
1184	10 for %trapno (no gcc equivalent)
1185   It is not at all clear how we should number the FP stack registers
1186   for the x86 architecture.  If the version of SDB on x86/svr4 were
1187   a bit less brain dead with respect to floating-point then we would
1188   have a precedent to follow with respect to DWARF register numbers
1189   for x86 FP registers, but the SDB on x86/svr4 is so completely
1190   broken with respect to FP registers that it is hardly worth thinking
1191   of it as something to strive for compatibility with.
1192   The version of x86/svr4 SDB I have at the moment does (partially)
1193   seem to believe that DWARF register number 11 is associated with
1194   the x86 register %st(0), but that's about all.  Higher DWARF
1195   register numbers don't seem to be associated with anything in
1196   particular, and even for DWARF regno 11, SDB only seems to under-
1197   stand that it should say that a variable lives in %st(0) (when
1198   asked via an `=' command) if we said it was in DWARF regno 11,
1199   but SDB still prints garbage when asked for the value of the
1200   variable in question (via a `/' command).
1201   (Also note that the labels SDB prints for various FP stack regs
1202   when doing an `x' command are all wrong.)
1203   Note that these problems generally don't affect the native SVR4
1204   C compiler because it doesn't allow the use of -O with -g and
1205   because when it is *not* optimizing, it allocates a memory
1206   location for each floating-point variable, and the memory
1207   location is what gets described in the DWARF AT_location
1208   attribute for the variable in question.
1209   Regardless of the severe mental illness of the x86/svr4 SDB, we
1210   do something sensible here and we use the following DWARF
1211   register numbers.  Note that these are all stack-top-relative
1212   numbers.
1213	11 for %st(0) (gcc regno = 8)
1214	12 for %st(1) (gcc regno = 9)
1215	13 for %st(2) (gcc regno = 10)
1216	14 for %st(3) (gcc regno = 11)
1217	15 for %st(4) (gcc regno = 12)
1218	16 for %st(5) (gcc regno = 13)
1219	17 for %st(6) (gcc regno = 14)
1220	18 for %st(7) (gcc regno = 15)
1221*/
1222int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1223{
1224  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
1225  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
1226  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
1227  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
1228  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
1229  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1230  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1231};
1232
1233/* Test and compare insns in i386.md store the information needed to
1234   generate branch and scc insns here.  */
1235
1236rtx ix86_compare_op0 = NULL_RTX;
1237rtx ix86_compare_op1 = NULL_RTX;
1238rtx ix86_compare_emitted = NULL_RTX;
1239
1240/* Size of the register save area.  */
1241#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1242
1243/* Define the structure for the machine field in struct function.  */
1244
1245struct stack_local_entry GTY(())
1246{
1247  unsigned short mode;
1248  unsigned short n;
1249  rtx rtl;
1250  struct stack_local_entry *next;
1251};
1252
1253/* Structure describing stack frame layout.
1254   Stack grows downward:
1255
1256   [arguments]
1257					      <- ARG_POINTER
1258   saved pc
1259
1260   saved frame pointer if frame_pointer_needed
1261					      <- HARD_FRAME_POINTER
1262   [saved regs]
1263
1264   [padding1]          \
1265		        )
1266   [va_arg registers]  (
1267		        > to_allocate	      <- FRAME_POINTER
1268   [frame]	       (
1269		        )
1270   [padding2]	       /
1271  */
1272struct ix86_frame
1273{
1274  int nregs;
1275  int padding1;
1276  int va_arg_size;
1277  HOST_WIDE_INT frame;
1278  int padding2;
1279  int outgoing_arguments_size;
1280  int red_zone_size;
1281
1282  HOST_WIDE_INT to_allocate;
1283  /* The offsets relative to ARG_POINTER.  */
1284  HOST_WIDE_INT frame_pointer_offset;
1285  HOST_WIDE_INT hard_frame_pointer_offset;
1286  HOST_WIDE_INT stack_pointer_offset;
1287
1288  /* When save_regs_using_mov is set, emit prologue using
1289     move instead of push instructions.  */
1290  bool save_regs_using_mov;
1291};
1292
1293/* Code model option.  */
1294enum cmodel ix86_cmodel;
1295/* Asm dialect.  */
1296enum asm_dialect ix86_asm_dialect = ASM_ATT;
1297/* TLS dialects.  */
1298enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1299
1300/* Which unit we are generating floating point math for.  */
1301enum fpmath_unit ix86_fpmath;
1302
1303/* Which cpu are we scheduling for.  */
1304enum processor_type ix86_tune;
1305/* Which instruction set architecture to use.  */
1306enum processor_type ix86_arch;
1307
1308/* true if sse prefetch instruction is not NOOP.  */
1309int x86_prefetch_sse;
1310
1311/* true if cmpxchg16b is supported.  */
1312int x86_cmpxchg16b;
1313
1314/* ix86_regparm_string as a number */
1315static int ix86_regparm;
1316
1317/* -mstackrealign option */
1318extern int ix86_force_align_arg_pointer;
1319static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1320
1321/* Preferred alignment for stack boundary in bits.  */
1322unsigned int ix86_preferred_stack_boundary;
1323
1324/* Values 1-5: see jump.c */
1325int ix86_branch_cost;
1326
1327/* Variables which are this size or smaller are put in the data/bss
1328   or ldata/lbss sections.  */
1329
1330int ix86_section_threshold = 65536;
1331
1332/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1333char internal_label_prefix[16];
1334int internal_label_prefix_len;
1335
1336static bool ix86_handle_option (size_t, const char *, int);
1337static void output_pic_addr_const (FILE *, rtx, int);
1338static void put_condition_code (enum rtx_code, enum machine_mode,
1339				int, int, FILE *);
1340static const char *get_some_local_dynamic_name (void);
1341static int get_some_local_dynamic_name_1 (rtx *, void *);
1342static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1343static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1344						   rtx *);
1345static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1346static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1347						   enum machine_mode);
1348static rtx get_thread_pointer (int);
1349static rtx legitimize_tls_address (rtx, enum tls_model, int);
1350static void get_pc_thunk_name (char [32], unsigned int);
1351static rtx gen_push (rtx);
1352static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1353static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1354static struct machine_function * ix86_init_machine_status (void);
1355static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1356static int ix86_nsaved_regs (void);
1357static void ix86_emit_save_regs (void);
1358static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1359static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1360static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1361static HOST_WIDE_INT ix86_GOT_alias_set (void);
1362static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1363static rtx ix86_expand_aligntest (rtx, int);
1364static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1365static int ix86_issue_rate (void);
1366static int ix86_adjust_cost (rtx, rtx, rtx, int);
1367static int ia32_multipass_dfa_lookahead (void);
1368static void ix86_init_mmx_sse_builtins (void);
1369static rtx x86_this_parameter (tree);
1370static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1371				 HOST_WIDE_INT, tree);
1372static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1373static void x86_file_start (void);
1374static void ix86_reorg (void);
1375static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1376static tree ix86_build_builtin_va_list (void);
1377static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1378					 tree, int *, int);
1379static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1380static bool ix86_scalar_mode_supported_p (enum machine_mode);
1381static bool ix86_vector_mode_supported_p (enum machine_mode);
1382
1383static int ix86_address_cost (rtx);
1384static bool ix86_cannot_force_const_mem (rtx);
1385static rtx ix86_delegitimize_address (rtx);
1386
1387static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1388
1389struct builtin_description;
1390static rtx ix86_expand_sse_comi (const struct builtin_description *,
1391				 tree, rtx);
1392static rtx ix86_expand_sse_compare (const struct builtin_description *,
1393				    tree, rtx);
1394static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1395static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1396static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1397static rtx ix86_expand_store_builtin (enum insn_code, tree);
1398static rtx safe_vector_operand (rtx, enum machine_mode);
1399static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1400static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1401static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1402static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1403static int ix86_fp_comparison_cost (enum rtx_code code);
1404static unsigned int ix86_select_alt_pic_regnum (void);
1405static int ix86_save_reg (unsigned int, int);
1406static void ix86_compute_frame_layout (struct ix86_frame *);
1407static int ix86_comp_type_attributes (tree, tree);
1408static int ix86_function_regparm (tree, tree);
1409const struct attribute_spec ix86_attribute_table[];
1410static bool ix86_function_ok_for_sibcall (tree, tree);
1411static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1412static int ix86_value_regno (enum machine_mode, tree, tree);
1413static bool contains_128bit_aligned_vector_p (tree);
1414static rtx ix86_struct_value_rtx (tree, int);
1415static bool ix86_ms_bitfield_layout_p (tree);
1416static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1417static int extended_reg_mentioned_1 (rtx *, void *);
1418static bool ix86_rtx_costs (rtx, int, int, int *);
1419static int min_insn_size (rtx);
1420static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1421static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1422static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1423				    tree, bool);
1424static void ix86_init_builtins (void);
1425static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1426static const char *ix86_mangle_fundamental_type (tree);
1427static tree ix86_stack_protect_fail (void);
1428static rtx ix86_internal_arg_pointer (void);
1429static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1430
1431/* This function is only used on Solaris.  */
1432static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1433  ATTRIBUTE_UNUSED;
1434
1435/* Register class used for passing given 64bit part of the argument.
1436   These represent classes as documented by the PS ABI, with the exception
1437   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1438   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1439
1440   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1441   whenever possible (upper half does contain padding).
1442 */
1443enum x86_64_reg_class
1444  {
1445    X86_64_NO_CLASS,
1446    X86_64_INTEGER_CLASS,
1447    X86_64_INTEGERSI_CLASS,
1448    X86_64_SSE_CLASS,
1449    X86_64_SSESF_CLASS,
1450    X86_64_SSEDF_CLASS,
1451    X86_64_SSEUP_CLASS,
1452    X86_64_X87_CLASS,
1453    X86_64_X87UP_CLASS,
1454    X86_64_COMPLEX_X87_CLASS,
1455    X86_64_MEMORY_CLASS
1456  };
1457static const char * const x86_64_reg_class_name[] = {
1458  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1459  "sseup", "x87", "x87up", "cplx87", "no"
1460};
1461
1462#define MAX_CLASSES 4
1463
1464/* Table of constants used by fldpi, fldln2, etc....  */
1465static REAL_VALUE_TYPE ext_80387_constants_table [5];
1466static bool ext_80387_constants_init = 0;
1467static void init_ext_80387_constants (void);
1468static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1469static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1470static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1471static section *x86_64_elf_select_section (tree decl, int reloc,
1472					   unsigned HOST_WIDE_INT align)
1473					     ATTRIBUTE_UNUSED;
1474
1475/* Initialize the GCC target structure.  */
1476#undef TARGET_ATTRIBUTE_TABLE
1477#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1478#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1479#  undef TARGET_MERGE_DECL_ATTRIBUTES
1480#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1481#endif
1482
1483#undef TARGET_COMP_TYPE_ATTRIBUTES
1484#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1485
1486#undef TARGET_INIT_BUILTINS
1487#define TARGET_INIT_BUILTINS ix86_init_builtins
1488#undef TARGET_EXPAND_BUILTIN
1489#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1490
1491#undef TARGET_ASM_FUNCTION_EPILOGUE
1492#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1493
1494#undef TARGET_ENCODE_SECTION_INFO
1495#ifndef SUBTARGET_ENCODE_SECTION_INFO
1496#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1497#else
1498#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1499#endif
1500
1501#undef TARGET_ASM_OPEN_PAREN
1502#define TARGET_ASM_OPEN_PAREN ""
1503#undef TARGET_ASM_CLOSE_PAREN
1504#define TARGET_ASM_CLOSE_PAREN ""
1505
1506#undef TARGET_ASM_ALIGNED_HI_OP
1507#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1508#undef TARGET_ASM_ALIGNED_SI_OP
1509#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1510#ifdef ASM_QUAD
1511#undef TARGET_ASM_ALIGNED_DI_OP
1512#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1513#endif
1514
1515#undef TARGET_ASM_UNALIGNED_HI_OP
1516#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1517#undef TARGET_ASM_UNALIGNED_SI_OP
1518#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1519#undef TARGET_ASM_UNALIGNED_DI_OP
1520#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1521
1522#undef TARGET_SCHED_ADJUST_COST
1523#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1524#undef TARGET_SCHED_ISSUE_RATE
1525#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1526#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1527#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1528  ia32_multipass_dfa_lookahead
1529
1530#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1531#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1532
1533#ifdef HAVE_AS_TLS
1534#undef TARGET_HAVE_TLS
1535#define TARGET_HAVE_TLS true
1536#endif
1537#undef TARGET_CANNOT_FORCE_CONST_MEM
1538#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1539#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1540#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1541
1542#undef TARGET_DELEGITIMIZE_ADDRESS
1543#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1544
1545#undef TARGET_MS_BITFIELD_LAYOUT_P
1546#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1547
1548#if TARGET_MACHO
1549#undef TARGET_BINDS_LOCAL_P
1550#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1551#endif
1552
1553#undef TARGET_ASM_OUTPUT_MI_THUNK
1554#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1555#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1556#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1557
1558#undef TARGET_ASM_FILE_START
1559#define TARGET_ASM_FILE_START x86_file_start
1560
1561#undef TARGET_DEFAULT_TARGET_FLAGS
1562#define TARGET_DEFAULT_TARGET_FLAGS	\
1563  (TARGET_DEFAULT			\
1564   | TARGET_64BIT_DEFAULT		\
1565   | TARGET_SUBTARGET_DEFAULT		\
1566   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1567
1568#undef TARGET_HANDLE_OPTION
1569#define TARGET_HANDLE_OPTION ix86_handle_option
1570
1571#undef TARGET_RTX_COSTS
1572#define TARGET_RTX_COSTS ix86_rtx_costs
1573#undef TARGET_ADDRESS_COST
1574#define TARGET_ADDRESS_COST ix86_address_cost
1575
1576#undef TARGET_FIXED_CONDITION_CODE_REGS
1577#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1578#undef TARGET_CC_MODES_COMPATIBLE
1579#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1580
1581#undef TARGET_MACHINE_DEPENDENT_REORG
1582#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1583
1584#undef TARGET_BUILD_BUILTIN_VA_LIST
1585#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1586
1587#undef TARGET_MD_ASM_CLOBBERS
1588#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1589
1590#undef TARGET_PROMOTE_PROTOTYPES
1591#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1592#undef TARGET_STRUCT_VALUE_RTX
1593#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1594#undef TARGET_SETUP_INCOMING_VARARGS
1595#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1596#undef TARGET_MUST_PASS_IN_STACK
1597#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1598#undef TARGET_PASS_BY_REFERENCE
1599#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1600#undef TARGET_INTERNAL_ARG_POINTER
1601#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1602#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1603#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1604
1605#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1606#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1607
1608#undef TARGET_SCALAR_MODE_SUPPORTED_P
1609#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1610
1611#undef TARGET_VECTOR_MODE_SUPPORTED_P
1612#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1613
1614#ifdef HAVE_AS_TLS
1615#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1616#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1617#endif
1618
1619#ifdef SUBTARGET_INSERT_ATTRIBUTES
1620#undef TARGET_INSERT_ATTRIBUTES
1621#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1622#endif
1623
1624#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1625#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1626
1627#undef TARGET_STACK_PROTECT_FAIL
1628#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1629
1630#undef TARGET_FUNCTION_VALUE
1631#define TARGET_FUNCTION_VALUE ix86_function_value
1632
1633struct gcc_target targetm = TARGET_INITIALIZER;
1634
1635
1636/* The svr4 ABI for the i386 says that records and unions are returned
1637   in memory.  */
1638#ifndef DEFAULT_PCC_STRUCT_RETURN
1639#define DEFAULT_PCC_STRUCT_RETURN 1
1640#endif
1641
1642/* Implement TARGET_HANDLE_OPTION.  */
1643
1644static bool
1645ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1646{
1647  switch (code)
1648    {
1649    case OPT_m3dnow:
1650      if (!value)
1651	{
1652	  target_flags &= ~MASK_3DNOW_A;
1653	  target_flags_explicit |= MASK_3DNOW_A;
1654	}
1655      return true;
1656
1657    case OPT_mmmx:
1658      if (!value)
1659	{
1660	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1661	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1662	}
1663      return true;
1664
1665    case OPT_msse:
1666      if (!value)
1667	{
1668	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1669	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1670	}
1671      return true;
1672
1673    case OPT_msse2:
1674      if (!value)
1675	{
1676	  target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1677	  target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1678	}
1679      return true;
1680
1681    case OPT_msse3:
1682      if (!value)
1683	{
1684	  target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1685	  target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1686	}
1687      return true;
1688
1689    case OPT_maes:
1690      if (!value)
1691	{
1692	  target_flags &= ~MASK_AES;
1693	  target_flags_explicit |= MASK_AES;
1694	}
1695      return true;
1696
1697    default:
1698      return true;
1699    }
1700}
1701
1702/* Sometimes certain combinations of command options do not make
1703   sense on a particular target machine.  You can define a macro
1704   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1705   defined, is executed once just after all the command options have
1706   been parsed.
1707
1708   Don't use this macro to turn on various extra optimizations for
1709   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1710
1711void
1712override_options (void)
1713{
1714  int i;
1715  int ix86_tune_defaulted = 0;
1716
1717  /* Comes from final.c -- no real reason to change it.  */
1718#define MAX_CODE_ALIGN 16
1719
1720  static struct ptt
1721    {
1722      const struct processor_costs *cost;	/* Processor costs */
1723      const int target_enable;			/* Target flags to enable.  */
1724      const int target_disable;			/* Target flags to disable.  */
1725      const int align_loop;			/* Default alignments.  */
1726      const int align_loop_max_skip;
1727      const int align_jump;
1728      const int align_jump_max_skip;
1729      const int align_func;
1730    }
1731  const processor_target_table[PROCESSOR_max] =
1732    {
1733      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1734      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1735      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1736      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1737      {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1738      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1739      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1740      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1741      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1742      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1743      {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1744      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1745      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1746      {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1747    };
1748
1749  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1750  static struct pta
1751    {
1752      const char *const name;		/* processor name or nickname.  */
1753      const enum processor_type processor;
1754      const enum pta_flags
1755	{
1756	  PTA_SSE = 1,
1757	  PTA_SSE2 = 2,
1758	  PTA_SSE3 = 4,
1759	  PTA_MMX = 8,
1760	  PTA_PREFETCH_SSE = 16,
1761	  PTA_3DNOW = 32,
1762	  PTA_3DNOW_A = 64,
1763	  PTA_64BIT = 128,
1764	  PTA_SSSE3 = 256,
1765	  PTA_CX16 = 512,
1766	  PTA_POPCNT = 1024,
1767	  PTA_ABM = 2048,
1768 	  PTA_SSE4A = 4096
1769	} flags;
1770    }
1771  const processor_alias_table[] =
1772    {
1773      {"i386", PROCESSOR_I386, 0},
1774      {"i486", PROCESSOR_I486, 0},
1775      {"i586", PROCESSOR_PENTIUM, 0},
1776      {"pentium", PROCESSOR_PENTIUM, 0},
1777      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1778      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1779      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1780      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1781      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1782      {"i686", PROCESSOR_PENTIUMPRO, 0},
1783      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1784      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1785      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1786      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1787      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1788      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1789				       | PTA_MMX | PTA_PREFETCH_SSE},
1790      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1791				        | PTA_MMX | PTA_PREFETCH_SSE},
1792      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1793				        | PTA_MMX | PTA_PREFETCH_SSE},
1794      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1795					| PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1796      {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1797                                        | PTA_64BIT | PTA_MMX
1798					| PTA_PREFETCH_SSE | PTA_CX16},
1799      {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1800				   | PTA_3DNOW_A},
1801      {"k6", PROCESSOR_K6, PTA_MMX},
1802      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1803      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1804      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1805				   | PTA_3DNOW_A},
1806      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1807					 | PTA_3DNOW | PTA_3DNOW_A},
1808      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1809				    | PTA_3DNOW_A | PTA_SSE},
1810      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1811				      | PTA_3DNOW_A | PTA_SSE},
1812      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1813				      | PTA_3DNOW_A | PTA_SSE},
1814      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1815			       | PTA_SSE | PTA_SSE2 },
1816      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818      {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1820				      | PTA_SSE3 },
1821      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823      {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1824				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1825				      | PTA_SSE3 },
1826      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1827				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1828      {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1829				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1830				      | PTA_SSE3 },
1831      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1832				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1833      {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1834                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1835                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1836                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
1837      {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1838                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1839                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1840                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
1841      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1842      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1843    };
1844
1845  int const pta_size = ARRAY_SIZE (processor_alias_table);
1846
1847#ifdef SUBTARGET_OVERRIDE_OPTIONS
1848  SUBTARGET_OVERRIDE_OPTIONS;
1849#endif
1850
1851#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1852  SUBSUBTARGET_OVERRIDE_OPTIONS;
1853#endif
1854
1855  /* -fPIC is the default for x86_64.  */
1856  if (TARGET_MACHO && TARGET_64BIT)
1857    flag_pic = 2;
1858
1859  /* Set the default values for switches whose default depends on TARGET_64BIT
1860     in case they weren't overwritten by command line options.  */
1861  if (TARGET_64BIT)
1862    {
1863      /* Mach-O doesn't support omitting the frame pointer for now.  */
1864      if (flag_omit_frame_pointer == 2)
1865	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1866      if (flag_asynchronous_unwind_tables == 2)
1867	flag_asynchronous_unwind_tables = 1;
1868      if (flag_pcc_struct_return == 2)
1869	flag_pcc_struct_return = 0;
1870    }
1871  else
1872    {
1873      if (flag_omit_frame_pointer == 2)
1874	flag_omit_frame_pointer = 0;
1875      if (flag_asynchronous_unwind_tables == 2)
1876	flag_asynchronous_unwind_tables = 0;
1877      if (flag_pcc_struct_return == 2)
1878	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1879    }
1880
1881  /* Need to check -mtune=generic first.  */
1882  if (ix86_tune_string)
1883    {
1884      if (!strcmp (ix86_tune_string, "generic")
1885	  || !strcmp (ix86_tune_string, "i686")
1886	  /* As special support for cross compilers we read -mtune=native
1887	     as -mtune=generic.  With native compilers we won't see the
1888	     -mtune=native, as it was changed by the driver.  */
1889	  || !strcmp (ix86_tune_string, "native"))
1890	{
1891	  if (TARGET_64BIT)
1892	    ix86_tune_string = "generic64";
1893	  else
1894	    ix86_tune_string = "generic32";
1895	}
1896      else if (!strncmp (ix86_tune_string, "generic", 7))
1897	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1898    }
1899  else
1900    {
1901      if (ix86_arch_string)
1902	ix86_tune_string = ix86_arch_string;
1903      if (!ix86_tune_string)
1904	{
1905	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1906	  ix86_tune_defaulted = 1;
1907	}
1908
1909      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1910	 need to use a sensible tune option.  */
1911      if (!strcmp (ix86_tune_string, "generic")
1912	  || !strcmp (ix86_tune_string, "x86-64")
1913	  || !strcmp (ix86_tune_string, "i686"))
1914	{
1915	  if (TARGET_64BIT)
1916	    ix86_tune_string = "generic64";
1917	  else
1918	    ix86_tune_string = "generic32";
1919	}
1920    }
1921  if (!strcmp (ix86_tune_string, "x86-64"))
1922    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1923	     "-mtune=generic instead as appropriate.");
1924
1925  if (!ix86_arch_string)
1926    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1927  if (!strcmp (ix86_arch_string, "generic"))
1928    error ("generic CPU can be used only for -mtune= switch");
1929  if (!strncmp (ix86_arch_string, "generic", 7))
1930    error ("bad value (%s) for -march= switch", ix86_arch_string);
1931
1932  if (ix86_cmodel_string != 0)
1933    {
1934      if (!strcmp (ix86_cmodel_string, "small"))
1935	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1936      else if (!strcmp (ix86_cmodel_string, "medium"))
1937	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1938      else if (flag_pic)
1939	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1940      else if (!strcmp (ix86_cmodel_string, "32"))
1941	ix86_cmodel = CM_32;
1942      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1943	ix86_cmodel = CM_KERNEL;
1944      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1945	ix86_cmodel = CM_LARGE;
1946      else
1947	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1948    }
1949  else
1950    {
1951      ix86_cmodel = CM_32;
1952      if (TARGET_64BIT)
1953	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1954    }
1955  if (ix86_asm_string != 0)
1956    {
1957      if (! TARGET_MACHO
1958	  && !strcmp (ix86_asm_string, "intel"))
1959	ix86_asm_dialect = ASM_INTEL;
1960      else if (!strcmp (ix86_asm_string, "att"))
1961	ix86_asm_dialect = ASM_ATT;
1962      else
1963	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1964    }
1965  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1966    error ("code model %qs not supported in the %s bit mode",
1967	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1968  if (ix86_cmodel == CM_LARGE)
1969    sorry ("code model %<large%> not supported yet");
1970  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1971    sorry ("%i-bit mode not compiled in",
1972	   (target_flags & MASK_64BIT) ? 64 : 32);
1973
1974  for (i = 0; i < pta_size; i++)
1975    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1976      {
1977	ix86_arch = processor_alias_table[i].processor;
1978	/* Default cpu tuning to the architecture.  */
1979	ix86_tune = ix86_arch;
1980	if (processor_alias_table[i].flags & PTA_MMX
1981	    && !(target_flags_explicit & MASK_MMX))
1982	  target_flags |= MASK_MMX;
1983	if (processor_alias_table[i].flags & PTA_3DNOW
1984	    && !(target_flags_explicit & MASK_3DNOW))
1985	  target_flags |= MASK_3DNOW;
1986	if (processor_alias_table[i].flags & PTA_3DNOW_A
1987	    && !(target_flags_explicit & MASK_3DNOW_A))
1988	  target_flags |= MASK_3DNOW_A;
1989	if (processor_alias_table[i].flags & PTA_SSE
1990	    && !(target_flags_explicit & MASK_SSE))
1991	  target_flags |= MASK_SSE;
1992	if (processor_alias_table[i].flags & PTA_SSE2
1993	    && !(target_flags_explicit & MASK_SSE2))
1994	  target_flags |= MASK_SSE2;
1995	if (processor_alias_table[i].flags & PTA_SSE3
1996	    && !(target_flags_explicit & MASK_SSE3))
1997	  target_flags |= MASK_SSE3;
1998	if (processor_alias_table[i].flags & PTA_SSSE3
1999	    && !(target_flags_explicit & MASK_SSSE3))
2000	  target_flags |= MASK_SSSE3;
2001	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2002	  x86_prefetch_sse = true;
2003	if (processor_alias_table[i].flags & PTA_CX16)
2004	  x86_cmpxchg16b = true;
2005	if (processor_alias_table[i].flags & PTA_POPCNT
2006	    && !(target_flags_explicit & MASK_POPCNT))
2007	  target_flags |= MASK_POPCNT;
2008	if (processor_alias_table[i].flags & PTA_ABM
2009	    && !(target_flags_explicit & MASK_ABM))
2010	  target_flags |= MASK_ABM;
2011	if (processor_alias_table[i].flags & PTA_SSE4A
2012	    && !(target_flags_explicit & MASK_SSE4A))
2013	  target_flags |= MASK_SSE4A;
2014	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2015	  error ("CPU you selected does not support x86-64 "
2016		 "instruction set");
2017	break;
2018      }
2019
2020  if (i == pta_size)
2021    error ("bad value (%s) for -march= switch", ix86_arch_string);
2022
2023  for (i = 0; i < pta_size; i++)
2024    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2025      {
2026	ix86_tune = processor_alias_table[i].processor;
2027	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2028	  {
2029	    if (ix86_tune_defaulted)
2030	      {
2031		ix86_tune_string = "x86-64";
2032		for (i = 0; i < pta_size; i++)
2033		  if (! strcmp (ix86_tune_string,
2034				processor_alias_table[i].name))
2035		    break;
2036		ix86_tune = processor_alias_table[i].processor;
2037	      }
2038	    else
2039	      error ("CPU you selected does not support x86-64 "
2040		     "instruction set");
2041	  }
2042        /* Intel CPUs have always interpreted SSE prefetch instructions as
2043	   NOPs; so, we can enable SSE prefetch instructions even when
2044	   -mtune (rather than -march) points us to a processor that has them.
2045	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2046	   higher processors.  */
2047	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2048	  x86_prefetch_sse = true;
2049	break;
2050      }
2051  if (i == pta_size)
2052    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2053
2054  if (optimize_size)
2055    ix86_cost = &size_cost;
2056  else
2057    ix86_cost = processor_target_table[ix86_tune].cost;
2058  target_flags |= processor_target_table[ix86_tune].target_enable;
2059  target_flags &= ~processor_target_table[ix86_tune].target_disable;
2060
2061  /* Arrange to set up i386_stack_locals for all functions.  */
2062  init_machine_status = ix86_init_machine_status;
2063
2064  /* Validate -mregparm= value.  */
2065  if (ix86_regparm_string)
2066    {
2067      i = atoi (ix86_regparm_string);
2068      if (i < 0 || i > REGPARM_MAX)
2069	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2070      else
2071	ix86_regparm = i;
2072    }
2073  else
2074   if (TARGET_64BIT)
2075     ix86_regparm = REGPARM_MAX;
2076
2077  /* If the user has provided any of the -malign-* options,
2078     warn and use that value only if -falign-* is not set.
2079     Remove this code in GCC 3.2 or later.  */
2080  if (ix86_align_loops_string)
2081    {
2082      warning (0, "-malign-loops is obsolete, use -falign-loops");
2083      if (align_loops == 0)
2084	{
2085	  i = atoi (ix86_align_loops_string);
2086	  if (i < 0 || i > MAX_CODE_ALIGN)
2087	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2088	  else
2089	    align_loops = 1 << i;
2090	}
2091    }
2092
2093  if (ix86_align_jumps_string)
2094    {
2095      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2096      if (align_jumps == 0)
2097	{
2098	  i = atoi (ix86_align_jumps_string);
2099	  if (i < 0 || i > MAX_CODE_ALIGN)
2100	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2101	  else
2102	    align_jumps = 1 << i;
2103	}
2104    }
2105
2106  if (ix86_align_funcs_string)
2107    {
2108      warning (0, "-malign-functions is obsolete, use -falign-functions");
2109      if (align_functions == 0)
2110	{
2111	  i = atoi (ix86_align_funcs_string);
2112	  if (i < 0 || i > MAX_CODE_ALIGN)
2113	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2114	  else
2115	    align_functions = 1 << i;
2116	}
2117    }
2118
2119  /* Default align_* from the processor table.  */
2120  if (align_loops == 0)
2121    {
2122      align_loops = processor_target_table[ix86_tune].align_loop;
2123      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2124    }
2125  if (align_jumps == 0)
2126    {
2127      align_jumps = processor_target_table[ix86_tune].align_jump;
2128      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2129    }
2130  if (align_functions == 0)
2131    {
2132      align_functions = processor_target_table[ix86_tune].align_func;
2133    }
2134
2135  /* Validate -mbranch-cost= value, or provide default.  */
2136  ix86_branch_cost = ix86_cost->branch_cost;
2137  if (ix86_branch_cost_string)
2138    {
2139      i = atoi (ix86_branch_cost_string);
2140      if (i < 0 || i > 5)
2141	error ("-mbranch-cost=%d is not between 0 and 5", i);
2142      else
2143	ix86_branch_cost = i;
2144    }
2145  if (ix86_section_threshold_string)
2146    {
2147      i = atoi (ix86_section_threshold_string);
2148      if (i < 0)
2149	error ("-mlarge-data-threshold=%d is negative", i);
2150      else
2151	ix86_section_threshold = i;
2152    }
2153
2154  if (ix86_tls_dialect_string)
2155    {
2156      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2157	ix86_tls_dialect = TLS_DIALECT_GNU;
2158      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2159	ix86_tls_dialect = TLS_DIALECT_GNU2;
2160      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2161	ix86_tls_dialect = TLS_DIALECT_SUN;
2162      else
2163	error ("bad value (%s) for -mtls-dialect= switch",
2164	       ix86_tls_dialect_string);
2165    }
2166
2167  /* Keep nonleaf frame pointers.  */
2168  if (flag_omit_frame_pointer)
2169    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2170  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2171    flag_omit_frame_pointer = 1;
2172
2173  /* If we're doing fast math, we don't care about comparison order
2174     wrt NaNs.  This lets us use a shorter comparison sequence.  */
2175  if (flag_finite_math_only)
2176    target_flags &= ~MASK_IEEE_FP;
2177
2178  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2179     since the insns won't need emulation.  */
2180  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2181    target_flags &= ~MASK_NO_FANCY_MATH_387;
2182
2183  /* Likewise, if the target doesn't have a 387, or we've specified
2184     software floating point, don't use 387 inline intrinsics.  */
2185  if (!TARGET_80387)
2186    target_flags |= MASK_NO_FANCY_MATH_387;
2187
2188  /* Turn on SSE3 builtins for -mssse3.  */
2189  if (TARGET_SSSE3)
2190    target_flags |= MASK_SSE3;
2191
2192  /* Turn on SSE3 builtins for -msse4a.  */
2193  if (TARGET_SSE4A)
2194    target_flags |= MASK_SSE3;
2195
2196  /* Turn on SSE2 builtins for -msse3.  */
2197  if (TARGET_SSE3)
2198    target_flags |= MASK_SSE2;
2199
2200  /* Turn on SSE2 builtins for -maes.  */
2201  if (TARGET_AES)
2202    target_flags |= MASK_SSE2;
2203
2204  /* Turn on SSE builtins for -msse2.  */
2205  if (TARGET_SSE2)
2206    target_flags |= MASK_SSE;
2207
2208  /* Turn on MMX builtins for -msse.  */
2209  if (TARGET_SSE)
2210    {
2211      target_flags |= MASK_MMX & ~target_flags_explicit;
2212      x86_prefetch_sse = true;
2213    }
2214
2215  /* Turn on MMX builtins for 3Dnow.  */
2216  if (TARGET_3DNOW)
2217    target_flags |= MASK_MMX;
2218
2219  /* Turn on POPCNT builtins for -mabm.  */
2220  if (TARGET_ABM)
2221    target_flags |= MASK_POPCNT;
2222
2223  if (TARGET_64BIT)
2224    {
2225      if (TARGET_ALIGN_DOUBLE)
2226	error ("-malign-double makes no sense in the 64bit mode");
2227      if (TARGET_RTD)
2228	error ("-mrtd calling convention not supported in the 64bit mode");
2229
2230      /* Enable by default the SSE and MMX builtins.  Do allow the user to
2231	 explicitly disable any of these.  In particular, disabling SSE and
2232	 MMX for kernel code is extremely useful.  */
2233      target_flags
2234	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2235	    & ~target_flags_explicit);
2236     }
2237  else
2238    {
2239      /* i386 ABI does not specify red zone.  It still makes sense to use it
2240         when programmer takes care to stack from being destroyed.  */
2241      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2242        target_flags |= MASK_NO_RED_ZONE;
2243    }
2244
2245  /* Validate -mpreferred-stack-boundary= value, or provide default.
2246     The default of 128 bits is for Pentium III's SSE __m128.  We can't
2247     change it because of optimize_size.  Otherwise, we can't mix object
2248     files compiled with -Os and -On.  */
2249  ix86_preferred_stack_boundary = 128;
2250  if (ix86_preferred_stack_boundary_string)
2251    {
2252      i = atoi (ix86_preferred_stack_boundary_string);
2253      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2254	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2255	       TARGET_64BIT ? 4 : 2);
2256      else
2257	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2258    }
2259
2260  /* Accept -msseregparm only if at least SSE support is enabled.  */
2261  if (TARGET_SSEREGPARM
2262      && ! TARGET_SSE)
2263    error ("-msseregparm used without SSE enabled");
2264
2265  ix86_fpmath = TARGET_FPMATH_DEFAULT;
2266
2267  if (ix86_fpmath_string != 0)
2268    {
2269      if (! strcmp (ix86_fpmath_string, "387"))
2270	ix86_fpmath = FPMATH_387;
2271      else if (! strcmp (ix86_fpmath_string, "sse"))
2272	{
2273	  if (!TARGET_SSE)
2274	    {
2275	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2276	      ix86_fpmath = FPMATH_387;
2277	    }
2278	  else
2279	    ix86_fpmath = FPMATH_SSE;
2280	}
2281      else if (! strcmp (ix86_fpmath_string, "387,sse")
2282	       || ! strcmp (ix86_fpmath_string, "sse,387"))
2283	{
2284	  if (!TARGET_SSE)
2285	    {
2286	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
2287	      ix86_fpmath = FPMATH_387;
2288	    }
2289	  else if (!TARGET_80387)
2290	    {
2291	      warning (0, "387 instruction set disabled, using SSE arithmetics");
2292	      ix86_fpmath = FPMATH_SSE;
2293	    }
2294	  else
2295	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
2296	}
2297      else
2298	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2299    }
2300
2301  /* If the i387 is disabled, then do not return values in it. */
2302  if (!TARGET_80387)
2303    target_flags &= ~MASK_FLOAT_RETURNS;
2304
2305  if ((x86_accumulate_outgoing_args & TUNEMASK)
2306      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2307      && !optimize_size)
2308    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2309
2310  /* ??? Unwind info is not correct around the CFG unless either a frame
2311     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
2312     unwind info generation to be aware of the CFG and propagating states
2313     around edges.  */
2314  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2315       || flag_exceptions || flag_non_call_exceptions)
2316      && flag_omit_frame_pointer
2317      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2318    {
2319      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2320	warning (0, "unwind tables currently require either a frame pointer "
2321		 "or -maccumulate-outgoing-args for correctness");
2322      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2323    }
2324
2325  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
2326  {
2327    char *p;
2328    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2329    p = strchr (internal_label_prefix, 'X');
2330    internal_label_prefix_len = p - internal_label_prefix;
2331    *p = '\0';
2332  }
2333
2334  /* When scheduling description is not available, disable scheduler pass
2335     so it won't slow down the compilation and make x87 code slower.  */
2336  if (!TARGET_SCHEDULE)
2337    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2338}
2339
2340/* switch to the appropriate section for output of DECL.
2341   DECL is either a `VAR_DECL' node or a constant of some sort.
2342   RELOC indicates whether forming the initial value of DECL requires
2343   link-time relocations.  */
2344
2345static section *
2346x86_64_elf_select_section (tree decl, int reloc,
2347			   unsigned HOST_WIDE_INT align)
2348{
2349  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2350      && ix86_in_large_data_p (decl))
2351    {
2352      const char *sname = NULL;
2353      unsigned int flags = SECTION_WRITE;
2354      switch (categorize_decl_for_section (decl, reloc))
2355	{
2356	case SECCAT_DATA:
2357	  sname = ".ldata";
2358	  break;
2359	case SECCAT_DATA_REL:
2360	  sname = ".ldata.rel";
2361	  break;
2362	case SECCAT_DATA_REL_LOCAL:
2363	  sname = ".ldata.rel.local";
2364	  break;
2365	case SECCAT_DATA_REL_RO:
2366	  sname = ".ldata.rel.ro";
2367	  break;
2368	case SECCAT_DATA_REL_RO_LOCAL:
2369	  sname = ".ldata.rel.ro.local";
2370	  break;
2371	case SECCAT_BSS:
2372	  sname = ".lbss";
2373	  flags |= SECTION_BSS;
2374	  break;
2375	case SECCAT_RODATA:
2376	case SECCAT_RODATA_MERGE_STR:
2377	case SECCAT_RODATA_MERGE_STR_INIT:
2378	case SECCAT_RODATA_MERGE_CONST:
2379	  sname = ".lrodata";
2380	  flags = 0;
2381	  break;
2382	case SECCAT_SRODATA:
2383	case SECCAT_SDATA:
2384	case SECCAT_SBSS:
2385	  gcc_unreachable ();
2386	case SECCAT_TEXT:
2387	case SECCAT_TDATA:
2388	case SECCAT_TBSS:
2389	  /* We don't split these for medium model.  Place them into
2390	     default sections and hope for best.  */
2391	  break;
2392	}
2393      if (sname)
2394	{
2395	  /* We might get called with string constants, but get_named_section
2396	     doesn't like them as they are not DECLs.  Also, we need to set
2397	     flags in that case.  */
2398	  if (!DECL_P (decl))
2399	    return get_section (sname, flags, NULL);
2400	  return get_named_section (decl, sname, reloc);
2401	}
2402    }
2403  return default_elf_select_section (decl, reloc, align);
2404}
2405
2406/* Build up a unique section name, expressed as a
2407   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2408   RELOC indicates whether the initial value of EXP requires
2409   link-time relocations.  */
2410
2411static void
2412x86_64_elf_unique_section (tree decl, int reloc)
2413{
2414  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2415      && ix86_in_large_data_p (decl))
2416    {
2417      const char *prefix = NULL;
2418      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2419      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2420
2421      switch (categorize_decl_for_section (decl, reloc))
2422	{
2423	case SECCAT_DATA:
2424	case SECCAT_DATA_REL:
2425	case SECCAT_DATA_REL_LOCAL:
2426	case SECCAT_DATA_REL_RO:
2427	case SECCAT_DATA_REL_RO_LOCAL:
2428          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2429	  break;
2430	case SECCAT_BSS:
2431          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2432	  break;
2433	case SECCAT_RODATA:
2434	case SECCAT_RODATA_MERGE_STR:
2435	case SECCAT_RODATA_MERGE_STR_INIT:
2436	case SECCAT_RODATA_MERGE_CONST:
2437          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2438	  break;
2439	case SECCAT_SRODATA:
2440	case SECCAT_SDATA:
2441	case SECCAT_SBSS:
2442	  gcc_unreachable ();
2443	case SECCAT_TEXT:
2444	case SECCAT_TDATA:
2445	case SECCAT_TBSS:
2446	  /* We don't split these for medium model.  Place them into
2447	     default sections and hope for best.  */
2448	  break;
2449	}
2450      if (prefix)
2451	{
2452	  const char *name;
2453	  size_t nlen, plen;
2454	  char *string;
2455	  plen = strlen (prefix);
2456
2457	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2458	  name = targetm.strip_name_encoding (name);
2459	  nlen = strlen (name);
2460
2461	  string = alloca (nlen + plen + 1);
2462	  memcpy (string, prefix, plen);
2463	  memcpy (string + plen, name, nlen + 1);
2464
2465	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2466	  return;
2467	}
2468    }
2469  default_unique_section (decl, reloc);
2470}
2471
2472#ifdef COMMON_ASM_OP
2473/* This says how to output assembler code to declare an
2474   uninitialized external linkage data object.
2475
2476   For medium model x86-64 we need to use .largecomm opcode for
2477   large objects.  */
2478void
2479x86_elf_aligned_common (FILE *file,
2480			const char *name, unsigned HOST_WIDE_INT size,
2481			int align)
2482{
2483  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2484      && size > (unsigned int)ix86_section_threshold)
2485    fprintf (file, ".largecomm\t");
2486  else
2487    fprintf (file, "%s", COMMON_ASM_OP);
2488  assemble_name (file, name);
2489  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2490	   size, align / BITS_PER_UNIT);
2491}
2492
2493/* Utility function for targets to use in implementing
2494   ASM_OUTPUT_ALIGNED_BSS.  */
2495
2496void
2497x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2498			const char *name, unsigned HOST_WIDE_INT size,
2499			int align)
2500{
2501  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2502      && size > (unsigned int)ix86_section_threshold)
2503    switch_to_section (get_named_section (decl, ".lbss", 0));
2504  else
2505    switch_to_section (bss_section);
2506  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2507#ifdef ASM_DECLARE_OBJECT_NAME
2508  last_assemble_variable_decl = decl;
2509  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2510#else
2511  /* Standard thing is just output label for the object.  */
2512  ASM_OUTPUT_LABEL (file, name);
2513#endif /* ASM_DECLARE_OBJECT_NAME */
2514  ASM_OUTPUT_SKIP (file, size ? size : 1);
2515}
2516#endif
2517
2518void
2519optimization_options (int level, int size ATTRIBUTE_UNUSED)
2520{
2521  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2522     make the problem with not enough registers even worse.  */
2523#ifdef INSN_SCHEDULING
2524  if (level > 1)
2525    flag_schedule_insns = 0;
2526#endif
2527
2528  if (TARGET_MACHO)
2529    /* The Darwin libraries never set errno, so we might as well
2530       avoid calling them when that's the only reason we would.  */
2531    flag_errno_math = 0;
2532
2533  /* The default values of these switches depend on the TARGET_64BIT
2534     that is not known at this moment.  Mark these values with 2 and
2535     let user the to override these.  In case there is no command line option
2536     specifying them, we will set the defaults in override_options.  */
2537  if (optimize >= 1)
2538    flag_omit_frame_pointer = 2;
2539  flag_pcc_struct_return = 2;
2540  flag_asynchronous_unwind_tables = 2;
2541#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2542  SUBTARGET_OPTIMIZATION_OPTIONS;
2543#endif
2544}
2545
2546/* Table of valid machine attributes.  */
2547const struct attribute_spec ix86_attribute_table[] =
2548{
2549  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2550  /* Stdcall attribute says callee is responsible for popping arguments
2551     if they are not variable.  */
2552  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2553  /* Fastcall attribute says callee is responsible for popping arguments
2554     if they are not variable.  */
2555  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2556  /* Cdecl attribute says the callee is a normal C declaration */
2557  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2558  /* Regparm attribute specifies how many integer arguments are to be
2559     passed in registers.  */
2560  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2561  /* Sseregparm attribute says we are using x86_64 calling conventions
2562     for FP arguments.  */
2563  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2564  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2565  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2566    false, true,  true, ix86_handle_cconv_attribute },
2567#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2568  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2569  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2570  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2571#endif
2572  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2573  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2574#ifdef SUBTARGET_ATTRIBUTE_TABLE
2575  SUBTARGET_ATTRIBUTE_TABLE,
2576#endif
2577  { NULL,        0, 0, false, false, false, NULL }
2578};
2579
2580/* Decide whether we can make a sibling call to a function.  DECL is the
2581   declaration of the function being targeted by the call and EXP is the
2582   CALL_EXPR representing the call.  */
2583
2584static bool
2585ix86_function_ok_for_sibcall (tree decl, tree exp)
2586{
2587  tree func;
2588  rtx a, b;
2589
2590  /* If we are generating position-independent code, we cannot sibcall
2591     optimize any indirect call, or a direct call to a global function,
2592     as the PLT requires %ebx be live.  */
2593  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2594    return false;
2595
2596  if (decl)
2597    func = decl;
2598  else
2599    {
2600      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2601      if (POINTER_TYPE_P (func))
2602        func = TREE_TYPE (func);
2603    }
2604
2605  /* Check that the return value locations are the same.  Like
2606     if we are returning floats on the 80387 register stack, we cannot
2607     make a sibcall from a function that doesn't return a float to a
2608     function that does or, conversely, from a function that does return
2609     a float to a function that doesn't; the necessary stack adjustment
2610     would not be executed.  This is also the place we notice
2611     differences in the return value ABI.  Note that it is ok for one
2612     of the functions to have void return type as long as the return
2613     value of the other is passed in a register.  */
2614  a = ix86_function_value (TREE_TYPE (exp), func, false);
2615  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2616			   cfun->decl, false);
2617  if (STACK_REG_P (a) || STACK_REG_P (b))
2618    {
2619      if (!rtx_equal_p (a, b))
2620	return false;
2621    }
2622  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2623    ;
2624  else if (!rtx_equal_p (a, b))
2625    return false;
2626
2627  /* If this call is indirect, we'll need to be able to use a call-clobbered
2628     register for the address of the target function.  Make sure that all
2629     such registers are not used for passing parameters.  */
2630  if (!decl && !TARGET_64BIT)
2631    {
2632      tree type;
2633
2634      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2635      type = TREE_OPERAND (exp, 0);		/* pointer expression */
2636      type = TREE_TYPE (type);			/* pointer type */
2637      type = TREE_TYPE (type);			/* function type */
2638
2639      if (ix86_function_regparm (type, NULL) >= 3)
2640	{
2641	  /* ??? Need to count the actual number of registers to be used,
2642	     not the possible number of registers.  Fix later.  */
2643	  return false;
2644	}
2645    }
2646
2647#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2648  /* Dllimport'd functions are also called indirectly.  */
2649  if (decl && DECL_DLLIMPORT_P (decl)
2650      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2651    return false;
2652#endif
2653
2654  /* If we forced aligned the stack, then sibcalling would unalign the
2655     stack, which may break the called function.  */
2656  if (cfun->machine->force_align_arg_pointer)
2657    return false;
2658
2659  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2660  return true;
2661}
2662
2663/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2664   calling convention attributes;
2665   arguments as in struct attribute_spec.handler.  */
2666
2667static tree
2668ix86_handle_cconv_attribute (tree *node, tree name,
2669				   tree args,
2670				   int flags ATTRIBUTE_UNUSED,
2671				   bool *no_add_attrs)
2672{
2673  if (TREE_CODE (*node) != FUNCTION_TYPE
2674      && TREE_CODE (*node) != METHOD_TYPE
2675      && TREE_CODE (*node) != FIELD_DECL
2676      && TREE_CODE (*node) != TYPE_DECL)
2677    {
2678      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2679	       IDENTIFIER_POINTER (name));
2680      *no_add_attrs = true;
2681      return NULL_TREE;
2682    }
2683
2684  /* Can combine regparm with all attributes but fastcall.  */
2685  if (is_attribute_p ("regparm", name))
2686    {
2687      tree cst;
2688
2689      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2690        {
2691	  error ("fastcall and regparm attributes are not compatible");
2692	}
2693
2694      cst = TREE_VALUE (args);
2695      if (TREE_CODE (cst) != INTEGER_CST)
2696	{
2697	  warning (OPT_Wattributes,
2698		   "%qs attribute requires an integer constant argument",
2699		   IDENTIFIER_POINTER (name));
2700	  *no_add_attrs = true;
2701	}
2702      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2703	{
2704	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2705		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2706	  *no_add_attrs = true;
2707	}
2708
2709      if (!TARGET_64BIT
2710	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2711			       TYPE_ATTRIBUTES (*node))
2712	  && compare_tree_int (cst, REGPARM_MAX-1))
2713	{
2714	  error ("%s functions limited to %d register parameters",
2715		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2716	}
2717
2718      return NULL_TREE;
2719    }
2720
2721  if (TARGET_64BIT)
2722    {
2723      warning (OPT_Wattributes, "%qs attribute ignored",
2724	       IDENTIFIER_POINTER (name));
2725      *no_add_attrs = true;
2726      return NULL_TREE;
2727    }
2728
2729  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2730  if (is_attribute_p ("fastcall", name))
2731    {
2732      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2733        {
2734	  error ("fastcall and cdecl attributes are not compatible");
2735	}
2736      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2737        {
2738	  error ("fastcall and stdcall attributes are not compatible");
2739	}
2740      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2741        {
2742	  error ("fastcall and regparm attributes are not compatible");
2743	}
2744    }
2745
2746  /* Can combine stdcall with fastcall (redundant), regparm and
2747     sseregparm.  */
2748  else if (is_attribute_p ("stdcall", name))
2749    {
2750      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2751        {
2752	  error ("stdcall and cdecl attributes are not compatible");
2753	}
2754      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2755        {
2756	  error ("stdcall and fastcall attributes are not compatible");
2757	}
2758    }
2759
2760  /* Can combine cdecl with regparm and sseregparm.  */
2761  else if (is_attribute_p ("cdecl", name))
2762    {
2763      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2764        {
2765	  error ("stdcall and cdecl attributes are not compatible");
2766	}
2767      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2768        {
2769	  error ("fastcall and cdecl attributes are not compatible");
2770	}
2771    }
2772
2773  /* Can combine sseregparm with all attributes.  */
2774
2775  return NULL_TREE;
2776}
2777
2778/* Return 0 if the attributes for two types are incompatible, 1 if they
2779   are compatible, and 2 if they are nearly compatible (which causes a
2780   warning to be generated).  */
2781
2782static int
2783ix86_comp_type_attributes (tree type1, tree type2)
2784{
2785  /* Check for mismatch of non-default calling convention.  */
2786  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2787
2788  if (TREE_CODE (type1) != FUNCTION_TYPE)
2789    return 1;
2790
2791  /* Check for mismatched fastcall/regparm types.  */
2792  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2793       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2794      || (ix86_function_regparm (type1, NULL)
2795	  != ix86_function_regparm (type2, NULL)))
2796    return 0;
2797
2798  /* Check for mismatched sseregparm types.  */
2799  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2800      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2801    return 0;
2802
2803  /* Check for mismatched return types (cdecl vs stdcall).  */
2804  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2805      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2806    return 0;
2807
2808  return 1;
2809}
2810
2811/* Return the regparm value for a function with the indicated TYPE and DECL.
2812   DECL may be NULL when calling function indirectly
2813   or considering a libcall.  */
2814
2815static int
2816ix86_function_regparm (tree type, tree decl)
2817{
2818  tree attr;
2819  int regparm = ix86_regparm;
2820  bool user_convention = false;
2821
2822  if (!TARGET_64BIT)
2823    {
2824      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2825      if (attr)
2826	{
2827	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2828	  user_convention = true;
2829	}
2830
2831      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2832	{
2833	  regparm = 2;
2834	  user_convention = true;
2835	}
2836
2837      /* Use register calling convention for local functions when possible.  */
2838      if (!TARGET_64BIT && !user_convention && decl
2839	  && flag_unit_at_a_time && !profile_flag)
2840	{
2841	  struct cgraph_local_info *i = cgraph_local_info (decl);
2842	  if (i && i->local)
2843	    {
2844	      int local_regparm, globals = 0, regno;
2845
2846	      /* Make sure no regparm register is taken by a global register
2847		 variable.  */
2848	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2849		if (global_regs[local_regparm])
2850		  break;
2851	      /* We can't use regparm(3) for nested functions as these use
2852		 static chain pointer in third argument.  */
2853	      if (local_regparm == 3
2854		  && decl_function_context (decl)
2855		  && !DECL_NO_STATIC_CHAIN (decl))
2856		local_regparm = 2;
2857	      /* If the function realigns its stackpointer, the
2858		 prologue will clobber %ecx.  If we've already
2859		 generated code for the callee, the callee
2860		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2861		 scanning the attributes for the self-realigning
2862		 property.  */
2863	      if ((DECL_STRUCT_FUNCTION (decl)
2864		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2865		  || (!DECL_STRUCT_FUNCTION (decl)
2866		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2867					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2868		local_regparm = 2;
2869	      /* Each global register variable increases register preassure,
2870		 so the more global reg vars there are, the smaller regparm
2871		 optimization use, unless requested by the user explicitly.  */
2872	      for (regno = 0; regno < 6; regno++)
2873		if (global_regs[regno])
2874		  globals++;
2875	      local_regparm
2876		= globals < local_regparm ? local_regparm - globals : 0;
2877
2878	      if (local_regparm > regparm)
2879		regparm = local_regparm;
2880	    }
2881	}
2882    }
2883  return regparm;
2884}
2885
2886/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2887   DFmode (2) arguments in SSE registers for a function with the
2888   indicated TYPE and DECL.  DECL may be NULL when calling function
2889   indirectly or considering a libcall.  Otherwise return 0.  */
2890
2891static int
2892ix86_function_sseregparm (tree type, tree decl)
2893{
2894  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2895     by the sseregparm attribute.  */
2896  if (TARGET_SSEREGPARM
2897      || (type
2898	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2899    {
2900      if (!TARGET_SSE)
2901	{
2902	  if (decl)
2903	    error ("Calling %qD with attribute sseregparm without "
2904		   "SSE/SSE2 enabled", decl);
2905	  else
2906	    error ("Calling %qT with attribute sseregparm without "
2907		   "SSE/SSE2 enabled", type);
2908	  return 0;
2909	}
2910
2911      return 2;
2912    }
2913
2914  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2915     (and DFmode for SSE2) arguments in SSE registers,
2916     even for 32-bit targets.  */
2917  if (!TARGET_64BIT && decl
2918      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2919    {
2920      struct cgraph_local_info *i = cgraph_local_info (decl);
2921      if (i && i->local)
2922	return TARGET_SSE2 ? 2 : 1;
2923    }
2924
2925  return 0;
2926}
2927
2928/* Return true if EAX is live at the start of the function.  Used by
2929   ix86_expand_prologue to determine if we need special help before
2930   calling allocate_stack_worker.  */
2931
2932static bool
2933ix86_eax_live_at_start_p (void)
2934{
2935  /* Cheat.  Don't bother working forward from ix86_function_regparm
2936     to the function type to whether an actual argument is located in
2937     eax.  Instead just look at cfg info, which is still close enough
2938     to correct at this point.  This gives false positives for broken
2939     functions that might use uninitialized data that happens to be
2940     allocated in eax, but who cares?  */
2941  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2942}
2943
2944/* Value is the number of bytes of arguments automatically
2945   popped when returning from a subroutine call.
2946   FUNDECL is the declaration node of the function (as a tree),
2947   FUNTYPE is the data type of the function (as a tree),
2948   or for a library call it is an identifier node for the subroutine name.
2949   SIZE is the number of bytes of arguments passed on the stack.
2950
2951   On the 80386, the RTD insn may be used to pop them if the number
2952     of args is fixed, but if the number is variable then the caller
2953     must pop them all.  RTD can't be used for library calls now
2954     because the library is compiled with the Unix compiler.
2955   Use of RTD is a selectable option, since it is incompatible with
2956   standard Unix calling sequences.  If the option is not selected,
2957   the caller must always pop the args.
2958
2959   The attribute stdcall is equivalent to RTD on a per module basis.  */
2960
2961int
2962ix86_return_pops_args (tree fundecl, tree funtype, int size)
2963{
2964  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2965
2966  /* Cdecl functions override -mrtd, and never pop the stack.  */
2967  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2968
2969    /* Stdcall and fastcall functions will pop the stack if not
2970       variable args.  */
2971    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2972        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2973      rtd = 1;
2974
2975    if (rtd
2976        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2977	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2978		== void_type_node)))
2979      return size;
2980  }
2981
2982  /* Lose any fake structure return argument if it is passed on the stack.  */
2983  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2984      && !TARGET_64BIT
2985      && !KEEP_AGGREGATE_RETURN_POINTER)
2986    {
2987      int nregs = ix86_function_regparm (funtype, fundecl);
2988
2989      if (!nregs)
2990	return GET_MODE_SIZE (Pmode);
2991    }
2992
2993  return 0;
2994}
2995
2996/* Argument support functions.  */
2997
2998/* Return true when register may be used to pass function parameters.  */
2999bool
3000ix86_function_arg_regno_p (int regno)
3001{
3002  int i;
3003  if (!TARGET_64BIT)
3004    {
3005      if (TARGET_MACHO)
3006        return (regno < REGPARM_MAX
3007                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3008      else
3009        return (regno < REGPARM_MAX
3010	        || (TARGET_MMX && MMX_REGNO_P (regno)
3011	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3012	        || (TARGET_SSE && SSE_REGNO_P (regno)
3013		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3014    }
3015
3016  if (TARGET_MACHO)
3017    {
3018      if (SSE_REGNO_P (regno) && TARGET_SSE)
3019        return true;
3020    }
3021  else
3022    {
3023      if (TARGET_SSE && SSE_REGNO_P (regno)
3024          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3025        return true;
3026    }
3027  /* RAX is used as hidden argument to va_arg functions.  */
3028  if (!regno)
3029    return true;
3030  for (i = 0; i < REGPARM_MAX; i++)
3031    if (regno == x86_64_int_parameter_registers[i])
3032      return true;
3033  return false;
3034}
3035
3036/* Return if we do not know how to pass TYPE solely in registers.  */
3037
3038static bool
3039ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3040{
3041  if (must_pass_in_stack_var_size_or_pad (mode, type))
3042    return true;
3043
3044  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
3045     The layout_type routine is crafty and tries to trick us into passing
3046     currently unsupported vector types on the stack by using TImode.  */
3047  return (!TARGET_64BIT && mode == TImode
3048	  && type && TREE_CODE (type) != VECTOR_TYPE);
3049}
3050
3051/* Initialize a variable CUM of type CUMULATIVE_ARGS
3052   for a call to a function whose data type is FNTYPE.
3053   For a library call, FNTYPE is 0.  */
3054
3055void
3056init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
3057		      tree fntype,	/* tree ptr for function decl */
3058		      rtx libname,	/* SYMBOL_REF of library name or 0 */
3059		      tree fndecl)
3060{
3061  static CUMULATIVE_ARGS zero_cum;
3062  tree param, next_param;
3063
3064  if (TARGET_DEBUG_ARG)
3065    {
3066      fprintf (stderr, "\ninit_cumulative_args (");
3067      if (fntype)
3068	fprintf (stderr, "fntype code = %s, ret code = %s",
3069		 tree_code_name[(int) TREE_CODE (fntype)],
3070		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3071      else
3072	fprintf (stderr, "no fntype");
3073
3074      if (libname)
3075	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3076    }
3077
3078  *cum = zero_cum;
3079
3080  /* Set up the number of registers to use for passing arguments.  */
3081  cum->nregs = ix86_regparm;
3082  if (TARGET_SSE)
3083    cum->sse_nregs = SSE_REGPARM_MAX;
3084  if (TARGET_MMX)
3085    cum->mmx_nregs = MMX_REGPARM_MAX;
3086  cum->warn_sse = true;
3087  cum->warn_mmx = true;
3088  cum->maybe_vaarg = false;
3089
3090  /* Use ecx and edx registers if function has fastcall attribute,
3091     else look for regparm information.  */
3092  if (fntype && !TARGET_64BIT)
3093    {
3094      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3095	{
3096	  cum->nregs = 2;
3097	  cum->fastcall = 1;
3098	}
3099      else
3100	cum->nregs = ix86_function_regparm (fntype, fndecl);
3101    }
3102
3103  /* Set up the number of SSE registers used for passing SFmode
3104     and DFmode arguments.  Warn for mismatching ABI.  */
3105  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3106
3107  /* Determine if this function has variable arguments.  This is
3108     indicated by the last argument being 'void_type_mode' if there
3109     are no variable arguments.  If there are variable arguments, then
3110     we won't pass anything in registers in 32-bit mode. */
3111
3112  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3113    {
3114      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3115	   param != 0; param = next_param)
3116	{
3117	  next_param = TREE_CHAIN (param);
3118	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3119	    {
3120	      if (!TARGET_64BIT)
3121		{
3122		  cum->nregs = 0;
3123		  cum->sse_nregs = 0;
3124		  cum->mmx_nregs = 0;
3125		  cum->warn_sse = 0;
3126		  cum->warn_mmx = 0;
3127		  cum->fastcall = 0;
3128		  cum->float_in_sse = 0;
3129		}
3130	      cum->maybe_vaarg = true;
3131	    }
3132	}
3133    }
3134  if ((!fntype && !libname)
3135      || (fntype && !TYPE_ARG_TYPES (fntype)))
3136    cum->maybe_vaarg = true;
3137
3138  if (TARGET_DEBUG_ARG)
3139    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3140
3141  return;
3142}
3143
3144/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
3145   But in the case of vector types, it is some vector mode.
3146
3147   When we have only some of our vector isa extensions enabled, then there
3148   are some modes for which vector_mode_supported_p is false.  For these
3149   modes, the generic vector support in gcc will choose some non-vector mode
3150   in order to implement the type.  By computing the natural mode, we'll
3151   select the proper ABI location for the operand and not depend on whatever
3152   the middle-end decides to do with these vector types.  */
3153
3154static enum machine_mode
3155type_natural_mode (tree type)
3156{
3157  enum machine_mode mode = TYPE_MODE (type);
3158
3159  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3160    {
3161      HOST_WIDE_INT size = int_size_in_bytes (type);
3162      if ((size == 8 || size == 16)
3163	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
3164	  && TYPE_VECTOR_SUBPARTS (type) > 1)
3165	{
3166	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3167
3168	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3169	    mode = MIN_MODE_VECTOR_FLOAT;
3170	  else
3171	    mode = MIN_MODE_VECTOR_INT;
3172
3173	  /* Get the mode which has this inner mode and number of units.  */
3174	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3175	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3176		&& GET_MODE_INNER (mode) == innermode)
3177	      return mode;
3178
3179	  gcc_unreachable ();
3180	}
3181    }
3182
3183  return mode;
3184}
3185
3186/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
3187   this may not agree with the mode that the type system has chosen for the
3188   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
3189   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
3190
3191static rtx
3192gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3193		     unsigned int regno)
3194{
3195  rtx tmp;
3196
3197  if (orig_mode != BLKmode)
3198    tmp = gen_rtx_REG (orig_mode, regno);
3199  else
3200    {
3201      tmp = gen_rtx_REG (mode, regno);
3202      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3203      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3204    }
3205
3206  return tmp;
3207}
3208
3209/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
3210   of this code is to classify each 8bytes of incoming argument by the register
3211   class and assign registers accordingly.  */
3212
3213/* Return the union class of CLASS1 and CLASS2.
3214   See the x86-64 PS ABI for details.  */
3215
3216static enum x86_64_reg_class
3217merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3218{
3219  /* Rule #1: If both classes are equal, this is the resulting class.  */
3220  if (class1 == class2)
3221    return class1;
3222
3223  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3224     the other class.  */
3225  if (class1 == X86_64_NO_CLASS)
3226    return class2;
3227  if (class2 == X86_64_NO_CLASS)
3228    return class1;
3229
3230  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
3231  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3232    return X86_64_MEMORY_CLASS;
3233
3234  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
3235  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3236      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3237    return X86_64_INTEGERSI_CLASS;
3238  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3239      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3240    return X86_64_INTEGER_CLASS;
3241
3242  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3243     MEMORY is used.  */
3244  if (class1 == X86_64_X87_CLASS
3245      || class1 == X86_64_X87UP_CLASS
3246      || class1 == X86_64_COMPLEX_X87_CLASS
3247      || class2 == X86_64_X87_CLASS
3248      || class2 == X86_64_X87UP_CLASS
3249      || class2 == X86_64_COMPLEX_X87_CLASS)
3250    return X86_64_MEMORY_CLASS;
3251
3252  /* Rule #6: Otherwise class SSE is used.  */
3253  return X86_64_SSE_CLASS;
3254}
3255
3256/* Classify the argument of type TYPE and mode MODE.
3257   CLASSES will be filled by the register class used to pass each word
3258   of the operand.  The number of words is returned.  In case the parameter
3259   should be passed in memory, 0 is returned. As a special case for zero
3260   sized containers, classes[0] will be NO_CLASS and 1 is returned.
3261
3262   BIT_OFFSET is used internally for handling records and specifies offset
3263   of the offset in bits modulo 256 to avoid overflow cases.
3264
3265   See the x86-64 PS ABI for details.
3266*/
3267
3268static int
3269classify_argument (enum machine_mode mode, tree type,
3270		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3271{
3272  HOST_WIDE_INT bytes =
3273    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3274  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3275
3276  /* Variable sized entities are always passed/returned in memory.  */
3277  if (bytes < 0)
3278    return 0;
3279
3280  if (mode != VOIDmode
3281      && targetm.calls.must_pass_in_stack (mode, type))
3282    return 0;
3283
3284  if (type && AGGREGATE_TYPE_P (type))
3285    {
3286      int i;
3287      tree field;
3288      enum x86_64_reg_class subclasses[MAX_CLASSES];
3289
3290      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
3291      if (bytes > 16)
3292	return 0;
3293
3294      for (i = 0; i < words; i++)
3295	classes[i] = X86_64_NO_CLASS;
3296
3297      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
3298	 signalize memory class, so handle it as special case.  */
3299      if (!words)
3300	{
3301	  classes[0] = X86_64_NO_CLASS;
3302	  return 1;
3303	}
3304
3305      /* Classify each field of record and merge classes.  */
3306      switch (TREE_CODE (type))
3307	{
3308	case RECORD_TYPE:
3309	  /* For classes first merge in the field of the subclasses.  */
3310	  if (TYPE_BINFO (type))
3311	    {
3312	      tree binfo, base_binfo;
3313	      int basenum;
3314
3315	      for (binfo = TYPE_BINFO (type), basenum = 0;
3316		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3317		{
3318		   int num;
3319		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3320		   tree type = BINFO_TYPE (base_binfo);
3321
3322		   num = classify_argument (TYPE_MODE (type),
3323					    type, subclasses,
3324					    (offset + bit_offset) % 256);
3325		   if (!num)
3326		     return 0;
3327		   for (i = 0; i < num; i++)
3328		     {
3329		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
3330		       classes[i + pos] =
3331			 merge_classes (subclasses[i], classes[i + pos]);
3332		     }
3333		}
3334	    }
3335	  /* And now merge the fields of structure.  */
3336	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3337	    {
3338	      if (TREE_CODE (field) == FIELD_DECL)
3339		{
3340		  int num;
3341
3342		  if (TREE_TYPE (field) == error_mark_node)
3343		    continue;
3344
3345		  /* Bitfields are always classified as integer.  Handle them
3346		     early, since later code would consider them to be
3347		     misaligned integers.  */
3348		  if (DECL_BIT_FIELD (field))
3349		    {
3350		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3351			   i < ((int_bit_position (field) + (bit_offset % 64))
3352			        + tree_low_cst (DECL_SIZE (field), 0)
3353				+ 63) / 8 / 8; i++)
3354			classes[i] =
3355			  merge_classes (X86_64_INTEGER_CLASS,
3356					 classes[i]);
3357		    }
3358		  else
3359		    {
3360		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361					       TREE_TYPE (field), subclasses,
3362					       (int_bit_position (field)
3363						+ bit_offset) % 256);
3364		      if (!num)
3365			return 0;
3366		      for (i = 0; i < num; i++)
3367			{
3368			  int pos =
3369			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3370			  classes[i + pos] =
3371			    merge_classes (subclasses[i], classes[i + pos]);
3372			}
3373		    }
3374		}
3375	    }
3376	  break;
3377
3378	case ARRAY_TYPE:
3379	  /* Arrays are handled as small records.  */
3380	  {
3381	    int num;
3382	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3383				     TREE_TYPE (type), subclasses, bit_offset);
3384	    if (!num)
3385	      return 0;
3386
3387	    /* The partial classes are now full classes.  */
3388	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3389	      subclasses[0] = X86_64_SSE_CLASS;
3390	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3391	      subclasses[0] = X86_64_INTEGER_CLASS;
3392
3393	    for (i = 0; i < words; i++)
3394	      classes[i] = subclasses[i % num];
3395
3396	    break;
3397	  }
3398	case UNION_TYPE:
3399	case QUAL_UNION_TYPE:
3400	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3401	     */
3402
3403	  /* Unions are not derived.  */
3404	  gcc_assert (!TYPE_BINFO (type)
3405		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3406	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3407	    {
3408	      if (TREE_CODE (field) == FIELD_DECL)
3409		{
3410		  int num;
3411
3412		  if (TREE_TYPE (field) == error_mark_node)
3413		    continue;
3414
3415		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3416					   TREE_TYPE (field), subclasses,
3417					   bit_offset);
3418		  if (!num)
3419		    return 0;
3420		  for (i = 0; i < num; i++)
3421		    classes[i] = merge_classes (subclasses[i], classes[i]);
3422		}
3423	    }
3424	  break;
3425
3426	default:
3427	  gcc_unreachable ();
3428	}
3429
3430      /* Final merger cleanup.  */
3431      for (i = 0; i < words; i++)
3432	{
3433	  /* If one class is MEMORY, everything should be passed in
3434	     memory.  */
3435	  if (classes[i] == X86_64_MEMORY_CLASS)
3436	    return 0;
3437
3438	  /* The X86_64_SSEUP_CLASS should be always preceded by
3439	     X86_64_SSE_CLASS.  */
3440	  if (classes[i] == X86_64_SSEUP_CLASS
3441	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3442	    classes[i] = X86_64_SSE_CLASS;
3443
3444	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3445	  if (classes[i] == X86_64_X87UP_CLASS
3446	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3447	    classes[i] = X86_64_SSE_CLASS;
3448	}
3449      return words;
3450    }
3451
3452  /* Compute alignment needed.  We align all types to natural boundaries with
3453     exception of XFmode that is aligned to 64bits.  */
3454  if (mode != VOIDmode && mode != BLKmode)
3455    {
3456      int mode_alignment = GET_MODE_BITSIZE (mode);
3457
3458      if (mode == XFmode)
3459	mode_alignment = 128;
3460      else if (mode == XCmode)
3461	mode_alignment = 256;
3462      if (COMPLEX_MODE_P (mode))
3463	mode_alignment /= 2;
3464      /* Misaligned fields are always returned in memory.  */
3465      if (bit_offset % mode_alignment)
3466	return 0;
3467    }
3468
3469  /* for V1xx modes, just use the base mode */
3470  if (VECTOR_MODE_P (mode)
3471      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3472    mode = GET_MODE_INNER (mode);
3473
3474  /* Classification of atomic types.  */
3475  switch (mode)
3476    {
3477    case SDmode:
3478    case DDmode:
3479      classes[0] = X86_64_SSE_CLASS;
3480      return 1;
3481    case TDmode:
3482      classes[0] = X86_64_SSE_CLASS;
3483      classes[1] = X86_64_SSEUP_CLASS;
3484      return 2;
3485    case DImode:
3486    case SImode:
3487    case HImode:
3488    case QImode:
3489    case CSImode:
3490    case CHImode:
3491    case CQImode:
3492      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3493	classes[0] = X86_64_INTEGERSI_CLASS;
3494      else
3495	classes[0] = X86_64_INTEGER_CLASS;
3496      return 1;
3497    case CDImode:
3498    case TImode:
3499      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3500      return 2;
3501    case CTImode:
3502      return 0;
3503    case SFmode:
3504      if (!(bit_offset % 64))
3505	classes[0] = X86_64_SSESF_CLASS;
3506      else
3507	classes[0] = X86_64_SSE_CLASS;
3508      return 1;
3509    case DFmode:
3510      classes[0] = X86_64_SSEDF_CLASS;
3511      return 1;
3512    case XFmode:
3513      classes[0] = X86_64_X87_CLASS;
3514      classes[1] = X86_64_X87UP_CLASS;
3515      return 2;
3516    case TFmode:
3517      classes[0] = X86_64_SSE_CLASS;
3518      classes[1] = X86_64_SSEUP_CLASS;
3519      return 2;
3520    case SCmode:
3521      classes[0] = X86_64_SSE_CLASS;
3522      return 1;
3523    case DCmode:
3524      classes[0] = X86_64_SSEDF_CLASS;
3525      classes[1] = X86_64_SSEDF_CLASS;
3526      return 2;
3527    case XCmode:
3528      classes[0] = X86_64_COMPLEX_X87_CLASS;
3529      return 1;
3530    case TCmode:
3531      /* This modes is larger than 16 bytes.  */
3532      return 0;
3533    case V4SFmode:
3534    case V4SImode:
3535    case V16QImode:
3536    case V8HImode:
3537    case V2DFmode:
3538    case V2DImode:
3539      classes[0] = X86_64_SSE_CLASS;
3540      classes[1] = X86_64_SSEUP_CLASS;
3541      return 2;
3542    case V2SFmode:
3543    case V2SImode:
3544    case V4HImode:
3545    case V8QImode:
3546      classes[0] = X86_64_SSE_CLASS;
3547      return 1;
3548    case BLKmode:
3549    case VOIDmode:
3550      return 0;
3551    default:
3552      gcc_assert (VECTOR_MODE_P (mode));
3553
3554      if (bytes > 16)
3555	return 0;
3556
3557      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3558
3559      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3560	classes[0] = X86_64_INTEGERSI_CLASS;
3561      else
3562	classes[0] = X86_64_INTEGER_CLASS;
3563      classes[1] = X86_64_INTEGER_CLASS;
3564      return 1 + (bytes > 8);
3565    }
3566}
3567
3568/* Examine the argument and return set number of register required in each
3569   class.  Return 0 iff parameter should be passed in memory.  */
3570static int
3571examine_argument (enum machine_mode mode, tree type, int in_return,
3572		  int *int_nregs, int *sse_nregs)
3573{
3574  enum x86_64_reg_class class[MAX_CLASSES];
3575  int n = classify_argument (mode, type, class, 0);
3576
3577  *int_nregs = 0;
3578  *sse_nregs = 0;
3579  if (!n)
3580    return 0;
3581  for (n--; n >= 0; n--)
3582    switch (class[n])
3583      {
3584      case X86_64_INTEGER_CLASS:
3585      case X86_64_INTEGERSI_CLASS:
3586	(*int_nregs)++;
3587	break;
3588      case X86_64_SSE_CLASS:
3589      case X86_64_SSESF_CLASS:
3590      case X86_64_SSEDF_CLASS:
3591	(*sse_nregs)++;
3592	break;
3593      case X86_64_NO_CLASS:
3594      case X86_64_SSEUP_CLASS:
3595	break;
3596      case X86_64_X87_CLASS:
3597      case X86_64_X87UP_CLASS:
3598	if (!in_return)
3599	  return 0;
3600	break;
3601      case X86_64_COMPLEX_X87_CLASS:
3602	return in_return ? 2 : 0;
3603      case X86_64_MEMORY_CLASS:
3604	gcc_unreachable ();
3605      }
3606  return 1;
3607}
3608
3609/* Construct container for the argument used by GCC interface.  See
3610   FUNCTION_ARG for the detailed description.  */
3611
3612static rtx
3613construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3614		     tree type, int in_return, int nintregs, int nsseregs,
3615		     const int *intreg, int sse_regno)
3616{
3617  /* The following variables hold the static issued_error state.  */
3618  static bool issued_sse_arg_error;
3619  static bool issued_sse_ret_error;
3620  static bool issued_x87_ret_error;
3621
3622  enum machine_mode tmpmode;
3623  int bytes =
3624    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625  enum x86_64_reg_class class[MAX_CLASSES];
3626  int n;
3627  int i;
3628  int nexps = 0;
3629  int needed_sseregs, needed_intregs;
3630  rtx exp[MAX_CLASSES];
3631  rtx ret;
3632
3633  n = classify_argument (mode, type, class, 0);
3634  if (TARGET_DEBUG_ARG)
3635    {
3636      if (!n)
3637	fprintf (stderr, "Memory class\n");
3638      else
3639	{
3640	  fprintf (stderr, "Classes:");
3641	  for (i = 0; i < n; i++)
3642	    {
3643	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3644	    }
3645	   fprintf (stderr, "\n");
3646	}
3647    }
3648  if (!n)
3649    return NULL;
3650  if (!examine_argument (mode, type, in_return, &needed_intregs,
3651			 &needed_sseregs))
3652    return NULL;
3653  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3654    return NULL;
3655
3656  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3657     some less clueful developer tries to use floating-point anyway.  */
3658  if (needed_sseregs && !TARGET_SSE)
3659    {
3660      if (in_return)
3661	{
3662	  if (!issued_sse_ret_error)
3663	    {
3664	      error ("SSE register return with SSE disabled");
3665	      issued_sse_ret_error = true;
3666	    }
3667	}
3668      else if (!issued_sse_arg_error)
3669	{
3670	  error ("SSE register argument with SSE disabled");
3671	  issued_sse_arg_error = true;
3672	}
3673      return NULL;
3674    }
3675
3676  /* Likewise, error if the ABI requires us to return values in the
3677     x87 registers and the user specified -mno-80387.  */
3678  if (!TARGET_80387 && in_return)
3679    for (i = 0; i < n; i++)
3680      if (class[i] == X86_64_X87_CLASS
3681	  || class[i] == X86_64_X87UP_CLASS
3682	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3683	{
3684	  if (!issued_x87_ret_error)
3685	    {
3686	      error ("x87 register return with x87 disabled");
3687	      issued_x87_ret_error = true;
3688	    }
3689	  return NULL;
3690	}
3691
3692  /* First construct simple cases.  Avoid SCmode, since we want to use
3693     single register to pass this type.  */
3694  if (n == 1 && mode != SCmode)
3695    switch (class[0])
3696      {
3697      case X86_64_INTEGER_CLASS:
3698      case X86_64_INTEGERSI_CLASS:
3699	return gen_rtx_REG (mode, intreg[0]);
3700      case X86_64_SSE_CLASS:
3701      case X86_64_SSESF_CLASS:
3702      case X86_64_SSEDF_CLASS:
3703	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3704      case X86_64_X87_CLASS:
3705      case X86_64_COMPLEX_X87_CLASS:
3706	return gen_rtx_REG (mode, FIRST_STACK_REG);
3707      case X86_64_NO_CLASS:
3708	/* Zero sized array, struct or class.  */
3709	return NULL;
3710      default:
3711	gcc_unreachable ();
3712      }
3713  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3714      && mode != BLKmode)
3715    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3716  if (n == 2
3717      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3718    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3719  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3720      && class[1] == X86_64_INTEGER_CLASS
3721      && (mode == CDImode || mode == TImode || mode == TFmode)
3722      && intreg[0] + 1 == intreg[1])
3723    return gen_rtx_REG (mode, intreg[0]);
3724
3725  /* Otherwise figure out the entries of the PARALLEL.  */
3726  for (i = 0; i < n; i++)
3727    {
3728      switch (class[i])
3729        {
3730	  case X86_64_NO_CLASS:
3731	    break;
3732	  case X86_64_INTEGER_CLASS:
3733	  case X86_64_INTEGERSI_CLASS:
3734	    /* Merge TImodes on aligned occasions here too.  */
3735	    if (i * 8 + 8 > bytes)
3736	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3737	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3738	      tmpmode = SImode;
3739	    else
3740	      tmpmode = DImode;
3741	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3742	    if (tmpmode == BLKmode)
3743	      tmpmode = DImode;
3744	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3745					       gen_rtx_REG (tmpmode, *intreg),
3746					       GEN_INT (i*8));
3747	    intreg++;
3748	    break;
3749	  case X86_64_SSESF_CLASS:
3750	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3751					       gen_rtx_REG (SFmode,
3752							    SSE_REGNO (sse_regno)),
3753					       GEN_INT (i*8));
3754	    sse_regno++;
3755	    break;
3756	  case X86_64_SSEDF_CLASS:
3757	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3758					       gen_rtx_REG (DFmode,
3759							    SSE_REGNO (sse_regno)),
3760					       GEN_INT (i*8));
3761	    sse_regno++;
3762	    break;
3763	  case X86_64_SSE_CLASS:
3764	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3765	      tmpmode = TImode;
3766	    else
3767	      tmpmode = DImode;
3768	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3769					       gen_rtx_REG (tmpmode,
3770							    SSE_REGNO (sse_regno)),
3771					       GEN_INT (i*8));
3772	    if (tmpmode == TImode)
3773	      i++;
3774	    sse_regno++;
3775	    break;
3776	  default:
3777	    gcc_unreachable ();
3778	}
3779    }
3780
3781  /* Empty aligned struct, union or class.  */
3782  if (nexps == 0)
3783    return NULL;
3784
3785  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3786  for (i = 0; i < nexps; i++)
3787    XVECEXP (ret, 0, i) = exp [i];
3788  return ret;
3789}
3790
3791/* Update the data in CUM to advance over an argument
3792   of mode MODE and data type TYPE.
3793   (TYPE is null for libcalls where that information may not be available.)  */
3794
3795void
3796function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3797		      tree type, int named)
3798{
3799  int bytes =
3800    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3801  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3802
3803  if (type)
3804    mode = type_natural_mode (type);
3805
3806  if (TARGET_DEBUG_ARG)
3807    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3808	     "mode=%s, named=%d)\n\n",
3809	     words, cum->words, cum->nregs, cum->sse_nregs,
3810	     GET_MODE_NAME (mode), named);
3811
3812  if (TARGET_64BIT)
3813    {
3814      int int_nregs, sse_nregs;
3815      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3816	cum->words += words;
3817      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3818	{
3819	  cum->nregs -= int_nregs;
3820	  cum->sse_nregs -= sse_nregs;
3821	  cum->regno += int_nregs;
3822	  cum->sse_regno += sse_nregs;
3823	}
3824      else
3825	cum->words += words;
3826    }
3827  else
3828    {
3829      switch (mode)
3830	{
3831	default:
3832	  break;
3833
3834	case BLKmode:
3835	  if (bytes < 0)
3836	    break;
3837	  /* FALLTHRU */
3838
3839	case DImode:
3840	case SImode:
3841	case HImode:
3842	case QImode:
3843	  cum->words += words;
3844	  cum->nregs -= words;
3845	  cum->regno += words;
3846
3847	  if (cum->nregs <= 0)
3848	    {
3849	      cum->nregs = 0;
3850	      cum->regno = 0;
3851	    }
3852	  break;
3853
3854	case DFmode:
3855	  if (cum->float_in_sse < 2)
3856	    break;
3857	case SFmode:
3858	  if (cum->float_in_sse < 1)
3859	    break;
3860	  /* FALLTHRU */
3861
3862	case TImode:
3863	case V16QImode:
3864	case V8HImode:
3865	case V4SImode:
3866	case V2DImode:
3867	case V4SFmode:
3868	case V2DFmode:
3869	  if (!type || !AGGREGATE_TYPE_P (type))
3870	    {
3871	      cum->sse_words += words;
3872	      cum->sse_nregs -= 1;
3873	      cum->sse_regno += 1;
3874	      if (cum->sse_nregs <= 0)
3875		{
3876		  cum->sse_nregs = 0;
3877		  cum->sse_regno = 0;
3878		}
3879	    }
3880	  break;
3881
3882	case V8QImode:
3883	case V4HImode:
3884	case V2SImode:
3885	case V2SFmode:
3886	  if (!type || !AGGREGATE_TYPE_P (type))
3887	    {
3888	      cum->mmx_words += words;
3889	      cum->mmx_nregs -= 1;
3890	      cum->mmx_regno += 1;
3891	      if (cum->mmx_nregs <= 0)
3892		{
3893		  cum->mmx_nregs = 0;
3894		  cum->mmx_regno = 0;
3895		}
3896	    }
3897	  break;
3898	}
3899    }
3900}
3901
3902/* Define where to put the arguments to a function.
3903   Value is zero to push the argument on the stack,
3904   or a hard register in which to store the argument.
3905
3906   MODE is the argument's machine mode.
3907   TYPE is the data type of the argument (as a tree).
3908    This is null for libcalls where that information may
3909    not be available.
3910   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3911    the preceding args and about the function being called.
3912   NAMED is nonzero if this argument is a named parameter
3913    (otherwise it is an extra parameter matching an ellipsis).  */
3914
3915rtx
3916function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3917	      tree type, int named)
3918{
3919  enum machine_mode mode = orig_mode;
3920  rtx ret = NULL_RTX;
3921  int bytes =
3922    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3924  static bool warnedsse, warnedmmx;
3925
3926  /* To simplify the code below, represent vector types with a vector mode
3927     even if MMX/SSE are not active.  */
3928  if (type && TREE_CODE (type) == VECTOR_TYPE)
3929    mode = type_natural_mode (type);
3930
3931  /* Handle a hidden AL argument containing number of registers for varargs
3932     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3933     any AL settings.  */
3934  if (mode == VOIDmode)
3935    {
3936      if (TARGET_64BIT)
3937	return GEN_INT (cum->maybe_vaarg
3938			? (cum->sse_nregs < 0
3939			   ? SSE_REGPARM_MAX
3940			   : cum->sse_regno)
3941			: -1);
3942      else
3943	return constm1_rtx;
3944    }
3945  if (TARGET_64BIT)
3946    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3947			       cum->sse_nregs,
3948			       &x86_64_int_parameter_registers [cum->regno],
3949			       cum->sse_regno);
3950  else
3951    switch (mode)
3952      {
3953	/* For now, pass fp/complex values on the stack.  */
3954      default:
3955	break;
3956
3957      case BLKmode:
3958	if (bytes < 0)
3959	  break;
3960	/* FALLTHRU */
3961      case DImode:
3962      case SImode:
3963      case HImode:
3964      case QImode:
3965	if (words <= cum->nregs)
3966	  {
3967	    int regno = cum->regno;
3968
3969	    /* Fastcall allocates the first two DWORD (SImode) or
3970	       smaller arguments to ECX and EDX.  */
3971	    if (cum->fastcall)
3972	      {
3973	        if (mode == BLKmode || mode == DImode)
3974	          break;
3975
3976	        /* ECX not EAX is the first allocated register.  */
3977	        if (regno == 0)
3978		  regno = 2;
3979	      }
3980	    ret = gen_rtx_REG (mode, regno);
3981	  }
3982	break;
3983      case DFmode:
3984	if (cum->float_in_sse < 2)
3985	  break;
3986      case SFmode:
3987	if (cum->float_in_sse < 1)
3988	  break;
3989	/* FALLTHRU */
3990      case TImode:
3991      case V16QImode:
3992      case V8HImode:
3993      case V4SImode:
3994      case V2DImode:
3995      case V4SFmode:
3996      case V2DFmode:
3997	if (!type || !AGGREGATE_TYPE_P (type))
3998	  {
3999	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4000	      {
4001		warnedsse = true;
4002		warning (0, "SSE vector argument without SSE enabled "
4003			 "changes the ABI");
4004	      }
4005	    if (cum->sse_nregs)
4006	      ret = gen_reg_or_parallel (mode, orig_mode,
4007					 cum->sse_regno + FIRST_SSE_REG);
4008	  }
4009	break;
4010      case V8QImode:
4011      case V4HImode:
4012      case V2SImode:
4013      case V2SFmode:
4014	if (!type || !AGGREGATE_TYPE_P (type))
4015	  {
4016	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4017	      {
4018		warnedmmx = true;
4019		warning (0, "MMX vector argument without MMX enabled "
4020			 "changes the ABI");
4021	      }
4022	    if (cum->mmx_nregs)
4023	      ret = gen_reg_or_parallel (mode, orig_mode,
4024					 cum->mmx_regno + FIRST_MMX_REG);
4025	  }
4026	break;
4027      }
4028
4029  if (TARGET_DEBUG_ARG)
4030    {
4031      fprintf (stderr,
4032	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4033	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4034
4035      if (ret)
4036	print_simple_rtl (stderr, ret);
4037      else
4038	fprintf (stderr, ", stack");
4039
4040      fprintf (stderr, " )\n");
4041    }
4042
4043  return ret;
4044}
4045
4046/* A C expression that indicates when an argument must be passed by
4047   reference.  If nonzero for an argument, a copy of that argument is
4048   made in memory and a pointer to the argument is passed instead of
4049   the argument itself.  The pointer is passed in whatever way is
4050   appropriate for passing a pointer to that type.  */
4051
4052static bool
4053ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4054			enum machine_mode mode ATTRIBUTE_UNUSED,
4055			tree type, bool named ATTRIBUTE_UNUSED)
4056{
4057  if (!TARGET_64BIT)
4058    return 0;
4059
4060  if (type && int_size_in_bytes (type) == -1)
4061    {
4062      if (TARGET_DEBUG_ARG)
4063	fprintf (stderr, "function_arg_pass_by_reference\n");
4064      return 1;
4065    }
4066
4067  return 0;
4068}
4069
4070/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4071   ABI.  Only called if TARGET_SSE.  */
4072static bool
4073contains_128bit_aligned_vector_p (tree type)
4074{
4075  enum machine_mode mode = TYPE_MODE (type);
4076  if (SSE_REG_MODE_P (mode)
4077      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4078    return true;
4079  if (TYPE_ALIGN (type) < 128)
4080    return false;
4081
4082  if (AGGREGATE_TYPE_P (type))
4083    {
4084      /* Walk the aggregates recursively.  */
4085      switch (TREE_CODE (type))
4086	{
4087	case RECORD_TYPE:
4088	case UNION_TYPE:
4089	case QUAL_UNION_TYPE:
4090	  {
4091	    tree field;
4092
4093	    if (TYPE_BINFO (type))
4094	      {
4095		tree binfo, base_binfo;
4096		int i;
4097
4098		for (binfo = TYPE_BINFO (type), i = 0;
4099		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4100		  if (contains_128bit_aligned_vector_p
4101		      (BINFO_TYPE (base_binfo)))
4102		    return true;
4103	      }
4104	    /* And now merge the fields of structure.  */
4105	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4106	      {
4107		if (TREE_CODE (field) == FIELD_DECL
4108		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4109		  return true;
4110	      }
4111	    break;
4112	  }
4113
4114	case ARRAY_TYPE:
4115	  /* Just for use if some languages passes arrays by value.  */
4116	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4117	    return true;
4118	  break;
4119
4120	default:
4121	  gcc_unreachable ();
4122	}
4123    }
4124  return false;
4125}
4126
4127/* Gives the alignment boundary, in bits, of an argument with the
4128   specified mode and type.  */
4129
4130int
4131ix86_function_arg_boundary (enum machine_mode mode, tree type)
4132{
4133  int align;
4134  if (type)
4135    align = TYPE_ALIGN (type);
4136  else
4137    align = GET_MODE_ALIGNMENT (mode);
4138  if (align < PARM_BOUNDARY)
4139    align = PARM_BOUNDARY;
4140  if (!TARGET_64BIT)
4141    {
4142      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
4143	 make an exception for SSE modes since these require 128bit
4144	 alignment.
4145
4146	 The handling here differs from field_alignment.  ICC aligns MMX
4147	 arguments to 4 byte boundaries, while structure fields are aligned
4148	 to 8 byte boundaries.  */
4149      if (!TARGET_SSE)
4150	align = PARM_BOUNDARY;
4151      else if (!type)
4152	{
4153	  if (!SSE_REG_MODE_P (mode))
4154	    align = PARM_BOUNDARY;
4155	}
4156      else
4157	{
4158	  if (!contains_128bit_aligned_vector_p (type))
4159	    align = PARM_BOUNDARY;
4160	}
4161    }
4162  if (align > 128)
4163    align = 128;
4164  return align;
4165}
4166
4167/* Return true if N is a possible register number of function value.  */
4168bool
4169ix86_function_value_regno_p (int regno)
4170{
4171  if (TARGET_MACHO)
4172    {
4173      if (!TARGET_64BIT)
4174        {
4175          return ((regno) == 0
4176                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4177                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4178        }
4179      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4180              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4181              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4182      }
4183  else
4184    {
4185      if (regno == 0
4186          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4187          || (regno == FIRST_SSE_REG && TARGET_SSE))
4188        return true;
4189
4190      if (!TARGET_64BIT
4191          && (regno == FIRST_MMX_REG && TARGET_MMX))
4192	    return true;
4193
4194      return false;
4195    }
4196}
4197
4198/* Define how to find the value returned by a function.
4199   VALTYPE is the data type of the value (as a tree).
4200   If the precise function being called is known, FUNC is its FUNCTION_DECL;
4201   otherwise, FUNC is 0.  */
4202rtx
4203ix86_function_value (tree valtype, tree fntype_or_decl,
4204		     bool outgoing ATTRIBUTE_UNUSED)
4205{
4206  enum machine_mode natmode = type_natural_mode (valtype);
4207
4208  if (TARGET_64BIT)
4209    {
4210      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4211				     1, REGPARM_MAX, SSE_REGPARM_MAX,
4212				     x86_64_int_return_registers, 0);
4213      /* For zero sized structures, construct_container return NULL, but we
4214	 need to keep rest of compiler happy by returning meaningful value.  */
4215      if (!ret)
4216	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4217      return ret;
4218    }
4219  else
4220    {
4221      tree fn = NULL_TREE, fntype;
4222      if (fntype_or_decl
4223	  && DECL_P (fntype_or_decl))
4224        fn = fntype_or_decl;
4225      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4226      return gen_rtx_REG (TYPE_MODE (valtype),
4227			  ix86_value_regno (natmode, fn, fntype));
4228    }
4229}
4230
4231/* Return true iff type is returned in memory.  */
4232int
4233ix86_return_in_memory (tree type)
4234{
4235  int needed_intregs, needed_sseregs, size;
4236  enum machine_mode mode = type_natural_mode (type);
4237
4238  if (TARGET_64BIT)
4239    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4240
4241  if (mode == BLKmode)
4242    return 1;
4243
4244  size = int_size_in_bytes (type);
4245
4246  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4247    return 0;
4248
4249  if (VECTOR_MODE_P (mode) || mode == TImode)
4250    {
4251      /* User-created vectors small enough to fit in EAX.  */
4252      if (size < 8)
4253	return 0;
4254
4255      /* MMX/3dNow values are returned in MM0,
4256	 except when it doesn't exits.  */
4257      if (size == 8)
4258	return (TARGET_MMX ? 0 : 1);
4259
4260      /* SSE values are returned in XMM0, except when it doesn't exist.  */
4261      if (size == 16)
4262	return (TARGET_SSE ? 0 : 1);
4263    }
4264
4265  if (mode == XFmode)
4266    return 0;
4267
4268  if (mode == TDmode)
4269    return 1;
4270
4271  if (size > 12)
4272    return 1;
4273  return 0;
4274}
4275
4276/* When returning SSE vector types, we have a choice of either
4277     (1) being abi incompatible with a -march switch, or
4278     (2) generating an error.
4279   Given no good solution, I think the safest thing is one warning.
4280   The user won't be able to use -Werror, but....
4281
4282   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4283   called in response to actually generating a caller or callee that
4284   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
4285   via aggregate_value_p for general type probing from tree-ssa.  */
4286
4287static rtx
4288ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4289{
4290  static bool warnedsse, warnedmmx;
4291
4292  if (type)
4293    {
4294      /* Look at the return type of the function, not the function type.  */
4295      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4296
4297      if (!TARGET_SSE && !warnedsse)
4298	{
4299	  if (mode == TImode
4300	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4301	    {
4302	      warnedsse = true;
4303	      warning (0, "SSE vector return without SSE enabled "
4304		       "changes the ABI");
4305	    }
4306	}
4307
4308      if (!TARGET_MMX && !warnedmmx)
4309	{
4310	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4311	    {
4312	      warnedmmx = true;
4313	      warning (0, "MMX vector return without MMX enabled "
4314		       "changes the ABI");
4315	    }
4316	}
4317    }
4318
4319  return NULL;
4320}
4321
4322/* Define how to find the value returned by a library function
4323   assuming the value has mode MODE.  */
4324rtx
4325ix86_libcall_value (enum machine_mode mode)
4326{
4327  if (TARGET_64BIT)
4328    {
4329      switch (mode)
4330	{
4331	case SFmode:
4332	case SCmode:
4333	case DFmode:
4334	case DCmode:
4335	case TFmode:
4336	case SDmode:
4337	case DDmode:
4338	case TDmode:
4339	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4340	case XFmode:
4341	case XCmode:
4342	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4343	case TCmode:
4344	  return NULL;
4345	default:
4346	  return gen_rtx_REG (mode, 0);
4347	}
4348    }
4349  else
4350    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4351}
4352
4353/* Given a mode, return the register to use for a return value.  */
4354
4355static int
4356ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4357{
4358  gcc_assert (!TARGET_64BIT);
4359
4360  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4361     we normally prevent this case when mmx is not available.  However
4362     some ABIs may require the result to be returned like DImode.  */
4363  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4364    return TARGET_MMX ? FIRST_MMX_REG : 0;
4365
4366  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4367     we prevent this case when sse is not available.  However some ABIs
4368     may require the result to be returned like integer TImode.  */
4369  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4370    return TARGET_SSE ? FIRST_SSE_REG : 0;
4371
4372  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4373  if (DECIMAL_FLOAT_MODE_P (mode))
4374    return 0;
4375
4376  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4377  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4378    return 0;
4379
4380  /* Floating point return values in %st(0), except for local functions when
4381     SSE math is enabled or for functions with sseregparm attribute.  */
4382  if ((func || fntype)
4383      && (mode == SFmode || mode == DFmode))
4384    {
4385      int sse_level = ix86_function_sseregparm (fntype, func);
4386      if ((sse_level >= 1 && mode == SFmode)
4387	  || (sse_level == 2 && mode == DFmode))
4388        return FIRST_SSE_REG;
4389    }
4390
4391  return FIRST_FLOAT_REG;
4392}
4393
4394/* Create the va_list data type.  */
4395
4396static tree
4397ix86_build_builtin_va_list (void)
4398{
4399  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4400
4401  /* For i386 we use plain pointer to argument area.  */
4402  if (!TARGET_64BIT)
4403    return build_pointer_type (char_type_node);
4404
4405  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4406  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4407
4408  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4409		      unsigned_type_node);
4410  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4411		      unsigned_type_node);
4412  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4413		      ptr_type_node);
4414  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4415		      ptr_type_node);
4416
4417  va_list_gpr_counter_field = f_gpr;
4418  va_list_fpr_counter_field = f_fpr;
4419
4420  DECL_FIELD_CONTEXT (f_gpr) = record;
4421  DECL_FIELD_CONTEXT (f_fpr) = record;
4422  DECL_FIELD_CONTEXT (f_ovf) = record;
4423  DECL_FIELD_CONTEXT (f_sav) = record;
4424
4425  TREE_CHAIN (record) = type_decl;
4426  TYPE_NAME (record) = type_decl;
4427  TYPE_FIELDS (record) = f_gpr;
4428  TREE_CHAIN (f_gpr) = f_fpr;
4429  TREE_CHAIN (f_fpr) = f_ovf;
4430  TREE_CHAIN (f_ovf) = f_sav;
4431
4432  layout_type (record);
4433
4434  /* The correct type is an array type of one element.  */
4435  return build_array_type (record, build_index_type (size_zero_node));
4436}
4437
4438/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4439
4440static void
4441ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4442			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4443			     int no_rtl)
4444{
4445  CUMULATIVE_ARGS next_cum;
4446  rtx save_area = NULL_RTX, mem;
4447  rtx label;
4448  rtx label_ref;
4449  rtx tmp_reg;
4450  rtx nsse_reg;
4451  int set;
4452  tree fntype;
4453  int stdarg_p;
4454  int i;
4455
4456  if (!TARGET_64BIT)
4457    return;
4458
4459  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4460    return;
4461
4462  /* Indicate to allocate space on the stack for varargs save area.  */
4463  ix86_save_varrargs_registers = 1;
4464
4465  cfun->stack_alignment_needed = 128;
4466
4467  fntype = TREE_TYPE (current_function_decl);
4468  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4469	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4470		  != void_type_node));
4471
4472  /* For varargs, we do not want to skip the dummy va_dcl argument.
4473     For stdargs, we do want to skip the last named argument.  */
4474  next_cum = *cum;
4475  if (stdarg_p)
4476    function_arg_advance (&next_cum, mode, type, 1);
4477
4478  if (!no_rtl)
4479    save_area = frame_pointer_rtx;
4480
4481  set = get_varargs_alias_set ();
4482
4483  for (i = next_cum.regno;
4484       i < ix86_regparm
4485       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4486       i++)
4487    {
4488      mem = gen_rtx_MEM (Pmode,
4489			 plus_constant (save_area, i * UNITS_PER_WORD));
4490      MEM_NOTRAP_P (mem) = 1;
4491      set_mem_alias_set (mem, set);
4492      emit_move_insn (mem, gen_rtx_REG (Pmode,
4493					x86_64_int_parameter_registers[i]));
4494    }
4495
4496  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4497    {
4498      /* Now emit code to save SSE registers.  The AX parameter contains number
4499	 of SSE parameter registers used to call this function.  We use
4500	 sse_prologue_save insn template that produces computed jump across
4501	 SSE saves.  We need some preparation work to get this working.  */
4502
4503      label = gen_label_rtx ();
4504      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4505
4506      /* Compute address to jump to :
4507         label - 5*eax + nnamed_sse_arguments*5  */
4508      tmp_reg = gen_reg_rtx (Pmode);
4509      nsse_reg = gen_reg_rtx (Pmode);
4510      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4511      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4512			      gen_rtx_MULT (Pmode, nsse_reg,
4513					    GEN_INT (4))));
4514      if (next_cum.sse_regno)
4515	emit_move_insn
4516	  (nsse_reg,
4517	   gen_rtx_CONST (DImode,
4518			  gen_rtx_PLUS (DImode,
4519					label_ref,
4520					GEN_INT (next_cum.sse_regno * 4))));
4521      else
4522	emit_move_insn (nsse_reg, label_ref);
4523      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4524
4525      /* Compute address of memory block we save into.  We always use pointer
4526	 pointing 127 bytes after first byte to store - this is needed to keep
4527	 instruction size limited by 4 bytes.  */
4528      tmp_reg = gen_reg_rtx (Pmode);
4529      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4530			      plus_constant (save_area,
4531					     8 * REGPARM_MAX + 127)));
4532      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4533      MEM_NOTRAP_P (mem) = 1;
4534      set_mem_alias_set (mem, set);
4535      set_mem_align (mem, BITS_PER_WORD);
4536
4537      /* And finally do the dirty job!  */
4538      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4539					GEN_INT (next_cum.sse_regno), label));
4540    }
4541
4542}
4543
4544/* Implement va_start.  */
4545
4546void
4547ix86_va_start (tree valist, rtx nextarg)
4548{
4549  HOST_WIDE_INT words, n_gpr, n_fpr;
4550  tree f_gpr, f_fpr, f_ovf, f_sav;
4551  tree gpr, fpr, ovf, sav, t;
4552  tree type;
4553
4554  /* Only 64bit target needs something special.  */
4555  if (!TARGET_64BIT)
4556    {
4557      std_expand_builtin_va_start (valist, nextarg);
4558      return;
4559    }
4560
4561  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4562  f_fpr = TREE_CHAIN (f_gpr);
4563  f_ovf = TREE_CHAIN (f_fpr);
4564  f_sav = TREE_CHAIN (f_ovf);
4565
4566  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4567  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4568  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4569  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4570  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4571
4572  /* Count number of gp and fp argument registers used.  */
4573  words = current_function_args_info.words;
4574  n_gpr = current_function_args_info.regno;
4575  n_fpr = current_function_args_info.sse_regno;
4576
4577  if (TARGET_DEBUG_ARG)
4578    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4579	     (int) words, (int) n_gpr, (int) n_fpr);
4580
4581  if (cfun->va_list_gpr_size)
4582    {
4583      type = TREE_TYPE (gpr);
4584      t = build2 (MODIFY_EXPR, type, gpr,
4585		  build_int_cst (type, n_gpr * 8));
4586      TREE_SIDE_EFFECTS (t) = 1;
4587      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4588    }
4589
4590  if (cfun->va_list_fpr_size)
4591    {
4592      type = TREE_TYPE (fpr);
4593      t = build2 (MODIFY_EXPR, type, fpr,
4594		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4595      TREE_SIDE_EFFECTS (t) = 1;
4596      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4597    }
4598
4599  /* Find the overflow area.  */
4600  type = TREE_TYPE (ovf);
4601  t = make_tree (type, virtual_incoming_args_rtx);
4602  if (words != 0)
4603    t = build2 (PLUS_EXPR, type, t,
4604	        build_int_cst (type, words * UNITS_PER_WORD));
4605  t = build2 (MODIFY_EXPR, type, ovf, t);
4606  TREE_SIDE_EFFECTS (t) = 1;
4607  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4608
4609  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4610    {
4611      /* Find the register save area.
4612	 Prologue of the function save it right above stack frame.  */
4613      type = TREE_TYPE (sav);
4614      t = make_tree (type, frame_pointer_rtx);
4615      t = build2 (MODIFY_EXPR, type, sav, t);
4616      TREE_SIDE_EFFECTS (t) = 1;
4617      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4618    }
4619}
4620
4621/* Implement va_arg.  */
4622
4623tree
4624ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4625{
4626  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4627  tree f_gpr, f_fpr, f_ovf, f_sav;
4628  tree gpr, fpr, ovf, sav, t;
4629  int size, rsize;
4630  tree lab_false, lab_over = NULL_TREE;
4631  tree addr, t2;
4632  rtx container;
4633  int indirect_p = 0;
4634  tree ptrtype;
4635  enum machine_mode nat_mode;
4636
4637  /* Only 64bit target needs something special.  */
4638  if (!TARGET_64BIT)
4639    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4640
4641  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4642  f_fpr = TREE_CHAIN (f_gpr);
4643  f_ovf = TREE_CHAIN (f_fpr);
4644  f_sav = TREE_CHAIN (f_ovf);
4645
4646  valist = build_va_arg_indirect_ref (valist);
4647  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4648  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4649  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4650  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4651
4652  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4653  if (indirect_p)
4654    type = build_pointer_type (type);
4655  size = int_size_in_bytes (type);
4656  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4657
4658  nat_mode = type_natural_mode (type);
4659  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4660				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4661
4662  /* Pull the value out of the saved registers.  */
4663
4664  addr = create_tmp_var (ptr_type_node, "addr");
4665  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4666
4667  if (container)
4668    {
4669      int needed_intregs, needed_sseregs;
4670      bool need_temp;
4671      tree int_addr, sse_addr;
4672
4673      lab_false = create_artificial_label ();
4674      lab_over = create_artificial_label ();
4675
4676      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4677
4678      need_temp = (!REG_P (container)
4679		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4680		       || TYPE_ALIGN (type) > 128));
4681
4682      /* In case we are passing structure, verify that it is consecutive block
4683         on the register save area.  If not we need to do moves.  */
4684      if (!need_temp && !REG_P (container))
4685	{
4686	  /* Verify that all registers are strictly consecutive  */
4687	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4688	    {
4689	      int i;
4690
4691	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4692		{
4693		  rtx slot = XVECEXP (container, 0, i);
4694		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4695		      || INTVAL (XEXP (slot, 1)) != i * 16)
4696		    need_temp = 1;
4697		}
4698	    }
4699	  else
4700	    {
4701	      int i;
4702
4703	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4704		{
4705		  rtx slot = XVECEXP (container, 0, i);
4706		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4707		      || INTVAL (XEXP (slot, 1)) != i * 8)
4708		    need_temp = 1;
4709		}
4710	    }
4711	}
4712      if (!need_temp)
4713	{
4714	  int_addr = addr;
4715	  sse_addr = addr;
4716	}
4717      else
4718	{
4719	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4720	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4721	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4722	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4723	}
4724
4725      /* First ensure that we fit completely in registers.  */
4726      if (needed_intregs)
4727	{
4728	  t = build_int_cst (TREE_TYPE (gpr),
4729			     (REGPARM_MAX - needed_intregs + 1) * 8);
4730	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4731	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4732	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4733	  gimplify_and_add (t, pre_p);
4734	}
4735      if (needed_sseregs)
4736	{
4737	  t = build_int_cst (TREE_TYPE (fpr),
4738			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4739			     + REGPARM_MAX * 8);
4740	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4741	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4742	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4743	  gimplify_and_add (t, pre_p);
4744	}
4745
4746      /* Compute index to start of area used for integer regs.  */
4747      if (needed_intregs)
4748	{
4749	  /* int_addr = gpr + sav; */
4750	  t = fold_convert (ptr_type_node, gpr);
4751	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4752	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4753	  gimplify_and_add (t, pre_p);
4754	}
4755      if (needed_sseregs)
4756	{
4757	  /* sse_addr = fpr + sav; */
4758	  t = fold_convert (ptr_type_node, fpr);
4759	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4760	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4761	  gimplify_and_add (t, pre_p);
4762	}
4763      if (need_temp)
4764	{
4765	  int i;
4766	  tree temp = create_tmp_var (type, "va_arg_tmp");
4767
4768	  /* addr = &temp; */
4769	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4770	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4771	  gimplify_and_add (t, pre_p);
4772
4773	  for (i = 0; i < XVECLEN (container, 0); i++)
4774	    {
4775	      rtx slot = XVECEXP (container, 0, i);
4776	      rtx reg = XEXP (slot, 0);
4777	      enum machine_mode mode = GET_MODE (reg);
4778	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4779	      tree addr_type = build_pointer_type (piece_type);
4780	      tree src_addr, src;
4781	      int src_offset;
4782	      tree dest_addr, dest;
4783
4784	      if (SSE_REGNO_P (REGNO (reg)))
4785		{
4786		  src_addr = sse_addr;
4787		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4788		}
4789	      else
4790		{
4791		  src_addr = int_addr;
4792		  src_offset = REGNO (reg) * 8;
4793		}
4794	      src_addr = fold_convert (addr_type, src_addr);
4795	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4796				       size_int (src_offset)));
4797	      src = build_va_arg_indirect_ref (src_addr);
4798
4799	      dest_addr = fold_convert (addr_type, addr);
4800	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4801					size_int (INTVAL (XEXP (slot, 1)))));
4802	      dest = build_va_arg_indirect_ref (dest_addr);
4803
4804	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4805	      gimplify_and_add (t, pre_p);
4806	    }
4807	}
4808
4809      if (needed_intregs)
4810	{
4811	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4812		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4813	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4814	  gimplify_and_add (t, pre_p);
4815	}
4816      if (needed_sseregs)
4817	{
4818	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4819		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4820	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4821	  gimplify_and_add (t, pre_p);
4822	}
4823
4824      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4825      gimplify_and_add (t, pre_p);
4826
4827      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4828      append_to_statement_list (t, pre_p);
4829    }
4830
4831  /* ... otherwise out of the overflow area.  */
4832
4833  /* Care for on-stack alignment if needed.  */
4834  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4835      || integer_zerop (TYPE_SIZE (type)))
4836    t = ovf;
4837  else
4838    {
4839      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4840      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4841		  build_int_cst (TREE_TYPE (ovf), align - 1));
4842      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4843		  build_int_cst (TREE_TYPE (t), -align));
4844    }
4845  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4846
4847  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4848  gimplify_and_add (t2, pre_p);
4849
4850  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4851	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4852  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4853  gimplify_and_add (t, pre_p);
4854
4855  if (container)
4856    {
4857      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4858      append_to_statement_list (t, pre_p);
4859    }
4860
4861  ptrtype = build_pointer_type (type);
4862  addr = fold_convert (ptrtype, addr);
4863
4864  if (indirect_p)
4865    addr = build_va_arg_indirect_ref (addr);
4866  return build_va_arg_indirect_ref (addr);
4867}
4868
4869/* Return nonzero if OPNUM's MEM should be matched
4870   in movabs* patterns.  */
4871
4872int
4873ix86_check_movabs (rtx insn, int opnum)
4874{
4875  rtx set, mem;
4876
4877  set = PATTERN (insn);
4878  if (GET_CODE (set) == PARALLEL)
4879    set = XVECEXP (set, 0, 0);
4880  gcc_assert (GET_CODE (set) == SET);
4881  mem = XEXP (set, opnum);
4882  while (GET_CODE (mem) == SUBREG)
4883    mem = SUBREG_REG (mem);
4884  gcc_assert (GET_CODE (mem) == MEM);
4885  return (volatile_ok || !MEM_VOLATILE_P (mem));
4886}
4887
4888/* Initialize the table of extra 80387 mathematical constants.  */
4889
4890static void
4891init_ext_80387_constants (void)
4892{
4893  static const char * cst[5] =
4894  {
4895    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4896    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4897    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4898    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4899    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4900  };
4901  int i;
4902
4903  for (i = 0; i < 5; i++)
4904    {
4905      real_from_string (&ext_80387_constants_table[i], cst[i]);
4906      /* Ensure each constant is rounded to XFmode precision.  */
4907      real_convert (&ext_80387_constants_table[i],
4908		    XFmode, &ext_80387_constants_table[i]);
4909    }
4910
4911  ext_80387_constants_init = 1;
4912}
4913
4914/* Return true if the constant is something that can be loaded with
4915   a special instruction.  */
4916
4917int
4918standard_80387_constant_p (rtx x)
4919{
4920  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4921    return -1;
4922
4923  if (x == CONST0_RTX (GET_MODE (x)))
4924    return 1;
4925  if (x == CONST1_RTX (GET_MODE (x)))
4926    return 2;
4927
4928  /* For XFmode constants, try to find a special 80387 instruction when
4929     optimizing for size or on those CPUs that benefit from them.  */
4930  if (GET_MODE (x) == XFmode
4931      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4932    {
4933      REAL_VALUE_TYPE r;
4934      int i;
4935
4936      if (! ext_80387_constants_init)
4937	init_ext_80387_constants ();
4938
4939      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4940      for (i = 0; i < 5; i++)
4941        if (real_identical (&r, &ext_80387_constants_table[i]))
4942	  return i + 3;
4943    }
4944
4945  return 0;
4946}
4947
4948/* Return the opcode of the special instruction to be used to load
4949   the constant X.  */
4950
4951const char *
4952standard_80387_constant_opcode (rtx x)
4953{
4954  switch (standard_80387_constant_p (x))
4955    {
4956    case 1:
4957      return "fldz";
4958    case 2:
4959      return "fld1";
4960    case 3:
4961      return "fldlg2";
4962    case 4:
4963      return "fldln2";
4964    case 5:
4965      return "fldl2e";
4966    case 6:
4967      return "fldl2t";
4968    case 7:
4969      return "fldpi";
4970    default:
4971      gcc_unreachable ();
4972    }
4973}
4974
4975/* Return the CONST_DOUBLE representing the 80387 constant that is
4976   loaded by the specified special instruction.  The argument IDX
4977   matches the return value from standard_80387_constant_p.  */
4978
4979rtx
4980standard_80387_constant_rtx (int idx)
4981{
4982  int i;
4983
4984  if (! ext_80387_constants_init)
4985    init_ext_80387_constants ();
4986
4987  switch (idx)
4988    {
4989    case 3:
4990    case 4:
4991    case 5:
4992    case 6:
4993    case 7:
4994      i = idx - 3;
4995      break;
4996
4997    default:
4998      gcc_unreachable ();
4999    }
5000
5001  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5002				       XFmode);
5003}
5004
5005/* Return 1 if mode is a valid mode for sse.  */
5006static int
5007standard_sse_mode_p (enum machine_mode mode)
5008{
5009  switch (mode)
5010    {
5011    case V16QImode:
5012    case V8HImode:
5013    case V4SImode:
5014    case V2DImode:
5015    case V4SFmode:
5016    case V2DFmode:
5017      return 1;
5018
5019    default:
5020      return 0;
5021    }
5022}
5023
5024/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5025 */
5026int
5027standard_sse_constant_p (rtx x)
5028{
5029  enum machine_mode mode = GET_MODE (x);
5030
5031  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5032    return 1;
5033  if (vector_all_ones_operand (x, mode)
5034      && standard_sse_mode_p (mode))
5035    return TARGET_SSE2 ? 2 : -1;
5036
5037  return 0;
5038}
5039
5040/* Return the opcode of the special instruction to be used to load
5041   the constant X.  */
5042
5043const char *
5044standard_sse_constant_opcode (rtx insn, rtx x)
5045{
5046  switch (standard_sse_constant_p (x))
5047    {
5048    case 1:
5049      if (get_attr_mode (insn) == MODE_V4SF)
5050        return "xorps\t%0, %0";
5051      else if (get_attr_mode (insn) == MODE_V2DF)
5052        return "xorpd\t%0, %0";
5053      else
5054        return "pxor\t%0, %0";
5055    case 2:
5056      return "pcmpeqd\t%0, %0";
5057    }
5058  gcc_unreachable ();
5059}
5060
5061/* Returns 1 if OP contains a symbol reference */
5062
5063int
5064symbolic_reference_mentioned_p (rtx op)
5065{
5066  const char *fmt;
5067  int i;
5068
5069  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5070    return 1;
5071
5072  fmt = GET_RTX_FORMAT (GET_CODE (op));
5073  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5074    {
5075      if (fmt[i] == 'E')
5076	{
5077	  int j;
5078
5079	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5080	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5081	      return 1;
5082	}
5083
5084      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5085	return 1;
5086    }
5087
5088  return 0;
5089}
5090
5091/* Return 1 if it is appropriate to emit `ret' instructions in the
5092   body of a function.  Do this only if the epilogue is simple, needing a
5093   couple of insns.  Prior to reloading, we can't tell how many registers
5094   must be saved, so return 0 then.  Return 0 if there is no frame
5095   marker to de-allocate.  */
5096
5097int
5098ix86_can_use_return_insn_p (void)
5099{
5100  struct ix86_frame frame;
5101
5102  if (! reload_completed || frame_pointer_needed)
5103    return 0;
5104
5105  /* Don't allow more than 32 pop, since that's all we can do
5106     with one instruction.  */
5107  if (current_function_pops_args
5108      && current_function_args_size >= 32768)
5109    return 0;
5110
5111  ix86_compute_frame_layout (&frame);
5112  return frame.to_allocate == 0 && frame.nregs == 0;
5113}
5114
5115/* Value should be nonzero if functions must have frame pointers.
5116   Zero means the frame pointer need not be set up (and parms may
5117   be accessed via the stack pointer) in functions that seem suitable.  */
5118
5119int
5120ix86_frame_pointer_required (void)
5121{
5122  /* If we accessed previous frames, then the generated code expects
5123     to be able to access the saved ebp value in our frame.  */
5124  if (cfun->machine->accesses_prev_frame)
5125    return 1;
5126
5127  /* Several x86 os'es need a frame pointer for other reasons,
5128     usually pertaining to setjmp.  */
5129  if (SUBTARGET_FRAME_POINTER_REQUIRED)
5130    return 1;
5131
5132  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5133     the frame pointer by default.  Turn it back on now if we've not
5134     got a leaf function.  */
5135  if (TARGET_OMIT_LEAF_FRAME_POINTER
5136      && (!current_function_is_leaf
5137	  || ix86_current_function_calls_tls_descriptor))
5138    return 1;
5139
5140  if (current_function_profile)
5141    return 1;
5142
5143  return 0;
5144}
5145
5146/* Record that the current function accesses previous call frames.  */
5147
5148void
5149ix86_setup_frame_addresses (void)
5150{
5151  cfun->machine->accesses_prev_frame = 1;
5152}
5153
5154#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5155# define USE_HIDDEN_LINKONCE 1
5156#else
5157# define USE_HIDDEN_LINKONCE 0
5158#endif
5159
5160static int pic_labels_used;
5161
5162/* Fills in the label name that should be used for a pc thunk for
5163   the given register.  */
5164
5165static void
5166get_pc_thunk_name (char name[32], unsigned int regno)
5167{
5168  gcc_assert (!TARGET_64BIT);
5169
5170  if (USE_HIDDEN_LINKONCE)
5171    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5172  else
5173    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5174}
5175
5176
5177/* This function generates code for -fpic that loads %ebx with
5178   the return address of the caller and then returns.  */
5179
5180void
5181ix86_file_end (void)
5182{
5183  rtx xops[2];
5184  int regno;
5185
5186  for (regno = 0; regno < 8; ++regno)
5187    {
5188      char name[32];
5189
5190      if (! ((pic_labels_used >> regno) & 1))
5191	continue;
5192
5193      get_pc_thunk_name (name, regno);
5194
5195#if TARGET_MACHO
5196      if (TARGET_MACHO)
5197	{
5198	  switch_to_section (darwin_sections[text_coal_section]);
5199	  fputs ("\t.weak_definition\t", asm_out_file);
5200	  assemble_name (asm_out_file, name);
5201	  fputs ("\n\t.private_extern\t", asm_out_file);
5202	  assemble_name (asm_out_file, name);
5203	  fputs ("\n", asm_out_file);
5204	  ASM_OUTPUT_LABEL (asm_out_file, name);
5205	}
5206      else
5207#endif
5208      if (USE_HIDDEN_LINKONCE)
5209	{
5210	  tree decl;
5211
5212	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
5213			     error_mark_node);
5214	  TREE_PUBLIC (decl) = 1;
5215	  TREE_STATIC (decl) = 1;
5216	  DECL_ONE_ONLY (decl) = 1;
5217
5218	  (*targetm.asm_out.unique_section) (decl, 0);
5219	  switch_to_section (get_named_section (decl, NULL, 0));
5220
5221	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
5222	  fputs ("\t.hidden\t", asm_out_file);
5223	  assemble_name (asm_out_file, name);
5224	  fputc ('\n', asm_out_file);
5225	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5226	}
5227      else
5228	{
5229	  switch_to_section (text_section);
5230	  ASM_OUTPUT_LABEL (asm_out_file, name);
5231	}
5232
5233      xops[0] = gen_rtx_REG (SImode, regno);
5234      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5235      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5236      output_asm_insn ("ret", xops);
5237    }
5238
5239  if (NEED_INDICATE_EXEC_STACK)
5240    file_end_indicate_exec_stack ();
5241}
5242
5243/* Emit code for the SET_GOT patterns.  */
5244
5245const char *
5246output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5247{
5248  rtx xops[3];
5249
5250  xops[0] = dest;
5251  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5252
5253  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5254    {
5255      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5256
5257      if (!flag_pic)
5258	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5259      else
5260	output_asm_insn ("call\t%a2", xops);
5261
5262#if TARGET_MACHO
5263      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5264         is what will be referenced by the Mach-O PIC subsystem.  */
5265      if (!label)
5266	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5267#endif
5268
5269      (*targetm.asm_out.internal_label) (asm_out_file, "L",
5270				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5271
5272      if (flag_pic)
5273	output_asm_insn ("pop{l}\t%0", xops);
5274    }
5275  else
5276    {
5277      char name[32];
5278      get_pc_thunk_name (name, REGNO (dest));
5279      pic_labels_used |= 1 << REGNO (dest);
5280
5281      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5282      xops[2] = gen_rtx_MEM (QImode, xops[2]);
5283      output_asm_insn ("call\t%X2", xops);
5284      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
5285         is what will be referenced by the Mach-O PIC subsystem.  */
5286#if TARGET_MACHO
5287      if (!label)
5288	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5289      else
5290        targetm.asm_out.internal_label (asm_out_file, "L",
5291					   CODE_LABEL_NUMBER (label));
5292#endif
5293    }
5294
5295  if (TARGET_MACHO)
5296    return "";
5297
5298  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5299    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5300  else
5301    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5302
5303  return "";
5304}
5305
5306/* Generate an "push" pattern for input ARG.  */
5307
5308static rtx
5309gen_push (rtx arg)
5310{
5311  return gen_rtx_SET (VOIDmode,
5312		      gen_rtx_MEM (Pmode,
5313				   gen_rtx_PRE_DEC (Pmode,
5314						    stack_pointer_rtx)),
5315		      arg);
5316}
5317
5318/* Return >= 0 if there is an unused call-clobbered register available
5319   for the entire function.  */
5320
5321static unsigned int
5322ix86_select_alt_pic_regnum (void)
5323{
5324  if (current_function_is_leaf && !current_function_profile
5325      && !ix86_current_function_calls_tls_descriptor)
5326    {
5327      int i;
5328      for (i = 2; i >= 0; --i)
5329        if (!regs_ever_live[i])
5330	  return i;
5331    }
5332
5333  return INVALID_REGNUM;
5334}
5335
5336/* Return 1 if we need to save REGNO.  */
5337static int
5338ix86_save_reg (unsigned int regno, int maybe_eh_return)
5339{
5340  if (pic_offset_table_rtx
5341      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5342      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5343	  || current_function_profile
5344	  || current_function_calls_eh_return
5345	  || current_function_uses_const_pool))
5346    {
5347      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5348	return 0;
5349      return 1;
5350    }
5351
5352  if (current_function_calls_eh_return && maybe_eh_return)
5353    {
5354      unsigned i;
5355      for (i = 0; ; i++)
5356	{
5357	  unsigned test = EH_RETURN_DATA_REGNO (i);
5358	  if (test == INVALID_REGNUM)
5359	    break;
5360	  if (test == regno)
5361	    return 1;
5362	}
5363    }
5364
5365  if (cfun->machine->force_align_arg_pointer
5366      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5367    return 1;
5368
5369  return (regs_ever_live[regno]
5370	  && !call_used_regs[regno]
5371	  && !fixed_regs[regno]
5372	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5373}
5374
5375/* Return number of registers to be saved on the stack.  */
5376
5377static int
5378ix86_nsaved_regs (void)
5379{
5380  int nregs = 0;
5381  int regno;
5382
5383  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5384    if (ix86_save_reg (regno, true))
5385      nregs++;
5386  return nregs;
5387}
5388
5389/* Return the offset between two registers, one to be eliminated, and the other
5390   its replacement, at the start of a routine.  */
5391
5392HOST_WIDE_INT
5393ix86_initial_elimination_offset (int from, int to)
5394{
5395  struct ix86_frame frame;
5396  ix86_compute_frame_layout (&frame);
5397
5398  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5399    return frame.hard_frame_pointer_offset;
5400  else if (from == FRAME_POINTER_REGNUM
5401	   && to == HARD_FRAME_POINTER_REGNUM)
5402    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5403  else
5404    {
5405      gcc_assert (to == STACK_POINTER_REGNUM);
5406
5407      if (from == ARG_POINTER_REGNUM)
5408	return frame.stack_pointer_offset;
5409
5410      gcc_assert (from == FRAME_POINTER_REGNUM);
5411      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5412    }
5413}
5414
5415/* Fill structure ix86_frame about frame of currently computed function.  */
5416
5417static void
5418ix86_compute_frame_layout (struct ix86_frame *frame)
5419{
5420  HOST_WIDE_INT total_size;
5421  unsigned int stack_alignment_needed;
5422  HOST_WIDE_INT offset;
5423  unsigned int preferred_alignment;
5424  HOST_WIDE_INT size = get_frame_size ();
5425
5426  frame->nregs = ix86_nsaved_regs ();
5427  total_size = size;
5428
5429  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5430  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5431
5432  /* During reload iteration the amount of registers saved can change.
5433     Recompute the value as needed.  Do not recompute when amount of registers
5434     didn't change as reload does multiple calls to the function and does not
5435     expect the decision to change within single iteration.  */
5436  if (!optimize_size
5437      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5438    {
5439      int count = frame->nregs;
5440
5441      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5442      /* The fast prologue uses move instead of push to save registers.  This
5443         is significantly longer, but also executes faster as modern hardware
5444         can execute the moves in parallel, but can't do that for push/pop.
5445
5446	 Be careful about choosing what prologue to emit:  When function takes
5447	 many instructions to execute we may use slow version as well as in
5448	 case function is known to be outside hot spot (this is known with
5449	 feedback only).  Weight the size of function by number of registers
5450	 to save as it is cheap to use one or two push instructions but very
5451	 slow to use many of them.  */
5452      if (count)
5453	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5454      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5455	  || (flag_branch_probabilities
5456	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5457        cfun->machine->use_fast_prologue_epilogue = false;
5458      else
5459        cfun->machine->use_fast_prologue_epilogue
5460	   = !expensive_function_p (count);
5461    }
5462  if (TARGET_PROLOGUE_USING_MOVE
5463      && cfun->machine->use_fast_prologue_epilogue)
5464    frame->save_regs_using_mov = true;
5465  else
5466    frame->save_regs_using_mov = false;
5467
5468
5469  /* Skip return address and saved base pointer.  */
5470  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5471
5472  frame->hard_frame_pointer_offset = offset;
5473
5474  /* Do some sanity checking of stack_alignment_needed and
5475     preferred_alignment, since i386 port is the only using those features
5476     that may break easily.  */
5477
5478  gcc_assert (!size || stack_alignment_needed);
5479  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5480  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5481  gcc_assert (stack_alignment_needed
5482	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5483
5484  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5485    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5486
5487  /* Register save area */
5488  offset += frame->nregs * UNITS_PER_WORD;
5489
5490  /* Va-arg area */
5491  if (ix86_save_varrargs_registers)
5492    {
5493      offset += X86_64_VARARGS_SIZE;
5494      frame->va_arg_size = X86_64_VARARGS_SIZE;
5495    }
5496  else
5497    frame->va_arg_size = 0;
5498
5499  /* Align start of frame for local function.  */
5500  frame->padding1 = ((offset + stack_alignment_needed - 1)
5501		     & -stack_alignment_needed) - offset;
5502
5503  offset += frame->padding1;
5504
5505  /* Frame pointer points here.  */
5506  frame->frame_pointer_offset = offset;
5507
5508  offset += size;
5509
5510  /* Add outgoing arguments area.  Can be skipped if we eliminated
5511     all the function calls as dead code.
5512     Skipping is however impossible when function calls alloca.  Alloca
5513     expander assumes that last current_function_outgoing_args_size
5514     of stack frame are unused.  */
5515  if (ACCUMULATE_OUTGOING_ARGS
5516      && (!current_function_is_leaf || current_function_calls_alloca
5517	  || ix86_current_function_calls_tls_descriptor))
5518    {
5519      offset += current_function_outgoing_args_size;
5520      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5521    }
5522  else
5523    frame->outgoing_arguments_size = 0;
5524
5525  /* Align stack boundary.  Only needed if we're calling another function
5526     or using alloca.  */
5527  if (!current_function_is_leaf || current_function_calls_alloca
5528      || ix86_current_function_calls_tls_descriptor)
5529    frame->padding2 = ((offset + preferred_alignment - 1)
5530		       & -preferred_alignment) - offset;
5531  else
5532    frame->padding2 = 0;
5533
5534  offset += frame->padding2;
5535
5536  /* We've reached end of stack frame.  */
5537  frame->stack_pointer_offset = offset;
5538
5539  /* Size prologue needs to allocate.  */
5540  frame->to_allocate =
5541    (size + frame->padding1 + frame->padding2
5542     + frame->outgoing_arguments_size + frame->va_arg_size);
5543
5544  if ((!frame->to_allocate && frame->nregs <= 1)
5545      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5546    frame->save_regs_using_mov = false;
5547
5548  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5549      && current_function_is_leaf
5550      && !ix86_current_function_calls_tls_descriptor)
5551    {
5552      frame->red_zone_size = frame->to_allocate;
5553      if (frame->save_regs_using_mov)
5554	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5555      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5556	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5557    }
5558  else
5559    frame->red_zone_size = 0;
5560  frame->to_allocate -= frame->red_zone_size;
5561  frame->stack_pointer_offset -= frame->red_zone_size;
5562#if 0
5563  fprintf (stderr, "nregs: %i\n", frame->nregs);
5564  fprintf (stderr, "size: %i\n", size);
5565  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5566  fprintf (stderr, "padding1: %i\n", frame->padding1);
5567  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5568  fprintf (stderr, "padding2: %i\n", frame->padding2);
5569  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5570  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5571  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5572  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5573	   frame->hard_frame_pointer_offset);
5574  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5575#endif
5576}
5577
5578/* Emit code to save registers in the prologue.  */
5579
5580static void
5581ix86_emit_save_regs (void)
5582{
5583  unsigned int regno;
5584  rtx insn;
5585
5586  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5587    if (ix86_save_reg (regno, true))
5588      {
5589	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5590	RTX_FRAME_RELATED_P (insn) = 1;
5591      }
5592}
5593
5594/* Emit code to save registers using MOV insns.  First register
5595   is restored from POINTER + OFFSET.  */
5596static void
5597ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5598{
5599  unsigned int regno;
5600  rtx insn;
5601
5602  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5603    if (ix86_save_reg (regno, true))
5604      {
5605	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5606					       Pmode, offset),
5607			       gen_rtx_REG (Pmode, regno));
5608	RTX_FRAME_RELATED_P (insn) = 1;
5609	offset += UNITS_PER_WORD;
5610      }
5611}
5612
5613/* Expand prologue or epilogue stack adjustment.
5614   The pattern exist to put a dependency on all ebp-based memory accesses.
5615   STYLE should be negative if instructions should be marked as frame related,
5616   zero if %r11 register is live and cannot be freely used and positive
5617   otherwise.  */
5618
5619static void
5620pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5621{
5622  rtx insn;
5623
5624  if (! TARGET_64BIT)
5625    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5626  else if (x86_64_immediate_operand (offset, DImode))
5627    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5628  else
5629    {
5630      rtx r11;
5631      /* r11 is used by indirect sibcall return as well, set before the
5632	 epilogue and used after the epilogue.  ATM indirect sibcall
5633	 shouldn't be used together with huge frame sizes in one
5634	 function because of the frame_size check in sibcall.c.  */
5635      gcc_assert (style);
5636      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5637      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5638      if (style < 0)
5639	RTX_FRAME_RELATED_P (insn) = 1;
5640      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5641							       offset));
5642    }
5643  if (style < 0)
5644    RTX_FRAME_RELATED_P (insn) = 1;
5645}
5646
5647/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5648
5649static rtx
5650ix86_internal_arg_pointer (void)
5651{
5652  bool has_force_align_arg_pointer =
5653    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5654			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5655  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5656       && DECL_NAME (current_function_decl)
5657       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5658       && DECL_FILE_SCOPE_P (current_function_decl))
5659      || ix86_force_align_arg_pointer
5660      || has_force_align_arg_pointer)
5661    {
5662      /* Nested functions can't realign the stack due to a register
5663	 conflict.  */
5664      if (DECL_CONTEXT (current_function_decl)
5665	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5666	{
5667	  if (ix86_force_align_arg_pointer)
5668	    warning (0, "-mstackrealign ignored for nested functions");
5669	  if (has_force_align_arg_pointer)
5670	    error ("%s not supported for nested functions",
5671		   ix86_force_align_arg_pointer_string);
5672	  return virtual_incoming_args_rtx;
5673	}
5674      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5675      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5676    }
5677  else
5678    return virtual_incoming_args_rtx;
5679}
5680
5681/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5682   This is called from dwarf2out.c to emit call frame instructions
5683   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5684static void
5685ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5686{
5687  rtx unspec = SET_SRC (pattern);
5688  gcc_assert (GET_CODE (unspec) == UNSPEC);
5689
5690  switch (index)
5691    {
5692    case UNSPEC_REG_SAVE:
5693      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5694			      SET_DEST (pattern));
5695      break;
5696    case UNSPEC_DEF_CFA:
5697      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5698			 INTVAL (XVECEXP (unspec, 0, 0)));
5699      break;
5700    default:
5701      gcc_unreachable ();
5702    }
5703}
5704
5705/* Expand the prologue into a bunch of separate insns.  */
5706
5707void
5708ix86_expand_prologue (void)
5709{
5710  rtx insn;
5711  bool pic_reg_used;
5712  struct ix86_frame frame;
5713  HOST_WIDE_INT allocate;
5714
5715  ix86_compute_frame_layout (&frame);
5716
5717  if (cfun->machine->force_align_arg_pointer)
5718    {
5719      rtx x, y;
5720
5721      /* Grab the argument pointer.  */
5722      x = plus_constant (stack_pointer_rtx, 4);
5723      y = cfun->machine->force_align_arg_pointer;
5724      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5725      RTX_FRAME_RELATED_P (insn) = 1;
5726
5727      /* The unwind info consists of two parts: install the fafp as the cfa,
5728	 and record the fafp as the "save register" of the stack pointer.
5729	 The later is there in order that the unwinder can see where it
5730	 should restore the stack pointer across the and insn.  */
5731      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5732      x = gen_rtx_SET (VOIDmode, y, x);
5733      RTX_FRAME_RELATED_P (x) = 1;
5734      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5735			  UNSPEC_REG_SAVE);
5736      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5737      RTX_FRAME_RELATED_P (y) = 1;
5738      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5739      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5740      REG_NOTES (insn) = x;
5741
5742      /* Align the stack.  */
5743      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5744			     GEN_INT (-16)));
5745
5746      /* And here we cheat like madmen with the unwind info.  We force the
5747	 cfa register back to sp+4, which is exactly what it was at the
5748	 start of the function.  Re-pushing the return address results in
5749	 the return at the same spot relative to the cfa, and thus is
5750	 correct wrt the unwind info.  */
5751      x = cfun->machine->force_align_arg_pointer;
5752      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5753      insn = emit_insn (gen_push (x));
5754      RTX_FRAME_RELATED_P (insn) = 1;
5755
5756      x = GEN_INT (4);
5757      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5758      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5759      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5760      REG_NOTES (insn) = x;
5761    }
5762
5763  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5764     slower on all targets.  Also sdb doesn't like it.  */
5765
5766  if (frame_pointer_needed)
5767    {
5768      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5769      RTX_FRAME_RELATED_P (insn) = 1;
5770
5771      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5772      RTX_FRAME_RELATED_P (insn) = 1;
5773    }
5774
5775  allocate = frame.to_allocate;
5776
5777  if (!frame.save_regs_using_mov)
5778    ix86_emit_save_regs ();
5779  else
5780    allocate += frame.nregs * UNITS_PER_WORD;
5781
5782  /* When using red zone we may start register saving before allocating
5783     the stack frame saving one cycle of the prologue.  */
5784  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5785    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5786				   : stack_pointer_rtx,
5787				   -frame.nregs * UNITS_PER_WORD);
5788
5789  if (allocate == 0)
5790    ;
5791  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5792    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5793			       GEN_INT (-allocate), -1);
5794  else
5795    {
5796      /* Only valid for Win32.  */
5797      rtx eax = gen_rtx_REG (SImode, 0);
5798      bool eax_live = ix86_eax_live_at_start_p ();
5799      rtx t;
5800
5801      gcc_assert (!TARGET_64BIT);
5802
5803      if (eax_live)
5804	{
5805	  emit_insn (gen_push (eax));
5806	  allocate -= 4;
5807	}
5808
5809      emit_move_insn (eax, GEN_INT (allocate));
5810
5811      insn = emit_insn (gen_allocate_stack_worker (eax));
5812      RTX_FRAME_RELATED_P (insn) = 1;
5813      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5814      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5815      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5816					    t, REG_NOTES (insn));
5817
5818      if (eax_live)
5819	{
5820	  if (frame_pointer_needed)
5821	    t = plus_constant (hard_frame_pointer_rtx,
5822			       allocate
5823			       - frame.to_allocate
5824			       - frame.nregs * UNITS_PER_WORD);
5825	  else
5826	    t = plus_constant (stack_pointer_rtx, allocate);
5827	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5828	}
5829    }
5830
5831  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5832    {
5833      if (!frame_pointer_needed || !frame.to_allocate)
5834        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5835      else
5836        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5837				       -frame.nregs * UNITS_PER_WORD);
5838    }
5839
5840  pic_reg_used = false;
5841  if (pic_offset_table_rtx
5842      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5843	  || current_function_profile))
5844    {
5845      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5846
5847      if (alt_pic_reg_used != INVALID_REGNUM)
5848	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5849
5850      pic_reg_used = true;
5851    }
5852
5853  if (pic_reg_used)
5854    {
5855      if (TARGET_64BIT)
5856        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5857      else
5858        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5859
5860      /* Even with accurate pre-reload life analysis, we can wind up
5861	 deleting all references to the pic register after reload.
5862	 Consider if cross-jumping unifies two sides of a branch
5863	 controlled by a comparison vs the only read from a global.
5864	 In which case, allow the set_got to be deleted, though we're
5865	 too late to do anything about the ebx save in the prologue.  */
5866      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5867    }
5868
5869  /* Prevent function calls from be scheduled before the call to mcount.
5870     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5871  if (current_function_profile)
5872    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5873}
5874
5875/* Emit code to restore saved registers using MOV insns.  First register
5876   is restored from POINTER + OFFSET.  */
5877static void
5878ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5879				  int maybe_eh_return)
5880{
5881  int regno;
5882  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5883
5884  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5885    if (ix86_save_reg (regno, maybe_eh_return))
5886      {
5887	/* Ensure that adjust_address won't be forced to produce pointer
5888	   out of range allowed by x86-64 instruction set.  */
5889	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5890	  {
5891	    rtx r11;
5892
5893	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5894	    emit_move_insn (r11, GEN_INT (offset));
5895	    emit_insn (gen_adddi3 (r11, r11, pointer));
5896	    base_address = gen_rtx_MEM (Pmode, r11);
5897	    offset = 0;
5898	  }
5899	emit_move_insn (gen_rtx_REG (Pmode, regno),
5900			adjust_address (base_address, Pmode, offset));
5901	offset += UNITS_PER_WORD;
5902      }
5903}
5904
5905/* Restore function stack, frame, and registers.  */
5906
5907void
5908ix86_expand_epilogue (int style)
5909{
5910  int regno;
5911  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5912  struct ix86_frame frame;
5913  HOST_WIDE_INT offset;
5914
5915  ix86_compute_frame_layout (&frame);
5916
5917  /* Calculate start of saved registers relative to ebp.  Special care
5918     must be taken for the normal return case of a function using
5919     eh_return: the eax and edx registers are marked as saved, but not
5920     restored along this path.  */
5921  offset = frame.nregs;
5922  if (current_function_calls_eh_return && style != 2)
5923    offset -= 2;
5924  offset *= -UNITS_PER_WORD;
5925
5926  /* If we're only restoring one register and sp is not valid then
5927     using a move instruction to restore the register since it's
5928     less work than reloading sp and popping the register.
5929
5930     The default code result in stack adjustment using add/lea instruction,
5931     while this code results in LEAVE instruction (or discrete equivalent),
5932     so it is profitable in some other cases as well.  Especially when there
5933     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5934     and there is exactly one register to pop. This heuristic may need some
5935     tuning in future.  */
5936  if ((!sp_valid && frame.nregs <= 1)
5937      || (TARGET_EPILOGUE_USING_MOVE
5938	  && cfun->machine->use_fast_prologue_epilogue
5939	  && (frame.nregs > 1 || frame.to_allocate))
5940      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5941      || (frame_pointer_needed && TARGET_USE_LEAVE
5942	  && cfun->machine->use_fast_prologue_epilogue
5943	  && frame.nregs == 1)
5944      || current_function_calls_eh_return)
5945    {
5946      /* Restore registers.  We can use ebp or esp to address the memory
5947	 locations.  If both are available, default to ebp, since offsets
5948	 are known to be small.  Only exception is esp pointing directly to the
5949	 end of block of saved registers, where we may simplify addressing
5950	 mode.  */
5951
5952      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5953	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5954					  frame.to_allocate, style == 2);
5955      else
5956	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5957					  offset, style == 2);
5958
5959      /* eh_return epilogues need %ecx added to the stack pointer.  */
5960      if (style == 2)
5961	{
5962	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5963
5964	  if (frame_pointer_needed)
5965	    {
5966	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5967	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5968	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5969
5970	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5971	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5972
5973	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5974					 const0_rtx, style);
5975	    }
5976	  else
5977	    {
5978	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5979	      tmp = plus_constant (tmp, (frame.to_allocate
5980                                         + frame.nregs * UNITS_PER_WORD));
5981	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5982	    }
5983	}
5984      else if (!frame_pointer_needed)
5985	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5986				   GEN_INT (frame.to_allocate
5987					    + frame.nregs * UNITS_PER_WORD),
5988				   style);
5989      /* If not an i386, mov & pop is faster than "leave".  */
5990      else if (TARGET_USE_LEAVE || optimize_size
5991	       || !cfun->machine->use_fast_prologue_epilogue)
5992	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5993      else
5994	{
5995	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5996				     hard_frame_pointer_rtx,
5997				     const0_rtx, style);
5998	  if (TARGET_64BIT)
5999	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6000	  else
6001	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6002	}
6003    }
6004  else
6005    {
6006      /* First step is to deallocate the stack frame so that we can
6007	 pop the registers.  */
6008      if (!sp_valid)
6009	{
6010	  gcc_assert (frame_pointer_needed);
6011	  pro_epilogue_adjust_stack (stack_pointer_rtx,
6012				     hard_frame_pointer_rtx,
6013				     GEN_INT (offset), style);
6014	}
6015      else if (frame.to_allocate)
6016	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6017				   GEN_INT (frame.to_allocate), style);
6018
6019      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6020	if (ix86_save_reg (regno, false))
6021	  {
6022	    if (TARGET_64BIT)
6023	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6024	    else
6025	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6026	  }
6027      if (frame_pointer_needed)
6028	{
6029	  /* Leave results in shorter dependency chains on CPUs that are
6030	     able to grok it fast.  */
6031	  if (TARGET_USE_LEAVE)
6032	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6033	  else if (TARGET_64BIT)
6034	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6035	  else
6036	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6037	}
6038    }
6039
6040  if (cfun->machine->force_align_arg_pointer)
6041    {
6042      emit_insn (gen_addsi3 (stack_pointer_rtx,
6043			     cfun->machine->force_align_arg_pointer,
6044			     GEN_INT (-4)));
6045    }
6046
6047  /* Sibcall epilogues don't want a return instruction.  */
6048  if (style == 0)
6049    return;
6050
6051  if (current_function_pops_args && current_function_args_size)
6052    {
6053      rtx popc = GEN_INT (current_function_pops_args);
6054
6055      /* i386 can only pop 64K bytes.  If asked to pop more, pop
6056	 return address, do explicit add, and jump indirectly to the
6057	 caller.  */
6058
6059      if (current_function_pops_args >= 65536)
6060	{
6061	  rtx ecx = gen_rtx_REG (SImode, 2);
6062
6063	  /* There is no "pascal" calling convention in 64bit ABI.  */
6064	  gcc_assert (!TARGET_64BIT);
6065
6066	  emit_insn (gen_popsi1 (ecx));
6067	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6068	  emit_jump_insn (gen_return_indirect_internal (ecx));
6069	}
6070      else
6071	emit_jump_insn (gen_return_pop_internal (popc));
6072    }
6073  else
6074    emit_jump_insn (gen_return_internal ());
6075}
6076
6077/* Reset from the function's potential modifications.  */
6078
6079static void
6080ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6081			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6082{
6083  if (pic_offset_table_rtx)
6084    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6085#if TARGET_MACHO
6086  /* Mach-O doesn't support labels at the end of objects, so if
6087     it looks like we might want one, insert a NOP.  */
6088  {
6089    rtx insn = get_last_insn ();
6090    while (insn
6091	   && NOTE_P (insn)
6092	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6093      insn = PREV_INSN (insn);
6094    if (insn
6095	&& (LABEL_P (insn)
6096	    || (NOTE_P (insn)
6097		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6098      fputs ("\tnop\n", file);
6099  }
6100#endif
6101
6102}
6103
6104/* Extract the parts of an RTL expression that is a valid memory address
6105   for an instruction.  Return 0 if the structure of the address is
6106   grossly off.  Return -1 if the address contains ASHIFT, so it is not
6107   strictly valid, but still used for computing length of lea instruction.  */
6108
6109int
6110ix86_decompose_address (rtx addr, struct ix86_address *out)
6111{
6112  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6113  rtx base_reg, index_reg;
6114  HOST_WIDE_INT scale = 1;
6115  rtx scale_rtx = NULL_RTX;
6116  int retval = 1;
6117  enum ix86_address_seg seg = SEG_DEFAULT;
6118
6119  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6120    base = addr;
6121  else if (GET_CODE (addr) == PLUS)
6122    {
6123      rtx addends[4], op;
6124      int n = 0, i;
6125
6126      op = addr;
6127      do
6128	{
6129	  if (n >= 4)
6130	    return 0;
6131	  addends[n++] = XEXP (op, 1);
6132	  op = XEXP (op, 0);
6133	}
6134      while (GET_CODE (op) == PLUS);
6135      if (n >= 4)
6136	return 0;
6137      addends[n] = op;
6138
6139      for (i = n; i >= 0; --i)
6140	{
6141	  op = addends[i];
6142	  switch (GET_CODE (op))
6143	    {
6144	    case MULT:
6145	      if (index)
6146		return 0;
6147	      index = XEXP (op, 0);
6148	      scale_rtx = XEXP (op, 1);
6149	      break;
6150
6151	    case UNSPEC:
6152	      if (XINT (op, 1) == UNSPEC_TP
6153	          && TARGET_TLS_DIRECT_SEG_REFS
6154	          && seg == SEG_DEFAULT)
6155		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6156	      else
6157		return 0;
6158	      break;
6159
6160	    case REG:
6161	    case SUBREG:
6162	      if (!base)
6163		base = op;
6164	      else if (!index)
6165		index = op;
6166	      else
6167		return 0;
6168	      break;
6169
6170	    case CONST:
6171	    case CONST_INT:
6172	    case SYMBOL_REF:
6173	    case LABEL_REF:
6174	      if (disp)
6175		return 0;
6176	      disp = op;
6177	      break;
6178
6179	    default:
6180	      return 0;
6181	    }
6182	}
6183    }
6184  else if (GET_CODE (addr) == MULT)
6185    {
6186      index = XEXP (addr, 0);		/* index*scale */
6187      scale_rtx = XEXP (addr, 1);
6188    }
6189  else if (GET_CODE (addr) == ASHIFT)
6190    {
6191      rtx tmp;
6192
6193      /* We're called for lea too, which implements ashift on occasion.  */
6194      index = XEXP (addr, 0);
6195      tmp = XEXP (addr, 1);
6196      if (GET_CODE (tmp) != CONST_INT)
6197	return 0;
6198      scale = INTVAL (tmp);
6199      if ((unsigned HOST_WIDE_INT) scale > 3)
6200	return 0;
6201      scale = 1 << scale;
6202      retval = -1;
6203    }
6204  else
6205    disp = addr;			/* displacement */
6206
6207  /* Extract the integral value of scale.  */
6208  if (scale_rtx)
6209    {
6210      if (GET_CODE (scale_rtx) != CONST_INT)
6211	return 0;
6212      scale = INTVAL (scale_rtx);
6213    }
6214
6215  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6216  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6217
6218  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
6219  if (base_reg && index_reg && scale == 1
6220      && (index_reg == arg_pointer_rtx
6221	  || index_reg == frame_pointer_rtx
6222	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6223    {
6224      rtx tmp;
6225      tmp = base, base = index, index = tmp;
6226      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6227    }
6228
6229  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
6230  if ((base_reg == hard_frame_pointer_rtx
6231       || base_reg == frame_pointer_rtx
6232       || base_reg == arg_pointer_rtx) && !disp)
6233    disp = const0_rtx;
6234
6235  /* Special case: on K6, [%esi] makes the instruction vector decoded.
6236     Avoid this by transforming to [%esi+0].  */
6237  if (ix86_tune == PROCESSOR_K6 && !optimize_size
6238      && base_reg && !index_reg && !disp
6239      && REG_P (base_reg)
6240      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6241    disp = const0_rtx;
6242
6243  /* Special case: encode reg+reg instead of reg*2.  */
6244  if (!base && index && scale && scale == 2)
6245    base = index, base_reg = index_reg, scale = 1;
6246
6247  /* Special case: scaling cannot be encoded without base or displacement.  */
6248  if (!base && !disp && index && scale != 1)
6249    disp = const0_rtx;
6250
6251  out->base = base;
6252  out->index = index;
6253  out->disp = disp;
6254  out->scale = scale;
6255  out->seg = seg;
6256
6257  return retval;
6258}
6259
6260/* Return cost of the memory address x.
6261   For i386, it is better to use a complex address than let gcc copy
6262   the address into a reg and make a new pseudo.  But not if the address
6263   requires to two regs - that would mean more pseudos with longer
6264   lifetimes.  */
6265static int
6266ix86_address_cost (rtx x)
6267{
6268  struct ix86_address parts;
6269  int cost = 1;
6270  int ok = ix86_decompose_address (x, &parts);
6271
6272  gcc_assert (ok);
6273
6274  if (parts.base && GET_CODE (parts.base) == SUBREG)
6275    parts.base = SUBREG_REG (parts.base);
6276  if (parts.index && GET_CODE (parts.index) == SUBREG)
6277    parts.index = SUBREG_REG (parts.index);
6278
6279  /* More complex memory references are better.  */
6280  if (parts.disp && parts.disp != const0_rtx)
6281    cost--;
6282  if (parts.seg != SEG_DEFAULT)
6283    cost--;
6284
6285  /* Attempt to minimize number of registers in the address.  */
6286  if ((parts.base
6287       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6288      || (parts.index
6289	  && (!REG_P (parts.index)
6290	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6291    cost++;
6292
6293  if (parts.base
6294      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6295      && parts.index
6296      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6297      && parts.base != parts.index)
6298    cost++;
6299
6300  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6301     since it's predecode logic can't detect the length of instructions
6302     and it degenerates to vector decoded.  Increase cost of such
6303     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
6304     to split such addresses or even refuse such addresses at all.
6305
6306     Following addressing modes are affected:
6307      [base+scale*index]
6308      [scale*index+disp]
6309      [base+index]
6310
6311     The first and last case  may be avoidable by explicitly coding the zero in
6312     memory address, but I don't have AMD-K6 machine handy to check this
6313     theory.  */
6314
6315  if (TARGET_K6
6316      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6317	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6318	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6319    cost += 10;
6320
6321  return cost;
6322}
6323
6324/* If X is a machine specific address (i.e. a symbol or label being
6325   referenced as a displacement from the GOT implemented using an
6326   UNSPEC), then return the base term.  Otherwise return X.  */
6327
6328rtx
6329ix86_find_base_term (rtx x)
6330{
6331  rtx term;
6332
6333  if (TARGET_64BIT)
6334    {
6335      if (GET_CODE (x) != CONST)
6336	return x;
6337      term = XEXP (x, 0);
6338      if (GET_CODE (term) == PLUS
6339	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6340	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6341	term = XEXP (term, 0);
6342      if (GET_CODE (term) != UNSPEC
6343	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6344	return x;
6345
6346      term = XVECEXP (term, 0, 0);
6347
6348      if (GET_CODE (term) != SYMBOL_REF
6349	  && GET_CODE (term) != LABEL_REF)
6350	return x;
6351
6352      return term;
6353    }
6354
6355  term = ix86_delegitimize_address (x);
6356
6357  if (GET_CODE (term) != SYMBOL_REF
6358      && GET_CODE (term) != LABEL_REF)
6359    return x;
6360
6361  return term;
6362}
6363
6364/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6365   this is used for to form addresses to local data when -fPIC is in
6366   use.  */
6367
6368static bool
6369darwin_local_data_pic (rtx disp)
6370{
6371  if (GET_CODE (disp) == MINUS)
6372    {
6373      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6374          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6375        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6376          {
6377            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6378            if (! strcmp (sym_name, "<pic base>"))
6379              return true;
6380          }
6381    }
6382
6383  return false;
6384}
6385
6386/* Determine if a given RTX is a valid constant.  We already know this
6387   satisfies CONSTANT_P.  */
6388
6389bool
6390legitimate_constant_p (rtx x)
6391{
6392  switch (GET_CODE (x))
6393    {
6394    case CONST:
6395      x = XEXP (x, 0);
6396
6397      if (GET_CODE (x) == PLUS)
6398	{
6399	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6400	    return false;
6401	  x = XEXP (x, 0);
6402	}
6403
6404      if (TARGET_MACHO && darwin_local_data_pic (x))
6405	return true;
6406
6407      /* Only some unspecs are valid as "constants".  */
6408      if (GET_CODE (x) == UNSPEC)
6409	switch (XINT (x, 1))
6410	  {
6411	  case UNSPEC_GOTOFF:
6412	    return TARGET_64BIT;
6413	  case UNSPEC_TPOFF:
6414	  case UNSPEC_NTPOFF:
6415	    x = XVECEXP (x, 0, 0);
6416	    return (GET_CODE (x) == SYMBOL_REF
6417		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6418	  case UNSPEC_DTPOFF:
6419	    x = XVECEXP (x, 0, 0);
6420	    return (GET_CODE (x) == SYMBOL_REF
6421		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6422	  default:
6423	    return false;
6424	  }
6425
6426      /* We must have drilled down to a symbol.  */
6427      if (GET_CODE (x) == LABEL_REF)
6428	return true;
6429      if (GET_CODE (x) != SYMBOL_REF)
6430	return false;
6431      /* FALLTHRU */
6432
6433    case SYMBOL_REF:
6434      /* TLS symbols are never valid.  */
6435      if (SYMBOL_REF_TLS_MODEL (x))
6436	return false;
6437      break;
6438
6439    case CONST_DOUBLE:
6440      if (GET_MODE (x) == TImode
6441	  && x != CONST0_RTX (TImode)
6442          && !TARGET_64BIT)
6443	return false;
6444      break;
6445
6446    case CONST_VECTOR:
6447      if (x == CONST0_RTX (GET_MODE (x)))
6448	return true;
6449      return false;
6450
6451    default:
6452      break;
6453    }
6454
6455  /* Otherwise we handle everything else in the move patterns.  */
6456  return true;
6457}
6458
6459/* Determine if it's legal to put X into the constant pool.  This
6460   is not possible for the address of thread-local symbols, which
6461   is checked above.  */
6462
6463static bool
6464ix86_cannot_force_const_mem (rtx x)
6465{
6466  /* We can always put integral constants and vectors in memory.  */
6467  switch (GET_CODE (x))
6468    {
6469    case CONST_INT:
6470    case CONST_DOUBLE:
6471    case CONST_VECTOR:
6472      return false;
6473
6474    default:
6475      break;
6476    }
6477  return !legitimate_constant_p (x);
6478}
6479
6480/* Determine if a given RTX is a valid constant address.  */
6481
6482bool
6483constant_address_p (rtx x)
6484{
6485  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6486}
6487
6488/* Nonzero if the constant value X is a legitimate general operand
6489   when generating PIC code.  It is given that flag_pic is on and
6490   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6491
6492bool
6493legitimate_pic_operand_p (rtx x)
6494{
6495  rtx inner;
6496
6497  switch (GET_CODE (x))
6498    {
6499    case CONST:
6500      inner = XEXP (x, 0);
6501      if (GET_CODE (inner) == PLUS
6502	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6503	inner = XEXP (inner, 0);
6504
6505      /* Only some unspecs are valid as "constants".  */
6506      if (GET_CODE (inner) == UNSPEC)
6507	switch (XINT (inner, 1))
6508	  {
6509	  case UNSPEC_GOTOFF:
6510	    return TARGET_64BIT;
6511	  case UNSPEC_TPOFF:
6512	    x = XVECEXP (inner, 0, 0);
6513	    return (GET_CODE (x) == SYMBOL_REF
6514		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6515	  default:
6516	    return false;
6517	  }
6518      /* FALLTHRU */
6519
6520    case SYMBOL_REF:
6521    case LABEL_REF:
6522      return legitimate_pic_address_disp_p (x);
6523
6524    default:
6525      return true;
6526    }
6527}
6528
6529/* Determine if a given CONST RTX is a valid memory displacement
6530   in PIC mode.  */
6531
6532int
6533legitimate_pic_address_disp_p (rtx disp)
6534{
6535  bool saw_plus;
6536
6537  /* In 64bit mode we can allow direct addresses of symbols and labels
6538     when they are not dynamic symbols.  */
6539  if (TARGET_64BIT)
6540    {
6541      rtx op0 = disp, op1;
6542
6543      switch (GET_CODE (disp))
6544	{
6545	case LABEL_REF:
6546	  return true;
6547
6548	case CONST:
6549	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6550	    break;
6551	  op0 = XEXP (XEXP (disp, 0), 0);
6552	  op1 = XEXP (XEXP (disp, 0), 1);
6553	  if (GET_CODE (op1) != CONST_INT
6554	      || INTVAL (op1) >= 16*1024*1024
6555	      || INTVAL (op1) < -16*1024*1024)
6556            break;
6557	  if (GET_CODE (op0) == LABEL_REF)
6558	    return true;
6559	  if (GET_CODE (op0) != SYMBOL_REF)
6560	    break;
6561	  /* FALLTHRU */
6562
6563	case SYMBOL_REF:
6564	  /* TLS references should always be enclosed in UNSPEC.  */
6565	  if (SYMBOL_REF_TLS_MODEL (op0))
6566	    return false;
6567	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6568	    return true;
6569	  break;
6570
6571	default:
6572	  break;
6573	}
6574    }
6575  if (GET_CODE (disp) != CONST)
6576    return 0;
6577  disp = XEXP (disp, 0);
6578
6579  if (TARGET_64BIT)
6580    {
6581      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6582         of GOT tables.  We should not need these anyway.  */
6583      if (GET_CODE (disp) != UNSPEC
6584	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6585	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6586	return 0;
6587
6588      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6589	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6590	return 0;
6591      return 1;
6592    }
6593
6594  saw_plus = false;
6595  if (GET_CODE (disp) == PLUS)
6596    {
6597      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6598	return 0;
6599      disp = XEXP (disp, 0);
6600      saw_plus = true;
6601    }
6602
6603  if (TARGET_MACHO && darwin_local_data_pic (disp))
6604    return 1;
6605
6606  if (GET_CODE (disp) != UNSPEC)
6607    return 0;
6608
6609  switch (XINT (disp, 1))
6610    {
6611    case UNSPEC_GOT:
6612      if (saw_plus)
6613	return false;
6614      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6615    case UNSPEC_GOTOFF:
6616      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6617	 While ABI specify also 32bit relocation but we don't produce it in
6618	 small PIC model at all.  */
6619      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6620	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6621	  && !TARGET_64BIT)
6622        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6623      return false;
6624    case UNSPEC_GOTTPOFF:
6625    case UNSPEC_GOTNTPOFF:
6626    case UNSPEC_INDNTPOFF:
6627      if (saw_plus)
6628	return false;
6629      disp = XVECEXP (disp, 0, 0);
6630      return (GET_CODE (disp) == SYMBOL_REF
6631	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6632    case UNSPEC_NTPOFF:
6633      disp = XVECEXP (disp, 0, 0);
6634      return (GET_CODE (disp) == SYMBOL_REF
6635	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6636    case UNSPEC_DTPOFF:
6637      disp = XVECEXP (disp, 0, 0);
6638      return (GET_CODE (disp) == SYMBOL_REF
6639	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6640    }
6641
6642  return 0;
6643}
6644
6645/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6646   memory address for an instruction.  The MODE argument is the machine mode
6647   for the MEM expression that wants to use this address.
6648
6649   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6650   convert common non-canonical forms to canonical form so that they will
6651   be recognized.  */
6652
6653int
6654legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6655{
6656  struct ix86_address parts;
6657  rtx base, index, disp;
6658  HOST_WIDE_INT scale;
6659  const char *reason = NULL;
6660  rtx reason_rtx = NULL_RTX;
6661
6662  if (TARGET_DEBUG_ADDR)
6663    {
6664      fprintf (stderr,
6665	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6666	       GET_MODE_NAME (mode), strict);
6667      debug_rtx (addr);
6668    }
6669
6670  if (ix86_decompose_address (addr, &parts) <= 0)
6671    {
6672      reason = "decomposition failed";
6673      goto report_error;
6674    }
6675
6676  base = parts.base;
6677  index = parts.index;
6678  disp = parts.disp;
6679  scale = parts.scale;
6680
6681  /* Validate base register.
6682
6683     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6684     failures when the base is one word out of a two word structure, which is
6685     represented internally as a DImode int.  */
6686
6687  if (base)
6688    {
6689      rtx reg;
6690      reason_rtx = base;
6691
6692      if (REG_P (base))
6693  	reg = base;
6694      else if (GET_CODE (base) == SUBREG
6695	       && REG_P (SUBREG_REG (base))
6696	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6697		  <= UNITS_PER_WORD)
6698  	reg = SUBREG_REG (base);
6699      else
6700	{
6701	  reason = "base is not a register";
6702	  goto report_error;
6703	}
6704
6705      if (GET_MODE (base) != Pmode)
6706	{
6707	  reason = "base is not in Pmode";
6708	  goto report_error;
6709	}
6710
6711      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6712	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6713	{
6714	  reason = "base is not valid";
6715	  goto report_error;
6716	}
6717    }
6718
6719  /* Validate index register.
6720
6721     Don't allow SUBREG's that span more than a word here -- same as above.  */
6722
6723  if (index)
6724    {
6725      rtx reg;
6726      reason_rtx = index;
6727
6728      if (REG_P (index))
6729  	reg = index;
6730      else if (GET_CODE (index) == SUBREG
6731	       && REG_P (SUBREG_REG (index))
6732	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6733		  <= UNITS_PER_WORD)
6734  	reg = SUBREG_REG (index);
6735      else
6736	{
6737	  reason = "index is not a register";
6738	  goto report_error;
6739	}
6740
6741      if (GET_MODE (index) != Pmode)
6742	{
6743	  reason = "index is not in Pmode";
6744	  goto report_error;
6745	}
6746
6747      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6748	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6749	{
6750	  reason = "index is not valid";
6751	  goto report_error;
6752	}
6753    }
6754
6755  /* Validate scale factor.  */
6756  if (scale != 1)
6757    {
6758      reason_rtx = GEN_INT (scale);
6759      if (!index)
6760	{
6761	  reason = "scale without index";
6762	  goto report_error;
6763	}
6764
6765      if (scale != 2 && scale != 4 && scale != 8)
6766	{
6767	  reason = "scale is not a valid multiplier";
6768	  goto report_error;
6769	}
6770    }
6771
6772  /* Validate displacement.  */
6773  if (disp)
6774    {
6775      reason_rtx = disp;
6776
6777      if (GET_CODE (disp) == CONST
6778	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6779	switch (XINT (XEXP (disp, 0), 1))
6780	  {
6781	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6782	     used.  While ABI specify also 32bit relocations, we don't produce
6783	     them at all and use IP relative instead.  */
6784	  case UNSPEC_GOT:
6785	  case UNSPEC_GOTOFF:
6786	    gcc_assert (flag_pic);
6787	    if (!TARGET_64BIT)
6788	      goto is_legitimate_pic;
6789	    reason = "64bit address unspec";
6790	    goto report_error;
6791
6792	  case UNSPEC_GOTPCREL:
6793	    gcc_assert (flag_pic);
6794	    goto is_legitimate_pic;
6795
6796	  case UNSPEC_GOTTPOFF:
6797	  case UNSPEC_GOTNTPOFF:
6798	  case UNSPEC_INDNTPOFF:
6799	  case UNSPEC_NTPOFF:
6800	  case UNSPEC_DTPOFF:
6801	    break;
6802
6803	  default:
6804	    reason = "invalid address unspec";
6805	    goto report_error;
6806	  }
6807
6808      else if (SYMBOLIC_CONST (disp)
6809	       && (flag_pic
6810		   || (TARGET_MACHO
6811#if TARGET_MACHO
6812		       && MACHOPIC_INDIRECT
6813		       && !machopic_operand_p (disp)
6814#endif
6815	       )))
6816	{
6817
6818	is_legitimate_pic:
6819	  if (TARGET_64BIT && (index || base))
6820	    {
6821	      /* foo@dtpoff(%rX) is ok.  */
6822	      if (GET_CODE (disp) != CONST
6823		  || GET_CODE (XEXP (disp, 0)) != PLUS
6824		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6825		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6826		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6827		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6828		{
6829		  reason = "non-constant pic memory reference";
6830		  goto report_error;
6831		}
6832	    }
6833	  else if (! legitimate_pic_address_disp_p (disp))
6834	    {
6835	      reason = "displacement is an invalid pic construct";
6836	      goto report_error;
6837	    }
6838
6839          /* This code used to verify that a symbolic pic displacement
6840	     includes the pic_offset_table_rtx register.
6841
6842	     While this is good idea, unfortunately these constructs may
6843	     be created by "adds using lea" optimization for incorrect
6844	     code like:
6845
6846	     int a;
6847	     int foo(int i)
6848	       {
6849	         return *(&a+i);
6850	       }
6851
6852	     This code is nonsensical, but results in addressing
6853	     GOT table with pic_offset_table_rtx base.  We can't
6854	     just refuse it easily, since it gets matched by
6855	     "addsi3" pattern, that later gets split to lea in the
6856	     case output register differs from input.  While this
6857	     can be handled by separate addsi pattern for this case
6858	     that never results in lea, this seems to be easier and
6859	     correct fix for crash to disable this test.  */
6860	}
6861      else if (GET_CODE (disp) != LABEL_REF
6862	       && GET_CODE (disp) != CONST_INT
6863	       && (GET_CODE (disp) != CONST
6864		   || !legitimate_constant_p (disp))
6865	       && (GET_CODE (disp) != SYMBOL_REF
6866		   || !legitimate_constant_p (disp)))
6867	{
6868	  reason = "displacement is not constant";
6869	  goto report_error;
6870	}
6871      else if (TARGET_64BIT
6872	       && !x86_64_immediate_operand (disp, VOIDmode))
6873	{
6874	  reason = "displacement is out of range";
6875	  goto report_error;
6876	}
6877    }
6878
6879  /* Everything looks valid.  */
6880  if (TARGET_DEBUG_ADDR)
6881    fprintf (stderr, "Success.\n");
6882  return TRUE;
6883
6884 report_error:
6885  if (TARGET_DEBUG_ADDR)
6886    {
6887      fprintf (stderr, "Error: %s\n", reason);
6888      debug_rtx (reason_rtx);
6889    }
6890  return FALSE;
6891}
6892
6893/* Return a unique alias set for the GOT.  */
6894
6895static HOST_WIDE_INT
6896ix86_GOT_alias_set (void)
6897{
6898  static HOST_WIDE_INT set = -1;
6899  if (set == -1)
6900    set = new_alias_set ();
6901  return set;
6902}
6903
6904/* Return a legitimate reference for ORIG (an address) using the
6905   register REG.  If REG is 0, a new pseudo is generated.
6906
6907   There are two types of references that must be handled:
6908
6909   1. Global data references must load the address from the GOT, via
6910      the PIC reg.  An insn is emitted to do this load, and the reg is
6911      returned.
6912
6913   2. Static data references, constant pool addresses, and code labels
6914      compute the address as an offset from the GOT, whose base is in
6915      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6916      differentiate them from global data objects.  The returned
6917      address is the PIC reg + an unspec constant.
6918
6919   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6920   reg also appears in the address.  */
6921
6922static rtx
6923legitimize_pic_address (rtx orig, rtx reg)
6924{
6925  rtx addr = orig;
6926  rtx new = orig;
6927  rtx base;
6928
6929#if TARGET_MACHO
6930  if (TARGET_MACHO && !TARGET_64BIT)
6931    {
6932      if (reg == 0)
6933	reg = gen_reg_rtx (Pmode);
6934      /* Use the generic Mach-O PIC machinery.  */
6935      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6936    }
6937#endif
6938
6939  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6940    new = addr;
6941  else if (TARGET_64BIT
6942	   && ix86_cmodel != CM_SMALL_PIC
6943	   && local_symbolic_operand (addr, Pmode))
6944    {
6945      rtx tmpreg;
6946      /* This symbol may be referenced via a displacement from the PIC
6947	 base address (@GOTOFF).  */
6948
6949      if (reload_in_progress)
6950	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6951      if (GET_CODE (addr) == CONST)
6952	addr = XEXP (addr, 0);
6953      if (GET_CODE (addr) == PLUS)
6954	  {
6955            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6956	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6957	  }
6958	else
6959          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6960      new = gen_rtx_CONST (Pmode, new);
6961      if (!reg)
6962        tmpreg = gen_reg_rtx (Pmode);
6963      else
6964	tmpreg = reg;
6965      emit_move_insn (tmpreg, new);
6966
6967      if (reg != 0)
6968	{
6969	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6970				     tmpreg, 1, OPTAB_DIRECT);
6971	  new = reg;
6972	}
6973      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6974    }
6975  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6976    {
6977      /* This symbol may be referenced via a displacement from the PIC
6978	 base address (@GOTOFF).  */
6979
6980      if (reload_in_progress)
6981	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6982      if (GET_CODE (addr) == CONST)
6983	addr = XEXP (addr, 0);
6984      if (GET_CODE (addr) == PLUS)
6985	  {
6986            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6987	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6988	  }
6989	else
6990          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6991      new = gen_rtx_CONST (Pmode, new);
6992      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6993
6994      if (reg != 0)
6995	{
6996	  emit_move_insn (reg, new);
6997	  new = reg;
6998	}
6999    }
7000  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7001    {
7002      if (TARGET_64BIT)
7003	{
7004	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7005	  new = gen_rtx_CONST (Pmode, new);
7006	  new = gen_const_mem (Pmode, new);
7007	  set_mem_alias_set (new, ix86_GOT_alias_set ());
7008
7009	  if (reg == 0)
7010	    reg = gen_reg_rtx (Pmode);
7011	  /* Use directly gen_movsi, otherwise the address is loaded
7012	     into register for CSE.  We don't want to CSE this addresses,
7013	     instead we CSE addresses from the GOT table, so skip this.  */
7014	  emit_insn (gen_movsi (reg, new));
7015	  new = reg;
7016	}
7017      else
7018	{
7019	  /* This symbol must be referenced via a load from the
7020	     Global Offset Table (@GOT).  */
7021
7022	  if (reload_in_progress)
7023	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7024	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7025	  new = gen_rtx_CONST (Pmode, new);
7026	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7027	  new = gen_const_mem (Pmode, new);
7028	  set_mem_alias_set (new, ix86_GOT_alias_set ());
7029
7030	  if (reg == 0)
7031	    reg = gen_reg_rtx (Pmode);
7032	  emit_move_insn (reg, new);
7033	  new = reg;
7034	}
7035    }
7036  else
7037    {
7038      if (GET_CODE (addr) == CONST_INT
7039	  && !x86_64_immediate_operand (addr, VOIDmode))
7040	{
7041	  if (reg)
7042	    {
7043	      emit_move_insn (reg, addr);
7044	      new = reg;
7045	    }
7046	  else
7047	    new = force_reg (Pmode, addr);
7048	}
7049      else if (GET_CODE (addr) == CONST)
7050	{
7051	  addr = XEXP (addr, 0);
7052
7053	  /* We must match stuff we generate before.  Assume the only
7054	     unspecs that can get here are ours.  Not that we could do
7055	     anything with them anyway....  */
7056	  if (GET_CODE (addr) == UNSPEC
7057	      || (GET_CODE (addr) == PLUS
7058		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7059	    return orig;
7060	  gcc_assert (GET_CODE (addr) == PLUS);
7061	}
7062      if (GET_CODE (addr) == PLUS)
7063	{
7064	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7065
7066	  /* Check first to see if this is a constant offset from a @GOTOFF
7067	     symbol reference.  */
7068	  if (local_symbolic_operand (op0, Pmode)
7069	      && GET_CODE (op1) == CONST_INT)
7070	    {
7071	      if (!TARGET_64BIT)
7072		{
7073		  if (reload_in_progress)
7074		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7075		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7076					UNSPEC_GOTOFF);
7077		  new = gen_rtx_PLUS (Pmode, new, op1);
7078		  new = gen_rtx_CONST (Pmode, new);
7079		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7080
7081		  if (reg != 0)
7082		    {
7083		      emit_move_insn (reg, new);
7084		      new = reg;
7085		    }
7086		}
7087	      else
7088		{
7089		  if (INTVAL (op1) < -16*1024*1024
7090		      || INTVAL (op1) >= 16*1024*1024)
7091		    {
7092		      if (!x86_64_immediate_operand (op1, Pmode))
7093			op1 = force_reg (Pmode, op1);
7094		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7095		    }
7096		}
7097	    }
7098	  else
7099	    {
7100	      base = legitimize_pic_address (XEXP (addr, 0), reg);
7101	      new  = legitimize_pic_address (XEXP (addr, 1),
7102					     base == reg ? NULL_RTX : reg);
7103
7104	      if (GET_CODE (new) == CONST_INT)
7105		new = plus_constant (base, INTVAL (new));
7106	      else
7107		{
7108		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7109		    {
7110		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7111		      new = XEXP (new, 1);
7112		    }
7113		  new = gen_rtx_PLUS (Pmode, base, new);
7114		}
7115	    }
7116	}
7117    }
7118  return new;
7119}
7120
7121/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
7122
7123static rtx
7124get_thread_pointer (int to_reg)
7125{
7126  rtx tp, reg, insn;
7127
7128  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7129  if (!to_reg)
7130    return tp;
7131
7132  reg = gen_reg_rtx (Pmode);
7133  insn = gen_rtx_SET (VOIDmode, reg, tp);
7134  insn = emit_insn (insn);
7135
7136  return reg;
7137}
7138
7139/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
7140   false if we expect this to be used for a memory address and true if
7141   we expect to load the address into a register.  */
7142
7143static rtx
7144legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7145{
7146  rtx dest, base, off, pic, tp;
7147  int type;
7148
7149  switch (model)
7150    {
7151    case TLS_MODEL_GLOBAL_DYNAMIC:
7152      dest = gen_reg_rtx (Pmode);
7153      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7154
7155      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7156	{
7157	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
7158
7159	  start_sequence ();
7160	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7161	  insns = get_insns ();
7162	  end_sequence ();
7163
7164	  emit_libcall_block (insns, dest, rax, x);
7165	}
7166      else if (TARGET_64BIT && TARGET_GNU2_TLS)
7167	emit_insn (gen_tls_global_dynamic_64 (dest, x));
7168      else
7169	emit_insn (gen_tls_global_dynamic_32 (dest, x));
7170
7171      if (TARGET_GNU2_TLS)
7172	{
7173	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7174
7175	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7176	}
7177      break;
7178
7179    case TLS_MODEL_LOCAL_DYNAMIC:
7180      base = gen_reg_rtx (Pmode);
7181      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7182
7183      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7184	{
7185	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7186
7187	  start_sequence ();
7188	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7189	  insns = get_insns ();
7190	  end_sequence ();
7191
7192	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7193	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7194	  emit_libcall_block (insns, base, rax, note);
7195	}
7196      else if (TARGET_64BIT && TARGET_GNU2_TLS)
7197	emit_insn (gen_tls_local_dynamic_base_64 (base));
7198      else
7199	emit_insn (gen_tls_local_dynamic_base_32 (base));
7200
7201      if (TARGET_GNU2_TLS)
7202	{
7203	  rtx x = ix86_tls_module_base ();
7204
7205	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
7206			       gen_rtx_MINUS (Pmode, x, tp));
7207	}
7208
7209      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7210      off = gen_rtx_CONST (Pmode, off);
7211
7212      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7213
7214      if (TARGET_GNU2_TLS)
7215	{
7216	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7217
7218	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7219	}
7220
7221      break;
7222
7223    case TLS_MODEL_INITIAL_EXEC:
7224      if (TARGET_64BIT)
7225	{
7226	  pic = NULL;
7227	  type = UNSPEC_GOTNTPOFF;
7228	}
7229      else if (flag_pic)
7230	{
7231	  if (reload_in_progress)
7232	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7233	  pic = pic_offset_table_rtx;
7234	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7235	}
7236      else if (!TARGET_ANY_GNU_TLS)
7237	{
7238	  pic = gen_reg_rtx (Pmode);
7239	  emit_insn (gen_set_got (pic));
7240	  type = UNSPEC_GOTTPOFF;
7241	}
7242      else
7243	{
7244	  pic = NULL;
7245	  type = UNSPEC_INDNTPOFF;
7246	}
7247
7248      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7249      off = gen_rtx_CONST (Pmode, off);
7250      if (pic)
7251	off = gen_rtx_PLUS (Pmode, pic, off);
7252      off = gen_const_mem (Pmode, off);
7253      set_mem_alias_set (off, ix86_GOT_alias_set ());
7254
7255      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7256	{
7257          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7258	  off = force_reg (Pmode, off);
7259	  return gen_rtx_PLUS (Pmode, base, off);
7260	}
7261      else
7262	{
7263	  base = get_thread_pointer (true);
7264	  dest = gen_reg_rtx (Pmode);
7265	  emit_insn (gen_subsi3 (dest, base, off));
7266	}
7267      break;
7268
7269    case TLS_MODEL_LOCAL_EXEC:
7270      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7271			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7272			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7273      off = gen_rtx_CONST (Pmode, off);
7274
7275      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7276	{
7277	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7278	  return gen_rtx_PLUS (Pmode, base, off);
7279	}
7280      else
7281	{
7282	  base = get_thread_pointer (true);
7283	  dest = gen_reg_rtx (Pmode);
7284	  emit_insn (gen_subsi3 (dest, base, off));
7285	}
7286      break;
7287
7288    default:
7289      gcc_unreachable ();
7290    }
7291
7292  return dest;
7293}
7294
7295/* Try machine-dependent ways of modifying an illegitimate address
7296   to be legitimate.  If we find one, return the new, valid address.
7297   This macro is used in only one place: `memory_address' in explow.c.
7298
7299   OLDX is the address as it was before break_out_memory_refs was called.
7300   In some cases it is useful to look at this to decide what needs to be done.
7301
7302   MODE and WIN are passed so that this macro can use
7303   GO_IF_LEGITIMATE_ADDRESS.
7304
7305   It is always safe for this macro to do nothing.  It exists to recognize
7306   opportunities to optimize the output.
7307
7308   For the 80386, we handle X+REG by loading X into a register R and
7309   using R+REG.  R will go in a general reg and indexing will be used.
7310   However, if REG is a broken-out memory address or multiplication,
7311   nothing needs to be done because REG can certainly go in a general reg.
7312
7313   When -fpic is used, special handling is needed for symbolic references.
7314   See comments by legitimize_pic_address in i386.c for details.  */
7315
7316rtx
7317legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7318{
7319  int changed = 0;
7320  unsigned log;
7321
7322  if (TARGET_DEBUG_ADDR)
7323    {
7324      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7325	       GET_MODE_NAME (mode));
7326      debug_rtx (x);
7327    }
7328
7329  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7330  if (log)
7331    return legitimize_tls_address (x, log, false);
7332  if (GET_CODE (x) == CONST
7333      && GET_CODE (XEXP (x, 0)) == PLUS
7334      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7335      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7336    {
7337      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7338      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7339    }
7340
7341  if (flag_pic && SYMBOLIC_CONST (x))
7342    return legitimize_pic_address (x, 0);
7343
7344  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7345  if (GET_CODE (x) == ASHIFT
7346      && GET_CODE (XEXP (x, 1)) == CONST_INT
7347      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7348    {
7349      changed = 1;
7350      log = INTVAL (XEXP (x, 1));
7351      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7352			GEN_INT (1 << log));
7353    }
7354
7355  if (GET_CODE (x) == PLUS)
7356    {
7357      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7358
7359      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7360	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7361	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7362	{
7363	  changed = 1;
7364	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7365	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7366				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7367				      GEN_INT (1 << log));
7368	}
7369
7370      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7371	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7372	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7373	{
7374	  changed = 1;
7375	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7376	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7377				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7378				      GEN_INT (1 << log));
7379	}
7380
7381      /* Put multiply first if it isn't already.  */
7382      if (GET_CODE (XEXP (x, 1)) == MULT)
7383	{
7384	  rtx tmp = XEXP (x, 0);
7385	  XEXP (x, 0) = XEXP (x, 1);
7386	  XEXP (x, 1) = tmp;
7387	  changed = 1;
7388	}
7389
7390      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7391	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7392	 created by virtual register instantiation, register elimination, and
7393	 similar optimizations.  */
7394      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7395	{
7396	  changed = 1;
7397	  x = gen_rtx_PLUS (Pmode,
7398			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7399					  XEXP (XEXP (x, 1), 0)),
7400			    XEXP (XEXP (x, 1), 1));
7401	}
7402
7403      /* Canonicalize
7404	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7405	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7406      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7407	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7409	       && CONSTANT_P (XEXP (x, 1)))
7410	{
7411	  rtx constant;
7412	  rtx other = NULL_RTX;
7413
7414	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7415	    {
7416	      constant = XEXP (x, 1);
7417	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7418	    }
7419	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7420	    {
7421	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7422	      other = XEXP (x, 1);
7423	    }
7424	  else
7425	    constant = 0;
7426
7427	  if (constant)
7428	    {
7429	      changed = 1;
7430	      x = gen_rtx_PLUS (Pmode,
7431				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7432					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7433				plus_constant (other, INTVAL (constant)));
7434	    }
7435	}
7436
7437      if (changed && legitimate_address_p (mode, x, FALSE))
7438	return x;
7439
7440      if (GET_CODE (XEXP (x, 0)) == MULT)
7441	{
7442	  changed = 1;
7443	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7444	}
7445
7446      if (GET_CODE (XEXP (x, 1)) == MULT)
7447	{
7448	  changed = 1;
7449	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7450	}
7451
7452      if (changed
7453	  && GET_CODE (XEXP (x, 1)) == REG
7454	  && GET_CODE (XEXP (x, 0)) == REG)
7455	return x;
7456
7457      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7458	{
7459	  changed = 1;
7460	  x = legitimize_pic_address (x, 0);
7461	}
7462
7463      if (changed && legitimate_address_p (mode, x, FALSE))
7464	return x;
7465
7466      if (GET_CODE (XEXP (x, 0)) == REG)
7467	{
7468	  rtx temp = gen_reg_rtx (Pmode);
7469	  rtx val  = force_operand (XEXP (x, 1), temp);
7470	  if (val != temp)
7471	    emit_move_insn (temp, val);
7472
7473	  XEXP (x, 1) = temp;
7474	  return x;
7475	}
7476
7477      else if (GET_CODE (XEXP (x, 1)) == REG)
7478	{
7479	  rtx temp = gen_reg_rtx (Pmode);
7480	  rtx val  = force_operand (XEXP (x, 0), temp);
7481	  if (val != temp)
7482	    emit_move_insn (temp, val);
7483
7484	  XEXP (x, 0) = temp;
7485	  return x;
7486	}
7487    }
7488
7489  return x;
7490}
7491
7492/* Print an integer constant expression in assembler syntax.  Addition
7493   and subtraction are the only arithmetic that may appear in these
7494   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7495   CODE is the operand print code from the output string.  */
7496
7497static void
7498output_pic_addr_const (FILE *file, rtx x, int code)
7499{
7500  char buf[256];
7501
7502  switch (GET_CODE (x))
7503    {
7504    case PC:
7505      gcc_assert (flag_pic);
7506      putc ('.', file);
7507      break;
7508
7509    case SYMBOL_REF:
7510      if (! TARGET_MACHO || TARGET_64BIT)
7511	output_addr_const (file, x);
7512      else
7513	{
7514	  const char *name = XSTR (x, 0);
7515
7516	  /* Mark the decl as referenced so that cgraph will output the function.  */
7517	  if (SYMBOL_REF_DECL (x))
7518	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7519
7520#if TARGET_MACHO
7521	  if (MACHOPIC_INDIRECT
7522	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7523	    name = machopic_indirection_name (x, /*stub_p=*/true);
7524#endif
7525	  assemble_name (file, name);
7526	}
7527      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7528	fputs ("@PLT", file);
7529      break;
7530
7531    case LABEL_REF:
7532      x = XEXP (x, 0);
7533      /* FALLTHRU */
7534    case CODE_LABEL:
7535      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7536      assemble_name (asm_out_file, buf);
7537      break;
7538
7539    case CONST_INT:
7540      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7541      break;
7542
7543    case CONST:
7544      /* This used to output parentheses around the expression,
7545	 but that does not work on the 386 (either ATT or BSD assembler).  */
7546      output_pic_addr_const (file, XEXP (x, 0), code);
7547      break;
7548
7549    case CONST_DOUBLE:
7550      if (GET_MODE (x) == VOIDmode)
7551	{
7552	  /* We can use %d if the number is <32 bits and positive.  */
7553	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7554	    fprintf (file, "0x%lx%08lx",
7555		     (unsigned long) CONST_DOUBLE_HIGH (x),
7556		     (unsigned long) CONST_DOUBLE_LOW (x));
7557	  else
7558	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7559	}
7560      else
7561	/* We can't handle floating point constants;
7562	   PRINT_OPERAND must handle them.  */
7563	output_operand_lossage ("floating constant misused");
7564      break;
7565
7566    case PLUS:
7567      /* Some assemblers need integer constants to appear first.  */
7568      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7569	{
7570	  output_pic_addr_const (file, XEXP (x, 0), code);
7571	  putc ('+', file);
7572	  output_pic_addr_const (file, XEXP (x, 1), code);
7573	}
7574      else
7575	{
7576	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7577	  output_pic_addr_const (file, XEXP (x, 1), code);
7578	  putc ('+', file);
7579	  output_pic_addr_const (file, XEXP (x, 0), code);
7580	}
7581      break;
7582
7583    case MINUS:
7584      if (!TARGET_MACHO)
7585	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7586      output_pic_addr_const (file, XEXP (x, 0), code);
7587      putc ('-', file);
7588      output_pic_addr_const (file, XEXP (x, 1), code);
7589      if (!TARGET_MACHO)
7590	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7591      break;
7592
7593     case UNSPEC:
7594       gcc_assert (XVECLEN (x, 0) == 1);
7595       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7596       switch (XINT (x, 1))
7597	{
7598	case UNSPEC_GOT:
7599	  fputs ("@GOT", file);
7600	  break;
7601	case UNSPEC_GOTOFF:
7602	  fputs ("@GOTOFF", file);
7603	  break;
7604	case UNSPEC_GOTPCREL:
7605	  fputs ("@GOTPCREL(%rip)", file);
7606	  break;
7607	case UNSPEC_GOTTPOFF:
7608	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7609	  fputs ("@GOTTPOFF", file);
7610	  break;
7611	case UNSPEC_TPOFF:
7612	  fputs ("@TPOFF", file);
7613	  break;
7614	case UNSPEC_NTPOFF:
7615	  if (TARGET_64BIT)
7616	    fputs ("@TPOFF", file);
7617	  else
7618	    fputs ("@NTPOFF", file);
7619	  break;
7620	case UNSPEC_DTPOFF:
7621	  fputs ("@DTPOFF", file);
7622	  break;
7623	case UNSPEC_GOTNTPOFF:
7624	  if (TARGET_64BIT)
7625	    fputs ("@GOTTPOFF(%rip)", file);
7626	  else
7627	    fputs ("@GOTNTPOFF", file);
7628	  break;
7629	case UNSPEC_INDNTPOFF:
7630	  fputs ("@INDNTPOFF", file);
7631	  break;
7632	default:
7633	  output_operand_lossage ("invalid UNSPEC as operand");
7634	  break;
7635	}
7636       break;
7637
7638    default:
7639      output_operand_lossage ("invalid expression as operand");
7640    }
7641}
7642
7643/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7644   We need to emit DTP-relative relocations.  */
7645
7646static void
7647i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7648{
7649  fputs (ASM_LONG, file);
7650  output_addr_const (file, x);
7651  fputs ("@DTPOFF", file);
7652  switch (size)
7653    {
7654    case 4:
7655      break;
7656    case 8:
7657      fputs (", 0", file);
7658      break;
7659    default:
7660      gcc_unreachable ();
7661   }
7662}
7663
7664/* In the name of slightly smaller debug output, and to cater to
7665   general assembler lossage, recognize PIC+GOTOFF and turn it back
7666   into a direct symbol reference.
7667
7668   On Darwin, this is necessary to avoid a crash, because Darwin
7669   has a different PIC label for each routine but the DWARF debugging
7670   information is not associated with any particular routine, so it's
7671   necessary to remove references to the PIC label from RTL stored by
7672   the DWARF output code.  */
7673
7674static rtx
7675ix86_delegitimize_address (rtx orig_x)
7676{
7677  rtx x = orig_x;
7678  /* reg_addend is NULL or a multiple of some register.  */
7679  rtx reg_addend = NULL_RTX;
7680  /* const_addend is NULL or a const_int.  */
7681  rtx const_addend = NULL_RTX;
7682  /* This is the result, or NULL.  */
7683  rtx result = NULL_RTX;
7684
7685  if (GET_CODE (x) == MEM)
7686    x = XEXP (x, 0);
7687
7688  if (TARGET_64BIT)
7689    {
7690      if (GET_CODE (x) != CONST
7691	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7692	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7693	  || GET_CODE (orig_x) != MEM)
7694	return orig_x;
7695      return XVECEXP (XEXP (x, 0), 0, 0);
7696    }
7697
7698  if (GET_CODE (x) != PLUS
7699      || GET_CODE (XEXP (x, 1)) != CONST)
7700    return orig_x;
7701
7702  if (GET_CODE (XEXP (x, 0)) == REG
7703      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7704    /* %ebx + GOT/GOTOFF */
7705    ;
7706  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7707    {
7708      /* %ebx + %reg * scale + GOT/GOTOFF */
7709      reg_addend = XEXP (x, 0);
7710      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7711	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7712	reg_addend = XEXP (reg_addend, 1);
7713      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7714	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7715	reg_addend = XEXP (reg_addend, 0);
7716      else
7717	return orig_x;
7718      if (GET_CODE (reg_addend) != REG
7719	  && GET_CODE (reg_addend) != MULT
7720	  && GET_CODE (reg_addend) != ASHIFT)
7721	return orig_x;
7722    }
7723  else
7724    return orig_x;
7725
7726  x = XEXP (XEXP (x, 1), 0);
7727  if (GET_CODE (x) == PLUS
7728      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7729    {
7730      const_addend = XEXP (x, 1);
7731      x = XEXP (x, 0);
7732    }
7733
7734  if (GET_CODE (x) == UNSPEC
7735      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7736	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7737    result = XVECEXP (x, 0, 0);
7738
7739  if (TARGET_MACHO && darwin_local_data_pic (x)
7740      && GET_CODE (orig_x) != MEM)
7741    result = XEXP (x, 0);
7742
7743  if (! result)
7744    return orig_x;
7745
7746  if (const_addend)
7747    result = gen_rtx_PLUS (Pmode, result, const_addend);
7748  if (reg_addend)
7749    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7750  return result;
7751}
7752
7753static void
7754put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7755		    int fp, FILE *file)
7756{
7757  const char *suffix;
7758
7759  if (mode == CCFPmode || mode == CCFPUmode)
7760    {
7761      enum rtx_code second_code, bypass_code;
7762      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7763      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7764      code = ix86_fp_compare_code_to_integer (code);
7765      mode = CCmode;
7766    }
7767  if (reverse)
7768    code = reverse_condition (code);
7769
7770  switch (code)
7771    {
7772    case EQ:
7773      suffix = "e";
7774      break;
7775    case NE:
7776      suffix = "ne";
7777      break;
7778    case GT:
7779      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7780      suffix = "g";
7781      break;
7782    case GTU:
7783      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7784	 Those same assemblers have the same but opposite lossage on cmov.  */
7785      gcc_assert (mode == CCmode);
7786      suffix = fp ? "nbe" : "a";
7787      break;
7788    case LT:
7789      switch (mode)
7790	{
7791	case CCNOmode:
7792	case CCGOCmode:
7793	  suffix = "s";
7794	  break;
7795
7796	case CCmode:
7797	case CCGCmode:
7798	  suffix = "l";
7799	  break;
7800
7801	default:
7802	  gcc_unreachable ();
7803	}
7804      break;
7805    case LTU:
7806      gcc_assert (mode == CCmode);
7807      suffix = "b";
7808      break;
7809    case GE:
7810      switch (mode)
7811	{
7812	case CCNOmode:
7813	case CCGOCmode:
7814	  suffix = "ns";
7815	  break;
7816
7817	case CCmode:
7818	case CCGCmode:
7819	  suffix = "ge";
7820	  break;
7821
7822	default:
7823	  gcc_unreachable ();
7824	}
7825      break;
7826    case GEU:
7827      /* ??? As above.  */
7828      gcc_assert (mode == CCmode);
7829      suffix = fp ? "nb" : "ae";
7830      break;
7831    case LE:
7832      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7833      suffix = "le";
7834      break;
7835    case LEU:
7836      gcc_assert (mode == CCmode);
7837      suffix = "be";
7838      break;
7839    case UNORDERED:
7840      suffix = fp ? "u" : "p";
7841      break;
7842    case ORDERED:
7843      suffix = fp ? "nu" : "np";
7844      break;
7845    default:
7846      gcc_unreachable ();
7847    }
7848  fputs (suffix, file);
7849}
7850
7851/* Print the name of register X to FILE based on its machine mode and number.
7852   If CODE is 'w', pretend the mode is HImode.
7853   If CODE is 'b', pretend the mode is QImode.
7854   If CODE is 'k', pretend the mode is SImode.
7855   If CODE is 'q', pretend the mode is DImode.
7856   If CODE is 'h', pretend the reg is the 'high' byte register.
7857   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7858
7859void
7860print_reg (rtx x, int code, FILE *file)
7861{
7862  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7863	      && REGNO (x) != FRAME_POINTER_REGNUM
7864	      && REGNO (x) != FLAGS_REG
7865	      && REGNO (x) != FPSR_REG);
7866
7867  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7868    putc ('%', file);
7869
7870  if (code == 'w' || MMX_REG_P (x))
7871    code = 2;
7872  else if (code == 'b')
7873    code = 1;
7874  else if (code == 'k')
7875    code = 4;
7876  else if (code == 'q')
7877    code = 8;
7878  else if (code == 'y')
7879    code = 3;
7880  else if (code == 'h')
7881    code = 0;
7882  else
7883    code = GET_MODE_SIZE (GET_MODE (x));
7884
7885  /* Irritatingly, AMD extended registers use different naming convention
7886     from the normal registers.  */
7887  if (REX_INT_REG_P (x))
7888    {
7889      gcc_assert (TARGET_64BIT);
7890      switch (code)
7891	{
7892	  case 0:
7893	    error ("extended registers have no high halves");
7894	    break;
7895	  case 1:
7896	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7897	    break;
7898	  case 2:
7899	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7900	    break;
7901	  case 4:
7902	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7903	    break;
7904	  case 8:
7905	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7906	    break;
7907	  default:
7908	    error ("unsupported operand size for extended register");
7909	    break;
7910	}
7911      return;
7912    }
7913  switch (code)
7914    {
7915    case 3:
7916      if (STACK_TOP_P (x))
7917	{
7918	  fputs ("st(0)", file);
7919	  break;
7920	}
7921      /* FALLTHRU */
7922    case 8:
7923    case 4:
7924    case 12:
7925      if (! ANY_FP_REG_P (x))
7926	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7927      /* FALLTHRU */
7928    case 16:
7929    case 2:
7930    normal:
7931      fputs (hi_reg_name[REGNO (x)], file);
7932      break;
7933    case 1:
7934      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7935	goto normal;
7936      fputs (qi_reg_name[REGNO (x)], file);
7937      break;
7938    case 0:
7939      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7940	goto normal;
7941      fputs (qi_high_reg_name[REGNO (x)], file);
7942      break;
7943    default:
7944      gcc_unreachable ();
7945    }
7946}
7947
7948/* Locate some local-dynamic symbol still in use by this function
7949   so that we can print its name in some tls_local_dynamic_base
7950   pattern.  */
7951
7952static const char *
7953get_some_local_dynamic_name (void)
7954{
7955  rtx insn;
7956
7957  if (cfun->machine->some_ld_name)
7958    return cfun->machine->some_ld_name;
7959
7960  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7961    if (INSN_P (insn)
7962	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7963      return cfun->machine->some_ld_name;
7964
7965  gcc_unreachable ();
7966}
7967
7968static int
7969get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7970{
7971  rtx x = *px;
7972
7973  if (GET_CODE (x) == SYMBOL_REF
7974      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7975    {
7976      cfun->machine->some_ld_name = XSTR (x, 0);
7977      return 1;
7978    }
7979
7980  return 0;
7981}
7982
7983/* Meaning of CODE:
7984   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7985   C -- print opcode suffix for set/cmov insn.
7986   c -- like C, but print reversed condition
7987   F,f -- likewise, but for floating-point.
7988   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7989        otherwise nothing
7990   R -- print the prefix for register names.
7991   z -- print the opcode suffix for the size of the current operand.
7992   * -- print a star (in certain assembler syntax)
7993   A -- print an absolute memory reference.
7994   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7995   s -- print a shift double count, followed by the assemblers argument
7996	delimiter.
7997   b -- print the QImode name of the register for the indicated operand.
7998	%b0 would print %al if operands[0] is reg 0.
7999   w --  likewise, print the HImode name of the register.
8000   k --  likewise, print the SImode name of the register.
8001   q --  likewise, print the DImode name of the register.
8002   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8003   y -- print "st(0)" instead of "st" as a register.
8004   D -- print condition for SSE cmp instruction.
8005   P -- if PIC, print an @PLT suffix.
8006   X -- don't print any sort of PIC '@' suffix for a symbol.
8007   & -- print some in-use local-dynamic symbol name.
8008   H -- print a memory address offset by 8; used for sse high-parts
8009 */
8010
8011void
8012print_operand (FILE *file, rtx x, int code)
8013{
8014  if (code)
8015    {
8016      switch (code)
8017	{
8018	case '*':
8019	  if (ASSEMBLER_DIALECT == ASM_ATT)
8020	    putc ('*', file);
8021	  return;
8022
8023	case '&':
8024	  assemble_name (file, get_some_local_dynamic_name ());
8025	  return;
8026
8027	case 'A':
8028	  switch (ASSEMBLER_DIALECT)
8029	    {
8030	    case ASM_ATT:
8031	      putc ('*', file);
8032	      break;
8033
8034	    case ASM_INTEL:
8035	      /* Intel syntax. For absolute addresses, registers should not
8036		 be surrounded by braces.  */
8037	      if (GET_CODE (x) != REG)
8038		{
8039		  putc ('[', file);
8040		  PRINT_OPERAND (file, x, 0);
8041		  putc (']', file);
8042		  return;
8043		}
8044	      break;
8045
8046	    default:
8047	      gcc_unreachable ();
8048	    }
8049
8050	  PRINT_OPERAND (file, x, 0);
8051	  return;
8052
8053
8054	case 'L':
8055	  if (ASSEMBLER_DIALECT == ASM_ATT)
8056	    putc ('l', file);
8057	  return;
8058
8059	case 'W':
8060	  if (ASSEMBLER_DIALECT == ASM_ATT)
8061	    putc ('w', file);
8062	  return;
8063
8064	case 'B':
8065	  if (ASSEMBLER_DIALECT == ASM_ATT)
8066	    putc ('b', file);
8067	  return;
8068
8069	case 'Q':
8070	  if (ASSEMBLER_DIALECT == ASM_ATT)
8071	    putc ('l', file);
8072	  return;
8073
8074	case 'S':
8075	  if (ASSEMBLER_DIALECT == ASM_ATT)
8076	    putc ('s', file);
8077	  return;
8078
8079	case 'T':
8080	  if (ASSEMBLER_DIALECT == ASM_ATT)
8081	    putc ('t', file);
8082	  return;
8083
8084	case 'z':
8085	  /* 387 opcodes don't get size suffixes if the operands are
8086	     registers.  */
8087	  if (STACK_REG_P (x))
8088	    return;
8089
8090	  /* Likewise if using Intel opcodes.  */
8091	  if (ASSEMBLER_DIALECT == ASM_INTEL)
8092	    return;
8093
8094	  /* This is the size of op from size of operand.  */
8095	  switch (GET_MODE_SIZE (GET_MODE (x)))
8096	    {
8097	    case 2:
8098#ifdef HAVE_GAS_FILDS_FISTS
8099	      putc ('s', file);
8100#endif
8101	      return;
8102
8103	    case 4:
8104	      if (GET_MODE (x) == SFmode)
8105		{
8106		  putc ('s', file);
8107		  return;
8108		}
8109	      else
8110		putc ('l', file);
8111	      return;
8112
8113	    case 12:
8114	    case 16:
8115	      putc ('t', file);
8116	      return;
8117
8118	    case 8:
8119	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8120		{
8121#ifdef GAS_MNEMONICS
8122		  putc ('q', file);
8123#else
8124		  putc ('l', file);
8125		  putc ('l', file);
8126#endif
8127		}
8128	      else
8129	        putc ('l', file);
8130	      return;
8131
8132	    default:
8133	      gcc_unreachable ();
8134	    }
8135
8136	case 'b':
8137	case 'w':
8138	case 'k':
8139	case 'q':
8140	case 'h':
8141	case 'y':
8142	case 'X':
8143	case 'P':
8144	  break;
8145
8146	case 's':
8147	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8148	    {
8149	      PRINT_OPERAND (file, x, 0);
8150	      putc (',', file);
8151	    }
8152	  return;
8153
8154	case 'D':
8155	  /* Little bit of braindamage here.  The SSE compare instructions
8156	     does use completely different names for the comparisons that the
8157	     fp conditional moves.  */
8158	  switch (GET_CODE (x))
8159	    {
8160	    case EQ:
8161	    case UNEQ:
8162	      fputs ("eq", file);
8163	      break;
8164	    case LT:
8165	    case UNLT:
8166	      fputs ("lt", file);
8167	      break;
8168	    case LE:
8169	    case UNLE:
8170	      fputs ("le", file);
8171	      break;
8172	    case UNORDERED:
8173	      fputs ("unord", file);
8174	      break;
8175	    case NE:
8176	    case LTGT:
8177	      fputs ("neq", file);
8178	      break;
8179	    case UNGE:
8180	    case GE:
8181	      fputs ("nlt", file);
8182	      break;
8183	    case UNGT:
8184	    case GT:
8185	      fputs ("nle", file);
8186	      break;
8187	    case ORDERED:
8188	      fputs ("ord", file);
8189	      break;
8190	    default:
8191	      gcc_unreachable ();
8192	    }
8193	  return;
8194	case 'O':
8195#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8196	  if (ASSEMBLER_DIALECT == ASM_ATT)
8197	    {
8198	      switch (GET_MODE (x))
8199		{
8200		case HImode: putc ('w', file); break;
8201		case SImode:
8202		case SFmode: putc ('l', file); break;
8203		case DImode:
8204		case DFmode: putc ('q', file); break;
8205		default: gcc_unreachable ();
8206		}
8207	      putc ('.', file);
8208	    }
8209#endif
8210	  return;
8211	case 'C':
8212	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8213	  return;
8214	case 'F':
8215#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8216	  if (ASSEMBLER_DIALECT == ASM_ATT)
8217	    putc ('.', file);
8218#endif
8219	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8220	  return;
8221
8222	  /* Like above, but reverse condition */
8223	case 'c':
8224	  /* Check to see if argument to %c is really a constant
8225	     and not a condition code which needs to be reversed.  */
8226	  if (!COMPARISON_P (x))
8227	  {
8228	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8229	     return;
8230	  }
8231	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8232	  return;
8233	case 'f':
8234#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8235	  if (ASSEMBLER_DIALECT == ASM_ATT)
8236	    putc ('.', file);
8237#endif
8238	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8239	  return;
8240
8241	case 'H':
8242	  /* It doesn't actually matter what mode we use here, as we're
8243	     only going to use this for printing.  */
8244	  x = adjust_address_nv (x, DImode, 8);
8245	  break;
8246
8247	case '+':
8248	  {
8249	    rtx x;
8250
8251	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8252	      return;
8253
8254	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8255	    if (x)
8256	      {
8257		int pred_val = INTVAL (XEXP (x, 0));
8258
8259		if (pred_val < REG_BR_PROB_BASE * 45 / 100
8260		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
8261		  {
8262		    int taken = pred_val > REG_BR_PROB_BASE / 2;
8263		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
8264
8265		    /* Emit hints only in the case default branch prediction
8266		       heuristics would fail.  */
8267		    if (taken != cputaken)
8268		      {
8269			/* We use 3e (DS) prefix for taken branches and
8270			   2e (CS) prefix for not taken branches.  */
8271			if (taken)
8272			  fputs ("ds ; ", file);
8273			else
8274			  fputs ("cs ; ", file);
8275		      }
8276		  }
8277	      }
8278	    return;
8279	  }
8280	default:
8281	    output_operand_lossage ("invalid operand code '%c'", code);
8282	}
8283    }
8284
8285  if (GET_CODE (x) == REG)
8286    print_reg (x, code, file);
8287
8288  else if (GET_CODE (x) == MEM)
8289    {
8290      /* No `byte ptr' prefix for call instructions.  */
8291      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8292	{
8293	  const char * size;
8294	  switch (GET_MODE_SIZE (GET_MODE (x)))
8295	    {
8296	    case 1: size = "BYTE"; break;
8297	    case 2: size = "WORD"; break;
8298	    case 4: size = "DWORD"; break;
8299	    case 8: size = "QWORD"; break;
8300	    case 12: size = "XWORD"; break;
8301	    case 16: size = "XMMWORD"; break;
8302	    default:
8303	      gcc_unreachable ();
8304	    }
8305
8306	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
8307	  if (code == 'b')
8308	    size = "BYTE";
8309	  else if (code == 'w')
8310	    size = "WORD";
8311	  else if (code == 'k')
8312	    size = "DWORD";
8313
8314	  fputs (size, file);
8315	  fputs (" PTR ", file);
8316	}
8317
8318      x = XEXP (x, 0);
8319      /* Avoid (%rip) for call operands.  */
8320      if (CONSTANT_ADDRESS_P (x) && code == 'P'
8321	       && GET_CODE (x) != CONST_INT)
8322	output_addr_const (file, x);
8323      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8324	output_operand_lossage ("invalid constraints for operand");
8325      else
8326	output_address (x);
8327    }
8328
8329  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8330    {
8331      REAL_VALUE_TYPE r;
8332      long l;
8333
8334      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8335      REAL_VALUE_TO_TARGET_SINGLE (r, l);
8336
8337      if (ASSEMBLER_DIALECT == ASM_ATT)
8338	putc ('$', file);
8339      fprintf (file, "0x%08lx", l);
8340    }
8341
8342  /* These float cases don't actually occur as immediate operands.  */
8343  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8344    {
8345      char dstr[30];
8346
8347      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8348      fprintf (file, "%s", dstr);
8349    }
8350
8351  else if (GET_CODE (x) == CONST_DOUBLE
8352	   && GET_MODE (x) == XFmode)
8353    {
8354      char dstr[30];
8355
8356      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8357      fprintf (file, "%s", dstr);
8358    }
8359
8360  else
8361    {
8362      /* We have patterns that allow zero sets of memory, for instance.
8363	 In 64-bit mode, we should probably support all 8-byte vectors,
8364	 since we can in fact encode that into an immediate.  */
8365      if (GET_CODE (x) == CONST_VECTOR)
8366	{
8367	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8368	  x = const0_rtx;
8369	}
8370
8371      if (code != 'P')
8372	{
8373	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8374	    {
8375	      if (ASSEMBLER_DIALECT == ASM_ATT)
8376		putc ('$', file);
8377	    }
8378	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8379		   || GET_CODE (x) == LABEL_REF)
8380	    {
8381	      if (ASSEMBLER_DIALECT == ASM_ATT)
8382		putc ('$', file);
8383	      else
8384		fputs ("OFFSET FLAT:", file);
8385	    }
8386	}
8387      if (GET_CODE (x) == CONST_INT)
8388	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8389      else if (flag_pic)
8390	output_pic_addr_const (file, x, code);
8391      else
8392	output_addr_const (file, x);
8393    }
8394}
8395
8396/* Print a memory operand whose address is ADDR.  */
8397
8398void
8399print_operand_address (FILE *file, rtx addr)
8400{
8401  struct ix86_address parts;
8402  rtx base, index, disp;
8403  int scale;
8404  int ok = ix86_decompose_address (addr, &parts);
8405
8406  gcc_assert (ok);
8407
8408  base = parts.base;
8409  index = parts.index;
8410  disp = parts.disp;
8411  scale = parts.scale;
8412
8413  switch (parts.seg)
8414    {
8415    case SEG_DEFAULT:
8416      break;
8417    case SEG_FS:
8418    case SEG_GS:
8419      if (USER_LABEL_PREFIX[0] == 0)
8420	putc ('%', file);
8421      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8422      break;
8423    default:
8424      gcc_unreachable ();
8425    }
8426
8427  if (!base && !index)
8428    {
8429      /* Displacement only requires special attention.  */
8430
8431      if (GET_CODE (disp) == CONST_INT)
8432	{
8433	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8434	    {
8435	      if (USER_LABEL_PREFIX[0] == 0)
8436		putc ('%', file);
8437	      fputs ("ds:", file);
8438	    }
8439	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8440	}
8441      else if (flag_pic)
8442	output_pic_addr_const (file, disp, 0);
8443      else
8444	output_addr_const (file, disp);
8445
8446      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8447      if (TARGET_64BIT)
8448	{
8449	  if (GET_CODE (disp) == CONST
8450	      && GET_CODE (XEXP (disp, 0)) == PLUS
8451	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8452	    disp = XEXP (XEXP (disp, 0), 0);
8453	  if (GET_CODE (disp) == LABEL_REF
8454	      || (GET_CODE (disp) == SYMBOL_REF
8455		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8456	    fputs ("(%rip)", file);
8457	}
8458    }
8459  else
8460    {
8461      if (ASSEMBLER_DIALECT == ASM_ATT)
8462	{
8463	  if (disp)
8464	    {
8465	      if (flag_pic)
8466		output_pic_addr_const (file, disp, 0);
8467	      else if (GET_CODE (disp) == LABEL_REF)
8468		output_asm_label (disp);
8469	      else
8470		output_addr_const (file, disp);
8471	    }
8472
8473	  putc ('(', file);
8474	  if (base)
8475	    print_reg (base, 0, file);
8476	  if (index)
8477	    {
8478	      putc (',', file);
8479	      print_reg (index, 0, file);
8480	      if (scale != 1)
8481		fprintf (file, ",%d", scale);
8482	    }
8483	  putc (')', file);
8484	}
8485      else
8486	{
8487	  rtx offset = NULL_RTX;
8488
8489	  if (disp)
8490	    {
8491	      /* Pull out the offset of a symbol; print any symbol itself.  */
8492	      if (GET_CODE (disp) == CONST
8493		  && GET_CODE (XEXP (disp, 0)) == PLUS
8494		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8495		{
8496		  offset = XEXP (XEXP (disp, 0), 1);
8497		  disp = gen_rtx_CONST (VOIDmode,
8498					XEXP (XEXP (disp, 0), 0));
8499		}
8500
8501	      if (flag_pic)
8502		output_pic_addr_const (file, disp, 0);
8503	      else if (GET_CODE (disp) == LABEL_REF)
8504		output_asm_label (disp);
8505	      else if (GET_CODE (disp) == CONST_INT)
8506		offset = disp;
8507	      else
8508		output_addr_const (file, disp);
8509	    }
8510
8511	  putc ('[', file);
8512	  if (base)
8513	    {
8514	      print_reg (base, 0, file);
8515	      if (offset)
8516		{
8517		  if (INTVAL (offset) >= 0)
8518		    putc ('+', file);
8519		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8520		}
8521	    }
8522	  else if (offset)
8523	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8524	  else
8525	    putc ('0', file);
8526
8527	  if (index)
8528	    {
8529	      putc ('+', file);
8530	      print_reg (index, 0, file);
8531	      if (scale != 1)
8532		fprintf (file, "*%d", scale);
8533	    }
8534	  putc (']', file);
8535	}
8536    }
8537}
8538
8539bool
8540output_addr_const_extra (FILE *file, rtx x)
8541{
8542  rtx op;
8543
8544  if (GET_CODE (x) != UNSPEC)
8545    return false;
8546
8547  op = XVECEXP (x, 0, 0);
8548  switch (XINT (x, 1))
8549    {
8550    case UNSPEC_GOTTPOFF:
8551      output_addr_const (file, op);
8552      /* FIXME: This might be @TPOFF in Sun ld.  */
8553      fputs ("@GOTTPOFF", file);
8554      break;
8555    case UNSPEC_TPOFF:
8556      output_addr_const (file, op);
8557      fputs ("@TPOFF", file);
8558      break;
8559    case UNSPEC_NTPOFF:
8560      output_addr_const (file, op);
8561      if (TARGET_64BIT)
8562	fputs ("@TPOFF", file);
8563      else
8564	fputs ("@NTPOFF", file);
8565      break;
8566    case UNSPEC_DTPOFF:
8567      output_addr_const (file, op);
8568      fputs ("@DTPOFF", file);
8569      break;
8570    case UNSPEC_GOTNTPOFF:
8571      output_addr_const (file, op);
8572      if (TARGET_64BIT)
8573	fputs ("@GOTTPOFF(%rip)", file);
8574      else
8575	fputs ("@GOTNTPOFF", file);
8576      break;
8577    case UNSPEC_INDNTPOFF:
8578      output_addr_const (file, op);
8579      fputs ("@INDNTPOFF", file);
8580      break;
8581
8582    default:
8583      return false;
8584    }
8585
8586  return true;
8587}
8588
8589/* Split one or more DImode RTL references into pairs of SImode
8590   references.  The RTL can be REG, offsettable MEM, integer constant, or
8591   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8592   split and "num" is its length.  lo_half and hi_half are output arrays
8593   that parallel "operands".  */
8594
8595void
8596split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8597{
8598  while (num--)
8599    {
8600      rtx op = operands[num];
8601
8602      /* simplify_subreg refuse to split volatile memory addresses,
8603         but we still have to handle it.  */
8604      if (GET_CODE (op) == MEM)
8605	{
8606	  lo_half[num] = adjust_address (op, SImode, 0);
8607	  hi_half[num] = adjust_address (op, SImode, 4);
8608	}
8609      else
8610	{
8611	  lo_half[num] = simplify_gen_subreg (SImode, op,
8612					      GET_MODE (op) == VOIDmode
8613					      ? DImode : GET_MODE (op), 0);
8614	  hi_half[num] = simplify_gen_subreg (SImode, op,
8615					      GET_MODE (op) == VOIDmode
8616					      ? DImode : GET_MODE (op), 4);
8617	}
8618    }
8619}
8620/* Split one or more TImode RTL references into pairs of DImode
8621   references.  The RTL can be REG, offsettable MEM, integer constant, or
8622   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8623   split and "num" is its length.  lo_half and hi_half are output arrays
8624   that parallel "operands".  */
8625
8626void
8627split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8628{
8629  while (num--)
8630    {
8631      rtx op = operands[num];
8632
8633      /* simplify_subreg refuse to split volatile memory addresses, but we
8634         still have to handle it.  */
8635      if (GET_CODE (op) == MEM)
8636	{
8637	  lo_half[num] = adjust_address (op, DImode, 0);
8638	  hi_half[num] = adjust_address (op, DImode, 8);
8639	}
8640      else
8641	{
8642	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8643	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8644	}
8645    }
8646}
8647
8648/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8649   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8650   is the expression of the binary operation.  The output may either be
8651   emitted here, or returned to the caller, like all output_* functions.
8652
8653   There is no guarantee that the operands are the same mode, as they
8654   might be within FLOAT or FLOAT_EXTEND expressions.  */
8655
8656#ifndef SYSV386_COMPAT
8657/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8658   wants to fix the assemblers because that causes incompatibility
8659   with gcc.  No-one wants to fix gcc because that causes
8660   incompatibility with assemblers...  You can use the option of
8661   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8662#define SYSV386_COMPAT 1
8663#endif
8664
8665const char *
8666output_387_binary_op (rtx insn, rtx *operands)
8667{
8668  static char buf[30];
8669  const char *p;
8670  const char *ssep;
8671  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8672
8673#ifdef ENABLE_CHECKING
8674  /* Even if we do not want to check the inputs, this documents input
8675     constraints.  Which helps in understanding the following code.  */
8676  if (STACK_REG_P (operands[0])
8677      && ((REG_P (operands[1])
8678	   && REGNO (operands[0]) == REGNO (operands[1])
8679	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8680	  || (REG_P (operands[2])
8681	      && REGNO (operands[0]) == REGNO (operands[2])
8682	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8683      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8684    ; /* ok */
8685  else
8686    gcc_assert (is_sse);
8687#endif
8688
8689  switch (GET_CODE (operands[3]))
8690    {
8691    case PLUS:
8692      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8693	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8694	p = "fiadd";
8695      else
8696	p = "fadd";
8697      ssep = "add";
8698      break;
8699
8700    case MINUS:
8701      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8702	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8703	p = "fisub";
8704      else
8705	p = "fsub";
8706      ssep = "sub";
8707      break;
8708
8709    case MULT:
8710      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8711	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8712	p = "fimul";
8713      else
8714	p = "fmul";
8715      ssep = "mul";
8716      break;
8717
8718    case DIV:
8719      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8720	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8721	p = "fidiv";
8722      else
8723	p = "fdiv";
8724      ssep = "div";
8725      break;
8726
8727    default:
8728      gcc_unreachable ();
8729    }
8730
8731  if (is_sse)
8732   {
8733      strcpy (buf, ssep);
8734      if (GET_MODE (operands[0]) == SFmode)
8735	strcat (buf, "ss\t{%2, %0|%0, %2}");
8736      else
8737	strcat (buf, "sd\t{%2, %0|%0, %2}");
8738      return buf;
8739   }
8740  strcpy (buf, p);
8741
8742  switch (GET_CODE (operands[3]))
8743    {
8744    case MULT:
8745    case PLUS:
8746      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8747	{
8748	  rtx temp = operands[2];
8749	  operands[2] = operands[1];
8750	  operands[1] = temp;
8751	}
8752
8753      /* know operands[0] == operands[1].  */
8754
8755      if (GET_CODE (operands[2]) == MEM)
8756	{
8757	  p = "%z2\t%2";
8758	  break;
8759	}
8760
8761      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8762	{
8763	  if (STACK_TOP_P (operands[0]))
8764	    /* How is it that we are storing to a dead operand[2]?
8765	       Well, presumably operands[1] is dead too.  We can't
8766	       store the result to st(0) as st(0) gets popped on this
8767	       instruction.  Instead store to operands[2] (which I
8768	       think has to be st(1)).  st(1) will be popped later.
8769	       gcc <= 2.8.1 didn't have this check and generated
8770	       assembly code that the Unixware assembler rejected.  */
8771	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8772	  else
8773	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8774	  break;
8775	}
8776
8777      if (STACK_TOP_P (operands[0]))
8778	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8779      else
8780	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8781      break;
8782
8783    case MINUS:
8784    case DIV:
8785      if (GET_CODE (operands[1]) == MEM)
8786	{
8787	  p = "r%z1\t%1";
8788	  break;
8789	}
8790
8791      if (GET_CODE (operands[2]) == MEM)
8792	{
8793	  p = "%z2\t%2";
8794	  break;
8795	}
8796
8797      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8798	{
8799#if SYSV386_COMPAT
8800	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8801	     derived assemblers, confusingly reverse the direction of
8802	     the operation for fsub{r} and fdiv{r} when the
8803	     destination register is not st(0).  The Intel assembler
8804	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8805	     figure out what the hardware really does.  */
8806	  if (STACK_TOP_P (operands[0]))
8807	    p = "{p\t%0, %2|rp\t%2, %0}";
8808	  else
8809	    p = "{rp\t%2, %0|p\t%0, %2}";
8810#else
8811	  if (STACK_TOP_P (operands[0]))
8812	    /* As above for fmul/fadd, we can't store to st(0).  */
8813	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8814	  else
8815	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8816#endif
8817	  break;
8818	}
8819
8820      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8821	{
8822#if SYSV386_COMPAT
8823	  if (STACK_TOP_P (operands[0]))
8824	    p = "{rp\t%0, %1|p\t%1, %0}";
8825	  else
8826	    p = "{p\t%1, %0|rp\t%0, %1}";
8827#else
8828	  if (STACK_TOP_P (operands[0]))
8829	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8830	  else
8831	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8832#endif
8833	  break;
8834	}
8835
8836      if (STACK_TOP_P (operands[0]))
8837	{
8838	  if (STACK_TOP_P (operands[1]))
8839	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8840	  else
8841	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8842	  break;
8843	}
8844      else if (STACK_TOP_P (operands[1]))
8845	{
8846#if SYSV386_COMPAT
8847	  p = "{\t%1, %0|r\t%0, %1}";
8848#else
8849	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8850#endif
8851	}
8852      else
8853	{
8854#if SYSV386_COMPAT
8855	  p = "{r\t%2, %0|\t%0, %2}";
8856#else
8857	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8858#endif
8859	}
8860      break;
8861
8862    default:
8863      gcc_unreachable ();
8864    }
8865
8866  strcat (buf, p);
8867  return buf;
8868}
8869
8870/* Return needed mode for entity in optimize_mode_switching pass.  */
8871
8872int
8873ix86_mode_needed (int entity, rtx insn)
8874{
8875  enum attr_i387_cw mode;
8876
8877  /* The mode UNINITIALIZED is used to store control word after a
8878     function call or ASM pattern.  The mode ANY specify that function
8879     has no requirements on the control word and make no changes in the
8880     bits we are interested in.  */
8881
8882  if (CALL_P (insn)
8883      || (NONJUMP_INSN_P (insn)
8884	  && (asm_noperands (PATTERN (insn)) >= 0
8885	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8886    return I387_CW_UNINITIALIZED;
8887
8888  if (recog_memoized (insn) < 0)
8889    return I387_CW_ANY;
8890
8891  mode = get_attr_i387_cw (insn);
8892
8893  switch (entity)
8894    {
8895    case I387_TRUNC:
8896      if (mode == I387_CW_TRUNC)
8897	return mode;
8898      break;
8899
8900    case I387_FLOOR:
8901      if (mode == I387_CW_FLOOR)
8902	return mode;
8903      break;
8904
8905    case I387_CEIL:
8906      if (mode == I387_CW_CEIL)
8907	return mode;
8908      break;
8909
8910    case I387_MASK_PM:
8911      if (mode == I387_CW_MASK_PM)
8912	return mode;
8913      break;
8914
8915    default:
8916      gcc_unreachable ();
8917    }
8918
8919  return I387_CW_ANY;
8920}
8921
8922/* Output code to initialize control word copies used by trunc?f?i and
8923   rounding patterns.  CURRENT_MODE is set to current control word,
8924   while NEW_MODE is set to new control word.  */
8925
8926void
8927emit_i387_cw_initialization (int mode)
8928{
8929  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8930  rtx new_mode;
8931
8932  int slot;
8933
8934  rtx reg = gen_reg_rtx (HImode);
8935
8936  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8937  emit_move_insn (reg, stored_mode);
8938
8939  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8940    {
8941      switch (mode)
8942	{
8943	case I387_CW_TRUNC:
8944	  /* round toward zero (truncate) */
8945	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8946	  slot = SLOT_CW_TRUNC;
8947	  break;
8948
8949	case I387_CW_FLOOR:
8950	  /* round down toward -oo */
8951	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8952	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8953	  slot = SLOT_CW_FLOOR;
8954	  break;
8955
8956	case I387_CW_CEIL:
8957	  /* round up toward +oo */
8958	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8959	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8960	  slot = SLOT_CW_CEIL;
8961	  break;
8962
8963	case I387_CW_MASK_PM:
8964	  /* mask precision exception for nearbyint() */
8965	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8966	  slot = SLOT_CW_MASK_PM;
8967	  break;
8968
8969	default:
8970	  gcc_unreachable ();
8971	}
8972    }
8973  else
8974    {
8975      switch (mode)
8976	{
8977	case I387_CW_TRUNC:
8978	  /* round toward zero (truncate) */
8979	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8980	  slot = SLOT_CW_TRUNC;
8981	  break;
8982
8983	case I387_CW_FLOOR:
8984	  /* round down toward -oo */
8985	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8986	  slot = SLOT_CW_FLOOR;
8987	  break;
8988
8989	case I387_CW_CEIL:
8990	  /* round up toward +oo */
8991	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8992	  slot = SLOT_CW_CEIL;
8993	  break;
8994
8995	case I387_CW_MASK_PM:
8996	  /* mask precision exception for nearbyint() */
8997	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8998	  slot = SLOT_CW_MASK_PM;
8999	  break;
9000
9001	default:
9002	  gcc_unreachable ();
9003	}
9004    }
9005
9006  gcc_assert (slot < MAX_386_STACK_LOCALS);
9007
9008  new_mode = assign_386_stack_local (HImode, slot);
9009  emit_move_insn (new_mode, reg);
9010}
9011
9012/* Output code for INSN to convert a float to a signed int.  OPERANDS
9013   are the insn operands.  The output may be [HSD]Imode and the input
9014   operand may be [SDX]Fmode.  */
9015
9016const char *
9017output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9018{
9019  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9020  int dimode_p = GET_MODE (operands[0]) == DImode;
9021  int round_mode = get_attr_i387_cw (insn);
9022
9023  /* Jump through a hoop or two for DImode, since the hardware has no
9024     non-popping instruction.  We used to do this a different way, but
9025     that was somewhat fragile and broke with post-reload splitters.  */
9026  if ((dimode_p || fisttp) && !stack_top_dies)
9027    output_asm_insn ("fld\t%y1", operands);
9028
9029  gcc_assert (STACK_TOP_P (operands[1]));
9030  gcc_assert (GET_CODE (operands[0]) == MEM);
9031
9032  if (fisttp)
9033      output_asm_insn ("fisttp%z0\t%0", operands);
9034  else
9035    {
9036      if (round_mode != I387_CW_ANY)
9037	output_asm_insn ("fldcw\t%3", operands);
9038      if (stack_top_dies || dimode_p)
9039	output_asm_insn ("fistp%z0\t%0", operands);
9040      else
9041	output_asm_insn ("fist%z0\t%0", operands);
9042      if (round_mode != I387_CW_ANY)
9043	output_asm_insn ("fldcw\t%2", operands);
9044    }
9045
9046  return "";
9047}
9048
9049/* Output code for x87 ffreep insn.  The OPNO argument, which may only
9050   have the values zero or one, indicates the ffreep insn's operand
9051   from the OPERANDS array.  */
9052
9053static const char *
9054output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9055{
9056  if (TARGET_USE_FFREEP)
9057#if HAVE_AS_IX86_FFREEP
9058    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9059#else
9060    switch (REGNO (operands[opno]))
9061      {
9062      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9063      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9064      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9065      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9066      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9067      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9068      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9069      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9070      }
9071#endif
9072
9073  return opno ? "fstp\t%y1" : "fstp\t%y0";
9074}
9075
9076
9077/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
9078   should be used.  UNORDERED_P is true when fucom should be used.  */
9079
9080const char *
9081output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9082{
9083  int stack_top_dies;
9084  rtx cmp_op0, cmp_op1;
9085  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9086
9087  if (eflags_p)
9088    {
9089      cmp_op0 = operands[0];
9090      cmp_op1 = operands[1];
9091    }
9092  else
9093    {
9094      cmp_op0 = operands[1];
9095      cmp_op1 = operands[2];
9096    }
9097
9098  if (is_sse)
9099    {
9100      if (GET_MODE (operands[0]) == SFmode)
9101	if (unordered_p)
9102	  return "ucomiss\t{%1, %0|%0, %1}";
9103	else
9104	  return "comiss\t{%1, %0|%0, %1}";
9105      else
9106	if (unordered_p)
9107	  return "ucomisd\t{%1, %0|%0, %1}";
9108	else
9109	  return "comisd\t{%1, %0|%0, %1}";
9110    }
9111
9112  gcc_assert (STACK_TOP_P (cmp_op0));
9113
9114  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9115
9116  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9117    {
9118      if (stack_top_dies)
9119	{
9120	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9121	  return output_387_ffreep (operands, 1);
9122	}
9123      else
9124	return "ftst\n\tfnstsw\t%0";
9125    }
9126
9127  if (STACK_REG_P (cmp_op1)
9128      && stack_top_dies
9129      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9130      && REGNO (cmp_op1) != FIRST_STACK_REG)
9131    {
9132      /* If both the top of the 387 stack dies, and the other operand
9133	 is also a stack register that dies, then this must be a
9134	 `fcompp' float compare */
9135
9136      if (eflags_p)
9137	{
9138	  /* There is no double popping fcomi variant.  Fortunately,
9139	     eflags is immune from the fstp's cc clobbering.  */
9140	  if (unordered_p)
9141	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9142	  else
9143	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9144	  return output_387_ffreep (operands, 0);
9145	}
9146      else
9147	{
9148	  if (unordered_p)
9149	    return "fucompp\n\tfnstsw\t%0";
9150	  else
9151	    return "fcompp\n\tfnstsw\t%0";
9152	}
9153    }
9154  else
9155    {
9156      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
9157
9158      static const char * const alt[16] =
9159      {
9160	"fcom%z2\t%y2\n\tfnstsw\t%0",
9161	"fcomp%z2\t%y2\n\tfnstsw\t%0",
9162	"fucom%z2\t%y2\n\tfnstsw\t%0",
9163	"fucomp%z2\t%y2\n\tfnstsw\t%0",
9164
9165	"ficom%z2\t%y2\n\tfnstsw\t%0",
9166	"ficomp%z2\t%y2\n\tfnstsw\t%0",
9167	NULL,
9168	NULL,
9169
9170	"fcomi\t{%y1, %0|%0, %y1}",
9171	"fcomip\t{%y1, %0|%0, %y1}",
9172	"fucomi\t{%y1, %0|%0, %y1}",
9173	"fucomip\t{%y1, %0|%0, %y1}",
9174
9175	NULL,
9176	NULL,
9177	NULL,
9178	NULL
9179      };
9180
9181      int mask;
9182      const char *ret;
9183
9184      mask  = eflags_p << 3;
9185      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9186      mask |= unordered_p << 1;
9187      mask |= stack_top_dies;
9188
9189      gcc_assert (mask < 16);
9190      ret = alt[mask];
9191      gcc_assert (ret);
9192
9193      return ret;
9194    }
9195}
9196
9197void
9198ix86_output_addr_vec_elt (FILE *file, int value)
9199{
9200  const char *directive = ASM_LONG;
9201
9202#ifdef ASM_QUAD
9203  if (TARGET_64BIT)
9204    directive = ASM_QUAD;
9205#else
9206  gcc_assert (!TARGET_64BIT);
9207#endif
9208
9209  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9210}
9211
9212void
9213ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9214{
9215  if (TARGET_64BIT)
9216    fprintf (file, "%s%s%d-%s%d\n",
9217	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
9218  else if (HAVE_AS_GOTOFF_IN_DATA)
9219    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9220#if TARGET_MACHO
9221  else if (TARGET_MACHO)
9222    {
9223      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9224      machopic_output_function_base_name (file);
9225      fprintf(file, "\n");
9226    }
9227#endif
9228  else
9229    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9230		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9231}
9232
9233/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9234   for the target.  */
9235
9236void
9237ix86_expand_clear (rtx dest)
9238{
9239  rtx tmp;
9240
9241  /* We play register width games, which are only valid after reload.  */
9242  gcc_assert (reload_completed);
9243
9244  /* Avoid HImode and its attendant prefix byte.  */
9245  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9246    dest = gen_rtx_REG (SImode, REGNO (dest));
9247
9248  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9249
9250  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
9251  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9252    {
9253      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9254      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9255    }
9256
9257  emit_insn (tmp);
9258}
9259
9260/* X is an unchanging MEM.  If it is a constant pool reference, return
9261   the constant pool rtx, else NULL.  */
9262
9263rtx
9264maybe_get_pool_constant (rtx x)
9265{
9266  x = ix86_delegitimize_address (XEXP (x, 0));
9267
9268  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9269    return get_pool_constant (x);
9270
9271  return NULL_RTX;
9272}
9273
9274void
9275ix86_expand_move (enum machine_mode mode, rtx operands[])
9276{
9277  int strict = (reload_in_progress || reload_completed);
9278  rtx op0, op1;
9279  enum tls_model model;
9280
9281  op0 = operands[0];
9282  op1 = operands[1];
9283
9284  if (GET_CODE (op1) == SYMBOL_REF)
9285    {
9286      model = SYMBOL_REF_TLS_MODEL (op1);
9287      if (model)
9288	{
9289	  op1 = legitimize_tls_address (op1, model, true);
9290	  op1 = force_operand (op1, op0);
9291	  if (op1 == op0)
9292	    return;
9293	}
9294    }
9295  else if (GET_CODE (op1) == CONST
9296	   && GET_CODE (XEXP (op1, 0)) == PLUS
9297	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9298    {
9299      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9300      if (model)
9301	{
9302	  rtx addend = XEXP (XEXP (op1, 0), 1);
9303	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9304	  op1 = force_operand (op1, NULL);
9305	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9306				     op0, 1, OPTAB_DIRECT);
9307	  if (op1 == op0)
9308	    return;
9309	}
9310    }
9311
9312  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9313    {
9314      if (TARGET_MACHO && !TARGET_64BIT)
9315	{
9316#if TARGET_MACHO
9317	  if (MACHOPIC_PURE)
9318	    {
9319	      rtx temp = ((reload_in_progress
9320			   || ((op0 && GET_CODE (op0) == REG)
9321			       && mode == Pmode))
9322			  ? op0 : gen_reg_rtx (Pmode));
9323	      op1 = machopic_indirect_data_reference (op1, temp);
9324	      op1 = machopic_legitimize_pic_address (op1, mode,
9325						     temp == op1 ? 0 : temp);
9326	    }
9327	  else if (MACHOPIC_INDIRECT)
9328	    op1 = machopic_indirect_data_reference (op1, 0);
9329	  if (op0 == op1)
9330	    return;
9331#endif
9332	}
9333      else
9334	{
9335	  if (GET_CODE (op0) == MEM)
9336	    op1 = force_reg (Pmode, op1);
9337	  else
9338	    op1 = legitimize_address (op1, op1, Pmode);
9339	}
9340    }
9341  else
9342    {
9343      if (GET_CODE (op0) == MEM
9344	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9345	      || !push_operand (op0, mode))
9346	  && GET_CODE (op1) == MEM)
9347	op1 = force_reg (mode, op1);
9348
9349      if (push_operand (op0, mode)
9350	  && ! general_no_elim_operand (op1, mode))
9351	op1 = copy_to_mode_reg (mode, op1);
9352
9353      /* Force large constants in 64bit compilation into register
9354	 to get them CSEed.  */
9355      if (TARGET_64BIT && mode == DImode
9356	  && immediate_operand (op1, mode)
9357	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9358	  && !register_operand (op0, mode)
9359	  && optimize && !reload_completed && !reload_in_progress)
9360	op1 = copy_to_mode_reg (mode, op1);
9361
9362      if (FLOAT_MODE_P (mode))
9363	{
9364	  /* If we are loading a floating point constant to a register,
9365	     force the value to memory now, since we'll get better code
9366	     out the back end.  */
9367
9368	  if (strict)
9369	    ;
9370	  else if (GET_CODE (op1) == CONST_DOUBLE)
9371	    {
9372	      op1 = validize_mem (force_const_mem (mode, op1));
9373	      if (!register_operand (op0, mode))
9374		{
9375		  rtx temp = gen_reg_rtx (mode);
9376		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9377		  emit_move_insn (op0, temp);
9378		  return;
9379		}
9380	    }
9381	}
9382    }
9383
9384  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9385}
9386
9387void
9388ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9389{
9390  rtx op0 = operands[0], op1 = operands[1];
9391
9392  /* Force constants other than zero into memory.  We do not know how
9393     the instructions used to build constants modify the upper 64 bits
9394     of the register, once we have that information we may be able
9395     to handle some of them more efficiently.  */
9396  if ((reload_in_progress | reload_completed) == 0
9397      && register_operand (op0, mode)
9398      && CONSTANT_P (op1)
9399      && standard_sse_constant_p (op1) <= 0)
9400    op1 = validize_mem (force_const_mem (mode, op1));
9401
9402  /* Make operand1 a register if it isn't already.  */
9403  if (!no_new_pseudos
9404      && !register_operand (op0, mode)
9405      && !register_operand (op1, mode))
9406    {
9407      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9408      return;
9409    }
9410
9411  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9412}
9413
9414/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9415   straight to ix86_expand_vector_move.  */
9416
9417void
9418ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9419{
9420  rtx op0, op1, m;
9421
9422  op0 = operands[0];
9423  op1 = operands[1];
9424
9425  if (MEM_P (op1))
9426    {
9427      /* If we're optimizing for size, movups is the smallest.  */
9428      if (optimize_size)
9429	{
9430	  op0 = gen_lowpart (V4SFmode, op0);
9431	  op1 = gen_lowpart (V4SFmode, op1);
9432	  emit_insn (gen_sse_movups (op0, op1));
9433	  return;
9434	}
9435
9436      /* ??? If we have typed data, then it would appear that using
9437	 movdqu is the only way to get unaligned data loaded with
9438	 integer type.  */
9439      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9440	{
9441	  op0 = gen_lowpart (V16QImode, op0);
9442	  op1 = gen_lowpart (V16QImode, op1);
9443	  emit_insn (gen_sse2_movdqu (op0, op1));
9444	  return;
9445	}
9446
9447      if (TARGET_SSE2 && mode == V2DFmode)
9448        {
9449          rtx zero;
9450
9451          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9452            {
9453              op0 = gen_lowpart (V2DFmode, op0);
9454              op1 = gen_lowpart (V2DFmode, op1);
9455              emit_insn (gen_sse2_movupd (op0, op1));
9456              return;
9457            }
9458
9459	  /* When SSE registers are split into halves, we can avoid
9460	     writing to the top half twice.  */
9461	  if (TARGET_SSE_SPLIT_REGS)
9462	    {
9463	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9464	      zero = op0;
9465	    }
9466	  else
9467	    {
9468	      /* ??? Not sure about the best option for the Intel chips.
9469		 The following would seem to satisfy; the register is
9470		 entirely cleared, breaking the dependency chain.  We
9471		 then store to the upper half, with a dependency depth
9472		 of one.  A rumor has it that Intel recommends two movsd
9473		 followed by an unpacklpd, but this is unconfirmed.  And
9474		 given that the dependency depth of the unpacklpd would
9475		 still be one, I'm not sure why this would be better.  */
9476	      zero = CONST0_RTX (V2DFmode);
9477	    }
9478
9479	  m = adjust_address (op1, DFmode, 0);
9480	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9481	  m = adjust_address (op1, DFmode, 8);
9482	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9483	}
9484      else
9485        {
9486          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9487            {
9488              op0 = gen_lowpart (V4SFmode, op0);
9489              op1 = gen_lowpart (V4SFmode, op1);
9490              emit_insn (gen_sse_movups (op0, op1));
9491              return;
9492            }
9493
9494	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9495	    emit_move_insn (op0, CONST0_RTX (mode));
9496	  else
9497	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9498
9499	  if (mode != V4SFmode)
9500	    op0 = gen_lowpart (V4SFmode, op0);
9501	  m = adjust_address (op1, V2SFmode, 0);
9502	  emit_insn (gen_sse_loadlps (op0, op0, m));
9503	  m = adjust_address (op1, V2SFmode, 8);
9504	  emit_insn (gen_sse_loadhps (op0, op0, m));
9505	}
9506    }
9507  else if (MEM_P (op0))
9508    {
9509      /* If we're optimizing for size, movups is the smallest.  */
9510      if (optimize_size)
9511	{
9512	  op0 = gen_lowpart (V4SFmode, op0);
9513	  op1 = gen_lowpart (V4SFmode, op1);
9514	  emit_insn (gen_sse_movups (op0, op1));
9515	  return;
9516	}
9517
9518      /* ??? Similar to above, only less clear because of quote
9519	 typeless stores unquote.  */
9520      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9521	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9522        {
9523	  op0 = gen_lowpart (V16QImode, op0);
9524	  op1 = gen_lowpart (V16QImode, op1);
9525	  emit_insn (gen_sse2_movdqu (op0, op1));
9526	  return;
9527	}
9528
9529      if (TARGET_SSE2 && mode == V2DFmode)
9530	{
9531	  m = adjust_address (op0, DFmode, 0);
9532	  emit_insn (gen_sse2_storelpd (m, op1));
9533	  m = adjust_address (op0, DFmode, 8);
9534	  emit_insn (gen_sse2_storehpd (m, op1));
9535	}
9536      else
9537	{
9538	  if (mode != V4SFmode)
9539	    op1 = gen_lowpart (V4SFmode, op1);
9540	  m = adjust_address (op0, V2SFmode, 0);
9541	  emit_insn (gen_sse_storelps (m, op1));
9542	  m = adjust_address (op0, V2SFmode, 8);
9543	  emit_insn (gen_sse_storehps (m, op1));
9544	}
9545    }
9546  else
9547    gcc_unreachable ();
9548}
9549
9550/* Expand a push in MODE.  This is some mode for which we do not support
9551   proper push instructions, at least from the registers that we expect
9552   the value to live in.  */
9553
9554void
9555ix86_expand_push (enum machine_mode mode, rtx x)
9556{
9557  rtx tmp;
9558
9559  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9560			     GEN_INT (-GET_MODE_SIZE (mode)),
9561			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9562  if (tmp != stack_pointer_rtx)
9563    emit_move_insn (stack_pointer_rtx, tmp);
9564
9565  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9566  emit_move_insn (tmp, x);
9567}
9568
9569/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9570   destination to use for the operation.  If different from the true
9571   destination in operands[0], a copy operation will be required.  */
9572
9573rtx
9574ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9575			    rtx operands[])
9576{
9577  int matching_memory;
9578  rtx src1, src2, dst;
9579
9580  dst = operands[0];
9581  src1 = operands[1];
9582  src2 = operands[2];
9583
9584  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9585  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9586      && (rtx_equal_p (dst, src2)
9587	  || immediate_operand (src1, mode)))
9588    {
9589      rtx temp = src1;
9590      src1 = src2;
9591      src2 = temp;
9592    }
9593
9594  /* If the destination is memory, and we do not have matching source
9595     operands, do things in registers.  */
9596  matching_memory = 0;
9597  if (GET_CODE (dst) == MEM)
9598    {
9599      if (rtx_equal_p (dst, src1))
9600	matching_memory = 1;
9601      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9602	       && rtx_equal_p (dst, src2))
9603	matching_memory = 2;
9604      else
9605	dst = gen_reg_rtx (mode);
9606    }
9607
9608  /* Both source operands cannot be in memory.  */
9609  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9610    {
9611      if (matching_memory != 2)
9612	src2 = force_reg (mode, src2);
9613      else
9614	src1 = force_reg (mode, src1);
9615    }
9616
9617  /* If the operation is not commutable, source 1 cannot be a constant
9618     or non-matching memory.  */
9619  if ((CONSTANT_P (src1)
9620       || (!matching_memory && GET_CODE (src1) == MEM))
9621      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9622    src1 = force_reg (mode, src1);
9623
9624  src1 = operands[1] = src1;
9625  src2 = operands[2] = src2;
9626  return dst;
9627}
9628
9629/* Similarly, but assume that the destination has already been
9630   set up properly.  */
9631
9632void
9633ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9634				    enum machine_mode mode, rtx operands[])
9635{
9636  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9637  gcc_assert (dst == operands[0]);
9638}
9639
9640/* Attempt to expand a binary operator.  Make the expansion closer to the
9641   actual machine, then just general_operand, which will allow 3 separate
9642   memory references (one output, two input) in a single insn.  */
9643
9644void
9645ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9646			     rtx operands[])
9647{
9648  rtx src1, src2, dst, op, clob;
9649
9650  dst = ix86_fixup_binary_operands (code, mode, operands);
9651  src1 = operands[1];
9652  src2 = operands[2];
9653
9654 /* Emit the instruction.  */
9655
9656  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9657  if (reload_in_progress)
9658    {
9659      /* Reload doesn't know about the flags register, and doesn't know that
9660         it doesn't want to clobber it.  We can only do this with PLUS.  */
9661      gcc_assert (code == PLUS);
9662      emit_insn (op);
9663    }
9664  else
9665    {
9666      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9667      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9668    }
9669
9670  /* Fix up the destination if needed.  */
9671  if (dst != operands[0])
9672    emit_move_insn (operands[0], dst);
9673}
9674
9675/* Return TRUE or FALSE depending on whether the binary operator meets the
9676   appropriate constraints.  */
9677
9678int
9679ix86_binary_operator_ok (enum rtx_code code,
9680			 enum machine_mode mode ATTRIBUTE_UNUSED,
9681			 rtx operands[3])
9682{
9683  /* Both source operands cannot be in memory.  */
9684  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9685    return 0;
9686  /* If the operation is not commutable, source 1 cannot be a constant.  */
9687  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9688    return 0;
9689  /* If the destination is memory, we must have a matching source operand.  */
9690  if (GET_CODE (operands[0]) == MEM
9691      && ! (rtx_equal_p (operands[0], operands[1])
9692	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9693		&& rtx_equal_p (operands[0], operands[2]))))
9694    return 0;
9695  /* If the operation is not commutable and the source 1 is memory, we must
9696     have a matching destination.  */
9697  if (GET_CODE (operands[1]) == MEM
9698      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9699      && ! rtx_equal_p (operands[0], operands[1]))
9700    return 0;
9701  return 1;
9702}
9703
9704/* Attempt to expand a unary operator.  Make the expansion closer to the
9705   actual machine, then just general_operand, which will allow 2 separate
9706   memory references (one output, one input) in a single insn.  */
9707
9708void
9709ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9710			    rtx operands[])
9711{
9712  int matching_memory;
9713  rtx src, dst, op, clob;
9714
9715  dst = operands[0];
9716  src = operands[1];
9717
9718  /* If the destination is memory, and we do not have matching source
9719     operands, do things in registers.  */
9720  matching_memory = 0;
9721  if (MEM_P (dst))
9722    {
9723      if (rtx_equal_p (dst, src))
9724	matching_memory = 1;
9725      else
9726	dst = gen_reg_rtx (mode);
9727    }
9728
9729  /* When source operand is memory, destination must match.  */
9730  if (MEM_P (src) && !matching_memory)
9731    src = force_reg (mode, src);
9732
9733  /* Emit the instruction.  */
9734
9735  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9736  if (reload_in_progress || code == NOT)
9737    {
9738      /* Reload doesn't know about the flags register, and doesn't know that
9739         it doesn't want to clobber it.  */
9740      gcc_assert (code == NOT);
9741      emit_insn (op);
9742    }
9743  else
9744    {
9745      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9746      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9747    }
9748
9749  /* Fix up the destination if needed.  */
9750  if (dst != operands[0])
9751    emit_move_insn (operands[0], dst);
9752}
9753
9754/* Return TRUE or FALSE depending on whether the unary operator meets the
9755   appropriate constraints.  */
9756
9757int
9758ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9759			enum machine_mode mode ATTRIBUTE_UNUSED,
9760			rtx operands[2] ATTRIBUTE_UNUSED)
9761{
9762  /* If one of operands is memory, source and destination must match.  */
9763  if ((GET_CODE (operands[0]) == MEM
9764       || GET_CODE (operands[1]) == MEM)
9765      && ! rtx_equal_p (operands[0], operands[1]))
9766    return FALSE;
9767  return TRUE;
9768}
9769
9770/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9771   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9772   true, then replicate the mask for all elements of the vector register.
9773   If INVERT is true, then create a mask excluding the sign bit.  */
9774
9775rtx
9776ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9777{
9778  enum machine_mode vec_mode;
9779  HOST_WIDE_INT hi, lo;
9780  int shift = 63;
9781  rtvec v;
9782  rtx mask;
9783
9784  /* Find the sign bit, sign extended to 2*HWI.  */
9785  if (mode == SFmode)
9786    lo = 0x80000000, hi = lo < 0;
9787  else if (HOST_BITS_PER_WIDE_INT >= 64)
9788    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9789  else
9790    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9791
9792  if (invert)
9793    lo = ~lo, hi = ~hi;
9794
9795  /* Force this value into the low part of a fp vector constant.  */
9796  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9797  mask = gen_lowpart (mode, mask);
9798
9799  if (mode == SFmode)
9800    {
9801      if (vect)
9802	v = gen_rtvec (4, mask, mask, mask, mask);
9803      else
9804	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9805		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9806      vec_mode = V4SFmode;
9807    }
9808  else
9809    {
9810      if (vect)
9811	v = gen_rtvec (2, mask, mask);
9812      else
9813	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9814      vec_mode = V2DFmode;
9815    }
9816
9817  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9818}
9819
9820/* Generate code for floating point ABS or NEG.  */
9821
9822void
9823ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9824				rtx operands[])
9825{
9826  rtx mask, set, use, clob, dst, src;
9827  bool matching_memory;
9828  bool use_sse = false;
9829  bool vector_mode = VECTOR_MODE_P (mode);
9830  enum machine_mode elt_mode = mode;
9831
9832  if (vector_mode)
9833    {
9834      elt_mode = GET_MODE_INNER (mode);
9835      use_sse = true;
9836    }
9837  else if (TARGET_SSE_MATH)
9838    use_sse = SSE_FLOAT_MODE_P (mode);
9839
9840  /* NEG and ABS performed with SSE use bitwise mask operations.
9841     Create the appropriate mask now.  */
9842  if (use_sse)
9843    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9844  else
9845    mask = NULL_RTX;
9846
9847  dst = operands[0];
9848  src = operands[1];
9849
9850  /* If the destination is memory, and we don't have matching source
9851     operands or we're using the x87, do things in registers.  */
9852  matching_memory = false;
9853  if (MEM_P (dst))
9854    {
9855      if (use_sse && rtx_equal_p (dst, src))
9856	matching_memory = true;
9857      else
9858	dst = gen_reg_rtx (mode);
9859    }
9860  if (MEM_P (src) && !matching_memory)
9861    src = force_reg (mode, src);
9862
9863  if (vector_mode)
9864    {
9865      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9866      set = gen_rtx_SET (VOIDmode, dst, set);
9867      emit_insn (set);
9868    }
9869  else
9870    {
9871      set = gen_rtx_fmt_e (code, mode, src);
9872      set = gen_rtx_SET (VOIDmode, dst, set);
9873      if (mask)
9874        {
9875          use = gen_rtx_USE (VOIDmode, mask);
9876          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9877          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9878				       gen_rtvec (3, set, use, clob)));
9879        }
9880      else
9881	emit_insn (set);
9882    }
9883
9884  if (dst != operands[0])
9885    emit_move_insn (operands[0], dst);
9886}
9887
9888/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9889
9890void
9891ix86_expand_copysign (rtx operands[])
9892{
9893  enum machine_mode mode, vmode;
9894  rtx dest, op0, op1, mask, nmask;
9895
9896  dest = operands[0];
9897  op0 = operands[1];
9898  op1 = operands[2];
9899
9900  mode = GET_MODE (dest);
9901  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9902
9903  if (GET_CODE (op0) == CONST_DOUBLE)
9904    {
9905      rtvec v;
9906
9907      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9908	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9909
9910      if (op0 == CONST0_RTX (mode))
9911	op0 = CONST0_RTX (vmode);
9912      else
9913        {
9914	  if (mode == SFmode)
9915	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9916                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9917	  else
9918	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9919          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9920	}
9921
9922      mask = ix86_build_signbit_mask (mode, 0, 0);
9923
9924      if (mode == SFmode)
9925	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9926      else
9927	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9928    }
9929  else
9930    {
9931      nmask = ix86_build_signbit_mask (mode, 0, 1);
9932      mask = ix86_build_signbit_mask (mode, 0, 0);
9933
9934      if (mode == SFmode)
9935	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9936      else
9937	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9938    }
9939}
9940
9941/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9942   be a constant, and so has already been expanded into a vector constant.  */
9943
9944void
9945ix86_split_copysign_const (rtx operands[])
9946{
9947  enum machine_mode mode, vmode;
9948  rtx dest, op0, op1, mask, x;
9949
9950  dest = operands[0];
9951  op0 = operands[1];
9952  op1 = operands[2];
9953  mask = operands[3];
9954
9955  mode = GET_MODE (dest);
9956  vmode = GET_MODE (mask);
9957
9958  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9959  x = gen_rtx_AND (vmode, dest, mask);
9960  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9961
9962  if (op0 != CONST0_RTX (vmode))
9963    {
9964      x = gen_rtx_IOR (vmode, dest, op0);
9965      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9966    }
9967}
9968
9969/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9970   so we have to do two masks.  */
9971
9972void
9973ix86_split_copysign_var (rtx operands[])
9974{
9975  enum machine_mode mode, vmode;
9976  rtx dest, scratch, op0, op1, mask, nmask, x;
9977
9978  dest = operands[0];
9979  scratch = operands[1];
9980  op0 = operands[2];
9981  op1 = operands[3];
9982  nmask = operands[4];
9983  mask = operands[5];
9984
9985  mode = GET_MODE (dest);
9986  vmode = GET_MODE (mask);
9987
9988  if (rtx_equal_p (op0, op1))
9989    {
9990      /* Shouldn't happen often (it's useless, obviously), but when it does
9991	 we'd generate incorrect code if we continue below.  */
9992      emit_move_insn (dest, op0);
9993      return;
9994    }
9995
9996  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9997    {
9998      gcc_assert (REGNO (op1) == REGNO (scratch));
9999
10000      x = gen_rtx_AND (vmode, scratch, mask);
10001      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10002
10003      dest = mask;
10004      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10005      x = gen_rtx_NOT (vmode, dest);
10006      x = gen_rtx_AND (vmode, x, op0);
10007      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10008    }
10009  else
10010    {
10011      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
10012	{
10013	  x = gen_rtx_AND (vmode, scratch, mask);
10014	}
10015      else						/* alternative 2,4 */
10016	{
10017          gcc_assert (REGNO (mask) == REGNO (scratch));
10018          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10019	  x = gen_rtx_AND (vmode, scratch, op1);
10020	}
10021      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10022
10023      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
10024	{
10025	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
10026	  x = gen_rtx_AND (vmode, dest, nmask);
10027	}
10028      else						/* alternative 3,4 */
10029	{
10030          gcc_assert (REGNO (nmask) == REGNO (dest));
10031	  dest = nmask;
10032	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10033	  x = gen_rtx_AND (vmode, dest, op0);
10034	}
10035      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10036    }
10037
10038  x = gen_rtx_IOR (vmode, dest, scratch);
10039  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10040}
10041
10042/* Return TRUE or FALSE depending on whether the first SET in INSN
10043   has source and destination with matching CC modes, and that the
10044   CC mode is at least as constrained as REQ_MODE.  */
10045
10046int
10047ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10048{
10049  rtx set;
10050  enum machine_mode set_mode;
10051
10052  set = PATTERN (insn);
10053  if (GET_CODE (set) == PARALLEL)
10054    set = XVECEXP (set, 0, 0);
10055  gcc_assert (GET_CODE (set) == SET);
10056  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10057
10058  set_mode = GET_MODE (SET_DEST (set));
10059  switch (set_mode)
10060    {
10061    case CCNOmode:
10062      if (req_mode != CCNOmode
10063	  && (req_mode != CCmode
10064	      || XEXP (SET_SRC (set), 1) != const0_rtx))
10065	return 0;
10066      break;
10067    case CCmode:
10068      if (req_mode == CCGCmode)
10069	return 0;
10070      /* FALLTHRU */
10071    case CCGCmode:
10072      if (req_mode == CCGOCmode || req_mode == CCNOmode)
10073	return 0;
10074      /* FALLTHRU */
10075    case CCGOCmode:
10076      if (req_mode == CCZmode)
10077	return 0;
10078      /* FALLTHRU */
10079    case CCZmode:
10080      break;
10081
10082    default:
10083      gcc_unreachable ();
10084    }
10085
10086  return (GET_MODE (SET_SRC (set)) == set_mode);
10087}
10088
10089/* Generate insn patterns to do an integer compare of OPERANDS.  */
10090
10091static rtx
10092ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10093{
10094  enum machine_mode cmpmode;
10095  rtx tmp, flags;
10096
10097  cmpmode = SELECT_CC_MODE (code, op0, op1);
10098  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10099
10100  /* This is very simple, but making the interface the same as in the
10101     FP case makes the rest of the code easier.  */
10102  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10103  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10104
10105  /* Return the test that should be put into the flags user, i.e.
10106     the bcc, scc, or cmov instruction.  */
10107  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10108}
10109
10110/* Figure out whether to use ordered or unordered fp comparisons.
10111   Return the appropriate mode to use.  */
10112
10113enum machine_mode
10114ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10115{
10116  /* ??? In order to make all comparisons reversible, we do all comparisons
10117     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
10118     all forms trapping and nontrapping comparisons, we can make inequality
10119     comparisons trapping again, since it results in better code when using
10120     FCOM based compares.  */
10121  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10122}
10123
10124enum machine_mode
10125ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10126{
10127  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10128    return ix86_fp_compare_mode (code);
10129  switch (code)
10130    {
10131      /* Only zero flag is needed.  */
10132    case EQ:			/* ZF=0 */
10133    case NE:			/* ZF!=0 */
10134      return CCZmode;
10135      /* Codes needing carry flag.  */
10136    case GEU:			/* CF=0 */
10137    case GTU:			/* CF=0 & ZF=0 */
10138    case LTU:			/* CF=1 */
10139    case LEU:			/* CF=1 | ZF=1 */
10140      return CCmode;
10141      /* Codes possibly doable only with sign flag when
10142         comparing against zero.  */
10143    case GE:			/* SF=OF   or   SF=0 */
10144    case LT:			/* SF<>OF  or   SF=1 */
10145      if (op1 == const0_rtx)
10146	return CCGOCmode;
10147      else
10148	/* For other cases Carry flag is not required.  */
10149	return CCGCmode;
10150      /* Codes doable only with sign flag when comparing
10151         against zero, but we miss jump instruction for it
10152         so we need to use relational tests against overflow
10153         that thus needs to be zero.  */
10154    case GT:			/* ZF=0 & SF=OF */
10155    case LE:			/* ZF=1 | SF<>OF */
10156      if (op1 == const0_rtx)
10157	return CCNOmode;
10158      else
10159	return CCGCmode;
10160      /* strcmp pattern do (use flags) and combine may ask us for proper
10161	 mode.  */
10162    case USE:
10163      return CCmode;
10164    default:
10165      gcc_unreachable ();
10166    }
10167}
10168
10169/* Return the fixed registers used for condition codes.  */
10170
10171static bool
10172ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10173{
10174  *p1 = FLAGS_REG;
10175  *p2 = FPSR_REG;
10176  return true;
10177}
10178
10179/* If two condition code modes are compatible, return a condition code
10180   mode which is compatible with both.  Otherwise, return
10181   VOIDmode.  */
10182
10183static enum machine_mode
10184ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10185{
10186  if (m1 == m2)
10187    return m1;
10188
10189  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10190    return VOIDmode;
10191
10192  if ((m1 == CCGCmode && m2 == CCGOCmode)
10193      || (m1 == CCGOCmode && m2 == CCGCmode))
10194    return CCGCmode;
10195
10196  switch (m1)
10197    {
10198    default:
10199      gcc_unreachable ();
10200
10201    case CCmode:
10202    case CCGCmode:
10203    case CCGOCmode:
10204    case CCNOmode:
10205    case CCZmode:
10206      switch (m2)
10207	{
10208	default:
10209	  return VOIDmode;
10210
10211	case CCmode:
10212	case CCGCmode:
10213	case CCGOCmode:
10214	case CCNOmode:
10215	case CCZmode:
10216	  return CCmode;
10217	}
10218
10219    case CCFPmode:
10220    case CCFPUmode:
10221      /* These are only compatible with themselves, which we already
10222	 checked above.  */
10223      return VOIDmode;
10224    }
10225}
10226
10227/* Return true if we should use an FCOMI instruction for this fp comparison.  */
10228
10229int
10230ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10231{
10232  enum rtx_code swapped_code = swap_condition (code);
10233  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10234	  || (ix86_fp_comparison_cost (swapped_code)
10235	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
10236}
10237
10238/* Swap, force into registers, or otherwise massage the two operands
10239   to a fp comparison.  The operands are updated in place; the new
10240   comparison code is returned.  */
10241
10242static enum rtx_code
10243ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10244{
10245  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10246  rtx op0 = *pop0, op1 = *pop1;
10247  enum machine_mode op_mode = GET_MODE (op0);
10248  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10249
10250  /* All of the unordered compare instructions only work on registers.
10251     The same is true of the fcomi compare instructions.  The XFmode
10252     compare instructions require registers except when comparing
10253     against zero or when converting operand 1 from fixed point to
10254     floating point.  */
10255
10256  if (!is_sse
10257      && (fpcmp_mode == CCFPUmode
10258	  || (op_mode == XFmode
10259	      && ! (standard_80387_constant_p (op0) == 1
10260		    || standard_80387_constant_p (op1) == 1)
10261	      && GET_CODE (op1) != FLOAT)
10262	  || ix86_use_fcomi_compare (code)))
10263    {
10264      op0 = force_reg (op_mode, op0);
10265      op1 = force_reg (op_mode, op1);
10266    }
10267  else
10268    {
10269      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
10270	 things around if they appear profitable, otherwise force op0
10271	 into a register.  */
10272
10273      if (standard_80387_constant_p (op0) == 0
10274	  || (GET_CODE (op0) == MEM
10275	      && ! (standard_80387_constant_p (op1) == 0
10276		    || GET_CODE (op1) == MEM)))
10277	{
10278	  rtx tmp;
10279	  tmp = op0, op0 = op1, op1 = tmp;
10280	  code = swap_condition (code);
10281	}
10282
10283      if (GET_CODE (op0) != REG)
10284	op0 = force_reg (op_mode, op0);
10285
10286      if (CONSTANT_P (op1))
10287	{
10288	  int tmp = standard_80387_constant_p (op1);
10289	  if (tmp == 0)
10290	    op1 = validize_mem (force_const_mem (op_mode, op1));
10291	  else if (tmp == 1)
10292	    {
10293	      if (TARGET_CMOVE)
10294		op1 = force_reg (op_mode, op1);
10295	    }
10296	  else
10297	    op1 = force_reg (op_mode, op1);
10298	}
10299    }
10300
10301  /* Try to rearrange the comparison to make it cheaper.  */
10302  if (ix86_fp_comparison_cost (code)
10303      > ix86_fp_comparison_cost (swap_condition (code))
10304      && (GET_CODE (op1) == REG || !no_new_pseudos))
10305    {
10306      rtx tmp;
10307      tmp = op0, op0 = op1, op1 = tmp;
10308      code = swap_condition (code);
10309      if (GET_CODE (op0) != REG)
10310	op0 = force_reg (op_mode, op0);
10311    }
10312
10313  *pop0 = op0;
10314  *pop1 = op1;
10315  return code;
10316}
10317
10318/* Convert comparison codes we use to represent FP comparison to integer
10319   code that will result in proper branch.  Return UNKNOWN if no such code
10320   is available.  */
10321
10322enum rtx_code
10323ix86_fp_compare_code_to_integer (enum rtx_code code)
10324{
10325  switch (code)
10326    {
10327    case GT:
10328      return GTU;
10329    case GE:
10330      return GEU;
10331    case ORDERED:
10332    case UNORDERED:
10333      return code;
10334      break;
10335    case UNEQ:
10336      return EQ;
10337      break;
10338    case UNLT:
10339      return LTU;
10340      break;
10341    case UNLE:
10342      return LEU;
10343      break;
10344    case LTGT:
10345      return NE;
10346      break;
10347    default:
10348      return UNKNOWN;
10349    }
10350}
10351
10352/* Split comparison code CODE into comparisons we can do using branch
10353   instructions.  BYPASS_CODE is comparison code for branch that will
10354   branch around FIRST_CODE and SECOND_CODE.  If some of branches
10355   is not required, set value to UNKNOWN.
10356   We never require more than two branches.  */
10357
10358void
10359ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10360			  enum rtx_code *first_code,
10361			  enum rtx_code *second_code)
10362{
10363  *first_code = code;
10364  *bypass_code = UNKNOWN;
10365  *second_code = UNKNOWN;
10366
10367  /* The fcomi comparison sets flags as follows:
10368
10369     cmp    ZF PF CF
10370     >      0  0  0
10371     <      0  0  1
10372     =      1  0  0
10373     un     1  1  1 */
10374
10375  switch (code)
10376    {
10377    case GT:			/* GTU - CF=0 & ZF=0 */
10378    case GE:			/* GEU - CF=0 */
10379    case ORDERED:		/* PF=0 */
10380    case UNORDERED:		/* PF=1 */
10381    case UNEQ:			/* EQ - ZF=1 */
10382    case UNLT:			/* LTU - CF=1 */
10383    case UNLE:			/* LEU - CF=1 | ZF=1 */
10384    case LTGT:			/* EQ - ZF=0 */
10385      break;
10386    case LT:			/* LTU - CF=1 - fails on unordered */
10387      *first_code = UNLT;
10388      *bypass_code = UNORDERED;
10389      break;
10390    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10391      *first_code = UNLE;
10392      *bypass_code = UNORDERED;
10393      break;
10394    case EQ:			/* EQ - ZF=1 - fails on unordered */
10395      *first_code = UNEQ;
10396      *bypass_code = UNORDERED;
10397      break;
10398    case NE:			/* NE - ZF=0 - fails on unordered */
10399      *first_code = LTGT;
10400      *second_code = UNORDERED;
10401      break;
10402    case UNGE:			/* GEU - CF=0 - fails on unordered */
10403      *first_code = GE;
10404      *second_code = UNORDERED;
10405      break;
10406    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10407      *first_code = GT;
10408      *second_code = UNORDERED;
10409      break;
10410    default:
10411      gcc_unreachable ();
10412    }
10413  if (!TARGET_IEEE_FP)
10414    {
10415      *second_code = UNKNOWN;
10416      *bypass_code = UNKNOWN;
10417    }
10418}
10419
10420/* Return cost of comparison done fcom + arithmetics operations on AX.
10421   All following functions do use number of instructions as a cost metrics.
10422   In future this should be tweaked to compute bytes for optimize_size and
10423   take into account performance of various instructions on various CPUs.  */
10424static int
10425ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10426{
10427  if (!TARGET_IEEE_FP)
10428    return 4;
10429  /* The cost of code output by ix86_expand_fp_compare.  */
10430  switch (code)
10431    {
10432    case UNLE:
10433    case UNLT:
10434    case LTGT:
10435    case GT:
10436    case GE:
10437    case UNORDERED:
10438    case ORDERED:
10439    case UNEQ:
10440      return 4;
10441      break;
10442    case LT:
10443    case NE:
10444    case EQ:
10445    case UNGE:
10446      return 5;
10447      break;
10448    case LE:
10449    case UNGT:
10450      return 6;
10451      break;
10452    default:
10453      gcc_unreachable ();
10454    }
10455}
10456
10457/* Return cost of comparison done using fcomi operation.
10458   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10459static int
10460ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10461{
10462  enum rtx_code bypass_code, first_code, second_code;
10463  /* Return arbitrarily high cost when instruction is not supported - this
10464     prevents gcc from using it.  */
10465  if (!TARGET_CMOVE)
10466    return 1024;
10467  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10469}
10470
10471/* Return cost of comparison done using sahf operation.
10472   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10473static int
10474ix86_fp_comparison_sahf_cost (enum rtx_code code)
10475{
10476  enum rtx_code bypass_code, first_code, second_code;
10477  /* Return arbitrarily high cost when instruction is not preferred - this
10478     avoids gcc from using it.  */
10479  if (!TARGET_USE_SAHF && !optimize_size)
10480    return 1024;
10481  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10482  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10483}
10484
10485/* Compute cost of the comparison done using any method.
10486   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10487static int
10488ix86_fp_comparison_cost (enum rtx_code code)
10489{
10490  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10491  int min;
10492
10493  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10494  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10495
10496  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10497  if (min > sahf_cost)
10498    min = sahf_cost;
10499  if (min > fcomi_cost)
10500    min = fcomi_cost;
10501  return min;
10502}
10503
10504/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10505
10506static rtx
10507ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10508			rtx *second_test, rtx *bypass_test)
10509{
10510  enum machine_mode fpcmp_mode, intcmp_mode;
10511  rtx tmp, tmp2;
10512  int cost = ix86_fp_comparison_cost (code);
10513  enum rtx_code bypass_code, first_code, second_code;
10514
10515  fpcmp_mode = ix86_fp_compare_mode (code);
10516  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10517
10518  if (second_test)
10519    *second_test = NULL_RTX;
10520  if (bypass_test)
10521    *bypass_test = NULL_RTX;
10522
10523  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10524
10525  /* Do fcomi/sahf based test when profitable.  */
10526  if ((bypass_code == UNKNOWN || bypass_test)
10527      && (second_code == UNKNOWN || second_test)
10528      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10529    {
10530      if (TARGET_CMOVE)
10531	{
10532	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10533	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10534			     tmp);
10535	  emit_insn (tmp);
10536	}
10537      else
10538	{
10539	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10540	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10541	  if (!scratch)
10542	    scratch = gen_reg_rtx (HImode);
10543	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10544	  emit_insn (gen_x86_sahf_1 (scratch));
10545	}
10546
10547      /* The FP codes work out to act like unsigned.  */
10548      intcmp_mode = fpcmp_mode;
10549      code = first_code;
10550      if (bypass_code != UNKNOWN)
10551	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10552				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10553				       const0_rtx);
10554      if (second_code != UNKNOWN)
10555	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10556				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10557				       const0_rtx);
10558    }
10559  else
10560    {
10561      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10562      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10563      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10564      if (!scratch)
10565	scratch = gen_reg_rtx (HImode);
10566      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10567
10568      /* In the unordered case, we have to check C2 for NaN's, which
10569	 doesn't happen to work out to anything nice combination-wise.
10570	 So do some bit twiddling on the value we've got in AH to come
10571	 up with an appropriate set of condition codes.  */
10572
10573      intcmp_mode = CCNOmode;
10574      switch (code)
10575	{
10576	case GT:
10577	case UNGT:
10578	  if (code == GT || !TARGET_IEEE_FP)
10579	    {
10580	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10581	      code = EQ;
10582	    }
10583	  else
10584	    {
10585	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10586	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10587	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10588	      intcmp_mode = CCmode;
10589	      code = GEU;
10590	    }
10591	  break;
10592	case LT:
10593	case UNLT:
10594	  if (code == LT && TARGET_IEEE_FP)
10595	    {
10596	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10597	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10598	      intcmp_mode = CCmode;
10599	      code = EQ;
10600	    }
10601	  else
10602	    {
10603	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10604	      code = NE;
10605	    }
10606	  break;
10607	case GE:
10608	case UNGE:
10609	  if (code == GE || !TARGET_IEEE_FP)
10610	    {
10611	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10612	      code = EQ;
10613	    }
10614	  else
10615	    {
10616	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10617	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10618					     GEN_INT (0x01)));
10619	      code = NE;
10620	    }
10621	  break;
10622	case LE:
10623	case UNLE:
10624	  if (code == LE && TARGET_IEEE_FP)
10625	    {
10626	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10627	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10628	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10629	      intcmp_mode = CCmode;
10630	      code = LTU;
10631	    }
10632	  else
10633	    {
10634	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10635	      code = NE;
10636	    }
10637	  break;
10638	case EQ:
10639	case UNEQ:
10640	  if (code == EQ && TARGET_IEEE_FP)
10641	    {
10642	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10643	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10644	      intcmp_mode = CCmode;
10645	      code = EQ;
10646	    }
10647	  else
10648	    {
10649	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10650	      code = NE;
10651	      break;
10652	    }
10653	  break;
10654	case NE:
10655	case LTGT:
10656	  if (code == NE && TARGET_IEEE_FP)
10657	    {
10658	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10659	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10660					     GEN_INT (0x40)));
10661	      code = NE;
10662	    }
10663	  else
10664	    {
10665	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10666	      code = EQ;
10667	    }
10668	  break;
10669
10670	case UNORDERED:
10671	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10672	  code = NE;
10673	  break;
10674	case ORDERED:
10675	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10676	  code = EQ;
10677	  break;
10678
10679	default:
10680	  gcc_unreachable ();
10681	}
10682    }
10683
10684  /* Return the test that should be put into the flags user, i.e.
10685     the bcc, scc, or cmov instruction.  */
10686  return gen_rtx_fmt_ee (code, VOIDmode,
10687			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10688			 const0_rtx);
10689}
10690
10691rtx
10692ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10693{
10694  rtx op0, op1, ret;
10695  op0 = ix86_compare_op0;
10696  op1 = ix86_compare_op1;
10697
10698  if (second_test)
10699    *second_test = NULL_RTX;
10700  if (bypass_test)
10701    *bypass_test = NULL_RTX;
10702
10703  if (ix86_compare_emitted)
10704    {
10705      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10706      ix86_compare_emitted = NULL_RTX;
10707    }
10708  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10709    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10710				  second_test, bypass_test);
10711  else
10712    ret = ix86_expand_int_compare (code, op0, op1);
10713
10714  return ret;
10715}
10716
10717/* Return true if the CODE will result in nontrivial jump sequence.  */
10718bool
10719ix86_fp_jump_nontrivial_p (enum rtx_code code)
10720{
10721  enum rtx_code bypass_code, first_code, second_code;
10722  if (!TARGET_CMOVE)
10723    return true;
10724  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10725  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10726}
10727
10728void
10729ix86_expand_branch (enum rtx_code code, rtx label)
10730{
10731  rtx tmp;
10732
10733  /* If we have emitted a compare insn, go straight to simple.
10734     ix86_expand_compare won't emit anything if ix86_compare_emitted
10735     is non NULL.  */
10736  if (ix86_compare_emitted)
10737    goto simple;
10738
10739  switch (GET_MODE (ix86_compare_op0))
10740    {
10741    case QImode:
10742    case HImode:
10743    case SImode:
10744      simple:
10745      tmp = ix86_expand_compare (code, NULL, NULL);
10746      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10747				  gen_rtx_LABEL_REF (VOIDmode, label),
10748				  pc_rtx);
10749      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10750      return;
10751
10752    case SFmode:
10753    case DFmode:
10754    case XFmode:
10755      {
10756	rtvec vec;
10757	int use_fcomi;
10758	enum rtx_code bypass_code, first_code, second_code;
10759
10760	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10761					     &ix86_compare_op1);
10762
10763	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10764
10765	/* Check whether we will use the natural sequence with one jump.  If
10766	   so, we can expand jump early.  Otherwise delay expansion by
10767	   creating compound insn to not confuse optimizers.  */
10768	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10769	    && TARGET_CMOVE)
10770	  {
10771	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10772				  gen_rtx_LABEL_REF (VOIDmode, label),
10773				  pc_rtx, NULL_RTX, NULL_RTX);
10774	  }
10775	else
10776	  {
10777	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10778				  ix86_compare_op0, ix86_compare_op1);
10779	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10780					gen_rtx_LABEL_REF (VOIDmode, label),
10781					pc_rtx);
10782	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10783
10784	    use_fcomi = ix86_use_fcomi_compare (code);
10785	    vec = rtvec_alloc (3 + !use_fcomi);
10786	    RTVEC_ELT (vec, 0) = tmp;
10787	    RTVEC_ELT (vec, 1)
10788	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10789	    RTVEC_ELT (vec, 2)
10790	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10791	    if (! use_fcomi)
10792	      RTVEC_ELT (vec, 3)
10793		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10794
10795	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10796	  }
10797	return;
10798      }
10799
10800    case DImode:
10801      if (TARGET_64BIT)
10802	goto simple;
10803    case TImode:
10804      /* Expand DImode branch into multiple compare+branch.  */
10805      {
10806	rtx lo[2], hi[2], label2;
10807	enum rtx_code code1, code2, code3;
10808	enum machine_mode submode;
10809
10810	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10811	  {
10812	    tmp = ix86_compare_op0;
10813	    ix86_compare_op0 = ix86_compare_op1;
10814	    ix86_compare_op1 = tmp;
10815	    code = swap_condition (code);
10816	  }
10817	if (GET_MODE (ix86_compare_op0) == DImode)
10818	  {
10819	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10820	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10821	    submode = SImode;
10822	  }
10823	else
10824	  {
10825	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10826	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10827	    submode = DImode;
10828	  }
10829
10830	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10831	   avoid two branches.  This costs one extra insn, so disable when
10832	   optimizing for size.  */
10833
10834	if ((code == EQ || code == NE)
10835	    && (!optimize_size
10836	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10837	  {
10838	    rtx xor0, xor1;
10839
10840	    xor1 = hi[0];
10841	    if (hi[1] != const0_rtx)
10842	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10843				   NULL_RTX, 0, OPTAB_WIDEN);
10844
10845	    xor0 = lo[0];
10846	    if (lo[1] != const0_rtx)
10847	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10848				   NULL_RTX, 0, OPTAB_WIDEN);
10849
10850	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10851				NULL_RTX, 0, OPTAB_WIDEN);
10852
10853	    ix86_compare_op0 = tmp;
10854	    ix86_compare_op1 = const0_rtx;
10855	    ix86_expand_branch (code, label);
10856	    return;
10857	  }
10858
10859	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10860	   op1 is a constant and the low word is zero, then we can just
10861	   examine the high word.  */
10862
10863	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10864	  switch (code)
10865	    {
10866	    case LT: case LTU: case GE: case GEU:
10867	      ix86_compare_op0 = hi[0];
10868	      ix86_compare_op1 = hi[1];
10869	      ix86_expand_branch (code, label);
10870	      return;
10871	    default:
10872	      break;
10873	    }
10874
10875	/* Otherwise, we need two or three jumps.  */
10876
10877	label2 = gen_label_rtx ();
10878
10879	code1 = code;
10880	code2 = swap_condition (code);
10881	code3 = unsigned_condition (code);
10882
10883	switch (code)
10884	  {
10885	  case LT: case GT: case LTU: case GTU:
10886	    break;
10887
10888	  case LE:   code1 = LT;  code2 = GT;  break;
10889	  case GE:   code1 = GT;  code2 = LT;  break;
10890	  case LEU:  code1 = LTU; code2 = GTU; break;
10891	  case GEU:  code1 = GTU; code2 = LTU; break;
10892
10893	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10894	  case NE:   code2 = UNKNOWN; break;
10895
10896	  default:
10897	    gcc_unreachable ();
10898	  }
10899
10900	/*
10901	 * a < b =>
10902	 *    if (hi(a) < hi(b)) goto true;
10903	 *    if (hi(a) > hi(b)) goto false;
10904	 *    if (lo(a) < lo(b)) goto true;
10905	 *  false:
10906	 */
10907
10908	ix86_compare_op0 = hi[0];
10909	ix86_compare_op1 = hi[1];
10910
10911	if (code1 != UNKNOWN)
10912	  ix86_expand_branch (code1, label);
10913	if (code2 != UNKNOWN)
10914	  ix86_expand_branch (code2, label2);
10915
10916	ix86_compare_op0 = lo[0];
10917	ix86_compare_op1 = lo[1];
10918	ix86_expand_branch (code3, label);
10919
10920	if (code2 != UNKNOWN)
10921	  emit_label (label2);
10922	return;
10923      }
10924
10925    default:
10926      gcc_unreachable ();
10927    }
10928}
10929
10930/* Split branch based on floating point condition.  */
10931void
10932ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10933		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10934{
10935  rtx second, bypass;
10936  rtx label = NULL_RTX;
10937  rtx condition;
10938  int bypass_probability = -1, second_probability = -1, probability = -1;
10939  rtx i;
10940
10941  if (target2 != pc_rtx)
10942    {
10943      rtx tmp = target2;
10944      code = reverse_condition_maybe_unordered (code);
10945      target2 = target1;
10946      target1 = tmp;
10947    }
10948
10949  condition = ix86_expand_fp_compare (code, op1, op2,
10950				      tmp, &second, &bypass);
10951
10952  /* Remove pushed operand from stack.  */
10953  if (pushed)
10954    ix86_free_from_memory (GET_MODE (pushed));
10955
10956  if (split_branch_probability >= 0)
10957    {
10958      /* Distribute the probabilities across the jumps.
10959	 Assume the BYPASS and SECOND to be always test
10960	 for UNORDERED.  */
10961      probability = split_branch_probability;
10962
10963      /* Value of 1 is low enough to make no need for probability
10964	 to be updated.  Later we may run some experiments and see
10965	 if unordered values are more frequent in practice.  */
10966      if (bypass)
10967	bypass_probability = 1;
10968      if (second)
10969	second_probability = 1;
10970    }
10971  if (bypass != NULL_RTX)
10972    {
10973      label = gen_label_rtx ();
10974      i = emit_jump_insn (gen_rtx_SET
10975			  (VOIDmode, pc_rtx,
10976			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10977						 bypass,
10978						 gen_rtx_LABEL_REF (VOIDmode,
10979								    label),
10980						 pc_rtx)));
10981      if (bypass_probability >= 0)
10982	REG_NOTES (i)
10983	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10984			       GEN_INT (bypass_probability),
10985			       REG_NOTES (i));
10986    }
10987  i = emit_jump_insn (gen_rtx_SET
10988		      (VOIDmode, pc_rtx,
10989		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10990					     condition, target1, target2)));
10991  if (probability >= 0)
10992    REG_NOTES (i)
10993      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10994			   GEN_INT (probability),
10995			   REG_NOTES (i));
10996  if (second != NULL_RTX)
10997    {
10998      i = emit_jump_insn (gen_rtx_SET
10999			  (VOIDmode, pc_rtx,
11000			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11001						 target2)));
11002      if (second_probability >= 0)
11003	REG_NOTES (i)
11004	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
11005			       GEN_INT (second_probability),
11006			       REG_NOTES (i));
11007    }
11008  if (label != NULL_RTX)
11009    emit_label (label);
11010}
11011
11012int
11013ix86_expand_setcc (enum rtx_code code, rtx dest)
11014{
11015  rtx ret, tmp, tmpreg, equiv;
11016  rtx second_test, bypass_test;
11017
11018  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11019    return 0; /* FAIL */
11020
11021  gcc_assert (GET_MODE (dest) == QImode);
11022
11023  ret = ix86_expand_compare (code, &second_test, &bypass_test);
11024  PUT_MODE (ret, QImode);
11025
11026  tmp = dest;
11027  tmpreg = dest;
11028
11029  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11030  if (bypass_test || second_test)
11031    {
11032      rtx test = second_test;
11033      int bypass = 0;
11034      rtx tmp2 = gen_reg_rtx (QImode);
11035      if (bypass_test)
11036	{
11037	  gcc_assert (!second_test);
11038	  test = bypass_test;
11039	  bypass = 1;
11040	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11041	}
11042      PUT_MODE (test, QImode);
11043      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11044
11045      if (bypass)
11046	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11047      else
11048	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11049    }
11050
11051  /* Attach a REG_EQUAL note describing the comparison result.  */
11052  if (ix86_compare_op0 && ix86_compare_op1)
11053    {
11054      equiv = simplify_gen_relational (code, QImode,
11055				       GET_MODE (ix86_compare_op0),
11056				       ix86_compare_op0, ix86_compare_op1);
11057      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11058    }
11059
11060  return 1; /* DONE */
11061}
11062
11063/* Expand comparison setting or clearing carry flag.  Return true when
11064   successful and set pop for the operation.  */
11065static bool
11066ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11067{
11068  enum machine_mode mode =
11069    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11070
11071  /* Do not handle DImode compares that go through special path.  Also we can't
11072     deal with FP compares yet.  This is possible to add.  */
11073  if (mode == (TARGET_64BIT ? TImode : DImode))
11074    return false;
11075  if (FLOAT_MODE_P (mode))
11076    {
11077      rtx second_test = NULL, bypass_test = NULL;
11078      rtx compare_op, compare_seq;
11079
11080      /* Shortcut:  following common codes never translate into carry flag compares.  */
11081      if (code == EQ || code == NE || code == UNEQ || code == LTGT
11082	  || code == ORDERED || code == UNORDERED)
11083	return false;
11084
11085      /* These comparisons require zero flag; swap operands so they won't.  */
11086      if ((code == GT || code == UNLE || code == LE || code == UNGT)
11087	  && !TARGET_IEEE_FP)
11088	{
11089	  rtx tmp = op0;
11090	  op0 = op1;
11091	  op1 = tmp;
11092	  code = swap_condition (code);
11093	}
11094
11095      /* Try to expand the comparison and verify that we end up with carry flag
11096	 based comparison.  This is fails to be true only when we decide to expand
11097	 comparison using arithmetic that is not too common scenario.  */
11098      start_sequence ();
11099      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11100					   &second_test, &bypass_test);
11101      compare_seq = get_insns ();
11102      end_sequence ();
11103
11104      if (second_test || bypass_test)
11105	return false;
11106      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11107	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11108        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11109      else
11110	code = GET_CODE (compare_op);
11111      if (code != LTU && code != GEU)
11112	return false;
11113      emit_insn (compare_seq);
11114      *pop = compare_op;
11115      return true;
11116    }
11117  if (!INTEGRAL_MODE_P (mode))
11118    return false;
11119  switch (code)
11120    {
11121    case LTU:
11122    case GEU:
11123      break;
11124
11125    /* Convert a==0 into (unsigned)a<1.  */
11126    case EQ:
11127    case NE:
11128      if (op1 != const0_rtx)
11129	return false;
11130      op1 = const1_rtx;
11131      code = (code == EQ ? LTU : GEU);
11132      break;
11133
11134    /* Convert a>b into b<a or a>=b-1.  */
11135    case GTU:
11136    case LEU:
11137      if (GET_CODE (op1) == CONST_INT)
11138	{
11139	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11140	  /* Bail out on overflow.  We still can swap operands but that
11141	     would force loading of the constant into register.  */
11142	  if (op1 == const0_rtx
11143	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11144	    return false;
11145	  code = (code == GTU ? GEU : LTU);
11146	}
11147      else
11148	{
11149	  rtx tmp = op1;
11150	  op1 = op0;
11151	  op0 = tmp;
11152	  code = (code == GTU ? LTU : GEU);
11153	}
11154      break;
11155
11156    /* Convert a>=0 into (unsigned)a<0x80000000.  */
11157    case LT:
11158    case GE:
11159      if (mode == DImode || op1 != const0_rtx)
11160	return false;
11161      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11162      code = (code == LT ? GEU : LTU);
11163      break;
11164    case LE:
11165    case GT:
11166      if (mode == DImode || op1 != constm1_rtx)
11167	return false;
11168      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11169      code = (code == LE ? GEU : LTU);
11170      break;
11171
11172    default:
11173      return false;
11174    }
11175  /* Swapping operands may cause constant to appear as first operand.  */
11176  if (!nonimmediate_operand (op0, VOIDmode))
11177    {
11178      if (no_new_pseudos)
11179	return false;
11180      op0 = force_reg (mode, op0);
11181    }
11182  ix86_compare_op0 = op0;
11183  ix86_compare_op1 = op1;
11184  *pop = ix86_expand_compare (code, NULL, NULL);
11185  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11186  return true;
11187}
11188
11189int
11190ix86_expand_int_movcc (rtx operands[])
11191{
11192  enum rtx_code code = GET_CODE (operands[1]), compare_code;
11193  rtx compare_seq, compare_op;
11194  rtx second_test, bypass_test;
11195  enum machine_mode mode = GET_MODE (operands[0]);
11196  bool sign_bit_compare_p = false;;
11197
11198  start_sequence ();
11199  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11200  compare_seq = get_insns ();
11201  end_sequence ();
11202
11203  compare_code = GET_CODE (compare_op);
11204
11205  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11206      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11207    sign_bit_compare_p = true;
11208
11209  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11210     HImode insns, we'd be swallowed in word prefix ops.  */
11211
11212  if ((mode != HImode || TARGET_FAST_PREFIX)
11213      && (mode != (TARGET_64BIT ? TImode : DImode))
11214      && GET_CODE (operands[2]) == CONST_INT
11215      && GET_CODE (operands[3]) == CONST_INT)
11216    {
11217      rtx out = operands[0];
11218      HOST_WIDE_INT ct = INTVAL (operands[2]);
11219      HOST_WIDE_INT cf = INTVAL (operands[3]);
11220      HOST_WIDE_INT diff;
11221
11222      diff = ct - cf;
11223      /*  Sign bit compares are better done using shifts than we do by using
11224	  sbb.  */
11225      if (sign_bit_compare_p
11226	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11227					     ix86_compare_op1, &compare_op))
11228	{
11229	  /* Detect overlap between destination and compare sources.  */
11230	  rtx tmp = out;
11231
11232          if (!sign_bit_compare_p)
11233	    {
11234	      bool fpcmp = false;
11235
11236	      compare_code = GET_CODE (compare_op);
11237
11238	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11239		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11240		{
11241		  fpcmp = true;
11242		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
11243		}
11244
11245	      /* To simplify rest of code, restrict to the GEU case.  */
11246	      if (compare_code == LTU)
11247		{
11248		  HOST_WIDE_INT tmp = ct;
11249		  ct = cf;
11250		  cf = tmp;
11251		  compare_code = reverse_condition (compare_code);
11252		  code = reverse_condition (code);
11253		}
11254	      else
11255		{
11256		  if (fpcmp)
11257		    PUT_CODE (compare_op,
11258			      reverse_condition_maybe_unordered
11259			        (GET_CODE (compare_op)));
11260		  else
11261		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11262		}
11263	      diff = ct - cf;
11264
11265	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11266		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
11267		tmp = gen_reg_rtx (mode);
11268
11269	      if (mode == DImode)
11270		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11271	      else
11272		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11273	    }
11274	  else
11275	    {
11276	      if (code == GT || code == GE)
11277		code = reverse_condition (code);
11278	      else
11279		{
11280		  HOST_WIDE_INT tmp = ct;
11281		  ct = cf;
11282		  cf = tmp;
11283		  diff = ct - cf;
11284		}
11285	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11286				     ix86_compare_op1, VOIDmode, 0, -1);
11287	    }
11288
11289	  if (diff == 1)
11290	    {
11291	      /*
11292	       * cmpl op0,op1
11293	       * sbbl dest,dest
11294	       * [addl dest, ct]
11295	       *
11296	       * Size 5 - 8.
11297	       */
11298	      if (ct)
11299		tmp = expand_simple_binop (mode, PLUS,
11300					   tmp, GEN_INT (ct),
11301					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11302	    }
11303	  else if (cf == -1)
11304	    {
11305	      /*
11306	       * cmpl op0,op1
11307	       * sbbl dest,dest
11308	       * orl $ct, dest
11309	       *
11310	       * Size 8.
11311	       */
11312	      tmp = expand_simple_binop (mode, IOR,
11313					 tmp, GEN_INT (ct),
11314					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11315	    }
11316	  else if (diff == -1 && ct)
11317	    {
11318	      /*
11319	       * cmpl op0,op1
11320	       * sbbl dest,dest
11321	       * notl dest
11322	       * [addl dest, cf]
11323	       *
11324	       * Size 8 - 11.
11325	       */
11326	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11327	      if (cf)
11328		tmp = expand_simple_binop (mode, PLUS,
11329					   copy_rtx (tmp), GEN_INT (cf),
11330					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11331	    }
11332	  else
11333	    {
11334	      /*
11335	       * cmpl op0,op1
11336	       * sbbl dest,dest
11337	       * [notl dest]
11338	       * andl cf - ct, dest
11339	       * [addl dest, ct]
11340	       *
11341	       * Size 8 - 11.
11342	       */
11343
11344	      if (cf == 0)
11345		{
11346		  cf = ct;
11347		  ct = 0;
11348		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11349		}
11350
11351	      tmp = expand_simple_binop (mode, AND,
11352					 copy_rtx (tmp),
11353					 gen_int_mode (cf - ct, mode),
11354					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11355	      if (ct)
11356		tmp = expand_simple_binop (mode, PLUS,
11357					   copy_rtx (tmp), GEN_INT (ct),
11358					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11359	    }
11360
11361	  if (!rtx_equal_p (tmp, out))
11362	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11363
11364	  return 1; /* DONE */
11365	}
11366
11367      if (diff < 0)
11368	{
11369	  HOST_WIDE_INT tmp;
11370	  tmp = ct, ct = cf, cf = tmp;
11371	  diff = -diff;
11372	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11373	    {
11374	      /* We may be reversing unordered compare to normal compare, that
11375		 is not valid in general (we may convert non-trapping condition
11376		 to trapping one), however on i386 we currently emit all
11377		 comparisons unordered.  */
11378	      compare_code = reverse_condition_maybe_unordered (compare_code);
11379	      code = reverse_condition_maybe_unordered (code);
11380	    }
11381	  else
11382	    {
11383	      compare_code = reverse_condition (compare_code);
11384	      code = reverse_condition (code);
11385	    }
11386	}
11387
11388      compare_code = UNKNOWN;
11389      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11390	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11391	{
11392	  if (ix86_compare_op1 == const0_rtx
11393	      && (code == LT || code == GE))
11394	    compare_code = code;
11395	  else if (ix86_compare_op1 == constm1_rtx)
11396	    {
11397	      if (code == LE)
11398		compare_code = LT;
11399	      else if (code == GT)
11400		compare_code = GE;
11401	    }
11402	}
11403
11404      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11405      if (compare_code != UNKNOWN
11406	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11407	  && (cf == -1 || ct == -1))
11408	{
11409	  /* If lea code below could be used, only optimize
11410	     if it results in a 2 insn sequence.  */
11411
11412	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11413		 || diff == 3 || diff == 5 || diff == 9)
11414	      || (compare_code == LT && ct == -1)
11415	      || (compare_code == GE && cf == -1))
11416	    {
11417	      /*
11418	       * notl op1	(if necessary)
11419	       * sarl $31, op1
11420	       * orl cf, op1
11421	       */
11422	      if (ct != -1)
11423		{
11424		  cf = ct;
11425		  ct = -1;
11426		  code = reverse_condition (code);
11427		}
11428
11429	      out = emit_store_flag (out, code, ix86_compare_op0,
11430				     ix86_compare_op1, VOIDmode, 0, -1);
11431
11432	      out = expand_simple_binop (mode, IOR,
11433					 out, GEN_INT (cf),
11434					 out, 1, OPTAB_DIRECT);
11435	      if (out != operands[0])
11436		emit_move_insn (operands[0], out);
11437
11438	      return 1; /* DONE */
11439	    }
11440	}
11441
11442
11443      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11444	   || diff == 3 || diff == 5 || diff == 9)
11445	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11446	  && (mode != DImode
11447	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11448	{
11449	  /*
11450	   * xorl dest,dest
11451	   * cmpl op1,op2
11452	   * setcc dest
11453	   * lea cf(dest*(ct-cf)),dest
11454	   *
11455	   * Size 14.
11456	   *
11457	   * This also catches the degenerate setcc-only case.
11458	   */
11459
11460	  rtx tmp;
11461	  int nops;
11462
11463	  out = emit_store_flag (out, code, ix86_compare_op0,
11464				 ix86_compare_op1, VOIDmode, 0, 1);
11465
11466	  nops = 0;
11467	  /* On x86_64 the lea instruction operates on Pmode, so we need
11468	     to get arithmetics done in proper mode to match.  */
11469	  if (diff == 1)
11470	    tmp = copy_rtx (out);
11471	  else
11472	    {
11473	      rtx out1;
11474	      out1 = copy_rtx (out);
11475	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11476	      nops++;
11477	      if (diff & 1)
11478		{
11479		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11480		  nops++;
11481		}
11482	    }
11483	  if (cf != 0)
11484	    {
11485	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11486	      nops++;
11487	    }
11488	  if (!rtx_equal_p (tmp, out))
11489	    {
11490	      if (nops == 1)
11491		out = force_operand (tmp, copy_rtx (out));
11492	      else
11493		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11494	    }
11495	  if (!rtx_equal_p (out, operands[0]))
11496	    emit_move_insn (operands[0], copy_rtx (out));
11497
11498	  return 1; /* DONE */
11499	}
11500
11501      /*
11502       * General case:			Jumpful:
11503       *   xorl dest,dest		cmpl op1, op2
11504       *   cmpl op1, op2		movl ct, dest
11505       *   setcc dest			jcc 1f
11506       *   decl dest			movl cf, dest
11507       *   andl (cf-ct),dest		1:
11508       *   addl ct,dest
11509       *
11510       * Size 20.			Size 14.
11511       *
11512       * This is reasonably steep, but branch mispredict costs are
11513       * high on modern cpus, so consider failing only if optimizing
11514       * for space.
11515       */
11516
11517      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11518	  && BRANCH_COST >= 2)
11519	{
11520	  if (cf == 0)
11521	    {
11522	      cf = ct;
11523	      ct = 0;
11524	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11525		/* We may be reversing unordered compare to normal compare,
11526		   that is not valid in general (we may convert non-trapping
11527		   condition to trapping one), however on i386 we currently
11528		   emit all comparisons unordered.  */
11529		code = reverse_condition_maybe_unordered (code);
11530	      else
11531		{
11532		  code = reverse_condition (code);
11533		  if (compare_code != UNKNOWN)
11534		    compare_code = reverse_condition (compare_code);
11535		}
11536	    }
11537
11538	  if (compare_code != UNKNOWN)
11539	    {
11540	      /* notl op1	(if needed)
11541		 sarl $31, op1
11542		 andl (cf-ct), op1
11543		 addl ct, op1
11544
11545		 For x < 0 (resp. x <= -1) there will be no notl,
11546		 so if possible swap the constants to get rid of the
11547		 complement.
11548		 True/false will be -1/0 while code below (store flag
11549		 followed by decrement) is 0/-1, so the constants need
11550		 to be exchanged once more.  */
11551
11552	      if (compare_code == GE || !cf)
11553		{
11554		  code = reverse_condition (code);
11555		  compare_code = LT;
11556		}
11557	      else
11558		{
11559		  HOST_WIDE_INT tmp = cf;
11560		  cf = ct;
11561		  ct = tmp;
11562		}
11563
11564	      out = emit_store_flag (out, code, ix86_compare_op0,
11565				     ix86_compare_op1, VOIDmode, 0, -1);
11566	    }
11567	  else
11568	    {
11569	      out = emit_store_flag (out, code, ix86_compare_op0,
11570				     ix86_compare_op1, VOIDmode, 0, 1);
11571
11572	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11573					 copy_rtx (out), 1, OPTAB_DIRECT);
11574	    }
11575
11576	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11577				     gen_int_mode (cf - ct, mode),
11578				     copy_rtx (out), 1, OPTAB_DIRECT);
11579	  if (ct)
11580	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11581				       copy_rtx (out), 1, OPTAB_DIRECT);
11582	  if (!rtx_equal_p (out, operands[0]))
11583	    emit_move_insn (operands[0], copy_rtx (out));
11584
11585	  return 1; /* DONE */
11586	}
11587    }
11588
11589  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11590    {
11591      /* Try a few things more with specific constants and a variable.  */
11592
11593      optab op;
11594      rtx var, orig_out, out, tmp;
11595
11596      if (BRANCH_COST <= 2)
11597	return 0; /* FAIL */
11598
11599      /* If one of the two operands is an interesting constant, load a
11600	 constant with the above and mask it in with a logical operation.  */
11601
11602      if (GET_CODE (operands[2]) == CONST_INT)
11603	{
11604	  var = operands[3];
11605	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11606	    operands[3] = constm1_rtx, op = and_optab;
11607	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11608	    operands[3] = const0_rtx, op = ior_optab;
11609	  else
11610	    return 0; /* FAIL */
11611	}
11612      else if (GET_CODE (operands[3]) == CONST_INT)
11613	{
11614	  var = operands[2];
11615	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11616	    operands[2] = constm1_rtx, op = and_optab;
11617	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11618	    operands[2] = const0_rtx, op = ior_optab;
11619	  else
11620	    return 0; /* FAIL */
11621	}
11622      else
11623        return 0; /* FAIL */
11624
11625      orig_out = operands[0];
11626      tmp = gen_reg_rtx (mode);
11627      operands[0] = tmp;
11628
11629      /* Recurse to get the constant loaded.  */
11630      if (ix86_expand_int_movcc (operands) == 0)
11631        return 0; /* FAIL */
11632
11633      /* Mask in the interesting variable.  */
11634      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11635			  OPTAB_WIDEN);
11636      if (!rtx_equal_p (out, orig_out))
11637	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11638
11639      return 1; /* DONE */
11640    }
11641
11642  /*
11643   * For comparison with above,
11644   *
11645   * movl cf,dest
11646   * movl ct,tmp
11647   * cmpl op1,op2
11648   * cmovcc tmp,dest
11649   *
11650   * Size 15.
11651   */
11652
11653  if (! nonimmediate_operand (operands[2], mode))
11654    operands[2] = force_reg (mode, operands[2]);
11655  if (! nonimmediate_operand (operands[3], mode))
11656    operands[3] = force_reg (mode, operands[3]);
11657
11658  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11659    {
11660      rtx tmp = gen_reg_rtx (mode);
11661      emit_move_insn (tmp, operands[3]);
11662      operands[3] = tmp;
11663    }
11664  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11665    {
11666      rtx tmp = gen_reg_rtx (mode);
11667      emit_move_insn (tmp, operands[2]);
11668      operands[2] = tmp;
11669    }
11670
11671  if (! register_operand (operands[2], VOIDmode)
11672      && (mode == QImode
11673          || ! register_operand (operands[3], VOIDmode)))
11674    operands[2] = force_reg (mode, operands[2]);
11675
11676  if (mode == QImode
11677      && ! register_operand (operands[3], VOIDmode))
11678    operands[3] = force_reg (mode, operands[3]);
11679
11680  emit_insn (compare_seq);
11681  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11682			  gen_rtx_IF_THEN_ELSE (mode,
11683						compare_op, operands[2],
11684						operands[3])));
11685  if (bypass_test)
11686    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11687			    gen_rtx_IF_THEN_ELSE (mode,
11688				  bypass_test,
11689				  copy_rtx (operands[3]),
11690				  copy_rtx (operands[0]))));
11691  if (second_test)
11692    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11693			    gen_rtx_IF_THEN_ELSE (mode,
11694				  second_test,
11695				  copy_rtx (operands[2]),
11696				  copy_rtx (operands[0]))));
11697
11698  return 1; /* DONE */
11699}
11700
11701/* Swap, force into registers, or otherwise massage the two operands
11702   to an sse comparison with a mask result.  Thus we differ a bit from
11703   ix86_prepare_fp_compare_args which expects to produce a flags result.
11704
11705   The DEST operand exists to help determine whether to commute commutative
11706   operators.  The POP0/POP1 operands are updated in place.  The new
11707   comparison code is returned, or UNKNOWN if not implementable.  */
11708
11709static enum rtx_code
11710ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11711				  rtx *pop0, rtx *pop1)
11712{
11713  rtx tmp;
11714
11715  switch (code)
11716    {
11717    case LTGT:
11718    case UNEQ:
11719      /* We have no LTGT as an operator.  We could implement it with
11720	 NE & ORDERED, but this requires an extra temporary.  It's
11721	 not clear that it's worth it.  */
11722      return UNKNOWN;
11723
11724    case LT:
11725    case LE:
11726    case UNGT:
11727    case UNGE:
11728      /* These are supported directly.  */
11729      break;
11730
11731    case EQ:
11732    case NE:
11733    case UNORDERED:
11734    case ORDERED:
11735      /* For commutative operators, try to canonicalize the destination
11736	 operand to be first in the comparison - this helps reload to
11737	 avoid extra moves.  */
11738      if (!dest || !rtx_equal_p (dest, *pop1))
11739	break;
11740      /* FALLTHRU */
11741
11742    case GE:
11743    case GT:
11744    case UNLE:
11745    case UNLT:
11746      /* These are not supported directly.  Swap the comparison operands
11747	 to transform into something that is supported.  */
11748      tmp = *pop0;
11749      *pop0 = *pop1;
11750      *pop1 = tmp;
11751      code = swap_condition (code);
11752      break;
11753
11754    default:
11755      gcc_unreachable ();
11756    }
11757
11758  return code;
11759}
11760
11761/* Detect conditional moves that exactly match min/max operational
11762   semantics.  Note that this is IEEE safe, as long as we don't
11763   interchange the operands.
11764
11765   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11766   and TRUE if the operation is successful and instructions are emitted.  */
11767
11768static bool
11769ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11770			   rtx cmp_op1, rtx if_true, rtx if_false)
11771{
11772  enum machine_mode mode;
11773  bool is_min;
11774  rtx tmp;
11775
11776  if (code == LT)
11777    ;
11778  else if (code == UNGE)
11779    {
11780      tmp = if_true;
11781      if_true = if_false;
11782      if_false = tmp;
11783    }
11784  else
11785    return false;
11786
11787  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11788    is_min = true;
11789  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11790    is_min = false;
11791  else
11792    return false;
11793
11794  mode = GET_MODE (dest);
11795
11796  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11797     but MODE may be a vector mode and thus not appropriate.  */
11798  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11799    {
11800      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11801      rtvec v;
11802
11803      if_true = force_reg (mode, if_true);
11804      v = gen_rtvec (2, if_true, if_false);
11805      tmp = gen_rtx_UNSPEC (mode, v, u);
11806    }
11807  else
11808    {
11809      code = is_min ? SMIN : SMAX;
11810      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11811    }
11812
11813  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11814  return true;
11815}
11816
11817/* Expand an sse vector comparison.  Return the register with the result.  */
11818
11819static rtx
11820ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11821		     rtx op_true, rtx op_false)
11822{
11823  enum machine_mode mode = GET_MODE (dest);
11824  rtx x;
11825
11826  cmp_op0 = force_reg (mode, cmp_op0);
11827  if (!nonimmediate_operand (cmp_op1, mode))
11828    cmp_op1 = force_reg (mode, cmp_op1);
11829
11830  if (optimize
11831      || reg_overlap_mentioned_p (dest, op_true)
11832      || reg_overlap_mentioned_p (dest, op_false))
11833    dest = gen_reg_rtx (mode);
11834
11835  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11836  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11837
11838  return dest;
11839}
11840
11841/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11842   operations.  This is used for both scalar and vector conditional moves.  */
11843
11844static void
11845ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11846{
11847  enum machine_mode mode = GET_MODE (dest);
11848  rtx t2, t3, x;
11849
11850  if (op_false == CONST0_RTX (mode))
11851    {
11852      op_true = force_reg (mode, op_true);
11853      x = gen_rtx_AND (mode, cmp, op_true);
11854      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11855    }
11856  else if (op_true == CONST0_RTX (mode))
11857    {
11858      op_false = force_reg (mode, op_false);
11859      x = gen_rtx_NOT (mode, cmp);
11860      x = gen_rtx_AND (mode, x, op_false);
11861      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11862    }
11863  else
11864    {
11865      op_true = force_reg (mode, op_true);
11866      op_false = force_reg (mode, op_false);
11867
11868      t2 = gen_reg_rtx (mode);
11869      if (optimize)
11870	t3 = gen_reg_rtx (mode);
11871      else
11872	t3 = dest;
11873
11874      x = gen_rtx_AND (mode, op_true, cmp);
11875      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11876
11877      x = gen_rtx_NOT (mode, cmp);
11878      x = gen_rtx_AND (mode, x, op_false);
11879      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11880
11881      x = gen_rtx_IOR (mode, t3, t2);
11882      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11883    }
11884}
11885
11886/* Expand a floating-point conditional move.  Return true if successful.  */
11887
11888int
11889ix86_expand_fp_movcc (rtx operands[])
11890{
11891  enum machine_mode mode = GET_MODE (operands[0]);
11892  enum rtx_code code = GET_CODE (operands[1]);
11893  rtx tmp, compare_op, second_test, bypass_test;
11894
11895  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11896    {
11897      enum machine_mode cmode;
11898
11899      /* Since we've no cmove for sse registers, don't force bad register
11900	 allocation just to gain access to it.  Deny movcc when the
11901	 comparison mode doesn't match the move mode.  */
11902      cmode = GET_MODE (ix86_compare_op0);
11903      if (cmode == VOIDmode)
11904	cmode = GET_MODE (ix86_compare_op1);
11905      if (cmode != mode)
11906	return 0;
11907
11908      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11909					       &ix86_compare_op0,
11910					       &ix86_compare_op1);
11911      if (code == UNKNOWN)
11912	return 0;
11913
11914      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11915				     ix86_compare_op1, operands[2],
11916				     operands[3]))
11917	return 1;
11918
11919      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11920				 ix86_compare_op1, operands[2], operands[3]);
11921      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11922      return 1;
11923    }
11924
11925  /* The floating point conditional move instructions don't directly
11926     support conditions resulting from a signed integer comparison.  */
11927
11928  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11929
11930  /* The floating point conditional move instructions don't directly
11931     support signed integer comparisons.  */
11932
11933  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11934    {
11935      gcc_assert (!second_test && !bypass_test);
11936      tmp = gen_reg_rtx (QImode);
11937      ix86_expand_setcc (code, tmp);
11938      code = NE;
11939      ix86_compare_op0 = tmp;
11940      ix86_compare_op1 = const0_rtx;
11941      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11942    }
11943  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11944    {
11945      tmp = gen_reg_rtx (mode);
11946      emit_move_insn (tmp, operands[3]);
11947      operands[3] = tmp;
11948    }
11949  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11950    {
11951      tmp = gen_reg_rtx (mode);
11952      emit_move_insn (tmp, operands[2]);
11953      operands[2] = tmp;
11954    }
11955
11956  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11957			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11958						operands[2], operands[3])));
11959  if (bypass_test)
11960    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11961			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11962						  operands[3], operands[0])));
11963  if (second_test)
11964    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11965			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11966						  operands[2], operands[0])));
11967
11968  return 1;
11969}
11970
11971/* Expand a floating-point vector conditional move; a vcond operation
11972   rather than a movcc operation.  */
11973
11974bool
11975ix86_expand_fp_vcond (rtx operands[])
11976{
11977  enum rtx_code code = GET_CODE (operands[3]);
11978  rtx cmp;
11979
11980  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11981					   &operands[4], &operands[5]);
11982  if (code == UNKNOWN)
11983    return false;
11984
11985  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11986				 operands[5], operands[1], operands[2]))
11987    return true;
11988
11989  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11990			     operands[1], operands[2]);
11991  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11992  return true;
11993}
11994
11995/* Expand a signed integral vector conditional move.  */
11996
11997bool
11998ix86_expand_int_vcond (rtx operands[])
11999{
12000  enum machine_mode mode = GET_MODE (operands[0]);
12001  enum rtx_code code = GET_CODE (operands[3]);
12002  bool negate = false;
12003  rtx x, cop0, cop1;
12004
12005  cop0 = operands[4];
12006  cop1 = operands[5];
12007
12008  /* Canonicalize the comparison to EQ, GT, GTU.  */
12009  switch (code)
12010    {
12011    case EQ:
12012    case GT:
12013    case GTU:
12014      break;
12015
12016    case NE:
12017    case LE:
12018    case LEU:
12019      code = reverse_condition (code);
12020      negate = true;
12021      break;
12022
12023    case GE:
12024    case GEU:
12025      code = reverse_condition (code);
12026      negate = true;
12027      /* FALLTHRU */
12028
12029    case LT:
12030    case LTU:
12031      code = swap_condition (code);
12032      x = cop0, cop0 = cop1, cop1 = x;
12033      break;
12034
12035    default:
12036      gcc_unreachable ();
12037    }
12038
12039  /* Unsigned parallel compare is not supported by the hardware.  Play some
12040     tricks to turn this into a signed comparison against 0.  */
12041  if (code == GTU)
12042    {
12043      cop0 = force_reg (mode, cop0);
12044
12045      switch (mode)
12046	{
12047	case V4SImode:
12048	  {
12049	    rtx t1, t2, mask;
12050
12051	    /* Perform a parallel modulo subtraction.  */
12052	    t1 = gen_reg_rtx (mode);
12053	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
12054
12055	    /* Extract the original sign bit of op0.  */
12056	    mask = GEN_INT (-0x80000000);
12057	    mask = gen_rtx_CONST_VECTOR (mode,
12058			gen_rtvec (4, mask, mask, mask, mask));
12059	    mask = force_reg (mode, mask);
12060	    t2 = gen_reg_rtx (mode);
12061	    emit_insn (gen_andv4si3 (t2, cop0, mask));
12062
12063	    /* XOR it back into the result of the subtraction.  This results
12064	       in the sign bit set iff we saw unsigned underflow.  */
12065	    x = gen_reg_rtx (mode);
12066	    emit_insn (gen_xorv4si3 (x, t1, t2));
12067
12068	    code = GT;
12069	  }
12070	  break;
12071
12072	case V16QImode:
12073	case V8HImode:
12074	  /* Perform a parallel unsigned saturating subtraction.  */
12075	  x = gen_reg_rtx (mode);
12076	  emit_insn (gen_rtx_SET (VOIDmode, x,
12077				  gen_rtx_US_MINUS (mode, cop0, cop1)));
12078
12079	  code = EQ;
12080	  negate = !negate;
12081	  break;
12082
12083	default:
12084	  gcc_unreachable ();
12085	}
12086
12087      cop0 = x;
12088      cop1 = CONST0_RTX (mode);
12089    }
12090
12091  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12092			   operands[1+negate], operands[2-negate]);
12093
12094  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12095			 operands[2-negate]);
12096  return true;
12097}
12098
12099/* Expand conditional increment or decrement using adb/sbb instructions.
12100   The default case using setcc followed by the conditional move can be
12101   done by generic code.  */
12102int
12103ix86_expand_int_addcc (rtx operands[])
12104{
12105  enum rtx_code code = GET_CODE (operands[1]);
12106  rtx compare_op;
12107  rtx val = const0_rtx;
12108  bool fpcmp = false;
12109  enum machine_mode mode = GET_MODE (operands[0]);
12110
12111  if (operands[3] != const1_rtx
12112      && operands[3] != constm1_rtx)
12113    return 0;
12114  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12115				       ix86_compare_op1, &compare_op))
12116     return 0;
12117  code = GET_CODE (compare_op);
12118
12119  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12120      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12121    {
12122      fpcmp = true;
12123      code = ix86_fp_compare_code_to_integer (code);
12124    }
12125
12126  if (code != LTU)
12127    {
12128      val = constm1_rtx;
12129      if (fpcmp)
12130	PUT_CODE (compare_op,
12131		  reverse_condition_maybe_unordered
12132		    (GET_CODE (compare_op)));
12133      else
12134	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12135    }
12136  PUT_MODE (compare_op, mode);
12137
12138  /* Construct either adc or sbb insn.  */
12139  if ((code == LTU) == (operands[3] == constm1_rtx))
12140    {
12141      switch (GET_MODE (operands[0]))
12142	{
12143	  case QImode:
12144            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12145	    break;
12146	  case HImode:
12147            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12148	    break;
12149	  case SImode:
12150            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12151	    break;
12152	  case DImode:
12153            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12154	    break;
12155	  default:
12156	    gcc_unreachable ();
12157	}
12158    }
12159  else
12160    {
12161      switch (GET_MODE (operands[0]))
12162	{
12163	  case QImode:
12164            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12165	    break;
12166	  case HImode:
12167            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12168	    break;
12169	  case SImode:
12170            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12171	    break;
12172	  case DImode:
12173            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12174	    break;
12175	  default:
12176	    gcc_unreachable ();
12177	}
12178    }
12179  return 1; /* DONE */
12180}
12181
12182
12183/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
12184   works for floating pointer parameters and nonoffsetable memories.
12185   For pushes, it returns just stack offsets; the values will be saved
12186   in the right order.  Maximally three parts are generated.  */
12187
12188static int
12189ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12190{
12191  int size;
12192
12193  if (!TARGET_64BIT)
12194    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12195  else
12196    size = (GET_MODE_SIZE (mode) + 4) / 8;
12197
12198  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12199  gcc_assert (size >= 2 && size <= 3);
12200
12201  /* Optimize constant pool reference to immediates.  This is used by fp
12202     moves, that force all constants to memory to allow combining.  */
12203  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12204    {
12205      rtx tmp = maybe_get_pool_constant (operand);
12206      if (tmp)
12207	operand = tmp;
12208    }
12209
12210  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12211    {
12212      /* The only non-offsetable memories we handle are pushes.  */
12213      int ok = push_operand (operand, VOIDmode);
12214
12215      gcc_assert (ok);
12216
12217      operand = copy_rtx (operand);
12218      PUT_MODE (operand, Pmode);
12219      parts[0] = parts[1] = parts[2] = operand;
12220      return size;
12221    }
12222
12223  if (GET_CODE (operand) == CONST_VECTOR)
12224    {
12225      enum machine_mode imode = int_mode_for_mode (mode);
12226      /* Caution: if we looked through a constant pool memory above,
12227	 the operand may actually have a different mode now.  That's
12228	 ok, since we want to pun this all the way back to an integer.  */
12229      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12230      gcc_assert (operand != NULL);
12231      mode = imode;
12232    }
12233
12234  if (!TARGET_64BIT)
12235    {
12236      if (mode == DImode)
12237	split_di (&operand, 1, &parts[0], &parts[1]);
12238      else
12239	{
12240	  if (REG_P (operand))
12241	    {
12242	      gcc_assert (reload_completed);
12243	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12244	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12245	      if (size == 3)
12246		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12247	    }
12248	  else if (offsettable_memref_p (operand))
12249	    {
12250	      operand = adjust_address (operand, SImode, 0);
12251	      parts[0] = operand;
12252	      parts[1] = adjust_address (operand, SImode, 4);
12253	      if (size == 3)
12254		parts[2] = adjust_address (operand, SImode, 8);
12255	    }
12256	  else if (GET_CODE (operand) == CONST_DOUBLE)
12257	    {
12258	      REAL_VALUE_TYPE r;
12259	      long l[4];
12260
12261	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12262	      switch (mode)
12263		{
12264		case XFmode:
12265		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12266		  parts[2] = gen_int_mode (l[2], SImode);
12267		  break;
12268		case DFmode:
12269		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12270		  break;
12271		default:
12272		  gcc_unreachable ();
12273		}
12274	      parts[1] = gen_int_mode (l[1], SImode);
12275	      parts[0] = gen_int_mode (l[0], SImode);
12276	    }
12277	  else
12278	    gcc_unreachable ();
12279	}
12280    }
12281  else
12282    {
12283      if (mode == TImode)
12284	split_ti (&operand, 1, &parts[0], &parts[1]);
12285      if (mode == XFmode || mode == TFmode)
12286	{
12287	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12288	  if (REG_P (operand))
12289	    {
12290	      gcc_assert (reload_completed);
12291	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12292	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12293	    }
12294	  else if (offsettable_memref_p (operand))
12295	    {
12296	      operand = adjust_address (operand, DImode, 0);
12297	      parts[0] = operand;
12298	      parts[1] = adjust_address (operand, upper_mode, 8);
12299	    }
12300	  else if (GET_CODE (operand) == CONST_DOUBLE)
12301	    {
12302	      REAL_VALUE_TYPE r;
12303	      long l[4];
12304
12305	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12306	      real_to_target (l, &r, mode);
12307
12308	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
12309	      if (HOST_BITS_PER_WIDE_INT >= 64)
12310	        parts[0]
12311		  = gen_int_mode
12312		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12313		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12314		       DImode);
12315	      else
12316	        parts[0] = immed_double_const (l[0], l[1], DImode);
12317
12318	      if (upper_mode == SImode)
12319	        parts[1] = gen_int_mode (l[2], SImode);
12320	      else if (HOST_BITS_PER_WIDE_INT >= 64)
12321	        parts[1]
12322		  = gen_int_mode
12323		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12324		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12325		       DImode);
12326	      else
12327	        parts[1] = immed_double_const (l[2], l[3], DImode);
12328	    }
12329	  else
12330	    gcc_unreachable ();
12331	}
12332    }
12333
12334  return size;
12335}
12336
12337/* Emit insns to perform a move or push of DI, DF, and XF values.
12338   Return false when normal moves are needed; true when all required
12339   insns have been emitted.  Operands 2-4 contain the input values
12340   int the correct order; operands 5-7 contain the output values.  */
12341
12342void
12343ix86_split_long_move (rtx operands[])
12344{
12345  rtx part[2][3];
12346  int nparts;
12347  int push = 0;
12348  int collisions = 0;
12349  enum machine_mode mode = GET_MODE (operands[0]);
12350
12351  /* The DFmode expanders may ask us to move double.
12352     For 64bit target this is single move.  By hiding the fact
12353     here we simplify i386.md splitters.  */
12354  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12355    {
12356      /* Optimize constant pool reference to immediates.  This is used by
12357	 fp moves, that force all constants to memory to allow combining.  */
12358
12359      if (GET_CODE (operands[1]) == MEM
12360	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12361	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12362	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12363      if (push_operand (operands[0], VOIDmode))
12364	{
12365	  operands[0] = copy_rtx (operands[0]);
12366	  PUT_MODE (operands[0], Pmode);
12367	}
12368      else
12369        operands[0] = gen_lowpart (DImode, operands[0]);
12370      operands[1] = gen_lowpart (DImode, operands[1]);
12371      emit_move_insn (operands[0], operands[1]);
12372      return;
12373    }
12374
12375  /* The only non-offsettable memory we handle is push.  */
12376  if (push_operand (operands[0], VOIDmode))
12377    push = 1;
12378  else
12379    gcc_assert (GET_CODE (operands[0]) != MEM
12380		|| offsettable_memref_p (operands[0]));
12381
12382  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12383  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12384
12385  /* When emitting push, take care for source operands on the stack.  */
12386  if (push && GET_CODE (operands[1]) == MEM
12387      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12388    {
12389      if (nparts == 3)
12390	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12391				     XEXP (part[1][2], 0));
12392      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12393				   XEXP (part[1][1], 0));
12394    }
12395
12396  /* We need to do copy in the right order in case an address register
12397     of the source overlaps the destination.  */
12398  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12399    {
12400      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12401	collisions++;
12402      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12403	collisions++;
12404      if (nparts == 3
12405	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12406	collisions++;
12407
12408      /* Collision in the middle part can be handled by reordering.  */
12409      if (collisions == 1 && nparts == 3
12410	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12411	{
12412	  rtx tmp;
12413	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12414	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12415	}
12416
12417      /* If there are more collisions, we can't handle it by reordering.
12418	 Do an lea to the last part and use only one colliding move.  */
12419      else if (collisions > 1)
12420	{
12421	  rtx base;
12422
12423	  collisions = 1;
12424
12425	  base = part[0][nparts - 1];
12426
12427	  /* Handle the case when the last part isn't valid for lea.
12428	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12429	  if (GET_MODE (base) != Pmode)
12430	    base = gen_rtx_REG (Pmode, REGNO (base));
12431
12432	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12433	  part[1][0] = replace_equiv_address (part[1][0], base);
12434	  part[1][1] = replace_equiv_address (part[1][1],
12435				      plus_constant (base, UNITS_PER_WORD));
12436	  if (nparts == 3)
12437	    part[1][2] = replace_equiv_address (part[1][2],
12438				      plus_constant (base, 8));
12439	}
12440    }
12441
12442  if (push)
12443    {
12444      if (!TARGET_64BIT)
12445	{
12446	  if (nparts == 3)
12447	    {
12448	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12449                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12450	      emit_move_insn (part[0][2], part[1][2]);
12451	    }
12452	}
12453      else
12454	{
12455	  /* In 64bit mode we don't have 32bit push available.  In case this is
12456	     register, it is OK - we will just use larger counterpart.  We also
12457	     retype memory - these comes from attempt to avoid REX prefix on
12458	     moving of second half of TFmode value.  */
12459	  if (GET_MODE (part[1][1]) == SImode)
12460	    {
12461	      switch (GET_CODE (part[1][1]))
12462		{
12463		case MEM:
12464		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12465		  break;
12466
12467		case REG:
12468		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12469		  break;
12470
12471		default:
12472		  gcc_unreachable ();
12473		}
12474
12475	      if (GET_MODE (part[1][0]) == SImode)
12476		part[1][0] = part[1][1];
12477	    }
12478	}
12479      emit_move_insn (part[0][1], part[1][1]);
12480      emit_move_insn (part[0][0], part[1][0]);
12481      return;
12482    }
12483
12484  /* Choose correct order to not overwrite the source before it is copied.  */
12485  if ((REG_P (part[0][0])
12486       && REG_P (part[1][1])
12487       && (REGNO (part[0][0]) == REGNO (part[1][1])
12488	   || (nparts == 3
12489	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12490      || (collisions > 0
12491	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12492    {
12493      if (nparts == 3)
12494	{
12495	  operands[2] = part[0][2];
12496	  operands[3] = part[0][1];
12497	  operands[4] = part[0][0];
12498	  operands[5] = part[1][2];
12499	  operands[6] = part[1][1];
12500	  operands[7] = part[1][0];
12501	}
12502      else
12503	{
12504	  operands[2] = part[0][1];
12505	  operands[3] = part[0][0];
12506	  operands[5] = part[1][1];
12507	  operands[6] = part[1][0];
12508	}
12509    }
12510  else
12511    {
12512      if (nparts == 3)
12513	{
12514	  operands[2] = part[0][0];
12515	  operands[3] = part[0][1];
12516	  operands[4] = part[0][2];
12517	  operands[5] = part[1][0];
12518	  operands[6] = part[1][1];
12519	  operands[7] = part[1][2];
12520	}
12521      else
12522	{
12523	  operands[2] = part[0][0];
12524	  operands[3] = part[0][1];
12525	  operands[5] = part[1][0];
12526	  operands[6] = part[1][1];
12527	}
12528    }
12529
12530  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12531  if (optimize_size)
12532    {
12533      if (GET_CODE (operands[5]) == CONST_INT
12534	  && operands[5] != const0_rtx
12535	  && REG_P (operands[2]))
12536	{
12537	  if (GET_CODE (operands[6]) == CONST_INT
12538	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12539	    operands[6] = operands[2];
12540
12541	  if (nparts == 3
12542	      && GET_CODE (operands[7]) == CONST_INT
12543	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12544	    operands[7] = operands[2];
12545	}
12546
12547      if (nparts == 3
12548	  && GET_CODE (operands[6]) == CONST_INT
12549	  && operands[6] != const0_rtx
12550	  && REG_P (operands[3])
12551	  && GET_CODE (operands[7]) == CONST_INT
12552	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12553	operands[7] = operands[3];
12554    }
12555
12556  emit_move_insn (operands[2], operands[5]);
12557  emit_move_insn (operands[3], operands[6]);
12558  if (nparts == 3)
12559    emit_move_insn (operands[4], operands[7]);
12560
12561  return;
12562}
12563
12564/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12565   left shift by a constant, either using a single shift or
12566   a sequence of add instructions.  */
12567
12568static void
12569ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12570{
12571  if (count == 1)
12572    {
12573      emit_insn ((mode == DImode
12574		  ? gen_addsi3
12575		  : gen_adddi3) (operand, operand, operand));
12576    }
12577  else if (!optimize_size
12578	   && count * ix86_cost->add <= ix86_cost->shift_const)
12579    {
12580      int i;
12581      for (i=0; i<count; i++)
12582	{
12583	  emit_insn ((mode == DImode
12584		      ? gen_addsi3
12585		      : gen_adddi3) (operand, operand, operand));
12586	}
12587    }
12588  else
12589    emit_insn ((mode == DImode
12590		? gen_ashlsi3
12591		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12592}
12593
12594void
12595ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12596{
12597  rtx low[2], high[2];
12598  int count;
12599  const int single_width = mode == DImode ? 32 : 64;
12600
12601  if (GET_CODE (operands[2]) == CONST_INT)
12602    {
12603      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12604      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12605
12606      if (count >= single_width)
12607	{
12608	  emit_move_insn (high[0], low[1]);
12609	  emit_move_insn (low[0], const0_rtx);
12610
12611	  if (count > single_width)
12612	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12613	}
12614      else
12615	{
12616	  if (!rtx_equal_p (operands[0], operands[1]))
12617	    emit_move_insn (operands[0], operands[1]);
12618	  emit_insn ((mode == DImode
12619		     ? gen_x86_shld_1
12620		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12621	  ix86_expand_ashl_const (low[0], count, mode);
12622	}
12623      return;
12624    }
12625
12626  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12627
12628  if (operands[1] == const1_rtx)
12629    {
12630      /* Assuming we've chosen a QImode capable registers, then 1 << N
12631	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12632      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12633	{
12634	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12635
12636	  ix86_expand_clear (low[0]);
12637	  ix86_expand_clear (high[0]);
12638	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12639
12640	  d = gen_lowpart (QImode, low[0]);
12641	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12642	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12643	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12644
12645	  d = gen_lowpart (QImode, high[0]);
12646	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12647	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12648	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12649	}
12650
12651      /* Otherwise, we can get the same results by manually performing
12652	 a bit extract operation on bit 5/6, and then performing the two
12653	 shifts.  The two methods of getting 0/1 into low/high are exactly
12654	 the same size.  Avoiding the shift in the bit extract case helps
12655	 pentium4 a bit; no one else seems to care much either way.  */
12656      else
12657	{
12658	  rtx x;
12659
12660	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12661	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12662	  else
12663	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12664	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12665
12666	  emit_insn ((mode == DImode
12667		      ? gen_lshrsi3
12668		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12669	  emit_insn ((mode == DImode
12670		      ? gen_andsi3
12671		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12672	  emit_move_insn (low[0], high[0]);
12673	  emit_insn ((mode == DImode
12674		      ? gen_xorsi3
12675		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12676	}
12677
12678      emit_insn ((mode == DImode
12679		    ? gen_ashlsi3
12680		    : gen_ashldi3) (low[0], low[0], operands[2]));
12681      emit_insn ((mode == DImode
12682		    ? gen_ashlsi3
12683		    : gen_ashldi3) (high[0], high[0], operands[2]));
12684      return;
12685    }
12686
12687  if (operands[1] == constm1_rtx)
12688    {
12689      /* For -1 << N, we can avoid the shld instruction, because we
12690	 know that we're shifting 0...31/63 ones into a -1.  */
12691      emit_move_insn (low[0], constm1_rtx);
12692      if (optimize_size)
12693	emit_move_insn (high[0], low[0]);
12694      else
12695	emit_move_insn (high[0], constm1_rtx);
12696    }
12697  else
12698    {
12699      if (!rtx_equal_p (operands[0], operands[1]))
12700	emit_move_insn (operands[0], operands[1]);
12701
12702      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12703      emit_insn ((mode == DImode
12704		  ? gen_x86_shld_1
12705		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12706    }
12707
12708  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12709
12710  if (TARGET_CMOVE && scratch)
12711    {
12712      ix86_expand_clear (scratch);
12713      emit_insn ((mode == DImode
12714		  ? gen_x86_shift_adj_1
12715		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12716    }
12717  else
12718    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12719}
12720
12721void
12722ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12723{
12724  rtx low[2], high[2];
12725  int count;
12726  const int single_width = mode == DImode ? 32 : 64;
12727
12728  if (GET_CODE (operands[2]) == CONST_INT)
12729    {
12730      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12731      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12732
12733      if (count == single_width * 2 - 1)
12734	{
12735	  emit_move_insn (high[0], high[1]);
12736	  emit_insn ((mode == DImode
12737		      ? gen_ashrsi3
12738		      : gen_ashrdi3) (high[0], high[0],
12739				      GEN_INT (single_width - 1)));
12740	  emit_move_insn (low[0], high[0]);
12741
12742	}
12743      else if (count >= single_width)
12744	{
12745	  emit_move_insn (low[0], high[1]);
12746	  emit_move_insn (high[0], low[0]);
12747	  emit_insn ((mode == DImode
12748		      ? gen_ashrsi3
12749		      : gen_ashrdi3) (high[0], high[0],
12750				      GEN_INT (single_width - 1)));
12751	  if (count > single_width)
12752	    emit_insn ((mode == DImode
12753			? gen_ashrsi3
12754			: gen_ashrdi3) (low[0], low[0],
12755					GEN_INT (count - single_width)));
12756	}
12757      else
12758	{
12759	  if (!rtx_equal_p (operands[0], operands[1]))
12760	    emit_move_insn (operands[0], operands[1]);
12761	  emit_insn ((mode == DImode
12762		      ? gen_x86_shrd_1
12763		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12764	  emit_insn ((mode == DImode
12765		      ? gen_ashrsi3
12766		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12767	}
12768    }
12769  else
12770    {
12771      if (!rtx_equal_p (operands[0], operands[1]))
12772	emit_move_insn (operands[0], operands[1]);
12773
12774      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12775
12776      emit_insn ((mode == DImode
12777		  ? gen_x86_shrd_1
12778		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12779      emit_insn ((mode == DImode
12780		  ? gen_ashrsi3
12781		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12782
12783      if (TARGET_CMOVE && scratch)
12784	{
12785	  emit_move_insn (scratch, high[0]);
12786	  emit_insn ((mode == DImode
12787		      ? gen_ashrsi3
12788		      : gen_ashrdi3) (scratch, scratch,
12789				      GEN_INT (single_width - 1)));
12790	  emit_insn ((mode == DImode
12791		      ? gen_x86_shift_adj_1
12792		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12793					 scratch));
12794	}
12795      else
12796	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12797    }
12798}
12799
12800void
12801ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12802{
12803  rtx low[2], high[2];
12804  int count;
12805  const int single_width = mode == DImode ? 32 : 64;
12806
12807  if (GET_CODE (operands[2]) == CONST_INT)
12808    {
12809      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12810      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12811
12812      if (count >= single_width)
12813	{
12814	  emit_move_insn (low[0], high[1]);
12815	  ix86_expand_clear (high[0]);
12816
12817	  if (count > single_width)
12818	    emit_insn ((mode == DImode
12819			? gen_lshrsi3
12820			: gen_lshrdi3) (low[0], low[0],
12821					GEN_INT (count - single_width)));
12822	}
12823      else
12824	{
12825	  if (!rtx_equal_p (operands[0], operands[1]))
12826	    emit_move_insn (operands[0], operands[1]);
12827	  emit_insn ((mode == DImode
12828		      ? gen_x86_shrd_1
12829		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12830	  emit_insn ((mode == DImode
12831		      ? gen_lshrsi3
12832		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12833	}
12834    }
12835  else
12836    {
12837      if (!rtx_equal_p (operands[0], operands[1]))
12838	emit_move_insn (operands[0], operands[1]);
12839
12840      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12841
12842      emit_insn ((mode == DImode
12843		  ? gen_x86_shrd_1
12844		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12845      emit_insn ((mode == DImode
12846		  ? gen_lshrsi3
12847		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12848
12849      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12850      if (TARGET_CMOVE && scratch)
12851	{
12852	  ix86_expand_clear (scratch);
12853	  emit_insn ((mode == DImode
12854		      ? gen_x86_shift_adj_1
12855		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12856					       scratch));
12857	}
12858      else
12859	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12860    }
12861}
12862
12863/* Helper function for the string operations below.  Dest VARIABLE whether
12864   it is aligned to VALUE bytes.  If true, jump to the label.  */
12865static rtx
12866ix86_expand_aligntest (rtx variable, int value)
12867{
12868  rtx label = gen_label_rtx ();
12869  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12870  if (GET_MODE (variable) == DImode)
12871    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12872  else
12873    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12874  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12875			   1, label);
12876  return label;
12877}
12878
12879/* Adjust COUNTER by the VALUE.  */
12880static void
12881ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12882{
12883  if (GET_MODE (countreg) == DImode)
12884    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12885  else
12886    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12887}
12888
12889/* Zero extend possibly SImode EXP to Pmode register.  */
12890rtx
12891ix86_zero_extend_to_Pmode (rtx exp)
12892{
12893  rtx r;
12894  if (GET_MODE (exp) == VOIDmode)
12895    return force_reg (Pmode, exp);
12896  if (GET_MODE (exp) == Pmode)
12897    return copy_to_mode_reg (Pmode, exp);
12898  r = gen_reg_rtx (Pmode);
12899  emit_insn (gen_zero_extendsidi2 (r, exp));
12900  return r;
12901}
12902
12903/* Expand string move (memcpy) operation.  Use i386 string operations when
12904   profitable.  expand_clrmem contains similar code.  */
12905int
12906ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12907{
12908  rtx srcreg, destreg, countreg, srcexp, destexp;
12909  enum machine_mode counter_mode;
12910  HOST_WIDE_INT align = 0;
12911  unsigned HOST_WIDE_INT count = 0;
12912
12913  if (GET_CODE (align_exp) == CONST_INT)
12914    align = INTVAL (align_exp);
12915
12916  /* Can't use any of this if the user has appropriated esi or edi.  */
12917  if (global_regs[4] || global_regs[5])
12918    return 0;
12919
12920  /* This simple hack avoids all inlining code and simplifies code below.  */
12921  if (!TARGET_ALIGN_STRINGOPS)
12922    align = 64;
12923
12924  if (GET_CODE (count_exp) == CONST_INT)
12925    {
12926      count = INTVAL (count_exp);
12927      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12928	return 0;
12929    }
12930
12931  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12932     for 64bits use SImode when possible, otherwise DImode.
12933     Set count to number of bytes copied when known at compile time.  */
12934  if (!TARGET_64BIT
12935      || GET_MODE (count_exp) == SImode
12936      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12937    counter_mode = SImode;
12938  else
12939    counter_mode = DImode;
12940
12941  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12942
12943  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12944  if (destreg != XEXP (dst, 0))
12945    dst = replace_equiv_address_nv (dst, destreg);
12946  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12947  if (srcreg != XEXP (src, 0))
12948    src = replace_equiv_address_nv (src, srcreg);
12949
12950  /* When optimizing for size emit simple rep ; movsb instruction for
12951     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12952     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12953     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12954     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12955     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12956     known to be zero or not.  The rep; movsb sequence causes higher
12957     register pressure though, so take that into account.  */
12958
12959  if ((!optimize || optimize_size)
12960      && (count == 0
12961	  || ((count & 0x03)
12962	      && (!optimize_size
12963		  || count > 5 * 4
12964		  || (count & 3) + count / 4 > 6))))
12965    {
12966      emit_insn (gen_cld ());
12967      countreg = ix86_zero_extend_to_Pmode (count_exp);
12968      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12969      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12970      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12971			      destexp, srcexp));
12972    }
12973
12974  /* For constant aligned (or small unaligned) copies use rep movsl
12975     followed by code copying the rest.  For PentiumPro ensure 8 byte
12976     alignment to allow rep movsl acceleration.  */
12977
12978  else if (count != 0
12979	   && (align >= 8
12980	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12981	       || optimize_size || count < (unsigned int) 64))
12982    {
12983      unsigned HOST_WIDE_INT offset = 0;
12984      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12985      rtx srcmem, dstmem;
12986
12987      emit_insn (gen_cld ());
12988      if (count & ~(size - 1))
12989	{
12990	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12991	    {
12992	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12993
12994	      while (offset < (count & ~(size - 1)))
12995		{
12996		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12997							 srcreg, offset);
12998		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12999							 destreg, offset);
13000		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13001		  offset += size;
13002		}
13003	    }
13004	  else
13005	    {
13006	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
13007				  & (TARGET_64BIT ? -1 : 0x3fffffff));
13008	      countreg = copy_to_mode_reg (counter_mode, countreg);
13009	      countreg = ix86_zero_extend_to_Pmode (countreg);
13010
13011	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
13012					GEN_INT (size == 4 ? 2 : 3));
13013	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13014	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13015
13016	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13017				      countreg, destexp, srcexp));
13018	      offset = count & ~(size - 1);
13019	    }
13020	}
13021      if (size == 8 && (count & 0x04))
13022	{
13023	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13024						 offset);
13025	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13026						 offset);
13027	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13028	  offset += 4;
13029	}
13030      if (count & 0x02)
13031	{
13032	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13033						 offset);
13034	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13035						 offset);
13036	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13037	  offset += 2;
13038	}
13039      if (count & 0x01)
13040	{
13041	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13042						 offset);
13043	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13044						 offset);
13045	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13046	}
13047    }
13048  /* The generic code based on the glibc implementation:
13049     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13050     allowing accelerated copying there)
13051     - copy the data using rep movsl
13052     - copy the rest.  */
13053  else
13054    {
13055      rtx countreg2;
13056      rtx label = NULL;
13057      rtx srcmem, dstmem;
13058      int desired_alignment = (TARGET_PENTIUMPRO
13059			       && (count == 0 || count >= (unsigned int) 260)
13060			       ? 8 : UNITS_PER_WORD);
13061      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
13062      dst = change_address (dst, BLKmode, destreg);
13063      src = change_address (src, BLKmode, srcreg);
13064
13065      /* In case we don't know anything about the alignment, default to
13066         library version, since it is usually equally fast and result in
13067         shorter code.
13068
13069	 Also emit call when we know that the count is large and call overhead
13070	 will not be important.  */
13071      if (!TARGET_INLINE_ALL_STRINGOPS
13072	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13073	return 0;
13074
13075      if (TARGET_SINGLE_STRINGOP)
13076	emit_insn (gen_cld ());
13077
13078      countreg2 = gen_reg_rtx (Pmode);
13079      countreg = copy_to_mode_reg (counter_mode, count_exp);
13080
13081      /* We don't use loops to align destination and to copy parts smaller
13082         than 4 bytes, because gcc is able to optimize such code better (in
13083         the case the destination or the count really is aligned, gcc is often
13084         able to predict the branches) and also it is friendlier to the
13085         hardware branch prediction.
13086
13087         Using loops is beneficial for generic case, because we can
13088         handle small counts using the loops.  Many CPUs (such as Athlon)
13089         have large REP prefix setup costs.
13090
13091         This is quite costly.  Maybe we can revisit this decision later or
13092         add some customizability to this code.  */
13093
13094      if (count == 0 && align < desired_alignment)
13095	{
13096	  label = gen_label_rtx ();
13097	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13098				   LEU, 0, counter_mode, 1, label);
13099	}
13100      if (align <= 1)
13101	{
13102	  rtx label = ix86_expand_aligntest (destreg, 1);
13103	  srcmem = change_address (src, QImode, srcreg);
13104	  dstmem = change_address (dst, QImode, destreg);
13105	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13106	  ix86_adjust_counter (countreg, 1);
13107	  emit_label (label);
13108	  LABEL_NUSES (label) = 1;
13109	}
13110      if (align <= 2)
13111	{
13112	  rtx label = ix86_expand_aligntest (destreg, 2);
13113	  srcmem = change_address (src, HImode, srcreg);
13114	  dstmem = change_address (dst, HImode, destreg);
13115	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13116	  ix86_adjust_counter (countreg, 2);
13117	  emit_label (label);
13118	  LABEL_NUSES (label) = 1;
13119	}
13120      if (align <= 4 && desired_alignment > 4)
13121	{
13122	  rtx label = ix86_expand_aligntest (destreg, 4);
13123	  srcmem = change_address (src, SImode, srcreg);
13124	  dstmem = change_address (dst, SImode, destreg);
13125	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13126	  ix86_adjust_counter (countreg, 4);
13127	  emit_label (label);
13128	  LABEL_NUSES (label) = 1;
13129	}
13130
13131      if (label && desired_alignment > 4 && !TARGET_64BIT)
13132	{
13133	  emit_label (label);
13134	  LABEL_NUSES (label) = 1;
13135	  label = NULL_RTX;
13136	}
13137      if (!TARGET_SINGLE_STRINGOP)
13138	emit_insn (gen_cld ());
13139      if (TARGET_64BIT)
13140	{
13141	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13142				  GEN_INT (3)));
13143	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13144	}
13145      else
13146	{
13147	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13148	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13149	}
13150      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13151      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13152      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13153			      countreg2, destexp, srcexp));
13154
13155      if (label)
13156	{
13157	  emit_label (label);
13158	  LABEL_NUSES (label) = 1;
13159	}
13160      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13161	{
13162	  srcmem = change_address (src, SImode, srcreg);
13163	  dstmem = change_address (dst, SImode, destreg);
13164	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13165	}
13166      if ((align <= 4 || count == 0) && TARGET_64BIT)
13167	{
13168	  rtx label = ix86_expand_aligntest (countreg, 4);
13169	  srcmem = change_address (src, SImode, srcreg);
13170	  dstmem = change_address (dst, SImode, destreg);
13171	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13172	  emit_label (label);
13173	  LABEL_NUSES (label) = 1;
13174	}
13175      if (align > 2 && count != 0 && (count & 2))
13176	{
13177	  srcmem = change_address (src, HImode, srcreg);
13178	  dstmem = change_address (dst, HImode, destreg);
13179	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13180	}
13181      if (align <= 2 || count == 0)
13182	{
13183	  rtx label = ix86_expand_aligntest (countreg, 2);
13184	  srcmem = change_address (src, HImode, srcreg);
13185	  dstmem = change_address (dst, HImode, destreg);
13186	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13187	  emit_label (label);
13188	  LABEL_NUSES (label) = 1;
13189	}
13190      if (align > 1 && count != 0 && (count & 1))
13191	{
13192	  srcmem = change_address (src, QImode, srcreg);
13193	  dstmem = change_address (dst, QImode, destreg);
13194	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13195	}
13196      if (align <= 1 || count == 0)
13197	{
13198	  rtx label = ix86_expand_aligntest (countreg, 1);
13199	  srcmem = change_address (src, QImode, srcreg);
13200	  dstmem = change_address (dst, QImode, destreg);
13201	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13202	  emit_label (label);
13203	  LABEL_NUSES (label) = 1;
13204	}
13205    }
13206
13207  return 1;
13208}
13209
13210/* Expand string clear operation (bzero).  Use i386 string operations when
13211   profitable.  expand_movmem contains similar code.  */
13212int
13213ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13214{
13215  rtx destreg, zeroreg, countreg, destexp;
13216  enum machine_mode counter_mode;
13217  HOST_WIDE_INT align = 0;
13218  unsigned HOST_WIDE_INT count = 0;
13219
13220  if (GET_CODE (align_exp) == CONST_INT)
13221    align = INTVAL (align_exp);
13222
13223  /* Can't use any of this if the user has appropriated esi.  */
13224  if (global_regs[4])
13225    return 0;
13226
13227  /* This simple hack avoids all inlining code and simplifies code below.  */
13228  if (!TARGET_ALIGN_STRINGOPS)
13229    align = 32;
13230
13231  if (GET_CODE (count_exp) == CONST_INT)
13232    {
13233      count = INTVAL (count_exp);
13234      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13235	return 0;
13236    }
13237  /* Figure out proper mode for counter.  For 32bits it is always SImode,
13238     for 64bits use SImode when possible, otherwise DImode.
13239     Set count to number of bytes copied when known at compile time.  */
13240  if (!TARGET_64BIT
13241      || GET_MODE (count_exp) == SImode
13242      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13243    counter_mode = SImode;
13244  else
13245    counter_mode = DImode;
13246
13247  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13248  if (destreg != XEXP (dst, 0))
13249    dst = replace_equiv_address_nv (dst, destreg);
13250
13251
13252  /* When optimizing for size emit simple rep ; movsb instruction for
13253     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
13254     sequence is 7 bytes long, so if optimizing for size and count is
13255     small enough that some stosl, stosw and stosb instructions without
13256     rep are shorter, fall back into the next if.  */
13257
13258  if ((!optimize || optimize_size)
13259      && (count == 0
13260	  || ((count & 0x03)
13261	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13262    {
13263      emit_insn (gen_cld ());
13264
13265      countreg = ix86_zero_extend_to_Pmode (count_exp);
13266      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13267      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13268      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13269    }
13270  else if (count != 0
13271	   && (align >= 8
13272	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13273	       || optimize_size || count < (unsigned int) 64))
13274    {
13275      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13276      unsigned HOST_WIDE_INT offset = 0;
13277
13278      emit_insn (gen_cld ());
13279
13280      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13281      if (count & ~(size - 1))
13282	{
13283	  unsigned HOST_WIDE_INT repcount;
13284	  unsigned int max_nonrep;
13285
13286	  repcount = count >> (size == 4 ? 2 : 3);
13287	  if (!TARGET_64BIT)
13288	    repcount &= 0x3fffffff;
13289
13290	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13291	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13292	     bytes.  In both cases the latter seems to be faster for small
13293	     values of N.  */
13294	  max_nonrep = size == 4 ? 7 : 4;
13295	  if (!optimize_size)
13296	    switch (ix86_tune)
13297	      {
13298	      case PROCESSOR_PENTIUM4:
13299	      case PROCESSOR_NOCONA:
13300	        max_nonrep = 3;
13301	        break;
13302	      default:
13303	        break;
13304	      }
13305
13306	  if (repcount <= max_nonrep)
13307	    while (repcount-- > 0)
13308	      {
13309		rtx mem = adjust_automodify_address_nv (dst,
13310							GET_MODE (zeroreg),
13311							destreg, offset);
13312		emit_insn (gen_strset (destreg, mem, zeroreg));
13313		offset += size;
13314	      }
13315	  else
13316	    {
13317	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13318	      countreg = ix86_zero_extend_to_Pmode (countreg);
13319	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
13320					GEN_INT (size == 4 ? 2 : 3));
13321	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13322	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13323				       destexp));
13324	      offset = count & ~(size - 1);
13325	    }
13326	}
13327      if (size == 8 && (count & 0x04))
13328	{
13329	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13330						  offset);
13331	  emit_insn (gen_strset (destreg, mem,
13332				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13333	  offset += 4;
13334	}
13335      if (count & 0x02)
13336	{
13337	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13338						  offset);
13339	  emit_insn (gen_strset (destreg, mem,
13340				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13341	  offset += 2;
13342	}
13343      if (count & 0x01)
13344	{
13345	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13346						  offset);
13347	  emit_insn (gen_strset (destreg, mem,
13348				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13349	}
13350    }
13351  else
13352    {
13353      rtx countreg2;
13354      rtx label = NULL;
13355      /* Compute desired alignment of the string operation.  */
13356      int desired_alignment = (TARGET_PENTIUMPRO
13357			       && (count == 0 || count >= (unsigned int) 260)
13358			       ? 8 : UNITS_PER_WORD);
13359
13360      /* In case we don't know anything about the alignment, default to
13361         library version, since it is usually equally fast and result in
13362         shorter code.
13363
13364	 Also emit call when we know that the count is large and call overhead
13365	 will not be important.  */
13366      if (!TARGET_INLINE_ALL_STRINGOPS
13367	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13368	return 0;
13369
13370      if (TARGET_SINGLE_STRINGOP)
13371	emit_insn (gen_cld ());
13372
13373      countreg2 = gen_reg_rtx (Pmode);
13374      countreg = copy_to_mode_reg (counter_mode, count_exp);
13375      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13376      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13377      dst = change_address (dst, BLKmode, destreg);
13378
13379      if (count == 0 && align < desired_alignment)
13380	{
13381	  label = gen_label_rtx ();
13382	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13383				   LEU, 0, counter_mode, 1, label);
13384	}
13385      if (align <= 1)
13386	{
13387	  rtx label = ix86_expand_aligntest (destreg, 1);
13388	  emit_insn (gen_strset (destreg, dst,
13389				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13390	  ix86_adjust_counter (countreg, 1);
13391	  emit_label (label);
13392	  LABEL_NUSES (label) = 1;
13393	}
13394      if (align <= 2)
13395	{
13396	  rtx label = ix86_expand_aligntest (destreg, 2);
13397	  emit_insn (gen_strset (destreg, dst,
13398				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13399	  ix86_adjust_counter (countreg, 2);
13400	  emit_label (label);
13401	  LABEL_NUSES (label) = 1;
13402	}
13403      if (align <= 4 && desired_alignment > 4)
13404	{
13405	  rtx label = ix86_expand_aligntest (destreg, 4);
13406	  emit_insn (gen_strset (destreg, dst,
13407				 (TARGET_64BIT
13408				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13409				  : zeroreg)));
13410	  ix86_adjust_counter (countreg, 4);
13411	  emit_label (label);
13412	  LABEL_NUSES (label) = 1;
13413	}
13414
13415      if (label && desired_alignment > 4 && !TARGET_64BIT)
13416	{
13417	  emit_label (label);
13418	  LABEL_NUSES (label) = 1;
13419	  label = NULL_RTX;
13420	}
13421
13422      if (!TARGET_SINGLE_STRINGOP)
13423	emit_insn (gen_cld ());
13424      if (TARGET_64BIT)
13425	{
13426	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13427				  GEN_INT (3)));
13428	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13429	}
13430      else
13431	{
13432	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13433	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13434	}
13435      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13436      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13437
13438      if (label)
13439	{
13440	  emit_label (label);
13441	  LABEL_NUSES (label) = 1;
13442	}
13443
13444      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13445	emit_insn (gen_strset (destreg, dst,
13446			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13447      if (TARGET_64BIT && (align <= 4 || count == 0))
13448	{
13449	  rtx label = ix86_expand_aligntest (countreg, 4);
13450	  emit_insn (gen_strset (destreg, dst,
13451				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13452	  emit_label (label);
13453	  LABEL_NUSES (label) = 1;
13454	}
13455      if (align > 2 && count != 0 && (count & 2))
13456	emit_insn (gen_strset (destreg, dst,
13457			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13458      if (align <= 2 || count == 0)
13459	{
13460	  rtx label = ix86_expand_aligntest (countreg, 2);
13461	  emit_insn (gen_strset (destreg, dst,
13462				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13463	  emit_label (label);
13464	  LABEL_NUSES (label) = 1;
13465	}
13466      if (align > 1 && count != 0 && (count & 1))
13467	emit_insn (gen_strset (destreg, dst,
13468			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13469      if (align <= 1 || count == 0)
13470	{
13471	  rtx label = ix86_expand_aligntest (countreg, 1);
13472	  emit_insn (gen_strset (destreg, dst,
13473				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13474	  emit_label (label);
13475	  LABEL_NUSES (label) = 1;
13476	}
13477    }
13478  return 1;
13479}
13480
13481/* Expand strlen.  */
13482int
13483ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13484{
13485  rtx addr, scratch1, scratch2, scratch3, scratch4;
13486
13487  /* The generic case of strlen expander is long.  Avoid it's
13488     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13489
13490  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13491      && !TARGET_INLINE_ALL_STRINGOPS
13492      && !optimize_size
13493      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13494    return 0;
13495
13496  addr = force_reg (Pmode, XEXP (src, 0));
13497  scratch1 = gen_reg_rtx (Pmode);
13498
13499  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13500      && !optimize_size)
13501    {
13502      /* Well it seems that some optimizer does not combine a call like
13503         foo(strlen(bar), strlen(bar));
13504         when the move and the subtraction is done here.  It does calculate
13505         the length just once when these instructions are done inside of
13506         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13507         often used and I use one fewer register for the lifetime of
13508         output_strlen_unroll() this is better.  */
13509
13510      emit_move_insn (out, addr);
13511
13512      ix86_expand_strlensi_unroll_1 (out, src, align);
13513
13514      /* strlensi_unroll_1 returns the address of the zero at the end of
13515         the string, like memchr(), so compute the length by subtracting
13516         the start address.  */
13517      if (TARGET_64BIT)
13518	emit_insn (gen_subdi3 (out, out, addr));
13519      else
13520	emit_insn (gen_subsi3 (out, out, addr));
13521    }
13522  else
13523    {
13524      rtx unspec;
13525      scratch2 = gen_reg_rtx (Pmode);
13526      scratch3 = gen_reg_rtx (Pmode);
13527      scratch4 = force_reg (Pmode, constm1_rtx);
13528
13529      emit_move_insn (scratch3, addr);
13530      eoschar = force_reg (QImode, eoschar);
13531
13532      emit_insn (gen_cld ());
13533      src = replace_equiv_address_nv (src, scratch3);
13534
13535      /* If .md starts supporting :P, this can be done in .md.  */
13536      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13537						 scratch4), UNSPEC_SCAS);
13538      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13539      if (TARGET_64BIT)
13540	{
13541	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13542	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13543	}
13544      else
13545	{
13546	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13547	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13548	}
13549    }
13550  return 1;
13551}
13552
13553/* Expand the appropriate insns for doing strlen if not just doing
13554   repnz; scasb
13555
13556   out = result, initialized with the start address
13557   align_rtx = alignment of the address.
13558   scratch = scratch register, initialized with the startaddress when
13559	not aligned, otherwise undefined
13560
13561   This is just the body. It needs the initializations mentioned above and
13562   some address computing at the end.  These things are done in i386.md.  */
13563
13564static void
13565ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13566{
13567  int align;
13568  rtx tmp;
13569  rtx align_2_label = NULL_RTX;
13570  rtx align_3_label = NULL_RTX;
13571  rtx align_4_label = gen_label_rtx ();
13572  rtx end_0_label = gen_label_rtx ();
13573  rtx mem;
13574  rtx tmpreg = gen_reg_rtx (SImode);
13575  rtx scratch = gen_reg_rtx (SImode);
13576  rtx cmp;
13577
13578  align = 0;
13579  if (GET_CODE (align_rtx) == CONST_INT)
13580    align = INTVAL (align_rtx);
13581
13582  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13583
13584  /* Is there a known alignment and is it less than 4?  */
13585  if (align < 4)
13586    {
13587      rtx scratch1 = gen_reg_rtx (Pmode);
13588      emit_move_insn (scratch1, out);
13589      /* Is there a known alignment and is it not 2? */
13590      if (align != 2)
13591	{
13592	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13593	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13594
13595	  /* Leave just the 3 lower bits.  */
13596	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13597				    NULL_RTX, 0, OPTAB_WIDEN);
13598
13599	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13600				   Pmode, 1, align_4_label);
13601	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13602				   Pmode, 1, align_2_label);
13603	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13604				   Pmode, 1, align_3_label);
13605	}
13606      else
13607        {
13608	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13609	     check if is aligned to 4 - byte.  */
13610
13611	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13612				    NULL_RTX, 0, OPTAB_WIDEN);
13613
13614	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13615				   Pmode, 1, align_4_label);
13616        }
13617
13618      mem = change_address (src, QImode, out);
13619
13620      /* Now compare the bytes.  */
13621
13622      /* Compare the first n unaligned byte on a byte per byte basis.  */
13623      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13624			       QImode, 1, end_0_label);
13625
13626      /* Increment the address.  */
13627      if (TARGET_64BIT)
13628	emit_insn (gen_adddi3 (out, out, const1_rtx));
13629      else
13630	emit_insn (gen_addsi3 (out, out, const1_rtx));
13631
13632      /* Not needed with an alignment of 2 */
13633      if (align != 2)
13634	{
13635	  emit_label (align_2_label);
13636
13637	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13638				   end_0_label);
13639
13640	  if (TARGET_64BIT)
13641	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13642	  else
13643	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13644
13645	  emit_label (align_3_label);
13646	}
13647
13648      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13649			       end_0_label);
13650
13651      if (TARGET_64BIT)
13652	emit_insn (gen_adddi3 (out, out, const1_rtx));
13653      else
13654	emit_insn (gen_addsi3 (out, out, const1_rtx));
13655    }
13656
13657  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13658     align this loop.  It gives only huge programs, but does not help to
13659     speed up.  */
13660  emit_label (align_4_label);
13661
13662  mem = change_address (src, SImode, out);
13663  emit_move_insn (scratch, mem);
13664  if (TARGET_64BIT)
13665    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13666  else
13667    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13668
13669  /* This formula yields a nonzero result iff one of the bytes is zero.
13670     This saves three branches inside loop and many cycles.  */
13671
13672  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13673  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13674  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13675  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13676			 gen_int_mode (0x80808080, SImode)));
13677  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13678			   align_4_label);
13679
13680  if (TARGET_CMOVE)
13681    {
13682       rtx reg = gen_reg_rtx (SImode);
13683       rtx reg2 = gen_reg_rtx (Pmode);
13684       emit_move_insn (reg, tmpreg);
13685       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13686
13687       /* If zero is not in the first two bytes, move two bytes forward.  */
13688       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13689       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13690       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13691       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13692			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13693						     reg,
13694						     tmpreg)));
13695       /* Emit lea manually to avoid clobbering of flags.  */
13696       emit_insn (gen_rtx_SET (SImode, reg2,
13697			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13698
13699       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13701       emit_insn (gen_rtx_SET (VOIDmode, out,
13702			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13703						     reg2,
13704						     out)));
13705
13706    }
13707  else
13708    {
13709       rtx end_2_label = gen_label_rtx ();
13710       /* Is zero in the first two bytes? */
13711
13712       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13713       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13714       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13715       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13716                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13717                            pc_rtx);
13718       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13719       JUMP_LABEL (tmp) = end_2_label;
13720
13721       /* Not in the first two.  Move two bytes forward.  */
13722       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13723       if (TARGET_64BIT)
13724	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13725       else
13726	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13727
13728       emit_label (end_2_label);
13729
13730    }
13731
13732  /* Avoid branch in fixing the byte.  */
13733  tmpreg = gen_lowpart (QImode, tmpreg);
13734  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13735  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13736  if (TARGET_64BIT)
13737    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13738  else
13739    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13740
13741  emit_label (end_0_label);
13742}
13743
13744void
13745ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13746		  rtx callarg2 ATTRIBUTE_UNUSED,
13747		  rtx pop, int sibcall)
13748{
13749  rtx use = NULL, call;
13750
13751  if (pop == const0_rtx)
13752    pop = NULL;
13753  gcc_assert (!TARGET_64BIT || !pop);
13754
13755  if (TARGET_MACHO && !TARGET_64BIT)
13756    {
13757#if TARGET_MACHO
13758      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13759	fnaddr = machopic_indirect_call_target (fnaddr);
13760#endif
13761    }
13762  else
13763    {
13764      /* Static functions and indirect calls don't need the pic register.  */
13765      if (! TARGET_64BIT && flag_pic
13766	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13767	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13768	use_reg (&use, pic_offset_table_rtx);
13769    }
13770
13771  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13772    {
13773      rtx al = gen_rtx_REG (QImode, 0);
13774      emit_move_insn (al, callarg2);
13775      use_reg (&use, al);
13776    }
13777
13778  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13779    {
13780      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13781      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13782    }
13783  if (sibcall && TARGET_64BIT
13784      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13785    {
13786      rtx addr;
13787      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13788      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13789      emit_move_insn (fnaddr, addr);
13790      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13791    }
13792
13793  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13794  if (retval)
13795    call = gen_rtx_SET (VOIDmode, retval, call);
13796  if (pop)
13797    {
13798      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13799      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13800      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13801    }
13802
13803  call = emit_call_insn (call);
13804  if (use)
13805    CALL_INSN_FUNCTION_USAGE (call) = use;
13806}
13807
13808
13809/* Clear stack slot assignments remembered from previous functions.
13810   This is called from INIT_EXPANDERS once before RTL is emitted for each
13811   function.  */
13812
13813static struct machine_function *
13814ix86_init_machine_status (void)
13815{
13816  struct machine_function *f;
13817
13818  f = ggc_alloc_cleared (sizeof (struct machine_function));
13819  f->use_fast_prologue_epilogue_nregs = -1;
13820  f->tls_descriptor_call_expanded_p = 0;
13821
13822  return f;
13823}
13824
13825/* Return a MEM corresponding to a stack slot with mode MODE.
13826   Allocate a new slot if necessary.
13827
13828   The RTL for a function can have several slots available: N is
13829   which slot to use.  */
13830
13831rtx
13832assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13833{
13834  struct stack_local_entry *s;
13835
13836  gcc_assert (n < MAX_386_STACK_LOCALS);
13837
13838  /* Virtual slot is valid only before vregs are instantiated.  */
13839  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13840
13841  for (s = ix86_stack_locals; s; s = s->next)
13842    if (s->mode == mode && s->n == n)
13843      return s->rtl;
13844
13845  s = (struct stack_local_entry *)
13846    ggc_alloc (sizeof (struct stack_local_entry));
13847  s->n = n;
13848  s->mode = mode;
13849  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13850
13851  s->next = ix86_stack_locals;
13852  ix86_stack_locals = s;
13853  return s->rtl;
13854}
13855
13856/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13857
13858static GTY(()) rtx ix86_tls_symbol;
13859rtx
13860ix86_tls_get_addr (void)
13861{
13862
13863  if (!ix86_tls_symbol)
13864    {
13865      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13866					    (TARGET_ANY_GNU_TLS
13867					     && !TARGET_64BIT)
13868					    ? "___tls_get_addr"
13869					    : "__tls_get_addr");
13870    }
13871
13872  return ix86_tls_symbol;
13873}
13874
13875/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13876
13877static GTY(()) rtx ix86_tls_module_base_symbol;
13878rtx
13879ix86_tls_module_base (void)
13880{
13881
13882  if (!ix86_tls_module_base_symbol)
13883    {
13884      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13885							"_TLS_MODULE_BASE_");
13886      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13887	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13888    }
13889
13890  return ix86_tls_module_base_symbol;
13891}
13892
13893/* Calculate the length of the memory address in the instruction
13894   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13895
13896int
13897memory_address_length (rtx addr)
13898{
13899  struct ix86_address parts;
13900  rtx base, index, disp;
13901  int len;
13902  int ok;
13903
13904  if (GET_CODE (addr) == PRE_DEC
13905      || GET_CODE (addr) == POST_INC
13906      || GET_CODE (addr) == PRE_MODIFY
13907      || GET_CODE (addr) == POST_MODIFY)
13908    return 0;
13909
13910  ok = ix86_decompose_address (addr, &parts);
13911  gcc_assert (ok);
13912
13913  if (parts.base && GET_CODE (parts.base) == SUBREG)
13914    parts.base = SUBREG_REG (parts.base);
13915  if (parts.index && GET_CODE (parts.index) == SUBREG)
13916    parts.index = SUBREG_REG (parts.index);
13917
13918  base = parts.base;
13919  index = parts.index;
13920  disp = parts.disp;
13921  len = 0;
13922
13923  /* Rule of thumb:
13924       - esp as the base always wants an index,
13925       - ebp as the base always wants a displacement.  */
13926
13927  /* Register Indirect.  */
13928  if (base && !index && !disp)
13929    {
13930      /* esp (for its index) and ebp (for its displacement) need
13931	 the two-byte modrm form.  */
13932      if (addr == stack_pointer_rtx
13933	  || addr == arg_pointer_rtx
13934	  || addr == frame_pointer_rtx
13935	  || addr == hard_frame_pointer_rtx)
13936	len = 1;
13937    }
13938
13939  /* Direct Addressing.  */
13940  else if (disp && !base && !index)
13941    len = 4;
13942
13943  else
13944    {
13945      /* Find the length of the displacement constant.  */
13946      if (disp)
13947	{
13948	  if (base && satisfies_constraint_K (disp))
13949	    len = 1;
13950	  else
13951	    len = 4;
13952	}
13953      /* ebp always wants a displacement.  */
13954      else if (base == hard_frame_pointer_rtx)
13955        len = 1;
13956
13957      /* An index requires the two-byte modrm form....  */
13958      if (index
13959	  /* ...like esp, which always wants an index.  */
13960	  || base == stack_pointer_rtx
13961	  || base == arg_pointer_rtx
13962	  || base == frame_pointer_rtx)
13963	len += 1;
13964    }
13965
13966  return len;
13967}
13968
13969/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13970   is set, expect that insn have 8bit immediate alternative.  */
13971int
13972ix86_attr_length_immediate_default (rtx insn, int shortform)
13973{
13974  int len = 0;
13975  int i;
13976  extract_insn_cached (insn);
13977  for (i = recog_data.n_operands - 1; i >= 0; --i)
13978    if (CONSTANT_P (recog_data.operand[i]))
13979      {
13980	gcc_assert (!len);
13981	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13982	  len = 1;
13983	else
13984	  {
13985	    switch (get_attr_mode (insn))
13986	      {
13987		case MODE_QI:
13988		  len+=1;
13989		  break;
13990		case MODE_HI:
13991		  len+=2;
13992		  break;
13993		case MODE_SI:
13994		  len+=4;
13995		  break;
13996		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13997		case MODE_DI:
13998		  len+=4;
13999		  break;
14000		default:
14001		  fatal_insn ("unknown insn mode", insn);
14002	      }
14003	  }
14004      }
14005  return len;
14006}
14007/* Compute default value for "length_address" attribute.  */
14008int
14009ix86_attr_length_address_default (rtx insn)
14010{
14011  int i;
14012
14013  if (get_attr_type (insn) == TYPE_LEA)
14014    {
14015      rtx set = PATTERN (insn);
14016
14017      if (GET_CODE (set) == PARALLEL)
14018	set = XVECEXP (set, 0, 0);
14019
14020      gcc_assert (GET_CODE (set) == SET);
14021
14022      return memory_address_length (SET_SRC (set));
14023    }
14024
14025  extract_insn_cached (insn);
14026  for (i = recog_data.n_operands - 1; i >= 0; --i)
14027    if (GET_CODE (recog_data.operand[i]) == MEM)
14028      {
14029	return memory_address_length (XEXP (recog_data.operand[i], 0));
14030	break;
14031      }
14032  return 0;
14033}
14034
14035/* Return the maximum number of instructions a cpu can issue.  */
14036
14037static int
14038ix86_issue_rate (void)
14039{
14040  switch (ix86_tune)
14041    {
14042    case PROCESSOR_PENTIUM:
14043    case PROCESSOR_K6:
14044      return 2;
14045
14046    case PROCESSOR_PENTIUMPRO:
14047    case PROCESSOR_PENTIUM4:
14048    case PROCESSOR_ATHLON:
14049    case PROCESSOR_K8:
14050    case PROCESSOR_AMDFAM10:
14051    case PROCESSOR_NOCONA:
14052    case PROCESSOR_GENERIC32:
14053    case PROCESSOR_GENERIC64:
14054      return 3;
14055
14056    case PROCESSOR_CORE2:
14057      return 4;
14058
14059    default:
14060      return 1;
14061    }
14062}
14063
14064/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14065   by DEP_INSN and nothing set by DEP_INSN.  */
14066
14067static int
14068ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14069{
14070  rtx set, set2;
14071
14072  /* Simplify the test for uninteresting insns.  */
14073  if (insn_type != TYPE_SETCC
14074      && insn_type != TYPE_ICMOV
14075      && insn_type != TYPE_FCMOV
14076      && insn_type != TYPE_IBR)
14077    return 0;
14078
14079  if ((set = single_set (dep_insn)) != 0)
14080    {
14081      set = SET_DEST (set);
14082      set2 = NULL_RTX;
14083    }
14084  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14085	   && XVECLEN (PATTERN (dep_insn), 0) == 2
14086	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14087	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14088    {
14089      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14090      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14091    }
14092  else
14093    return 0;
14094
14095  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14096    return 0;
14097
14098  /* This test is true if the dependent insn reads the flags but
14099     not any other potentially set register.  */
14100  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14101    return 0;
14102
14103  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14104    return 0;
14105
14106  return 1;
14107}
14108
14109/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14110   address with operands set by DEP_INSN.  */
14111
14112static int
14113ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14114{
14115  rtx addr;
14116
14117  if (insn_type == TYPE_LEA
14118      && TARGET_PENTIUM)
14119    {
14120      addr = PATTERN (insn);
14121
14122      if (GET_CODE (addr) == PARALLEL)
14123	addr = XVECEXP (addr, 0, 0);
14124
14125      gcc_assert (GET_CODE (addr) == SET);
14126
14127      addr = SET_SRC (addr);
14128    }
14129  else
14130    {
14131      int i;
14132      extract_insn_cached (insn);
14133      for (i = recog_data.n_operands - 1; i >= 0; --i)
14134	if (GET_CODE (recog_data.operand[i]) == MEM)
14135	  {
14136	    addr = XEXP (recog_data.operand[i], 0);
14137	    goto found;
14138	  }
14139      return 0;
14140    found:;
14141    }
14142
14143  return modified_in_p (addr, dep_insn);
14144}
14145
14146static int
14147ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14148{
14149  enum attr_type insn_type, dep_insn_type;
14150  enum attr_memory memory;
14151  rtx set, set2;
14152  int dep_insn_code_number;
14153
14154  /* Anti and output dependencies have zero cost on all CPUs.  */
14155  if (REG_NOTE_KIND (link) != 0)
14156    return 0;
14157
14158  dep_insn_code_number = recog_memoized (dep_insn);
14159
14160  /* If we can't recognize the insns, we can't really do anything.  */
14161  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14162    return cost;
14163
14164  insn_type = get_attr_type (insn);
14165  dep_insn_type = get_attr_type (dep_insn);
14166
14167  switch (ix86_tune)
14168    {
14169    case PROCESSOR_PENTIUM:
14170      /* Address Generation Interlock adds a cycle of latency.  */
14171      if (ix86_agi_dependent (insn, dep_insn, insn_type))
14172	cost += 1;
14173
14174      /* ??? Compares pair with jump/setcc.  */
14175      if (ix86_flags_dependent (insn, dep_insn, insn_type))
14176	cost = 0;
14177
14178      /* Floating point stores require value to be ready one cycle earlier.  */
14179      if (insn_type == TYPE_FMOV
14180	  && get_attr_memory (insn) == MEMORY_STORE
14181	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14182	cost += 1;
14183      break;
14184
14185    case PROCESSOR_PENTIUMPRO:
14186      memory = get_attr_memory (insn);
14187
14188      /* INT->FP conversion is expensive.  */
14189      if (get_attr_fp_int_src (dep_insn))
14190	cost += 5;
14191
14192      /* There is one cycle extra latency between an FP op and a store.  */
14193      if (insn_type == TYPE_FMOV
14194	  && (set = single_set (dep_insn)) != NULL_RTX
14195	  && (set2 = single_set (insn)) != NULL_RTX
14196	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14197	  && GET_CODE (SET_DEST (set2)) == MEM)
14198	cost += 1;
14199
14200      /* Show ability of reorder buffer to hide latency of load by executing
14201	 in parallel with previous instruction in case
14202	 previous instruction is not needed to compute the address.  */
14203      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14204	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14205	{
14206	  /* Claim moves to take one cycle, as core can issue one load
14207	     at time and the next load can start cycle later.  */
14208	  if (dep_insn_type == TYPE_IMOV
14209	      || dep_insn_type == TYPE_FMOV)
14210	    cost = 1;
14211	  else if (cost > 1)
14212	    cost--;
14213	}
14214      break;
14215
14216    case PROCESSOR_K6:
14217      memory = get_attr_memory (insn);
14218
14219      /* The esp dependency is resolved before the instruction is really
14220         finished.  */
14221      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14222	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14223	return 1;
14224
14225      /* INT->FP conversion is expensive.  */
14226      if (get_attr_fp_int_src (dep_insn))
14227	cost += 5;
14228
14229      /* Show ability of reorder buffer to hide latency of load by executing
14230	 in parallel with previous instruction in case
14231	 previous instruction is not needed to compute the address.  */
14232      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14233	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14234	{
14235	  /* Claim moves to take one cycle, as core can issue one load
14236	     at time and the next load can start cycle later.  */
14237	  if (dep_insn_type == TYPE_IMOV
14238	      || dep_insn_type == TYPE_FMOV)
14239	    cost = 1;
14240	  else if (cost > 2)
14241	    cost -= 2;
14242	  else
14243	    cost = 1;
14244	}
14245      break;
14246
14247    case PROCESSOR_ATHLON:
14248    case PROCESSOR_K8:
14249    case PROCESSOR_AMDFAM10:
14250    case PROCESSOR_GENERIC32:
14251    case PROCESSOR_GENERIC64:
14252      memory = get_attr_memory (insn);
14253
14254      /* Show ability of reorder buffer to hide latency of load by executing
14255	 in parallel with previous instruction in case
14256	 previous instruction is not needed to compute the address.  */
14257      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14258	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
14259	{
14260	  enum attr_unit unit = get_attr_unit (insn);
14261	  int loadcost = 3;
14262
14263	  /* Because of the difference between the length of integer and
14264	     floating unit pipeline preparation stages, the memory operands
14265	     for floating point are cheaper.
14266
14267	     ??? For Athlon it the difference is most probably 2.  */
14268	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14269	    loadcost = 3;
14270	  else
14271	    loadcost = TARGET_ATHLON ? 2 : 0;
14272
14273	  if (cost >= loadcost)
14274	    cost -= loadcost;
14275	  else
14276	    cost = 0;
14277	}
14278
14279    default:
14280      break;
14281    }
14282
14283  return cost;
14284}
14285
14286/* How many alternative schedules to try.  This should be as wide as the
14287   scheduling freedom in the DFA, but no wider.  Making this value too
14288   large results extra work for the scheduler.  */
14289
14290static int
14291ia32_multipass_dfa_lookahead (void)
14292{
14293  if (ix86_tune == PROCESSOR_PENTIUM)
14294    return 2;
14295
14296  if (ix86_tune == PROCESSOR_PENTIUMPRO
14297      || ix86_tune == PROCESSOR_K6)
14298    return 1;
14299
14300  else
14301    return 0;
14302}
14303
14304
14305/* Compute the alignment given to a constant that is being placed in memory.
14306   EXP is the constant and ALIGN is the alignment that the object would
14307   ordinarily have.
14308   The value of this function is used instead of that alignment to align
14309   the object.  */
14310
14311int
14312ix86_constant_alignment (tree exp, int align)
14313{
14314  if (TREE_CODE (exp) == REAL_CST)
14315    {
14316      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14317	return 64;
14318      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14319	return 128;
14320    }
14321  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14322      	   && !TARGET_NO_ALIGN_LONG_STRINGS
14323	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14324    return BITS_PER_WORD;
14325
14326  return align;
14327}
14328
14329/* Compute the alignment for a static variable.
14330   TYPE is the data type, and ALIGN is the alignment that
14331   the object would ordinarily have.  The value of this function is used
14332   instead of that alignment to align the object.  */
14333
14334int
14335ix86_data_alignment (tree type, int align)
14336{
14337  int max_align = optimize_size ? BITS_PER_WORD : 256;
14338
14339  if (AGGREGATE_TYPE_P (type)
14340      && TYPE_SIZE (type)
14341      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14342      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14343	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14344      && align < max_align)
14345    align = max_align;
14346
14347  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14348     to 16byte boundary.  */
14349  if (TARGET_64BIT)
14350    {
14351      if (AGGREGATE_TYPE_P (type)
14352	   && TYPE_SIZE (type)
14353	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14354	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14355	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14356	return 128;
14357    }
14358
14359  if (TREE_CODE (type) == ARRAY_TYPE)
14360    {
14361      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14362	return 64;
14363      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14364	return 128;
14365    }
14366  else if (TREE_CODE (type) == COMPLEX_TYPE)
14367    {
14368
14369      if (TYPE_MODE (type) == DCmode && align < 64)
14370	return 64;
14371      if (TYPE_MODE (type) == XCmode && align < 128)
14372	return 128;
14373    }
14374  else if ((TREE_CODE (type) == RECORD_TYPE
14375	    || TREE_CODE (type) == UNION_TYPE
14376	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14377	   && TYPE_FIELDS (type))
14378    {
14379      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14380	return 64;
14381      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14382	return 128;
14383    }
14384  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14385	   || TREE_CODE (type) == INTEGER_TYPE)
14386    {
14387      if (TYPE_MODE (type) == DFmode && align < 64)
14388	return 64;
14389      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14390	return 128;
14391    }
14392
14393  return align;
14394}
14395
14396/* Compute the alignment for a local variable.
14397   TYPE is the data type, and ALIGN is the alignment that
14398   the object would ordinarily have.  The value of this macro is used
14399   instead of that alignment to align the object.  */
14400
14401int
14402ix86_local_alignment (tree type, int align)
14403{
14404  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14405     to 16byte boundary.  */
14406  if (TARGET_64BIT)
14407    {
14408      if (AGGREGATE_TYPE_P (type)
14409	   && TYPE_SIZE (type)
14410	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14411	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14412	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14413	return 128;
14414    }
14415  if (TREE_CODE (type) == ARRAY_TYPE)
14416    {
14417      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14418	return 64;
14419      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14420	return 128;
14421    }
14422  else if (TREE_CODE (type) == COMPLEX_TYPE)
14423    {
14424      if (TYPE_MODE (type) == DCmode && align < 64)
14425	return 64;
14426      if (TYPE_MODE (type) == XCmode && align < 128)
14427	return 128;
14428    }
14429  else if ((TREE_CODE (type) == RECORD_TYPE
14430	    || TREE_CODE (type) == UNION_TYPE
14431	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14432	   && TYPE_FIELDS (type))
14433    {
14434      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14435	return 64;
14436      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14437	return 128;
14438    }
14439  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14440	   || TREE_CODE (type) == INTEGER_TYPE)
14441    {
14442
14443      if (TYPE_MODE (type) == DFmode && align < 64)
14444	return 64;
14445      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14446	return 128;
14447    }
14448  return align;
14449}
14450
14451/* Emit RTL insns to initialize the variable parts of a trampoline.
14452   FNADDR is an RTX for the address of the function's pure code.
14453   CXT is an RTX for the static chain value for the function.  */
14454void
14455x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14456{
14457  if (!TARGET_64BIT)
14458    {
14459      /* Compute offset from the end of the jmp to the target function.  */
14460      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14461			       plus_constant (tramp, 10),
14462			       NULL_RTX, 1, OPTAB_DIRECT);
14463      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14464		      gen_int_mode (0xb9, QImode));
14465      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14466      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14467		      gen_int_mode (0xe9, QImode));
14468      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14469    }
14470  else
14471    {
14472      int offset = 0;
14473      /* Try to load address using shorter movl instead of movabs.
14474         We may want to support movq for kernel mode, but kernel does not use
14475         trampolines at the moment.  */
14476      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14477	{
14478	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14479	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14480			  gen_int_mode (0xbb41, HImode));
14481	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14482			  gen_lowpart (SImode, fnaddr));
14483	  offset += 6;
14484	}
14485      else
14486	{
14487	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14488			  gen_int_mode (0xbb49, HImode));
14489	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14490			  fnaddr);
14491	  offset += 10;
14492	}
14493      /* Load static chain using movabs to r10.  */
14494      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14495		      gen_int_mode (0xba49, HImode));
14496      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14497		      cxt);
14498      offset += 10;
14499      /* Jump to the r11 */
14500      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14501		      gen_int_mode (0xff49, HImode));
14502      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14503		      gen_int_mode (0xe3, QImode));
14504      offset += 3;
14505      gcc_assert (offset <= TRAMPOLINE_SIZE);
14506    }
14507
14508#ifdef ENABLE_EXECUTE_STACK
14509  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14510		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14511#endif
14512}
14513
14514/* Codes for all the SSE/MMX builtins.  */
14515enum ix86_builtins
14516{
14517  IX86_BUILTIN_ADDPS,
14518  IX86_BUILTIN_ADDSS,
14519  IX86_BUILTIN_DIVPS,
14520  IX86_BUILTIN_DIVSS,
14521  IX86_BUILTIN_MULPS,
14522  IX86_BUILTIN_MULSS,
14523  IX86_BUILTIN_SUBPS,
14524  IX86_BUILTIN_SUBSS,
14525
14526  IX86_BUILTIN_CMPEQPS,
14527  IX86_BUILTIN_CMPLTPS,
14528  IX86_BUILTIN_CMPLEPS,
14529  IX86_BUILTIN_CMPGTPS,
14530  IX86_BUILTIN_CMPGEPS,
14531  IX86_BUILTIN_CMPNEQPS,
14532  IX86_BUILTIN_CMPNLTPS,
14533  IX86_BUILTIN_CMPNLEPS,
14534  IX86_BUILTIN_CMPNGTPS,
14535  IX86_BUILTIN_CMPNGEPS,
14536  IX86_BUILTIN_CMPORDPS,
14537  IX86_BUILTIN_CMPUNORDPS,
14538  IX86_BUILTIN_CMPEQSS,
14539  IX86_BUILTIN_CMPLTSS,
14540  IX86_BUILTIN_CMPLESS,
14541  IX86_BUILTIN_CMPNEQSS,
14542  IX86_BUILTIN_CMPNLTSS,
14543  IX86_BUILTIN_CMPNLESS,
14544  IX86_BUILTIN_CMPNGTSS,
14545  IX86_BUILTIN_CMPNGESS,
14546  IX86_BUILTIN_CMPORDSS,
14547  IX86_BUILTIN_CMPUNORDSS,
14548
14549  IX86_BUILTIN_COMIEQSS,
14550  IX86_BUILTIN_COMILTSS,
14551  IX86_BUILTIN_COMILESS,
14552  IX86_BUILTIN_COMIGTSS,
14553  IX86_BUILTIN_COMIGESS,
14554  IX86_BUILTIN_COMINEQSS,
14555  IX86_BUILTIN_UCOMIEQSS,
14556  IX86_BUILTIN_UCOMILTSS,
14557  IX86_BUILTIN_UCOMILESS,
14558  IX86_BUILTIN_UCOMIGTSS,
14559  IX86_BUILTIN_UCOMIGESS,
14560  IX86_BUILTIN_UCOMINEQSS,
14561
14562  IX86_BUILTIN_CVTPI2PS,
14563  IX86_BUILTIN_CVTPS2PI,
14564  IX86_BUILTIN_CVTSI2SS,
14565  IX86_BUILTIN_CVTSI642SS,
14566  IX86_BUILTIN_CVTSS2SI,
14567  IX86_BUILTIN_CVTSS2SI64,
14568  IX86_BUILTIN_CVTTPS2PI,
14569  IX86_BUILTIN_CVTTSS2SI,
14570  IX86_BUILTIN_CVTTSS2SI64,
14571
14572  IX86_BUILTIN_MAXPS,
14573  IX86_BUILTIN_MAXSS,
14574  IX86_BUILTIN_MINPS,
14575  IX86_BUILTIN_MINSS,
14576
14577  IX86_BUILTIN_LOADUPS,
14578  IX86_BUILTIN_STOREUPS,
14579  IX86_BUILTIN_MOVSS,
14580
14581  IX86_BUILTIN_MOVHLPS,
14582  IX86_BUILTIN_MOVLHPS,
14583  IX86_BUILTIN_LOADHPS,
14584  IX86_BUILTIN_LOADLPS,
14585  IX86_BUILTIN_STOREHPS,
14586  IX86_BUILTIN_STORELPS,
14587
14588  IX86_BUILTIN_MASKMOVQ,
14589  IX86_BUILTIN_MOVMSKPS,
14590  IX86_BUILTIN_PMOVMSKB,
14591
14592  IX86_BUILTIN_MOVNTPS,
14593  IX86_BUILTIN_MOVNTQ,
14594
14595  IX86_BUILTIN_LOADDQU,
14596  IX86_BUILTIN_STOREDQU,
14597
14598  IX86_BUILTIN_PACKSSWB,
14599  IX86_BUILTIN_PACKSSDW,
14600  IX86_BUILTIN_PACKUSWB,
14601
14602  IX86_BUILTIN_PADDB,
14603  IX86_BUILTIN_PADDW,
14604  IX86_BUILTIN_PADDD,
14605  IX86_BUILTIN_PADDQ,
14606  IX86_BUILTIN_PADDSB,
14607  IX86_BUILTIN_PADDSW,
14608  IX86_BUILTIN_PADDUSB,
14609  IX86_BUILTIN_PADDUSW,
14610  IX86_BUILTIN_PSUBB,
14611  IX86_BUILTIN_PSUBW,
14612  IX86_BUILTIN_PSUBD,
14613  IX86_BUILTIN_PSUBQ,
14614  IX86_BUILTIN_PSUBSB,
14615  IX86_BUILTIN_PSUBSW,
14616  IX86_BUILTIN_PSUBUSB,
14617  IX86_BUILTIN_PSUBUSW,
14618
14619  IX86_BUILTIN_PAND,
14620  IX86_BUILTIN_PANDN,
14621  IX86_BUILTIN_POR,
14622  IX86_BUILTIN_PXOR,
14623
14624  IX86_BUILTIN_PAVGB,
14625  IX86_BUILTIN_PAVGW,
14626
14627  IX86_BUILTIN_PCMPEQB,
14628  IX86_BUILTIN_PCMPEQW,
14629  IX86_BUILTIN_PCMPEQD,
14630  IX86_BUILTIN_PCMPGTB,
14631  IX86_BUILTIN_PCMPGTW,
14632  IX86_BUILTIN_PCMPGTD,
14633
14634  IX86_BUILTIN_PMADDWD,
14635
14636  IX86_BUILTIN_PMAXSW,
14637  IX86_BUILTIN_PMAXUB,
14638  IX86_BUILTIN_PMINSW,
14639  IX86_BUILTIN_PMINUB,
14640
14641  IX86_BUILTIN_PMULHUW,
14642  IX86_BUILTIN_PMULHW,
14643  IX86_BUILTIN_PMULLW,
14644
14645  IX86_BUILTIN_PSADBW,
14646  IX86_BUILTIN_PSHUFW,
14647
14648  IX86_BUILTIN_PSLLW,
14649  IX86_BUILTIN_PSLLD,
14650  IX86_BUILTIN_PSLLQ,
14651  IX86_BUILTIN_PSRAW,
14652  IX86_BUILTIN_PSRAD,
14653  IX86_BUILTIN_PSRLW,
14654  IX86_BUILTIN_PSRLD,
14655  IX86_BUILTIN_PSRLQ,
14656  IX86_BUILTIN_PSLLWI,
14657  IX86_BUILTIN_PSLLDI,
14658  IX86_BUILTIN_PSLLQI,
14659  IX86_BUILTIN_PSRAWI,
14660  IX86_BUILTIN_PSRADI,
14661  IX86_BUILTIN_PSRLWI,
14662  IX86_BUILTIN_PSRLDI,
14663  IX86_BUILTIN_PSRLQI,
14664
14665  IX86_BUILTIN_PUNPCKHBW,
14666  IX86_BUILTIN_PUNPCKHWD,
14667  IX86_BUILTIN_PUNPCKHDQ,
14668  IX86_BUILTIN_PUNPCKLBW,
14669  IX86_BUILTIN_PUNPCKLWD,
14670  IX86_BUILTIN_PUNPCKLDQ,
14671
14672  IX86_BUILTIN_SHUFPS,
14673
14674  IX86_BUILTIN_RCPPS,
14675  IX86_BUILTIN_RCPSS,
14676  IX86_BUILTIN_RSQRTPS,
14677  IX86_BUILTIN_RSQRTSS,
14678  IX86_BUILTIN_SQRTPS,
14679  IX86_BUILTIN_SQRTSS,
14680
14681  IX86_BUILTIN_UNPCKHPS,
14682  IX86_BUILTIN_UNPCKLPS,
14683
14684  IX86_BUILTIN_ANDPS,
14685  IX86_BUILTIN_ANDNPS,
14686  IX86_BUILTIN_ORPS,
14687  IX86_BUILTIN_XORPS,
14688
14689  IX86_BUILTIN_EMMS,
14690  IX86_BUILTIN_LDMXCSR,
14691  IX86_BUILTIN_STMXCSR,
14692  IX86_BUILTIN_SFENCE,
14693
14694  /* 3DNow! Original */
14695  IX86_BUILTIN_FEMMS,
14696  IX86_BUILTIN_PAVGUSB,
14697  IX86_BUILTIN_PF2ID,
14698  IX86_BUILTIN_PFACC,
14699  IX86_BUILTIN_PFADD,
14700  IX86_BUILTIN_PFCMPEQ,
14701  IX86_BUILTIN_PFCMPGE,
14702  IX86_BUILTIN_PFCMPGT,
14703  IX86_BUILTIN_PFMAX,
14704  IX86_BUILTIN_PFMIN,
14705  IX86_BUILTIN_PFMUL,
14706  IX86_BUILTIN_PFRCP,
14707  IX86_BUILTIN_PFRCPIT1,
14708  IX86_BUILTIN_PFRCPIT2,
14709  IX86_BUILTIN_PFRSQIT1,
14710  IX86_BUILTIN_PFRSQRT,
14711  IX86_BUILTIN_PFSUB,
14712  IX86_BUILTIN_PFSUBR,
14713  IX86_BUILTIN_PI2FD,
14714  IX86_BUILTIN_PMULHRW,
14715
14716  /* 3DNow! Athlon Extensions */
14717  IX86_BUILTIN_PF2IW,
14718  IX86_BUILTIN_PFNACC,
14719  IX86_BUILTIN_PFPNACC,
14720  IX86_BUILTIN_PI2FW,
14721  IX86_BUILTIN_PSWAPDSI,
14722  IX86_BUILTIN_PSWAPDSF,
14723
14724  /* SSE2 */
14725  IX86_BUILTIN_ADDPD,
14726  IX86_BUILTIN_ADDSD,
14727  IX86_BUILTIN_DIVPD,
14728  IX86_BUILTIN_DIVSD,
14729  IX86_BUILTIN_MULPD,
14730  IX86_BUILTIN_MULSD,
14731  IX86_BUILTIN_SUBPD,
14732  IX86_BUILTIN_SUBSD,
14733
14734  IX86_BUILTIN_CMPEQPD,
14735  IX86_BUILTIN_CMPLTPD,
14736  IX86_BUILTIN_CMPLEPD,
14737  IX86_BUILTIN_CMPGTPD,
14738  IX86_BUILTIN_CMPGEPD,
14739  IX86_BUILTIN_CMPNEQPD,
14740  IX86_BUILTIN_CMPNLTPD,
14741  IX86_BUILTIN_CMPNLEPD,
14742  IX86_BUILTIN_CMPNGTPD,
14743  IX86_BUILTIN_CMPNGEPD,
14744  IX86_BUILTIN_CMPORDPD,
14745  IX86_BUILTIN_CMPUNORDPD,
14746  IX86_BUILTIN_CMPNEPD,
14747  IX86_BUILTIN_CMPEQSD,
14748  IX86_BUILTIN_CMPLTSD,
14749  IX86_BUILTIN_CMPLESD,
14750  IX86_BUILTIN_CMPNEQSD,
14751  IX86_BUILTIN_CMPNLTSD,
14752  IX86_BUILTIN_CMPNLESD,
14753  IX86_BUILTIN_CMPORDSD,
14754  IX86_BUILTIN_CMPUNORDSD,
14755  IX86_BUILTIN_CMPNESD,
14756
14757  IX86_BUILTIN_COMIEQSD,
14758  IX86_BUILTIN_COMILTSD,
14759  IX86_BUILTIN_COMILESD,
14760  IX86_BUILTIN_COMIGTSD,
14761  IX86_BUILTIN_COMIGESD,
14762  IX86_BUILTIN_COMINEQSD,
14763  IX86_BUILTIN_UCOMIEQSD,
14764  IX86_BUILTIN_UCOMILTSD,
14765  IX86_BUILTIN_UCOMILESD,
14766  IX86_BUILTIN_UCOMIGTSD,
14767  IX86_BUILTIN_UCOMIGESD,
14768  IX86_BUILTIN_UCOMINEQSD,
14769
14770  IX86_BUILTIN_MAXPD,
14771  IX86_BUILTIN_MAXSD,
14772  IX86_BUILTIN_MINPD,
14773  IX86_BUILTIN_MINSD,
14774
14775  IX86_BUILTIN_ANDPD,
14776  IX86_BUILTIN_ANDNPD,
14777  IX86_BUILTIN_ORPD,
14778  IX86_BUILTIN_XORPD,
14779
14780  IX86_BUILTIN_SQRTPD,
14781  IX86_BUILTIN_SQRTSD,
14782
14783  IX86_BUILTIN_UNPCKHPD,
14784  IX86_BUILTIN_UNPCKLPD,
14785
14786  IX86_BUILTIN_SHUFPD,
14787
14788  IX86_BUILTIN_LOADUPD,
14789  IX86_BUILTIN_STOREUPD,
14790  IX86_BUILTIN_MOVSD,
14791
14792  IX86_BUILTIN_LOADHPD,
14793  IX86_BUILTIN_LOADLPD,
14794
14795  IX86_BUILTIN_CVTDQ2PD,
14796  IX86_BUILTIN_CVTDQ2PS,
14797
14798  IX86_BUILTIN_CVTPD2DQ,
14799  IX86_BUILTIN_CVTPD2PI,
14800  IX86_BUILTIN_CVTPD2PS,
14801  IX86_BUILTIN_CVTTPD2DQ,
14802  IX86_BUILTIN_CVTTPD2PI,
14803
14804  IX86_BUILTIN_CVTPI2PD,
14805  IX86_BUILTIN_CVTSI2SD,
14806  IX86_BUILTIN_CVTSI642SD,
14807
14808  IX86_BUILTIN_CVTSD2SI,
14809  IX86_BUILTIN_CVTSD2SI64,
14810  IX86_BUILTIN_CVTSD2SS,
14811  IX86_BUILTIN_CVTSS2SD,
14812  IX86_BUILTIN_CVTTSD2SI,
14813  IX86_BUILTIN_CVTTSD2SI64,
14814
14815  IX86_BUILTIN_CVTPS2DQ,
14816  IX86_BUILTIN_CVTPS2PD,
14817  IX86_BUILTIN_CVTTPS2DQ,
14818
14819  IX86_BUILTIN_MOVNTI,
14820  IX86_BUILTIN_MOVNTPD,
14821  IX86_BUILTIN_MOVNTDQ,
14822
14823  /* SSE2 MMX */
14824  IX86_BUILTIN_MASKMOVDQU,
14825  IX86_BUILTIN_MOVMSKPD,
14826  IX86_BUILTIN_PMOVMSKB128,
14827
14828  IX86_BUILTIN_PACKSSWB128,
14829  IX86_BUILTIN_PACKSSDW128,
14830  IX86_BUILTIN_PACKUSWB128,
14831
14832  IX86_BUILTIN_PADDB128,
14833  IX86_BUILTIN_PADDW128,
14834  IX86_BUILTIN_PADDD128,
14835  IX86_BUILTIN_PADDQ128,
14836  IX86_BUILTIN_PADDSB128,
14837  IX86_BUILTIN_PADDSW128,
14838  IX86_BUILTIN_PADDUSB128,
14839  IX86_BUILTIN_PADDUSW128,
14840  IX86_BUILTIN_PSUBB128,
14841  IX86_BUILTIN_PSUBW128,
14842  IX86_BUILTIN_PSUBD128,
14843  IX86_BUILTIN_PSUBQ128,
14844  IX86_BUILTIN_PSUBSB128,
14845  IX86_BUILTIN_PSUBSW128,
14846  IX86_BUILTIN_PSUBUSB128,
14847  IX86_BUILTIN_PSUBUSW128,
14848
14849  IX86_BUILTIN_PAND128,
14850  IX86_BUILTIN_PANDN128,
14851  IX86_BUILTIN_POR128,
14852  IX86_BUILTIN_PXOR128,
14853
14854  IX86_BUILTIN_PAVGB128,
14855  IX86_BUILTIN_PAVGW128,
14856
14857  IX86_BUILTIN_PCMPEQB128,
14858  IX86_BUILTIN_PCMPEQW128,
14859  IX86_BUILTIN_PCMPEQD128,
14860  IX86_BUILTIN_PCMPGTB128,
14861  IX86_BUILTIN_PCMPGTW128,
14862  IX86_BUILTIN_PCMPGTD128,
14863
14864  IX86_BUILTIN_PMADDWD128,
14865
14866  IX86_BUILTIN_PMAXSW128,
14867  IX86_BUILTIN_PMAXUB128,
14868  IX86_BUILTIN_PMINSW128,
14869  IX86_BUILTIN_PMINUB128,
14870
14871  IX86_BUILTIN_PMULUDQ,
14872  IX86_BUILTIN_PMULUDQ128,
14873  IX86_BUILTIN_PMULHUW128,
14874  IX86_BUILTIN_PMULHW128,
14875  IX86_BUILTIN_PMULLW128,
14876
14877  IX86_BUILTIN_PSADBW128,
14878  IX86_BUILTIN_PSHUFHW,
14879  IX86_BUILTIN_PSHUFLW,
14880  IX86_BUILTIN_PSHUFD,
14881
14882  IX86_BUILTIN_PSLLW128,
14883  IX86_BUILTIN_PSLLD128,
14884  IX86_BUILTIN_PSLLQ128,
14885  IX86_BUILTIN_PSRAW128,
14886  IX86_BUILTIN_PSRAD128,
14887  IX86_BUILTIN_PSRLW128,
14888  IX86_BUILTIN_PSRLD128,
14889  IX86_BUILTIN_PSRLQ128,
14890  IX86_BUILTIN_PSLLDQI128,
14891  IX86_BUILTIN_PSLLWI128,
14892  IX86_BUILTIN_PSLLDI128,
14893  IX86_BUILTIN_PSLLQI128,
14894  IX86_BUILTIN_PSRAWI128,
14895  IX86_BUILTIN_PSRADI128,
14896  IX86_BUILTIN_PSRLDQI128,
14897  IX86_BUILTIN_PSRLWI128,
14898  IX86_BUILTIN_PSRLDI128,
14899  IX86_BUILTIN_PSRLQI128,
14900
14901  IX86_BUILTIN_PUNPCKHBW128,
14902  IX86_BUILTIN_PUNPCKHWD128,
14903  IX86_BUILTIN_PUNPCKHDQ128,
14904  IX86_BUILTIN_PUNPCKHQDQ128,
14905  IX86_BUILTIN_PUNPCKLBW128,
14906  IX86_BUILTIN_PUNPCKLWD128,
14907  IX86_BUILTIN_PUNPCKLDQ128,
14908  IX86_BUILTIN_PUNPCKLQDQ128,
14909
14910  IX86_BUILTIN_CLFLUSH,
14911  IX86_BUILTIN_MFENCE,
14912  IX86_BUILTIN_LFENCE,
14913
14914  /* Prescott New Instructions.  */
14915  IX86_BUILTIN_ADDSUBPS,
14916  IX86_BUILTIN_HADDPS,
14917  IX86_BUILTIN_HSUBPS,
14918  IX86_BUILTIN_MOVSHDUP,
14919  IX86_BUILTIN_MOVSLDUP,
14920  IX86_BUILTIN_ADDSUBPD,
14921  IX86_BUILTIN_HADDPD,
14922  IX86_BUILTIN_HSUBPD,
14923  IX86_BUILTIN_LDDQU,
14924
14925  IX86_BUILTIN_MONITOR,
14926  IX86_BUILTIN_MWAIT,
14927
14928  /* SSSE3.  */
14929  IX86_BUILTIN_PHADDW,
14930  IX86_BUILTIN_PHADDD,
14931  IX86_BUILTIN_PHADDSW,
14932  IX86_BUILTIN_PHSUBW,
14933  IX86_BUILTIN_PHSUBD,
14934  IX86_BUILTIN_PHSUBSW,
14935  IX86_BUILTIN_PMADDUBSW,
14936  IX86_BUILTIN_PMULHRSW,
14937  IX86_BUILTIN_PSHUFB,
14938  IX86_BUILTIN_PSIGNB,
14939  IX86_BUILTIN_PSIGNW,
14940  IX86_BUILTIN_PSIGND,
14941  IX86_BUILTIN_PALIGNR,
14942  IX86_BUILTIN_PABSB,
14943  IX86_BUILTIN_PABSW,
14944  IX86_BUILTIN_PABSD,
14945
14946  IX86_BUILTIN_PHADDW128,
14947  IX86_BUILTIN_PHADDD128,
14948  IX86_BUILTIN_PHADDSW128,
14949  IX86_BUILTIN_PHSUBW128,
14950  IX86_BUILTIN_PHSUBD128,
14951  IX86_BUILTIN_PHSUBSW128,
14952  IX86_BUILTIN_PMADDUBSW128,
14953  IX86_BUILTIN_PMULHRSW128,
14954  IX86_BUILTIN_PSHUFB128,
14955  IX86_BUILTIN_PSIGNB128,
14956  IX86_BUILTIN_PSIGNW128,
14957  IX86_BUILTIN_PSIGND128,
14958  IX86_BUILTIN_PALIGNR128,
14959  IX86_BUILTIN_PABSB128,
14960  IX86_BUILTIN_PABSW128,
14961  IX86_BUILTIN_PABSD128,
14962
14963  /* AMDFAM10 - SSE4A New Instructions.  */
14964  IX86_BUILTIN_MOVNTSD,
14965  IX86_BUILTIN_MOVNTSS,
14966  IX86_BUILTIN_EXTRQI,
14967  IX86_BUILTIN_EXTRQ,
14968  IX86_BUILTIN_INSERTQI,
14969  IX86_BUILTIN_INSERTQ,
14970
14971  IX86_BUILTIN_VEC_INIT_V2SI,
14972  IX86_BUILTIN_VEC_INIT_V4HI,
14973  IX86_BUILTIN_VEC_INIT_V8QI,
14974  IX86_BUILTIN_VEC_EXT_V2DF,
14975  IX86_BUILTIN_VEC_EXT_V2DI,
14976  IX86_BUILTIN_VEC_EXT_V4SF,
14977  IX86_BUILTIN_VEC_EXT_V4SI,
14978  IX86_BUILTIN_VEC_EXT_V8HI,
14979  IX86_BUILTIN_VEC_EXT_V16QI,
14980  IX86_BUILTIN_VEC_EXT_V2SI,
14981  IX86_BUILTIN_VEC_EXT_V4HI,
14982  IX86_BUILTIN_VEC_SET_V8HI,
14983  IX86_BUILTIN_VEC_SET_V4HI,
14984
14985  IX86_BUILTIN_MAX
14986};
14987
14988#define def_builtin(MASK, NAME, TYPE, CODE)				\
14989do {									\
14990  if ((MASK) & target_flags						\
14991      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14992    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14993				 NULL, NULL_TREE);			\
14994} while (0)
14995
14996/* Bits for builtin_description.flag.  */
14997
14998/* Set when we don't support the comparison natively, and should
14999   swap_comparison in order to support it.  */
15000#define BUILTIN_DESC_SWAP_OPERANDS	1
15001
15002struct builtin_description
15003{
15004  const unsigned int mask;
15005  const enum insn_code icode;
15006  const char *const name;
15007  const enum ix86_builtins code;
15008  const enum rtx_code comparison;
15009  const unsigned int flag;
15010};
15011
15012static const struct builtin_description bdesc_comi[] =
15013{
15014  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15015  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15016  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15017  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15018  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15019  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15020  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15021  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15022  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15023  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15024  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15025  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15026  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15027  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15028  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15029  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15030  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15031  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15032  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15033  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15034  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15035  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15036  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15037  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15038};
15039
15040static const struct builtin_description bdesc_2arg[] =
15041{
15042  /* SSE */
15043  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15044  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15045  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15046  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15047  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15048  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15049  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15050  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15051
15052  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15053  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15054  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15055  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15056    BUILTIN_DESC_SWAP_OPERANDS },
15057  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15058    BUILTIN_DESC_SWAP_OPERANDS },
15059  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15060  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15061  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15062  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15063  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15064    BUILTIN_DESC_SWAP_OPERANDS },
15065  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15066    BUILTIN_DESC_SWAP_OPERANDS },
15067  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15068  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15069  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15070  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15071  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15072  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15073  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15074  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15075  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15076    BUILTIN_DESC_SWAP_OPERANDS },
15077  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15078    BUILTIN_DESC_SWAP_OPERANDS },
15079  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15080
15081  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15082  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15083  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15084  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15085
15086  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15087  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15088  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15089  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15090
15091  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15092  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15093  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15094  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15095  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15096
15097  /* MMX */
15098  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15099  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15100  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15101  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15102  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15103  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15104  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15105  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15106
15107  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15108  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15109  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15110  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15111  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15112  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15113  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15114  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15115
15116  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15117  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15118  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15119
15120  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15121  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15122  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15123  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15124
15125  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15126  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15127
15128  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15129  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15130  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15131  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15132  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15133  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15134
15135  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15136  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15137  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15138  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15139
15140  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15141  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15142  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15143  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15144  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15145  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15146
15147  /* Special.  */
15148  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15149  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15150  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15151
15152  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15153  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15154  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15155
15156  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15157  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15158  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15159  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15160  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15161  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15162
15163  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15164  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15165  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15166  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15167  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15168  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15169
15170  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15171  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15172  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15173  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15174
15175  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15176  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15177
15178  /* SSE2 */
15179  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15180  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15181  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15182  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15183  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15184  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15185  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15186  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15187
15188  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15189  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15190  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15191  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15192    BUILTIN_DESC_SWAP_OPERANDS },
15193  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15194    BUILTIN_DESC_SWAP_OPERANDS },
15195  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15196  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15197  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15198  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15199  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15200    BUILTIN_DESC_SWAP_OPERANDS },
15201  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15202    BUILTIN_DESC_SWAP_OPERANDS },
15203  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15204  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15205  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15206  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15207  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15208  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15209  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15210  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15211  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15212
15213  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15214  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15215  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15216  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15217
15218  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15219  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15220  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15221  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15222
15223  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15224  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15225  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15226
15227  /* SSE2 MMX */
15228  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15229  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15230  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15231  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15232  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15233  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15234  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15235  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15236
15237  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15238  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15239  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15240  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15241  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15242  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15243  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15244  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15245
15246  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15247  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15248
15249  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15250  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15251  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15252  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15253
15254  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15255  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15256
15257  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15258  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15259  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15260  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15261  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15262  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15263
15264  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15265  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15266  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15267  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15268
15269  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15270  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15271  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15272  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15273  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15274  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15275  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15276  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15277
15278  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15279  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15280  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15281
15282  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15283  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15284
15285  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15286  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15287
15288  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15289  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15290  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15291
15292  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15293  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15294  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15295
15296  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15297  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15298
15299  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15300
15301  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15302  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15303  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15304  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15305
15306  /* SSE3 MMX */
15307  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15308  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15309  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15310  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15311  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15312  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15313
15314  /* SSSE3 */
15315  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15316  { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15317  { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15318  { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15319  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15320  { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15321  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15322  { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15323  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15324  { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15325  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15326  { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15327  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15328  { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15329  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15330  { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15331  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15332  { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15333  { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15334  { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15335  { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15336  { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15337  { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15338  { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15339};
15340
15341static const struct builtin_description bdesc_1arg[] =
15342{
15343  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15344  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15345
15346  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15347  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15348  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15349
15350  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15351  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15352  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15353  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15354  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15355  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15356
15357  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15358  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15359
15360  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15361
15362  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15363  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15364
15365  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15366  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15367  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15368  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15369  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15370
15371  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15372
15373  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15374  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15375  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15376  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15377
15378  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15379  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15380  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15381
15382  /* SSE3 */
15383  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15384  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15385
15386  /* SSSE3 */
15387  { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15388  { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15389  { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15390  { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15391  { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15392  { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15393};
15394
15395static void
15396ix86_init_builtins (void)
15397{
15398  if (TARGET_MMX)
15399    ix86_init_mmx_sse_builtins ();
15400}
15401
15402/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
15403   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
15404   builtins.  */
15405static void
15406ix86_init_mmx_sse_builtins (void)
15407{
15408  const struct builtin_description * d;
15409  size_t i;
15410
15411  tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
15412  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15413  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15414  tree V2DI_type_node
15415    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15416  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15417  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15418  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15419  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15420  tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
15421  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15422
15423  tree pchar_type_node = build_pointer_type (char_type_node);
15424  tree pcchar_type_node = build_pointer_type (
15425			     build_type_variant (char_type_node, 1, 0));
15426  tree pfloat_type_node = build_pointer_type (float_type_node);
15427  tree pcfloat_type_node = build_pointer_type (
15428			     build_type_variant (float_type_node, 1, 0));
15429  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15430  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15431  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15432
15433  /* Comparisons.  */
15434  tree int_ftype_v4sf_v4sf
15435    = build_function_type_list (integer_type_node,
15436				V4SF_type_node, V4SF_type_node, NULL_TREE);
15437  tree v4si_ftype_v4sf_v4sf
15438    = build_function_type_list (V4SI_type_node,
15439				V4SF_type_node, V4SF_type_node, NULL_TREE);
15440  /* MMX/SSE/integer conversions.  */
15441  tree int_ftype_v4sf
15442    = build_function_type_list (integer_type_node,
15443				V4SF_type_node, NULL_TREE);
15444  tree int64_ftype_v4sf
15445    = build_function_type_list (long_long_integer_type_node,
15446				V4SF_type_node, NULL_TREE);
15447  tree int_ftype_v8qi
15448    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15449  tree v4sf_ftype_v4sf_int
15450    = build_function_type_list (V4SF_type_node,
15451				V4SF_type_node, integer_type_node, NULL_TREE);
15452  tree v4sf_ftype_v4sf_int64
15453    = build_function_type_list (V4SF_type_node,
15454				V4SF_type_node, long_long_integer_type_node,
15455				NULL_TREE);
15456  tree v4sf_ftype_v4sf_v2si
15457    = build_function_type_list (V4SF_type_node,
15458				V4SF_type_node, V2SI_type_node, NULL_TREE);
15459
15460  /* Miscellaneous.  */
15461  tree v8qi_ftype_v4hi_v4hi
15462    = build_function_type_list (V8QI_type_node,
15463				V4HI_type_node, V4HI_type_node, NULL_TREE);
15464  tree v4hi_ftype_v2si_v2si
15465    = build_function_type_list (V4HI_type_node,
15466				V2SI_type_node, V2SI_type_node, NULL_TREE);
15467  tree v4sf_ftype_v4sf_v4sf_int
15468    = build_function_type_list (V4SF_type_node,
15469				V4SF_type_node, V4SF_type_node,
15470				integer_type_node, NULL_TREE);
15471  tree v2si_ftype_v4hi_v4hi
15472    = build_function_type_list (V2SI_type_node,
15473				V4HI_type_node, V4HI_type_node, NULL_TREE);
15474  tree v4hi_ftype_v4hi_int
15475    = build_function_type_list (V4HI_type_node,
15476				V4HI_type_node, integer_type_node, NULL_TREE);
15477  tree v4hi_ftype_v4hi_di
15478    = build_function_type_list (V4HI_type_node,
15479				V4HI_type_node, long_long_unsigned_type_node,
15480				NULL_TREE);
15481  tree v2si_ftype_v2si_di
15482    = build_function_type_list (V2SI_type_node,
15483				V2SI_type_node, long_long_unsigned_type_node,
15484				NULL_TREE);
15485  tree void_ftype_void
15486    = build_function_type (void_type_node, void_list_node);
15487  tree void_ftype_unsigned
15488    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15489  tree void_ftype_unsigned_unsigned
15490    = build_function_type_list (void_type_node, unsigned_type_node,
15491				unsigned_type_node, NULL_TREE);
15492  tree void_ftype_pcvoid_unsigned_unsigned
15493    = build_function_type_list (void_type_node, const_ptr_type_node,
15494				unsigned_type_node, unsigned_type_node,
15495				NULL_TREE);
15496  tree unsigned_ftype_void
15497    = build_function_type (unsigned_type_node, void_list_node);
15498  tree v2si_ftype_v4sf
15499    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15500  /* Loads/stores.  */
15501  tree void_ftype_v8qi_v8qi_pchar
15502    = build_function_type_list (void_type_node,
15503				V8QI_type_node, V8QI_type_node,
15504				pchar_type_node, NULL_TREE);
15505  tree v4sf_ftype_pcfloat
15506    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15507  /* @@@ the type is bogus */
15508  tree v4sf_ftype_v4sf_pv2si
15509    = build_function_type_list (V4SF_type_node,
15510				V4SF_type_node, pv2si_type_node, NULL_TREE);
15511  tree void_ftype_pv2si_v4sf
15512    = build_function_type_list (void_type_node,
15513				pv2si_type_node, V4SF_type_node, NULL_TREE);
15514  tree void_ftype_pfloat_v4sf
15515    = build_function_type_list (void_type_node,
15516				pfloat_type_node, V4SF_type_node, NULL_TREE);
15517  tree void_ftype_pdi_di
15518    = build_function_type_list (void_type_node,
15519				pdi_type_node, long_long_unsigned_type_node,
15520				NULL_TREE);
15521  tree void_ftype_pv2di_v2di
15522    = build_function_type_list (void_type_node,
15523				pv2di_type_node, V2DI_type_node, NULL_TREE);
15524  /* Normal vector unops.  */
15525  tree v4sf_ftype_v4sf
15526    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15527  tree v16qi_ftype_v16qi
15528    = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15529  tree v8hi_ftype_v8hi
15530    = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15531  tree v4si_ftype_v4si
15532    = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15533  tree v8qi_ftype_v8qi
15534    = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15535  tree v4hi_ftype_v4hi
15536    = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15537
15538  /* Normal vector binops.  */
15539  tree v4sf_ftype_v4sf_v4sf
15540    = build_function_type_list (V4SF_type_node,
15541				V4SF_type_node, V4SF_type_node, NULL_TREE);
15542  tree v8qi_ftype_v8qi_v8qi
15543    = build_function_type_list (V8QI_type_node,
15544				V8QI_type_node, V8QI_type_node, NULL_TREE);
15545  tree v4hi_ftype_v4hi_v4hi
15546    = build_function_type_list (V4HI_type_node,
15547				V4HI_type_node, V4HI_type_node, NULL_TREE);
15548  tree v2si_ftype_v2si_v2si
15549    = build_function_type_list (V2SI_type_node,
15550				V2SI_type_node, V2SI_type_node, NULL_TREE);
15551  tree di_ftype_di_di
15552    = build_function_type_list (long_long_unsigned_type_node,
15553				long_long_unsigned_type_node,
15554				long_long_unsigned_type_node, NULL_TREE);
15555
15556  tree di_ftype_di_di_int
15557    = build_function_type_list (long_long_unsigned_type_node,
15558				long_long_unsigned_type_node,
15559				long_long_unsigned_type_node,
15560				integer_type_node, NULL_TREE);
15561
15562  tree v2si_ftype_v2sf
15563    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15564  tree v2sf_ftype_v2si
15565    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15566  tree v2si_ftype_v2si
15567    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15568  tree v2sf_ftype_v2sf
15569    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15570  tree v2sf_ftype_v2sf_v2sf
15571    = build_function_type_list (V2SF_type_node,
15572				V2SF_type_node, V2SF_type_node, NULL_TREE);
15573  tree v2si_ftype_v2sf_v2sf
15574    = build_function_type_list (V2SI_type_node,
15575				V2SF_type_node, V2SF_type_node, NULL_TREE);
15576  tree pint_type_node    = build_pointer_type (integer_type_node);
15577  tree pdouble_type_node = build_pointer_type (double_type_node);
15578  tree pcdouble_type_node = build_pointer_type (
15579				build_type_variant (double_type_node, 1, 0));
15580  tree int_ftype_v2df_v2df
15581    = build_function_type_list (integer_type_node,
15582				V2DF_type_node, V2DF_type_node, NULL_TREE);
15583
15584  tree void_ftype_pcvoid
15585    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15586  tree v4sf_ftype_v4si
15587    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15588  tree v4si_ftype_v4sf
15589    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15590  tree v2df_ftype_v4si
15591    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15592  tree v4si_ftype_v2df
15593    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15594  tree v2si_ftype_v2df
15595    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15596  tree v4sf_ftype_v2df
15597    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15598  tree v2df_ftype_v2si
15599    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15600  tree v2df_ftype_v4sf
15601    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15602  tree int_ftype_v2df
15603    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15604  tree int64_ftype_v2df
15605    = build_function_type_list (long_long_integer_type_node,
15606				V2DF_type_node, NULL_TREE);
15607  tree v2df_ftype_v2df_int
15608    = build_function_type_list (V2DF_type_node,
15609				V2DF_type_node, integer_type_node, NULL_TREE);
15610  tree v2df_ftype_v2df_int64
15611    = build_function_type_list (V2DF_type_node,
15612				V2DF_type_node, long_long_integer_type_node,
15613				NULL_TREE);
15614  tree v4sf_ftype_v4sf_v2df
15615    = build_function_type_list (V4SF_type_node,
15616				V4SF_type_node, V2DF_type_node, NULL_TREE);
15617  tree v2df_ftype_v2df_v4sf
15618    = build_function_type_list (V2DF_type_node,
15619				V2DF_type_node, V4SF_type_node, NULL_TREE);
15620  tree v2df_ftype_v2df_v2df_int
15621    = build_function_type_list (V2DF_type_node,
15622				V2DF_type_node, V2DF_type_node,
15623				integer_type_node,
15624				NULL_TREE);
15625  tree v2df_ftype_v2df_pcdouble
15626    = build_function_type_list (V2DF_type_node,
15627				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15628  tree void_ftype_pdouble_v2df
15629    = build_function_type_list (void_type_node,
15630				pdouble_type_node, V2DF_type_node, NULL_TREE);
15631  tree void_ftype_pint_int
15632    = build_function_type_list (void_type_node,
15633				pint_type_node, integer_type_node, NULL_TREE);
15634  tree void_ftype_v16qi_v16qi_pchar
15635    = build_function_type_list (void_type_node,
15636				V16QI_type_node, V16QI_type_node,
15637				pchar_type_node, NULL_TREE);
15638  tree v2df_ftype_pcdouble
15639    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15640  tree v2df_ftype_v2df_v2df
15641    = build_function_type_list (V2DF_type_node,
15642				V2DF_type_node, V2DF_type_node, NULL_TREE);
15643  tree v16qi_ftype_v16qi_v16qi
15644    = build_function_type_list (V16QI_type_node,
15645				V16QI_type_node, V16QI_type_node, NULL_TREE);
15646  tree v8hi_ftype_v8hi_v8hi
15647    = build_function_type_list (V8HI_type_node,
15648				V8HI_type_node, V8HI_type_node, NULL_TREE);
15649  tree v4si_ftype_v4si_v4si
15650    = build_function_type_list (V4SI_type_node,
15651				V4SI_type_node, V4SI_type_node, NULL_TREE);
15652  tree v2di_ftype_v2di_v2di
15653    = build_function_type_list (V2DI_type_node,
15654				V2DI_type_node, V2DI_type_node, NULL_TREE);
15655  tree v2di_ftype_v2df_v2df
15656    = build_function_type_list (V2DI_type_node,
15657				V2DF_type_node, V2DF_type_node, NULL_TREE);
15658  tree v2df_ftype_v2df
15659    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15660  tree v2di_ftype_v2di_int
15661    = build_function_type_list (V2DI_type_node,
15662				V2DI_type_node, integer_type_node, NULL_TREE);
15663  tree v2di_ftype_v2di_v2di_int
15664    = build_function_type_list (V2DI_type_node, V2DI_type_node,
15665				V2DI_type_node, integer_type_node, NULL_TREE);
15666  tree v4si_ftype_v4si_int
15667    = build_function_type_list (V4SI_type_node,
15668				V4SI_type_node, integer_type_node, NULL_TREE);
15669  tree v8hi_ftype_v8hi_int
15670    = build_function_type_list (V8HI_type_node,
15671				V8HI_type_node, integer_type_node, NULL_TREE);
15672  tree v4si_ftype_v8hi_v8hi
15673    = build_function_type_list (V4SI_type_node,
15674				V8HI_type_node, V8HI_type_node, NULL_TREE);
15675  tree di_ftype_v8qi_v8qi
15676    = build_function_type_list (long_long_unsigned_type_node,
15677				V8QI_type_node, V8QI_type_node, NULL_TREE);
15678  tree di_ftype_v2si_v2si
15679    = build_function_type_list (long_long_unsigned_type_node,
15680				V2SI_type_node, V2SI_type_node, NULL_TREE);
15681  tree v2di_ftype_v16qi_v16qi
15682    = build_function_type_list (V2DI_type_node,
15683				V16QI_type_node, V16QI_type_node, NULL_TREE);
15684  tree v2di_ftype_v4si_v4si
15685    = build_function_type_list (V2DI_type_node,
15686				V4SI_type_node, V4SI_type_node, NULL_TREE);
15687  tree int_ftype_v16qi
15688    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15689  tree v16qi_ftype_pcchar
15690    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15691  tree void_ftype_pchar_v16qi
15692    = build_function_type_list (void_type_node,
15693			        pchar_type_node, V16QI_type_node, NULL_TREE);
15694
15695  tree v2di_ftype_v2di_unsigned_unsigned
15696    = build_function_type_list (V2DI_type_node, V2DI_type_node,
15697                                unsigned_type_node, unsigned_type_node,
15698                                NULL_TREE);
15699  tree v2di_ftype_v2di_v2di_unsigned_unsigned
15700    = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15701                                unsigned_type_node, unsigned_type_node,
15702                                NULL_TREE);
15703  tree v2di_ftype_v2di_v16qi
15704    = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15705                                NULL_TREE);
15706
15707  tree float80_type;
15708  tree float128_type;
15709  tree ftype;
15710
15711  /* The __float80 type.  */
15712  if (TYPE_MODE (long_double_type_node) == XFmode)
15713    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15714					       "__float80");
15715  else
15716    {
15717      /* The __float80 type.  */
15718      float80_type = make_node (REAL_TYPE);
15719      TYPE_PRECISION (float80_type) = 80;
15720      layout_type (float80_type);
15721      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15722    }
15723
15724  if (TARGET_64BIT)
15725    {
15726      float128_type = make_node (REAL_TYPE);
15727      TYPE_PRECISION (float128_type) = 128;
15728      layout_type (float128_type);
15729      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15730    }
15731
15732  /* Add all builtins that are more or less simple operations on two
15733     operands.  */
15734  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15735    {
15736      /* Use one of the operands; the target can have a different mode for
15737	 mask-generating compares.  */
15738      enum machine_mode mode;
15739      tree type;
15740
15741      if (d->name == 0)
15742	continue;
15743      mode = insn_data[d->icode].operand[1].mode;
15744
15745      switch (mode)
15746	{
15747	case V16QImode:
15748	  type = v16qi_ftype_v16qi_v16qi;
15749	  break;
15750	case V8HImode:
15751	  type = v8hi_ftype_v8hi_v8hi;
15752	  break;
15753	case V4SImode:
15754	  type = v4si_ftype_v4si_v4si;
15755	  break;
15756	case V2DImode:
15757	  type = v2di_ftype_v2di_v2di;
15758	  break;
15759	case V2DFmode:
15760	  type = v2df_ftype_v2df_v2df;
15761	  break;
15762	case V4SFmode:
15763	  type = v4sf_ftype_v4sf_v4sf;
15764	  break;
15765	case V8QImode:
15766	  type = v8qi_ftype_v8qi_v8qi;
15767	  break;
15768	case V4HImode:
15769	  type = v4hi_ftype_v4hi_v4hi;
15770	  break;
15771	case V2SImode:
15772	  type = v2si_ftype_v2si_v2si;
15773	  break;
15774	case DImode:
15775	  type = di_ftype_di_di;
15776	  break;
15777
15778	default:
15779	  gcc_unreachable ();
15780	}
15781
15782      /* Override for comparisons.  */
15783      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15784	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15785	type = v4si_ftype_v4sf_v4sf;
15786
15787      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15788	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15789	type = v2di_ftype_v2df_v2df;
15790
15791      def_builtin (d->mask, d->name, type, d->code);
15792    }
15793
15794  /* Add all builtins that are more or less simple operations on 1 operand.  */
15795  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15796    {
15797      enum machine_mode mode;
15798      tree type;
15799
15800      if (d->name == 0)
15801	continue;
15802      mode = insn_data[d->icode].operand[1].mode;
15803
15804      switch (mode)
15805	{
15806	case V16QImode:
15807	  type = v16qi_ftype_v16qi;
15808	  break;
15809	case V8HImode:
15810	  type = v8hi_ftype_v8hi;
15811	  break;
15812	case V4SImode:
15813	  type = v4si_ftype_v4si;
15814	  break;
15815	case V2DFmode:
15816	  type = v2df_ftype_v2df;
15817	  break;
15818	case V4SFmode:
15819	  type = v4sf_ftype_v4sf;
15820	  break;
15821	case V8QImode:
15822	  type = v8qi_ftype_v8qi;
15823	  break;
15824	case V4HImode:
15825	  type = v4hi_ftype_v4hi;
15826	  break;
15827	case V2SImode:
15828	  type = v2si_ftype_v2si;
15829	  break;
15830
15831	default:
15832	  abort ();
15833	}
15834
15835      def_builtin (d->mask, d->name, type, d->code);
15836    }
15837
15838  /* Add the remaining MMX insns with somewhat more complicated types.  */
15839  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15840  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15841  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15842  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15843
15844  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15845  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15846  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15847
15848  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15849  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15850
15851  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15852  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15853
15854  /* comi/ucomi insns.  */
15855  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15856    if (d->mask == MASK_SSE2)
15857      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15858    else
15859      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15860
15861  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15862  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15863  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15864
15865  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15866  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15867  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15868  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15869  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15870  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15871  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15872  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15873  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15874  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15875  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15876
15877  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15878
15879  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15880  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15881
15882  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15883  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15884  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15885  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15886
15887  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15888  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15889  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15890  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15891
15892  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15893
15894  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15895
15896  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15897  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15898  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15899  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15900  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15901  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15902
15903  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15904
15905  /* Original 3DNow!  */
15906  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15907  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15908  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15909  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15910  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15911  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15912  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15913  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15914  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15915  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15916  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15917  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15918  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15919  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15920  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15921  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15922  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15923  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15924  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15925  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15926
15927  /* 3DNow! extension as used in the Athlon CPU.  */
15928  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15929  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15930  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15931  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15932  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15933  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15934
15935  /* SSE2 */
15936  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15937
15938  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15939  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15940
15941  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15942  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15943
15944  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15945  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15946  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15947  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15948  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15949
15950  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15951  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15952  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15953  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15954
15955  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15956  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15957
15958  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15959
15960  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15961  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15962
15963  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15964  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15965  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15966  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15967  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15968
15969  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15970
15971  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15972  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15973  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15974  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15975
15976  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15977  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15978  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15979
15980  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15981  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15982  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15983  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15984
15985  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15986  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15987  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15988
15989  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15990  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15991
15992  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15993  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15994
15995  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15996  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15997  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15998
15999  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
16000  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
16001  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16002
16003  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
16004  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
16005
16006  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16007  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16008  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16009  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16010
16011  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16012  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16013  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16014  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16015
16016  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16017  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16018
16019  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16020
16021  /* Prescott New Instructions.  */
16022  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16023	       void_ftype_pcvoid_unsigned_unsigned,
16024	       IX86_BUILTIN_MONITOR);
16025  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16026	       void_ftype_unsigned_unsigned,
16027	       IX86_BUILTIN_MWAIT);
16028  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16029	       v4sf_ftype_v4sf,
16030	       IX86_BUILTIN_MOVSHDUP);
16031  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16032	       v4sf_ftype_v4sf,
16033	       IX86_BUILTIN_MOVSLDUP);
16034  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16035	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16036
16037  /* SSSE3.  */
16038  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16039	       v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16040  def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16041	       IX86_BUILTIN_PALIGNR);
16042
16043  /* AMDFAM10 SSE4A New built-ins  */
16044  def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16045               void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16046  def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16047               void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16048  def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16049               v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16050  def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16051               v2di_ftype_v2di_v16qi,  IX86_BUILTIN_EXTRQ);
16052  def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16053               v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16054  def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16055               v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16056
16057  /* Access to the vec_init patterns.  */
16058  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16059				    integer_type_node, NULL_TREE);
16060  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16061	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16062
16063  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16064				    short_integer_type_node,
16065				    short_integer_type_node,
16066				    short_integer_type_node, NULL_TREE);
16067  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16068	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16069
16070  ftype = build_function_type_list (V8QI_type_node, char_type_node,
16071				    char_type_node, char_type_node,
16072				    char_type_node, char_type_node,
16073				    char_type_node, char_type_node,
16074				    char_type_node, NULL_TREE);
16075  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16076	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16077
16078  /* Access to the vec_extract patterns.  */
16079  ftype = build_function_type_list (double_type_node, V2DF_type_node,
16080				    integer_type_node, NULL_TREE);
16081  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16082	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16083
16084  ftype = build_function_type_list (long_long_integer_type_node,
16085				    V2DI_type_node, integer_type_node,
16086				    NULL_TREE);
16087  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16088	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16089
16090  ftype = build_function_type_list (float_type_node, V4SF_type_node,
16091				    integer_type_node, NULL_TREE);
16092  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16093	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16094
16095  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16096				    integer_type_node, NULL_TREE);
16097  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16098	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16099
16100  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16101				    integer_type_node, NULL_TREE);
16102  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16103	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16104
16105  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16106				    integer_type_node, NULL_TREE);
16107  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16108	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16109
16110  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16111				    integer_type_node, NULL_TREE);
16112  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16113	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16114
16115  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16116				    integer_type_node, NULL_TREE);
16117  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16118
16119  /* Access to the vec_set patterns.  */
16120  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16121				    intHI_type_node,
16122				    integer_type_node, NULL_TREE);
16123  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16124	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
16125
16126  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16127				    intHI_type_node,
16128				    integer_type_node, NULL_TREE);
16129  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16130	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
16131}
16132
16133/* Errors in the source file can cause expand_expr to return const0_rtx
16134   where we expect a vector.  To avoid crashing, use one of the vector
16135   clear instructions.  */
16136static rtx
16137safe_vector_operand (rtx x, enum machine_mode mode)
16138{
16139  if (x == const0_rtx)
16140    x = CONST0_RTX (mode);
16141  return x;
16142}
16143
16144/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
16145
16146static rtx
16147ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16148{
16149  rtx pat, xops[3];
16150  tree arg0 = TREE_VALUE (arglist);
16151  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152  rtx op0 = expand_normal (arg0);
16153  rtx op1 = expand_normal (arg1);
16154  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16155  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16156  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16157
16158  if (VECTOR_MODE_P (mode0))
16159    op0 = safe_vector_operand (op0, mode0);
16160  if (VECTOR_MODE_P (mode1))
16161    op1 = safe_vector_operand (op1, mode1);
16162
16163  if (optimize || !target
16164      || GET_MODE (target) != tmode
16165      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16166    target = gen_reg_rtx (tmode);
16167
16168  if (GET_MODE (op1) == SImode && mode1 == TImode)
16169    {
16170      rtx x = gen_reg_rtx (V4SImode);
16171      emit_insn (gen_sse2_loadd (x, op1));
16172      op1 = gen_lowpart (TImode, x);
16173    }
16174
16175  /* The insn must want input operands in the same modes as the
16176     result.  */
16177  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16178	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16179
16180  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16181    op0 = copy_to_mode_reg (mode0, op0);
16182  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16183    op1 = copy_to_mode_reg (mode1, op1);
16184
16185  /* ??? Using ix86_fixup_binary_operands is problematic when
16186     we've got mismatched modes.  Fake it.  */
16187
16188  xops[0] = target;
16189  xops[1] = op0;
16190  xops[2] = op1;
16191
16192  if (tmode == mode0 && tmode == mode1)
16193    {
16194      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16195      op0 = xops[1];
16196      op1 = xops[2];
16197    }
16198  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16199    {
16200      op0 = force_reg (mode0, op0);
16201      op1 = force_reg (mode1, op1);
16202      target = gen_reg_rtx (tmode);
16203    }
16204
16205  pat = GEN_FCN (icode) (target, op0, op1);
16206  if (! pat)
16207    return 0;
16208  emit_insn (pat);
16209  return target;
16210}
16211
16212/* Subroutine of ix86_expand_builtin to take care of stores.  */
16213
16214static rtx
16215ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16216{
16217  rtx pat;
16218  tree arg0 = TREE_VALUE (arglist);
16219  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16220  rtx op0 = expand_normal (arg0);
16221  rtx op1 = expand_normal (arg1);
16222  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16223  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16224
16225  if (VECTOR_MODE_P (mode1))
16226    op1 = safe_vector_operand (op1, mode1);
16227
16228  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229  op1 = copy_to_mode_reg (mode1, op1);
16230
16231  pat = GEN_FCN (icode) (op0, op1);
16232  if (pat)
16233    emit_insn (pat);
16234  return 0;
16235}
16236
16237/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
16238
16239static rtx
16240ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16241			  rtx target, int do_load)
16242{
16243  rtx pat;
16244  tree arg0 = TREE_VALUE (arglist);
16245  rtx op0 = expand_normal (arg0);
16246  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16247  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16248
16249  if (optimize || !target
16250      || GET_MODE (target) != tmode
16251      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16252    target = gen_reg_rtx (tmode);
16253  if (do_load)
16254    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16255  else
16256    {
16257      if (VECTOR_MODE_P (mode0))
16258	op0 = safe_vector_operand (op0, mode0);
16259
16260      if ((optimize && !register_operand (op0, mode0))
16261	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16262	op0 = copy_to_mode_reg (mode0, op0);
16263    }
16264
16265  pat = GEN_FCN (icode) (target, op0);
16266  if (! pat)
16267    return 0;
16268  emit_insn (pat);
16269  return target;
16270}
16271
16272/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16273   sqrtss, rsqrtss, rcpss.  */
16274
16275static rtx
16276ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16277{
16278  rtx pat;
16279  tree arg0 = TREE_VALUE (arglist);
16280  rtx op1, op0 = expand_normal (arg0);
16281  enum machine_mode tmode = insn_data[icode].operand[0].mode;
16282  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16283
16284  if (optimize || !target
16285      || GET_MODE (target) != tmode
16286      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16287    target = gen_reg_rtx (tmode);
16288
16289  if (VECTOR_MODE_P (mode0))
16290    op0 = safe_vector_operand (op0, mode0);
16291
16292  if ((optimize && !register_operand (op0, mode0))
16293      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16294    op0 = copy_to_mode_reg (mode0, op0);
16295
16296  op1 = op0;
16297  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16298    op1 = copy_to_mode_reg (mode0, op1);
16299
16300  pat = GEN_FCN (icode) (target, op0, op1);
16301  if (! pat)
16302    return 0;
16303  emit_insn (pat);
16304  return target;
16305}
16306
16307/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
16308
16309static rtx
16310ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16311			 rtx target)
16312{
16313  rtx pat;
16314  tree arg0 = TREE_VALUE (arglist);
16315  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16316  rtx op0 = expand_normal (arg0);
16317  rtx op1 = expand_normal (arg1);
16318  rtx op2;
16319  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16320  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16321  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16322  enum rtx_code comparison = d->comparison;
16323
16324  if (VECTOR_MODE_P (mode0))
16325    op0 = safe_vector_operand (op0, mode0);
16326  if (VECTOR_MODE_P (mode1))
16327    op1 = safe_vector_operand (op1, mode1);
16328
16329  /* Swap operands if we have a comparison that isn't available in
16330     hardware.  */
16331  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16332    {
16333      rtx tmp = gen_reg_rtx (mode1);
16334      emit_move_insn (tmp, op1);
16335      op1 = op0;
16336      op0 = tmp;
16337    }
16338
16339  if (optimize || !target
16340      || GET_MODE (target) != tmode
16341      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16342    target = gen_reg_rtx (tmode);
16343
16344  if ((optimize && !register_operand (op0, mode0))
16345      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16346    op0 = copy_to_mode_reg (mode0, op0);
16347  if ((optimize && !register_operand (op1, mode1))
16348      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16349    op1 = copy_to_mode_reg (mode1, op1);
16350
16351  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16352  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16353  if (! pat)
16354    return 0;
16355  emit_insn (pat);
16356  return target;
16357}
16358
16359/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
16360
16361static rtx
16362ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16363		      rtx target)
16364{
16365  rtx pat;
16366  tree arg0 = TREE_VALUE (arglist);
16367  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16368  rtx op0 = expand_normal (arg0);
16369  rtx op1 = expand_normal (arg1);
16370  rtx op2;
16371  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16372  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16373  enum rtx_code comparison = d->comparison;
16374
16375  if (VECTOR_MODE_P (mode0))
16376    op0 = safe_vector_operand (op0, mode0);
16377  if (VECTOR_MODE_P (mode1))
16378    op1 = safe_vector_operand (op1, mode1);
16379
16380  /* Swap operands if we have a comparison that isn't available in
16381     hardware.  */
16382  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16383    {
16384      rtx tmp = op1;
16385      op1 = op0;
16386      op0 = tmp;
16387    }
16388
16389  target = gen_reg_rtx (SImode);
16390  emit_move_insn (target, const0_rtx);
16391  target = gen_rtx_SUBREG (QImode, target, 0);
16392
16393  if ((optimize && !register_operand (op0, mode0))
16394      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16395    op0 = copy_to_mode_reg (mode0, op0);
16396  if ((optimize && !register_operand (op1, mode1))
16397      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16398    op1 = copy_to_mode_reg (mode1, op1);
16399
16400  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16401  pat = GEN_FCN (d->icode) (op0, op1);
16402  if (! pat)
16403    return 0;
16404  emit_insn (pat);
16405  emit_insn (gen_rtx_SET (VOIDmode,
16406			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16407			  gen_rtx_fmt_ee (comparison, QImode,
16408					  SET_DEST (pat),
16409					  const0_rtx)));
16410
16411  return SUBREG_REG (target);
16412}
16413
16414/* Return the integer constant in ARG.  Constrain it to be in the range
16415   of the subparts of VEC_TYPE; issue an error if not.  */
16416
16417static int
16418get_element_number (tree vec_type, tree arg)
16419{
16420  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16421
16422  if (!host_integerp (arg, 1)
16423      || (elt = tree_low_cst (arg, 1), elt > max))
16424    {
16425      error ("selector must be an integer constant in the range 0..%wi", max);
16426      return 0;
16427    }
16428
16429  return elt;
16430}
16431
16432/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16433   ix86_expand_vector_init.  We DO have language-level syntax for this, in
16434   the form of  (type){ init-list }.  Except that since we can't place emms
16435   instructions from inside the compiler, we can't allow the use of MMX
16436   registers unless the user explicitly asks for it.  So we do *not* define
16437   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
16438   we have builtins invoked by mmintrin.h that gives us license to emit
16439   these sorts of instructions.  */
16440
16441static rtx
16442ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16443{
16444  enum machine_mode tmode = TYPE_MODE (type);
16445  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16446  int i, n_elt = GET_MODE_NUNITS (tmode);
16447  rtvec v = rtvec_alloc (n_elt);
16448
16449  gcc_assert (VECTOR_MODE_P (tmode));
16450
16451  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16452    {
16453      rtx x = expand_normal (TREE_VALUE (arglist));
16454      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16455    }
16456
16457  gcc_assert (arglist == NULL);
16458
16459  if (!target || !register_operand (target, tmode))
16460    target = gen_reg_rtx (tmode);
16461
16462  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16463  return target;
16464}
16465
16466/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16467   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
16468   had a language-level syntax for referencing vector elements.  */
16469
16470static rtx
16471ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16472{
16473  enum machine_mode tmode, mode0;
16474  tree arg0, arg1;
16475  int elt;
16476  rtx op0;
16477
16478  arg0 = TREE_VALUE (arglist);
16479  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16480
16481  op0 = expand_normal (arg0);
16482  elt = get_element_number (TREE_TYPE (arg0), arg1);
16483
16484  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16485  mode0 = TYPE_MODE (TREE_TYPE (arg0));
16486  gcc_assert (VECTOR_MODE_P (mode0));
16487
16488  op0 = force_reg (mode0, op0);
16489
16490  if (optimize || !target || !register_operand (target, tmode))
16491    target = gen_reg_rtx (tmode);
16492
16493  ix86_expand_vector_extract (true, target, op0, elt);
16494
16495  return target;
16496}
16497
16498/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
16499   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
16500   a language-level syntax for referencing vector elements.  */
16501
16502static rtx
16503ix86_expand_vec_set_builtin (tree arglist)
16504{
16505  enum machine_mode tmode, mode1;
16506  tree arg0, arg1, arg2;
16507  int elt;
16508  rtx op0, op1, target;
16509
16510  arg0 = TREE_VALUE (arglist);
16511  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16512  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16513
16514  tmode = TYPE_MODE (TREE_TYPE (arg0));
16515  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16516  gcc_assert (VECTOR_MODE_P (tmode));
16517
16518  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16519  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16520  elt = get_element_number (TREE_TYPE (arg0), arg2);
16521
16522  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16523    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16524
16525  op0 = force_reg (tmode, op0);
16526  op1 = force_reg (mode1, op1);
16527
16528  /* OP0 is the source of these builtin functions and shouldn't be
16529     modified.  Create a copy, use it and return it as target.  */
16530  target = gen_reg_rtx (tmode);
16531  emit_move_insn (target, op0);
16532  ix86_expand_vector_set (true, target, op1, elt);
16533
16534  return target;
16535}
16536
16537/* Expand an expression EXP that calls a built-in function,
16538   with result going to TARGET if that's convenient
16539   (and in mode MODE if that's convenient).
16540   SUBTARGET may be used as the target for computing one of EXP's operands.
16541   IGNORE is nonzero if the value is to be ignored.  */
16542
16543static rtx
16544ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16545		     enum machine_mode mode ATTRIBUTE_UNUSED,
16546		     int ignore ATTRIBUTE_UNUSED)
16547{
16548  const struct builtin_description *d;
16549  size_t i;
16550  enum insn_code icode;
16551  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16552  tree arglist = TREE_OPERAND (exp, 1);
16553  tree arg0, arg1, arg2, arg3;
16554  rtx op0, op1, op2, op3, pat;
16555  enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16556  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16557
16558  switch (fcode)
16559    {
16560    case IX86_BUILTIN_EMMS:
16561      emit_insn (gen_mmx_emms ());
16562      return 0;
16563
16564    case IX86_BUILTIN_SFENCE:
16565      emit_insn (gen_sse_sfence ());
16566      return 0;
16567
16568    case IX86_BUILTIN_MASKMOVQ:
16569    case IX86_BUILTIN_MASKMOVDQU:
16570      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16571	       ? CODE_FOR_mmx_maskmovq
16572	       : CODE_FOR_sse2_maskmovdqu);
16573      /* Note the arg order is different from the operand order.  */
16574      arg1 = TREE_VALUE (arglist);
16575      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16576      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16577      op0 = expand_normal (arg0);
16578      op1 = expand_normal (arg1);
16579      op2 = expand_normal (arg2);
16580      mode0 = insn_data[icode].operand[0].mode;
16581      mode1 = insn_data[icode].operand[1].mode;
16582      mode2 = insn_data[icode].operand[2].mode;
16583
16584      op0 = force_reg (Pmode, op0);
16585      op0 = gen_rtx_MEM (mode1, op0);
16586
16587      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16588	op0 = copy_to_mode_reg (mode0, op0);
16589      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16590	op1 = copy_to_mode_reg (mode1, op1);
16591      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16592	op2 = copy_to_mode_reg (mode2, op2);
16593      pat = GEN_FCN (icode) (op0, op1, op2);
16594      if (! pat)
16595	return 0;
16596      emit_insn (pat);
16597      return 0;
16598
16599    case IX86_BUILTIN_SQRTSS:
16600      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16601    case IX86_BUILTIN_RSQRTSS:
16602      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16603    case IX86_BUILTIN_RCPSS:
16604      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16605
16606    case IX86_BUILTIN_LOADUPS:
16607      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16608
16609    case IX86_BUILTIN_STOREUPS:
16610      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16611
16612    case IX86_BUILTIN_LOADHPS:
16613    case IX86_BUILTIN_LOADLPS:
16614    case IX86_BUILTIN_LOADHPD:
16615    case IX86_BUILTIN_LOADLPD:
16616      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16617	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16618	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16619	       : CODE_FOR_sse2_loadlpd);
16620      arg0 = TREE_VALUE (arglist);
16621      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16622      op0 = expand_normal (arg0);
16623      op1 = expand_normal (arg1);
16624      tmode = insn_data[icode].operand[0].mode;
16625      mode0 = insn_data[icode].operand[1].mode;
16626      mode1 = insn_data[icode].operand[2].mode;
16627
16628      op0 = force_reg (mode0, op0);
16629      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16630      if (optimize || target == 0
16631	  || GET_MODE (target) != tmode
16632	  || !register_operand (target, tmode))
16633	target = gen_reg_rtx (tmode);
16634      pat = GEN_FCN (icode) (target, op0, op1);
16635      if (! pat)
16636	return 0;
16637      emit_insn (pat);
16638      return target;
16639
16640    case IX86_BUILTIN_STOREHPS:
16641    case IX86_BUILTIN_STORELPS:
16642      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16643	       : CODE_FOR_sse_storelps);
16644      arg0 = TREE_VALUE (arglist);
16645      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16646      op0 = expand_normal (arg0);
16647      op1 = expand_normal (arg1);
16648      mode0 = insn_data[icode].operand[0].mode;
16649      mode1 = insn_data[icode].operand[1].mode;
16650
16651      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16652      op1 = force_reg (mode1, op1);
16653
16654      pat = GEN_FCN (icode) (op0, op1);
16655      if (! pat)
16656	return 0;
16657      emit_insn (pat);
16658      return const0_rtx;
16659
16660    case IX86_BUILTIN_MOVNTPS:
16661      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16662    case IX86_BUILTIN_MOVNTQ:
16663      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16664
16665    case IX86_BUILTIN_LDMXCSR:
16666      op0 = expand_normal (TREE_VALUE (arglist));
16667      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16668      emit_move_insn (target, op0);
16669      emit_insn (gen_sse_ldmxcsr (target));
16670      return 0;
16671
16672    case IX86_BUILTIN_STMXCSR:
16673      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16674      emit_insn (gen_sse_stmxcsr (target));
16675      return copy_to_mode_reg (SImode, target);
16676
16677    case IX86_BUILTIN_SHUFPS:
16678    case IX86_BUILTIN_SHUFPD:
16679      icode = (fcode == IX86_BUILTIN_SHUFPS
16680	       ? CODE_FOR_sse_shufps
16681	       : CODE_FOR_sse2_shufpd);
16682      arg0 = TREE_VALUE (arglist);
16683      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16684      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16685      op0 = expand_normal (arg0);
16686      op1 = expand_normal (arg1);
16687      op2 = expand_normal (arg2);
16688      tmode = insn_data[icode].operand[0].mode;
16689      mode0 = insn_data[icode].operand[1].mode;
16690      mode1 = insn_data[icode].operand[2].mode;
16691      mode2 = insn_data[icode].operand[3].mode;
16692
16693      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16694	op0 = copy_to_mode_reg (mode0, op0);
16695      if ((optimize && !register_operand (op1, mode1))
16696	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16697	op1 = copy_to_mode_reg (mode1, op1);
16698      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16699	{
16700	  /* @@@ better error message */
16701	  error ("mask must be an immediate");
16702	  return gen_reg_rtx (tmode);
16703	}
16704      if (optimize || target == 0
16705	  || GET_MODE (target) != tmode
16706	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16707	target = gen_reg_rtx (tmode);
16708      pat = GEN_FCN (icode) (target, op0, op1, op2);
16709      if (! pat)
16710	return 0;
16711      emit_insn (pat);
16712      return target;
16713
16714    case IX86_BUILTIN_PSHUFW:
16715    case IX86_BUILTIN_PSHUFD:
16716    case IX86_BUILTIN_PSHUFHW:
16717    case IX86_BUILTIN_PSHUFLW:
16718      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16719	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16720	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16721	       : CODE_FOR_mmx_pshufw);
16722      arg0 = TREE_VALUE (arglist);
16723      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16724      op0 = expand_normal (arg0);
16725      op1 = expand_normal (arg1);
16726      tmode = insn_data[icode].operand[0].mode;
16727      mode1 = insn_data[icode].operand[1].mode;
16728      mode2 = insn_data[icode].operand[2].mode;
16729
16730      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16731	op0 = copy_to_mode_reg (mode1, op0);
16732      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16733	{
16734	  /* @@@ better error message */
16735	  error ("mask must be an immediate");
16736	  return const0_rtx;
16737	}
16738      if (target == 0
16739	  || GET_MODE (target) != tmode
16740	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16741	target = gen_reg_rtx (tmode);
16742      pat = GEN_FCN (icode) (target, op0, op1);
16743      if (! pat)
16744	return 0;
16745      emit_insn (pat);
16746      return target;
16747
16748    case IX86_BUILTIN_PSLLWI128:
16749      icode = CODE_FOR_ashlv8hi3;
16750      goto do_pshifti;
16751    case IX86_BUILTIN_PSLLDI128:
16752      icode = CODE_FOR_ashlv4si3;
16753      goto do_pshifti;
16754    case IX86_BUILTIN_PSLLQI128:
16755      icode = CODE_FOR_ashlv2di3;
16756      goto do_pshifti;
16757    case IX86_BUILTIN_PSRAWI128:
16758      icode = CODE_FOR_ashrv8hi3;
16759      goto do_pshifti;
16760    case IX86_BUILTIN_PSRADI128:
16761      icode = CODE_FOR_ashrv4si3;
16762      goto do_pshifti;
16763    case IX86_BUILTIN_PSRLWI128:
16764      icode = CODE_FOR_lshrv8hi3;
16765      goto do_pshifti;
16766    case IX86_BUILTIN_PSRLDI128:
16767      icode = CODE_FOR_lshrv4si3;
16768      goto do_pshifti;
16769    case IX86_BUILTIN_PSRLQI128:
16770      icode = CODE_FOR_lshrv2di3;
16771      goto do_pshifti;
16772    do_pshifti:
16773      arg0 = TREE_VALUE (arglist);
16774      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16775      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16776      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16777
16778      if (GET_CODE (op1) != CONST_INT)
16779	{
16780	  error ("shift must be an immediate");
16781	  return const0_rtx;
16782	}
16783      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16784	op1 = GEN_INT (255);
16785
16786      tmode = insn_data[icode].operand[0].mode;
16787      mode1 = insn_data[icode].operand[1].mode;
16788      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16789	op0 = copy_to_reg (op0);
16790
16791      target = gen_reg_rtx (tmode);
16792      pat = GEN_FCN (icode) (target, op0, op1);
16793      if (!pat)
16794	return 0;
16795      emit_insn (pat);
16796      return target;
16797
16798    case IX86_BUILTIN_PSLLW128:
16799      icode = CODE_FOR_ashlv8hi3;
16800      goto do_pshift;
16801    case IX86_BUILTIN_PSLLD128:
16802      icode = CODE_FOR_ashlv4si3;
16803      goto do_pshift;
16804    case IX86_BUILTIN_PSLLQ128:
16805      icode = CODE_FOR_ashlv2di3;
16806      goto do_pshift;
16807    case IX86_BUILTIN_PSRAW128:
16808      icode = CODE_FOR_ashrv8hi3;
16809      goto do_pshift;
16810    case IX86_BUILTIN_PSRAD128:
16811      icode = CODE_FOR_ashrv4si3;
16812      goto do_pshift;
16813    case IX86_BUILTIN_PSRLW128:
16814      icode = CODE_FOR_lshrv8hi3;
16815      goto do_pshift;
16816    case IX86_BUILTIN_PSRLD128:
16817      icode = CODE_FOR_lshrv4si3;
16818      goto do_pshift;
16819    case IX86_BUILTIN_PSRLQ128:
16820      icode = CODE_FOR_lshrv2di3;
16821      goto do_pshift;
16822    do_pshift:
16823      arg0 = TREE_VALUE (arglist);
16824      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16825      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16826      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16827
16828      tmode = insn_data[icode].operand[0].mode;
16829      mode1 = insn_data[icode].operand[1].mode;
16830
16831      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16832	op0 = copy_to_reg (op0);
16833
16834      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16835      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16836	op1 = copy_to_reg (op1);
16837
16838      target = gen_reg_rtx (tmode);
16839      pat = GEN_FCN (icode) (target, op0, op1);
16840      if (!pat)
16841	return 0;
16842      emit_insn (pat);
16843      return target;
16844
16845    case IX86_BUILTIN_PSLLDQI128:
16846    case IX86_BUILTIN_PSRLDQI128:
16847      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16848	       : CODE_FOR_sse2_lshrti3);
16849      arg0 = TREE_VALUE (arglist);
16850      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16851      op0 = expand_normal (arg0);
16852      op1 = expand_normal (arg1);
16853      tmode = insn_data[icode].operand[0].mode;
16854      mode1 = insn_data[icode].operand[1].mode;
16855      mode2 = insn_data[icode].operand[2].mode;
16856
16857      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16858	{
16859	  op0 = copy_to_reg (op0);
16860	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16861	}
16862      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16863	{
16864	  error ("shift must be an immediate");
16865	  return const0_rtx;
16866	}
16867      target = gen_reg_rtx (V2DImode);
16868      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16869			     op0, op1);
16870      if (! pat)
16871	return 0;
16872      emit_insn (pat);
16873      return target;
16874
16875    case IX86_BUILTIN_FEMMS:
16876      emit_insn (gen_mmx_femms ());
16877      return NULL_RTX;
16878
16879    case IX86_BUILTIN_PAVGUSB:
16880      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16881
16882    case IX86_BUILTIN_PF2ID:
16883      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16884
16885    case IX86_BUILTIN_PFACC:
16886      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16887
16888    case IX86_BUILTIN_PFADD:
16889     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16890
16891    case IX86_BUILTIN_PFCMPEQ:
16892      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16893
16894    case IX86_BUILTIN_PFCMPGE:
16895      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16896
16897    case IX86_BUILTIN_PFCMPGT:
16898      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16899
16900    case IX86_BUILTIN_PFMAX:
16901      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16902
16903    case IX86_BUILTIN_PFMIN:
16904      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16905
16906    case IX86_BUILTIN_PFMUL:
16907      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16908
16909    case IX86_BUILTIN_PFRCP:
16910      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16911
16912    case IX86_BUILTIN_PFRCPIT1:
16913      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16914
16915    case IX86_BUILTIN_PFRCPIT2:
16916      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16917
16918    case IX86_BUILTIN_PFRSQIT1:
16919      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16920
16921    case IX86_BUILTIN_PFRSQRT:
16922      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16923
16924    case IX86_BUILTIN_PFSUB:
16925      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16926
16927    case IX86_BUILTIN_PFSUBR:
16928      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16929
16930    case IX86_BUILTIN_PI2FD:
16931      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16932
16933    case IX86_BUILTIN_PMULHRW:
16934      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16935
16936    case IX86_BUILTIN_PF2IW:
16937      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16938
16939    case IX86_BUILTIN_PFNACC:
16940      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16941
16942    case IX86_BUILTIN_PFPNACC:
16943      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16944
16945    case IX86_BUILTIN_PI2FW:
16946      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16947
16948    case IX86_BUILTIN_PSWAPDSI:
16949      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16950
16951    case IX86_BUILTIN_PSWAPDSF:
16952      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16953
16954    case IX86_BUILTIN_SQRTSD:
16955      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16956    case IX86_BUILTIN_LOADUPD:
16957      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16958    case IX86_BUILTIN_STOREUPD:
16959      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16960
16961    case IX86_BUILTIN_MFENCE:
16962	emit_insn (gen_sse2_mfence ());
16963	return 0;
16964    case IX86_BUILTIN_LFENCE:
16965	emit_insn (gen_sse2_lfence ());
16966	return 0;
16967
16968    case IX86_BUILTIN_CLFLUSH:
16969	arg0 = TREE_VALUE (arglist);
16970	op0 = expand_normal (arg0);
16971	icode = CODE_FOR_sse2_clflush;
16972	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16973	    op0 = copy_to_mode_reg (Pmode, op0);
16974
16975	emit_insn (gen_sse2_clflush (op0));
16976	return 0;
16977
16978    case IX86_BUILTIN_MOVNTPD:
16979      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16980    case IX86_BUILTIN_MOVNTDQ:
16981      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16982    case IX86_BUILTIN_MOVNTI:
16983      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16984
16985    case IX86_BUILTIN_LOADDQU:
16986      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16987    case IX86_BUILTIN_STOREDQU:
16988      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16989
16990    case IX86_BUILTIN_MONITOR:
16991      arg0 = TREE_VALUE (arglist);
16992      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16993      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16994      op0 = expand_normal (arg0);
16995      op1 = expand_normal (arg1);
16996      op2 = expand_normal (arg2);
16997      if (!REG_P (op0))
16998	op0 = copy_to_mode_reg (Pmode, op0);
16999      if (!REG_P (op1))
17000	op1 = copy_to_mode_reg (SImode, op1);
17001      if (!REG_P (op2))
17002	op2 = copy_to_mode_reg (SImode, op2);
17003      if (!TARGET_64BIT)
17004	emit_insn (gen_sse3_monitor (op0, op1, op2));
17005      else
17006	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17007      return 0;
17008
17009    case IX86_BUILTIN_MWAIT:
17010      arg0 = TREE_VALUE (arglist);
17011      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17012      op0 = expand_normal (arg0);
17013      op1 = expand_normal (arg1);
17014      if (!REG_P (op0))
17015	op0 = copy_to_mode_reg (SImode, op0);
17016      if (!REG_P (op1))
17017	op1 = copy_to_mode_reg (SImode, op1);
17018      emit_insn (gen_sse3_mwait (op0, op1));
17019      return 0;
17020
17021    case IX86_BUILTIN_LDDQU:
17022      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17023				       target, 1);
17024
17025    case IX86_BUILTIN_PALIGNR:
17026    case IX86_BUILTIN_PALIGNR128:
17027      if (fcode == IX86_BUILTIN_PALIGNR)
17028	{
17029	  icode = CODE_FOR_ssse3_palignrdi;
17030	  mode = DImode;
17031	}
17032      else
17033	{
17034	  icode = CODE_FOR_ssse3_palignrti;
17035	  mode = V2DImode;
17036	}
17037      arg0 = TREE_VALUE (arglist);
17038      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17039      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17040      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17041      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17042      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17043      tmode = insn_data[icode].operand[0].mode;
17044      mode1 = insn_data[icode].operand[1].mode;
17045      mode2 = insn_data[icode].operand[2].mode;
17046      mode3 = insn_data[icode].operand[3].mode;
17047
17048      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17049	{
17050	  op0 = copy_to_reg (op0);
17051	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17052	}
17053      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17054	{
17055	  op1 = copy_to_reg (op1);
17056	  op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17057	}
17058      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17059	{
17060	  error ("shift must be an immediate");
17061	  return const0_rtx;
17062	}
17063      target = gen_reg_rtx (mode);
17064      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17065			     op0, op1, op2);
17066      if (! pat)
17067	return 0;
17068      emit_insn (pat);
17069      return target;
17070
17071    case IX86_BUILTIN_MOVNTSD:
17072      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17073
17074    case IX86_BUILTIN_MOVNTSS:
17075      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17076
17077    case IX86_BUILTIN_INSERTQ:
17078    case IX86_BUILTIN_EXTRQ:
17079      icode = (fcode == IX86_BUILTIN_EXTRQ
17080               ? CODE_FOR_sse4a_extrq
17081               : CODE_FOR_sse4a_insertq);
17082      arg0 = TREE_VALUE (arglist);
17083      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17084      op0 = expand_normal (arg0);
17085      op1 = expand_normal (arg1);
17086      tmode = insn_data[icode].operand[0].mode;
17087      mode1 = insn_data[icode].operand[1].mode;
17088      mode2 = insn_data[icode].operand[2].mode;
17089      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17090        op0 = copy_to_mode_reg (mode1, op0);
17091      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17092        op1 = copy_to_mode_reg (mode2, op1);
17093      if (optimize || target == 0
17094          || GET_MODE (target) != tmode
17095          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17096        target = gen_reg_rtx (tmode);
17097      pat = GEN_FCN (icode) (target, op0, op1);
17098      if (! pat)
17099        return NULL_RTX;
17100      emit_insn (pat);
17101      return target;
17102
17103    case IX86_BUILTIN_EXTRQI:
17104      icode = CODE_FOR_sse4a_extrqi;
17105      arg0 = TREE_VALUE (arglist);
17106      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17107      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17108      op0 = expand_normal (arg0);
17109      op1 = expand_normal (arg1);
17110      op2 = expand_normal (arg2);
17111      tmode = insn_data[icode].operand[0].mode;
17112      mode1 = insn_data[icode].operand[1].mode;
17113      mode2 = insn_data[icode].operand[2].mode;
17114      mode3 = insn_data[icode].operand[3].mode;
17115      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17116        op0 = copy_to_mode_reg (mode1, op0);
17117      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17118        {
17119          error ("index mask must be an immediate");
17120          return gen_reg_rtx (tmode);
17121        }
17122      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17123        {
17124          error ("length mask must be an immediate");
17125          return gen_reg_rtx (tmode);
17126        }
17127      if (optimize || target == 0
17128          || GET_MODE (target) != tmode
17129          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17130        target = gen_reg_rtx (tmode);
17131      pat = GEN_FCN (icode) (target, op0, op1, op2);
17132      if (! pat)
17133        return NULL_RTX;
17134      emit_insn (pat);
17135      return target;
17136
17137    case IX86_BUILTIN_INSERTQI:
17138      icode = CODE_FOR_sse4a_insertqi;
17139      arg0 = TREE_VALUE (arglist);
17140      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17141      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17142      arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17143      op0 = expand_normal (arg0);
17144      op1 = expand_normal (arg1);
17145      op2 = expand_normal (arg2);
17146      op3 = expand_normal (arg3);
17147      tmode = insn_data[icode].operand[0].mode;
17148      mode1 = insn_data[icode].operand[1].mode;
17149      mode2 = insn_data[icode].operand[2].mode;
17150      mode3 = insn_data[icode].operand[3].mode;
17151      mode4 = insn_data[icode].operand[4].mode;
17152
17153      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17154        op0 = copy_to_mode_reg (mode1, op0);
17155
17156      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17157        op1 = copy_to_mode_reg (mode2, op1);
17158
17159      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17160        {
17161          error ("index mask must be an immediate");
17162          return gen_reg_rtx (tmode);
17163        }
17164      if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17165        {
17166          error ("length mask must be an immediate");
17167          return gen_reg_rtx (tmode);
17168        }
17169      if (optimize || target == 0
17170          || GET_MODE (target) != tmode
17171          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17172        target = gen_reg_rtx (tmode);
17173      pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17174      if (! pat)
17175        return NULL_RTX;
17176      emit_insn (pat);
17177      return target;
17178
17179    case IX86_BUILTIN_VEC_INIT_V2SI:
17180    case IX86_BUILTIN_VEC_INIT_V4HI:
17181    case IX86_BUILTIN_VEC_INIT_V8QI:
17182      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17183
17184    case IX86_BUILTIN_VEC_EXT_V2DF:
17185    case IX86_BUILTIN_VEC_EXT_V2DI:
17186    case IX86_BUILTIN_VEC_EXT_V4SF:
17187    case IX86_BUILTIN_VEC_EXT_V4SI:
17188    case IX86_BUILTIN_VEC_EXT_V8HI:
17189    case IX86_BUILTIN_VEC_EXT_V16QI:
17190    case IX86_BUILTIN_VEC_EXT_V2SI:
17191    case IX86_BUILTIN_VEC_EXT_V4HI:
17192      return ix86_expand_vec_ext_builtin (arglist, target);
17193
17194    case IX86_BUILTIN_VEC_SET_V8HI:
17195    case IX86_BUILTIN_VEC_SET_V4HI:
17196      return ix86_expand_vec_set_builtin (arglist);
17197
17198    default:
17199      break;
17200    }
17201
17202  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17203    if (d->code == fcode)
17204      {
17205	/* Compares are treated specially.  */
17206	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17207	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17208	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
17209	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17210	  return ix86_expand_sse_compare (d, arglist, target);
17211
17212	return ix86_expand_binop_builtin (d->icode, arglist, target);
17213      }
17214
17215  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17216    if (d->code == fcode)
17217      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17218
17219  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17220    if (d->code == fcode)
17221      return ix86_expand_sse_comi (d, arglist, target);
17222
17223  gcc_unreachable ();
17224}
17225
17226/* Store OPERAND to the memory after reload is completed.  This means
17227   that we can't easily use assign_stack_local.  */
17228rtx
17229ix86_force_to_memory (enum machine_mode mode, rtx operand)
17230{
17231  rtx result;
17232
17233  gcc_assert (reload_completed);
17234  if (TARGET_RED_ZONE)
17235    {
17236      result = gen_rtx_MEM (mode,
17237			    gen_rtx_PLUS (Pmode,
17238					  stack_pointer_rtx,
17239					  GEN_INT (-RED_ZONE_SIZE)));
17240      emit_move_insn (result, operand);
17241    }
17242  else if (!TARGET_RED_ZONE && TARGET_64BIT)
17243    {
17244      switch (mode)
17245	{
17246	case HImode:
17247	case SImode:
17248	  operand = gen_lowpart (DImode, operand);
17249	  /* FALLTHRU */
17250	case DImode:
17251	  emit_insn (
17252		      gen_rtx_SET (VOIDmode,
17253				   gen_rtx_MEM (DImode,
17254						gen_rtx_PRE_DEC (DImode,
17255							stack_pointer_rtx)),
17256				   operand));
17257	  break;
17258	default:
17259	  gcc_unreachable ();
17260	}
17261      result = gen_rtx_MEM (mode, stack_pointer_rtx);
17262    }
17263  else
17264    {
17265      switch (mode)
17266	{
17267	case DImode:
17268	  {
17269	    rtx operands[2];
17270	    split_di (&operand, 1, operands, operands + 1);
17271	    emit_insn (
17272			gen_rtx_SET (VOIDmode,
17273				     gen_rtx_MEM (SImode,
17274						  gen_rtx_PRE_DEC (Pmode,
17275							stack_pointer_rtx)),
17276				     operands[1]));
17277	    emit_insn (
17278			gen_rtx_SET (VOIDmode,
17279				     gen_rtx_MEM (SImode,
17280						  gen_rtx_PRE_DEC (Pmode,
17281							stack_pointer_rtx)),
17282				     operands[0]));
17283	  }
17284	  break;
17285	case HImode:
17286	  /* Store HImodes as SImodes.  */
17287	  operand = gen_lowpart (SImode, operand);
17288	  /* FALLTHRU */
17289	case SImode:
17290	  emit_insn (
17291		      gen_rtx_SET (VOIDmode,
17292				   gen_rtx_MEM (GET_MODE (operand),
17293						gen_rtx_PRE_DEC (SImode,
17294							stack_pointer_rtx)),
17295				   operand));
17296	  break;
17297	default:
17298	  gcc_unreachable ();
17299	}
17300      result = gen_rtx_MEM (mode, stack_pointer_rtx);
17301    }
17302  return result;
17303}
17304
17305/* Free operand from the memory.  */
17306void
17307ix86_free_from_memory (enum machine_mode mode)
17308{
17309  if (!TARGET_RED_ZONE)
17310    {
17311      int size;
17312
17313      if (mode == DImode || TARGET_64BIT)
17314	size = 8;
17315      else
17316	size = 4;
17317      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
17318         to pop or add instruction if registers are available.  */
17319      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17320			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17321					    GEN_INT (size))));
17322    }
17323}
17324
17325/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17326   QImode must go into class Q_REGS.
17327   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
17328   movdf to do mem-to-mem moves through integer regs.  */
17329enum reg_class
17330ix86_preferred_reload_class (rtx x, enum reg_class class)
17331{
17332  enum machine_mode mode = GET_MODE (x);
17333
17334  /* We're only allowed to return a subclass of CLASS.  Many of the
17335     following checks fail for NO_REGS, so eliminate that early.  */
17336  if (class == NO_REGS)
17337    return NO_REGS;
17338
17339  /* All classes can load zeros.  */
17340  if (x == CONST0_RTX (mode))
17341    return class;
17342
17343  /* Force constants into memory if we are loading a (nonzero) constant into
17344     an MMX or SSE register.  This is because there are no MMX/SSE instructions
17345     to load from a constant.  */
17346  if (CONSTANT_P (x)
17347      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17348    return NO_REGS;
17349
17350  /* Prefer SSE regs only, if we can use them for math.  */
17351  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17352    return SSE_CLASS_P (class) ? class : NO_REGS;
17353
17354  /* Floating-point constants need more complex checks.  */
17355  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17356    {
17357      /* General regs can load everything.  */
17358      if (reg_class_subset_p (class, GENERAL_REGS))
17359        return class;
17360
17361      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
17362	 zero above.  We only want to wind up preferring 80387 registers if
17363	 we plan on doing computation with them.  */
17364      if (TARGET_80387
17365	  && standard_80387_constant_p (x))
17366	{
17367	  /* Limit class to non-sse.  */
17368	  if (class == FLOAT_SSE_REGS)
17369	    return FLOAT_REGS;
17370	  if (class == FP_TOP_SSE_REGS)
17371	    return FP_TOP_REG;
17372	  if (class == FP_SECOND_SSE_REGS)
17373	    return FP_SECOND_REG;
17374	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17375	    return class;
17376	}
17377
17378      return NO_REGS;
17379    }
17380
17381  /* Generally when we see PLUS here, it's the function invariant
17382     (plus soft-fp const_int).  Which can only be computed into general
17383     regs.  */
17384  if (GET_CODE (x) == PLUS)
17385    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17386
17387  /* QImode constants are easy to load, but non-constant QImode data
17388     must go into Q_REGS.  */
17389  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17390    {
17391      if (reg_class_subset_p (class, Q_REGS))
17392	return class;
17393      if (reg_class_subset_p (Q_REGS, class))
17394	return Q_REGS;
17395      return NO_REGS;
17396    }
17397
17398  return class;
17399}
17400
17401/* Discourage putting floating-point values in SSE registers unless
17402   SSE math is being used, and likewise for the 387 registers.  */
17403enum reg_class
17404ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17405{
17406  enum machine_mode mode = GET_MODE (x);
17407
17408  /* Restrict the output reload class to the register bank that we are doing
17409     math on.  If we would like not to return a subset of CLASS, reject this
17410     alternative: if reload cannot do this, it will still use its choice.  */
17411  mode = GET_MODE (x);
17412  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17413    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17414
17415  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17416    {
17417      if (class == FP_TOP_SSE_REGS)
17418	return FP_TOP_REG;
17419      else if (class == FP_SECOND_SSE_REGS)
17420	return FP_SECOND_REG;
17421      else
17422	return FLOAT_CLASS_P (class) ? class : NO_REGS;
17423    }
17424
17425  return class;
17426}
17427
17428/* If we are copying between general and FP registers, we need a memory
17429   location. The same is true for SSE and MMX registers.
17430
17431   The macro can't work reliably when one of the CLASSES is class containing
17432   registers from multiple units (SSE, MMX, integer).  We avoid this by never
17433   combining those units in single alternative in the machine description.
17434   Ensure that this constraint holds to avoid unexpected surprises.
17435
17436   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17437   enforce these sanity checks.  */
17438
17439int
17440ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17441			      enum machine_mode mode, int strict)
17442{
17443  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17444      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17445      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17446      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17447      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17448      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17449    {
17450      gcc_assert (!strict);
17451      return true;
17452    }
17453
17454  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17455    return true;
17456
17457  /* ??? This is a lie.  We do have moves between mmx/general, and for
17458     mmx/sse2.  But by saying we need secondary memory we discourage the
17459     register allocator from using the mmx registers unless needed.  */
17460  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17461    return true;
17462
17463  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17464    {
17465      /* SSE1 doesn't have any direct moves from other classes.  */
17466      if (!TARGET_SSE2)
17467	return true;
17468
17469      /* If the target says that inter-unit moves are more expensive
17470	 than moving through memory, then don't generate them.  */
17471      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17472	return true;
17473
17474      /* Between SSE and general, we have moves no larger than word size.  */
17475      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17476	return true;
17477
17478      /* ??? For the cost of one register reformat penalty, we could use
17479	 the same instructions to move SFmode and DFmode data, but the
17480	 relevant move patterns don't support those alternatives.  */
17481      if (mode == SFmode || mode == DFmode)
17482	return true;
17483    }
17484
17485  return false;
17486}
17487
17488/* Return true if the registers in CLASS cannot represent the change from
17489   modes FROM to TO.  */
17490
17491bool
17492ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17493			       enum reg_class class)
17494{
17495  if (from == to)
17496    return false;
17497
17498  /* x87 registers can't do subreg at all, as all values are reformatted
17499     to extended precision.  */
17500  if (MAYBE_FLOAT_CLASS_P (class))
17501    return true;
17502
17503  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17504    {
17505      /* Vector registers do not support QI or HImode loads.  If we don't
17506	 disallow a change to these modes, reload will assume it's ok to
17507	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
17508	 the vec_dupv4hi pattern.  */
17509      if (GET_MODE_SIZE (from) < 4)
17510	return true;
17511
17512      /* Vector registers do not support subreg with nonzero offsets, which
17513	 are otherwise valid for integer registers.  Since we can't see
17514	 whether we have a nonzero offset from here, prohibit all
17515         nonparadoxical subregs changing size.  */
17516      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17517	return true;
17518    }
17519
17520  return false;
17521}
17522
17523/* Return the cost of moving data from a register in class CLASS1 to
17524   one in class CLASS2.
17525
17526   It is not required that the cost always equal 2 when FROM is the same as TO;
17527   on some machines it is expensive to move between registers if they are not
17528   general registers.  */
17529
17530int
17531ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17532			 enum reg_class class2)
17533{
17534  /* In case we require secondary memory, compute cost of the store followed
17535     by load.  In order to avoid bad register allocation choices, we need
17536     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
17537
17538  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17539    {
17540      int cost = 1;
17541
17542      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17543		   MEMORY_MOVE_COST (mode, class1, 1));
17544      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17545		   MEMORY_MOVE_COST (mode, class2, 1));
17546
17547      /* In case of copying from general_purpose_register we may emit multiple
17548         stores followed by single load causing memory size mismatch stall.
17549         Count this as arbitrarily high cost of 20.  */
17550      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17551	cost += 20;
17552
17553      /* In the case of FP/MMX moves, the registers actually overlap, and we
17554	 have to switch modes in order to treat them differently.  */
17555      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17556          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17557	cost += 20;
17558
17559      return cost;
17560    }
17561
17562  /* Moves between SSE/MMX and integer unit are expensive.  */
17563  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17564      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17565    return ix86_cost->mmxsse_to_integer;
17566  if (MAYBE_FLOAT_CLASS_P (class1))
17567    return ix86_cost->fp_move;
17568  if (MAYBE_SSE_CLASS_P (class1))
17569    return ix86_cost->sse_move;
17570  if (MAYBE_MMX_CLASS_P (class1))
17571    return ix86_cost->mmx_move;
17572  return 2;
17573}
17574
17575/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
17576
17577bool
17578ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17579{
17580  /* Flags and only flags can only hold CCmode values.  */
17581  if (CC_REGNO_P (regno))
17582    return GET_MODE_CLASS (mode) == MODE_CC;
17583  if (GET_MODE_CLASS (mode) == MODE_CC
17584      || GET_MODE_CLASS (mode) == MODE_RANDOM
17585      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17586    return 0;
17587  if (FP_REGNO_P (regno))
17588    return VALID_FP_MODE_P (mode);
17589  if (SSE_REGNO_P (regno))
17590    {
17591      /* We implement the move patterns for all vector modes into and
17592	 out of SSE registers, even when no operation instructions
17593	 are available.  */
17594      return (VALID_SSE_REG_MODE (mode)
17595	      || VALID_SSE2_REG_MODE (mode)
17596	      || VALID_MMX_REG_MODE (mode)
17597	      || VALID_MMX_REG_MODE_3DNOW (mode));
17598    }
17599  if (MMX_REGNO_P (regno))
17600    {
17601      /* We implement the move patterns for 3DNOW modes even in MMX mode,
17602	 so if the register is available at all, then we can move data of
17603	 the given mode into or out of it.  */
17604      return (VALID_MMX_REG_MODE (mode)
17605	      || VALID_MMX_REG_MODE_3DNOW (mode));
17606    }
17607
17608  if (mode == QImode)
17609    {
17610      /* Take care for QImode values - they can be in non-QI regs,
17611	 but then they do cause partial register stalls.  */
17612      if (regno < 4 || TARGET_64BIT)
17613	return 1;
17614      if (!TARGET_PARTIAL_REG_STALL)
17615	return 1;
17616      return reload_in_progress || reload_completed;
17617    }
17618  /* We handle both integer and floats in the general purpose registers.  */
17619  else if (VALID_INT_MODE_P (mode))
17620    return 1;
17621  else if (VALID_FP_MODE_P (mode))
17622    return 1;
17623  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
17624     on to use that value in smaller contexts, this can easily force a
17625     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
17626     supporting DImode, allow it.  */
17627  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17628    return 1;
17629
17630  return 0;
17631}
17632
17633/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
17634   tieable integer mode.  */
17635
17636static bool
17637ix86_tieable_integer_mode_p (enum machine_mode mode)
17638{
17639  switch (mode)
17640    {
17641    case HImode:
17642    case SImode:
17643      return true;
17644
17645    case QImode:
17646      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17647
17648    case DImode:
17649      return TARGET_64BIT;
17650
17651    default:
17652      return false;
17653    }
17654}
17655
17656/* Return true if MODE1 is accessible in a register that can hold MODE2
17657   without copying.  That is, all register classes that can hold MODE2
17658   can also hold MODE1.  */
17659
17660bool
17661ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17662{
17663  if (mode1 == mode2)
17664    return true;
17665
17666  if (ix86_tieable_integer_mode_p (mode1)
17667      && ix86_tieable_integer_mode_p (mode2))
17668    return true;
17669
17670  /* MODE2 being XFmode implies fp stack or general regs, which means we
17671     can tie any smaller floating point modes to it.  Note that we do not
17672     tie this with TFmode.  */
17673  if (mode2 == XFmode)
17674    return mode1 == SFmode || mode1 == DFmode;
17675
17676  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17677     that we can tie it with SFmode.  */
17678  if (mode2 == DFmode)
17679    return mode1 == SFmode;
17680
17681  /* If MODE2 is only appropriate for an SSE register, then tie with
17682     any other mode acceptable to SSE registers.  */
17683  if (GET_MODE_SIZE (mode2) >= 8
17684      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17685    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17686
17687  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17688     with any other mode acceptable to MMX registers.  */
17689  if (GET_MODE_SIZE (mode2) == 8
17690      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17691    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17692
17693  return false;
17694}
17695
17696/* Return the cost of moving data of mode M between a
17697   register and memory.  A value of 2 is the default; this cost is
17698   relative to those in `REGISTER_MOVE_COST'.
17699
17700   If moving between registers and memory is more expensive than
17701   between two registers, you should define this macro to express the
17702   relative cost.
17703
17704   Model also increased moving costs of QImode registers in non
17705   Q_REGS classes.
17706 */
17707int
17708ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17709{
17710  if (FLOAT_CLASS_P (class))
17711    {
17712      int index;
17713      switch (mode)
17714	{
17715	  case SFmode:
17716	    index = 0;
17717	    break;
17718	  case DFmode:
17719	    index = 1;
17720	    break;
17721	  case XFmode:
17722	    index = 2;
17723	    break;
17724	  default:
17725	    return 100;
17726	}
17727      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17728    }
17729  if (SSE_CLASS_P (class))
17730    {
17731      int index;
17732      switch (GET_MODE_SIZE (mode))
17733	{
17734	  case 4:
17735	    index = 0;
17736	    break;
17737	  case 8:
17738	    index = 1;
17739	    break;
17740	  case 16:
17741	    index = 2;
17742	    break;
17743	  default:
17744	    return 100;
17745	}
17746      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17747    }
17748  if (MMX_CLASS_P (class))
17749    {
17750      int index;
17751      switch (GET_MODE_SIZE (mode))
17752	{
17753	  case 4:
17754	    index = 0;
17755	    break;
17756	  case 8:
17757	    index = 1;
17758	    break;
17759	  default:
17760	    return 100;
17761	}
17762      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17763    }
17764  switch (GET_MODE_SIZE (mode))
17765    {
17766      case 1:
17767	if (in)
17768	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17769		  : ix86_cost->movzbl_load);
17770	else
17771	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17772		  : ix86_cost->int_store[0] + 4);
17773	break;
17774      case 2:
17775	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17776      default:
17777	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17778	if (mode == TFmode)
17779	  mode = XFmode;
17780	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17781		* (((int) GET_MODE_SIZE (mode)
17782		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17783    }
17784}
17785
17786/* Compute a (partial) cost for rtx X.  Return true if the complete
17787   cost has been computed, and false if subexpressions should be
17788   scanned.  In either case, *TOTAL contains the cost result.  */
17789
17790static bool
17791ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17792{
17793  enum machine_mode mode = GET_MODE (x);
17794
17795  switch (code)
17796    {
17797    case CONST_INT:
17798    case CONST:
17799    case LABEL_REF:
17800    case SYMBOL_REF:
17801      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17802	*total = 3;
17803      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17804	*total = 2;
17805      else if (flag_pic && SYMBOLIC_CONST (x)
17806	       && (!TARGET_64BIT
17807		   || (!GET_CODE (x) != LABEL_REF
17808		       && (GET_CODE (x) != SYMBOL_REF
17809		           || !SYMBOL_REF_LOCAL_P (x)))))
17810	*total = 1;
17811      else
17812	*total = 0;
17813      return true;
17814
17815    case CONST_DOUBLE:
17816      if (mode == VOIDmode)
17817	*total = 0;
17818      else
17819	switch (standard_80387_constant_p (x))
17820	  {
17821	  case 1: /* 0.0 */
17822	    *total = 1;
17823	    break;
17824	  default: /* Other constants */
17825	    *total = 2;
17826	    break;
17827	  case 0:
17828	  case -1:
17829	    /* Start with (MEM (SYMBOL_REF)), since that's where
17830	       it'll probably end up.  Add a penalty for size.  */
17831	    *total = (COSTS_N_INSNS (1)
17832		      + (flag_pic != 0 && !TARGET_64BIT)
17833		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17834	    break;
17835	  }
17836      return true;
17837
17838    case ZERO_EXTEND:
17839      /* The zero extensions is often completely free on x86_64, so make
17840	 it as cheap as possible.  */
17841      if (TARGET_64BIT && mode == DImode
17842	  && GET_MODE (XEXP (x, 0)) == SImode)
17843	*total = 1;
17844      else if (TARGET_ZERO_EXTEND_WITH_AND)
17845	*total = ix86_cost->add;
17846      else
17847	*total = ix86_cost->movzx;
17848      return false;
17849
17850    case SIGN_EXTEND:
17851      *total = ix86_cost->movsx;
17852      return false;
17853
17854    case ASHIFT:
17855      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17856	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17857	{
17858	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17859	  if (value == 1)
17860	    {
17861	      *total = ix86_cost->add;
17862	      return false;
17863	    }
17864	  if ((value == 2 || value == 3)
17865	      && ix86_cost->lea <= ix86_cost->shift_const)
17866	    {
17867	      *total = ix86_cost->lea;
17868	      return false;
17869	    }
17870	}
17871      /* FALLTHRU */
17872
17873    case ROTATE:
17874    case ASHIFTRT:
17875    case LSHIFTRT:
17876    case ROTATERT:
17877      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17878	{
17879	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17880	    {
17881	      if (INTVAL (XEXP (x, 1)) > 32)
17882		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17883	      else
17884		*total = ix86_cost->shift_const * 2;
17885	    }
17886	  else
17887	    {
17888	      if (GET_CODE (XEXP (x, 1)) == AND)
17889		*total = ix86_cost->shift_var * 2;
17890	      else
17891		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17892	    }
17893	}
17894      else
17895	{
17896	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17897	    *total = ix86_cost->shift_const;
17898	  else
17899	    *total = ix86_cost->shift_var;
17900	}
17901      return false;
17902
17903    case MULT:
17904      if (FLOAT_MODE_P (mode))
17905	{
17906	  *total = ix86_cost->fmul;
17907	  return false;
17908	}
17909      else
17910	{
17911	  rtx op0 = XEXP (x, 0);
17912	  rtx op1 = XEXP (x, 1);
17913	  int nbits;
17914	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17915	    {
17916	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17917	      for (nbits = 0; value != 0; value &= value - 1)
17918	        nbits++;
17919	    }
17920	  else
17921	    /* This is arbitrary.  */
17922	    nbits = 7;
17923
17924	  /* Compute costs correctly for widening multiplication.  */
17925	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17926	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17927	         == GET_MODE_SIZE (mode))
17928	    {
17929	      int is_mulwiden = 0;
17930	      enum machine_mode inner_mode = GET_MODE (op0);
17931
17932	      if (GET_CODE (op0) == GET_CODE (op1))
17933		is_mulwiden = 1, op1 = XEXP (op1, 0);
17934	      else if (GET_CODE (op1) == CONST_INT)
17935		{
17936		  if (GET_CODE (op0) == SIGN_EXTEND)
17937		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17938			          == INTVAL (op1);
17939		  else
17940		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17941	        }
17942
17943	      if (is_mulwiden)
17944	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17945	    }
17946
17947  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17948		    + nbits * ix86_cost->mult_bit
17949	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17950
17951          return true;
17952	}
17953
17954    case DIV:
17955    case UDIV:
17956    case MOD:
17957    case UMOD:
17958      if (FLOAT_MODE_P (mode))
17959	*total = ix86_cost->fdiv;
17960      else
17961	*total = ix86_cost->divide[MODE_INDEX (mode)];
17962      return false;
17963
17964    case PLUS:
17965      if (FLOAT_MODE_P (mode))
17966	*total = ix86_cost->fadd;
17967      else if (GET_MODE_CLASS (mode) == MODE_INT
17968	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17969	{
17970	  if (GET_CODE (XEXP (x, 0)) == PLUS
17971	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17972	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17973	      && CONSTANT_P (XEXP (x, 1)))
17974	    {
17975	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17976	      if (val == 2 || val == 4 || val == 8)
17977		{
17978		  *total = ix86_cost->lea;
17979		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17980		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17981				      outer_code);
17982		  *total += rtx_cost (XEXP (x, 1), outer_code);
17983		  return true;
17984		}
17985	    }
17986	  else if (GET_CODE (XEXP (x, 0)) == MULT
17987		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17988	    {
17989	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17990	      if (val == 2 || val == 4 || val == 8)
17991		{
17992		  *total = ix86_cost->lea;
17993		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17994		  *total += rtx_cost (XEXP (x, 1), outer_code);
17995		  return true;
17996		}
17997	    }
17998	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17999	    {
18000	      *total = ix86_cost->lea;
18001	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18002	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18003	      *total += rtx_cost (XEXP (x, 1), outer_code);
18004	      return true;
18005	    }
18006	}
18007      /* FALLTHRU */
18008
18009    case MINUS:
18010      if (FLOAT_MODE_P (mode))
18011	{
18012	  *total = ix86_cost->fadd;
18013	  return false;
18014	}
18015      /* FALLTHRU */
18016
18017    case AND:
18018    case IOR:
18019    case XOR:
18020      if (!TARGET_64BIT && mode == DImode)
18021	{
18022	  *total = (ix86_cost->add * 2
18023		    + (rtx_cost (XEXP (x, 0), outer_code)
18024		       << (GET_MODE (XEXP (x, 0)) != DImode))
18025		    + (rtx_cost (XEXP (x, 1), outer_code)
18026	               << (GET_MODE (XEXP (x, 1)) != DImode)));
18027	  return true;
18028	}
18029      /* FALLTHRU */
18030
18031    case NEG:
18032      if (FLOAT_MODE_P (mode))
18033	{
18034	  *total = ix86_cost->fchs;
18035	  return false;
18036	}
18037      /* FALLTHRU */
18038
18039    case NOT:
18040      if (!TARGET_64BIT && mode == DImode)
18041	*total = ix86_cost->add * 2;
18042      else
18043	*total = ix86_cost->add;
18044      return false;
18045
18046    case COMPARE:
18047      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18048	  && XEXP (XEXP (x, 0), 1) == const1_rtx
18049	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18050	  && XEXP (x, 1) == const0_rtx)
18051	{
18052	  /* This kind of construct is implemented using test[bwl].
18053	     Treat it as if we had an AND.  */
18054	  *total = (ix86_cost->add
18055		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18056		    + rtx_cost (const1_rtx, outer_code));
18057	  return true;
18058	}
18059      return false;
18060
18061    case FLOAT_EXTEND:
18062      if (!TARGET_SSE_MATH
18063	  || mode == XFmode
18064	  || (mode == DFmode && !TARGET_SSE2))
18065	/* For standard 80387 constants, raise the cost to prevent
18066	   compress_float_constant() to generate load from memory.  */
18067	switch (standard_80387_constant_p (XEXP (x, 0)))
18068	  {
18069	  case -1:
18070	  case 0:
18071	    *total = 0;
18072	    break;
18073	  case 1: /* 0.0 */
18074	    *total = 1;
18075	    break;
18076	  default:
18077	    *total = (x86_ext_80387_constants & TUNEMASK
18078		      || optimize_size
18079		      ? 1 : 0);
18080	  }
18081      return false;
18082
18083    case ABS:
18084      if (FLOAT_MODE_P (mode))
18085	*total = ix86_cost->fabs;
18086      return false;
18087
18088    case SQRT:
18089      if (FLOAT_MODE_P (mode))
18090	*total = ix86_cost->fsqrt;
18091      return false;
18092
18093    case UNSPEC:
18094      if (XINT (x, 1) == UNSPEC_TP)
18095	*total = 0;
18096      return false;
18097
18098    default:
18099      return false;
18100    }
18101}
18102
18103#if TARGET_MACHO
18104
18105static int current_machopic_label_num;
18106
18107/* Given a symbol name and its associated stub, write out the
18108   definition of the stub.  */
18109
18110void
18111machopic_output_stub (FILE *file, const char *symb, const char *stub)
18112{
18113  unsigned int length;
18114  char *binder_name, *symbol_name, lazy_ptr_name[32];
18115  int label = ++current_machopic_label_num;
18116
18117  /* For 64-bit we shouldn't get here.  */
18118  gcc_assert (!TARGET_64BIT);
18119
18120  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
18121  symb = (*targetm.strip_name_encoding) (symb);
18122
18123  length = strlen (stub);
18124  binder_name = alloca (length + 32);
18125  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18126
18127  length = strlen (symb);
18128  symbol_name = alloca (length + 32);
18129  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18130
18131  sprintf (lazy_ptr_name, "L%d$lz", label);
18132
18133  if (MACHOPIC_PURE)
18134    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18135  else
18136    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18137
18138  fprintf (file, "%s:\n", stub);
18139  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18140
18141  if (MACHOPIC_PURE)
18142    {
18143      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18144      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18145      fprintf (file, "\tjmp\t*%%edx\n");
18146    }
18147  else
18148    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18149
18150  fprintf (file, "%s:\n", binder_name);
18151
18152  if (MACHOPIC_PURE)
18153    {
18154      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18155      fprintf (file, "\tpushl\t%%eax\n");
18156    }
18157  else
18158    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18159
18160  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18161
18162  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18163  fprintf (file, "%s:\n", lazy_ptr_name);
18164  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18165  fprintf (file, "\t.long %s\n", binder_name);
18166}
18167
18168void
18169darwin_x86_file_end (void)
18170{
18171  darwin_file_end ();
18172  ix86_file_end ();
18173}
18174#endif /* TARGET_MACHO */
18175
18176/* Order the registers for register allocator.  */
18177
18178void
18179x86_order_regs_for_local_alloc (void)
18180{
18181   int pos = 0;
18182   int i;
18183
18184   /* First allocate the local general purpose registers.  */
18185   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18186     if (GENERAL_REGNO_P (i) && call_used_regs[i])
18187	reg_alloc_order [pos++] = i;
18188
18189   /* Global general purpose registers.  */
18190   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18191     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18192	reg_alloc_order [pos++] = i;
18193
18194   /* x87 registers come first in case we are doing FP math
18195      using them.  */
18196   if (!TARGET_SSE_MATH)
18197     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18198       reg_alloc_order [pos++] = i;
18199
18200   /* SSE registers.  */
18201   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18202     reg_alloc_order [pos++] = i;
18203   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18204     reg_alloc_order [pos++] = i;
18205
18206   /* x87 registers.  */
18207   if (TARGET_SSE_MATH)
18208     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18209       reg_alloc_order [pos++] = i;
18210
18211   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18212     reg_alloc_order [pos++] = i;
18213
18214   /* Initialize the rest of array as we do not allocate some registers
18215      at all.  */
18216   while (pos < FIRST_PSEUDO_REGISTER)
18217     reg_alloc_order [pos++] = 0;
18218}
18219
18220/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18221   struct attribute_spec.handler.  */
18222static tree
18223ix86_handle_struct_attribute (tree *node, tree name,
18224			      tree args ATTRIBUTE_UNUSED,
18225			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18226{
18227  tree *type = NULL;
18228  if (DECL_P (*node))
18229    {
18230      if (TREE_CODE (*node) == TYPE_DECL)
18231	type = &TREE_TYPE (*node);
18232    }
18233  else
18234    type = node;
18235
18236  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18237		 || TREE_CODE (*type) == UNION_TYPE)))
18238    {
18239      warning (OPT_Wattributes, "%qs attribute ignored",
18240	       IDENTIFIER_POINTER (name));
18241      *no_add_attrs = true;
18242    }
18243
18244  else if ((is_attribute_p ("ms_struct", name)
18245	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18246	   || ((is_attribute_p ("gcc_struct", name)
18247		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18248    {
18249      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18250               IDENTIFIER_POINTER (name));
18251      *no_add_attrs = true;
18252    }
18253
18254  return NULL_TREE;
18255}
18256
18257static bool
18258ix86_ms_bitfield_layout_p (tree record_type)
18259{
18260  return (TARGET_MS_BITFIELD_LAYOUT &&
18261	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18262    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18263}
18264
18265/* Returns an expression indicating where the this parameter is
18266   located on entry to the FUNCTION.  */
18267
18268static rtx
18269x86_this_parameter (tree function)
18270{
18271  tree type = TREE_TYPE (function);
18272
18273  if (TARGET_64BIT)
18274    {
18275      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18276      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18277    }
18278
18279  if (ix86_function_regparm (type, function) > 0)
18280    {
18281      tree parm;
18282
18283      parm = TYPE_ARG_TYPES (type);
18284      /* Figure out whether or not the function has a variable number of
18285	 arguments.  */
18286      for (; parm; parm = TREE_CHAIN (parm))
18287	if (TREE_VALUE (parm) == void_type_node)
18288	  break;
18289      /* If not, the this parameter is in the first argument.  */
18290      if (parm)
18291	{
18292	  int regno = 0;
18293	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18294	    regno = 2;
18295	  return gen_rtx_REG (SImode, regno);
18296	}
18297    }
18298
18299  if (aggregate_value_p (TREE_TYPE (type), type))
18300    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18301  else
18302    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18303}
18304
18305/* Determine whether x86_output_mi_thunk can succeed.  */
18306
18307static bool
18308x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18309			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18310			 HOST_WIDE_INT vcall_offset, tree function)
18311{
18312  /* 64-bit can handle anything.  */
18313  if (TARGET_64BIT)
18314    return true;
18315
18316  /* For 32-bit, everything's fine if we have one free register.  */
18317  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18318    return true;
18319
18320  /* Need a free register for vcall_offset.  */
18321  if (vcall_offset)
18322    return false;
18323
18324  /* Need a free register for GOT references.  */
18325  if (flag_pic && !(*targetm.binds_local_p) (function))
18326    return false;
18327
18328  /* Otherwise ok.  */
18329  return true;
18330}
18331
18332/* Output the assembler code for a thunk function.  THUNK_DECL is the
18333   declaration for the thunk function itself, FUNCTION is the decl for
18334   the target function.  DELTA is an immediate constant offset to be
18335   added to THIS.  If VCALL_OFFSET is nonzero, the word at
18336   *(*this + vcall_offset) should be added to THIS.  */
18337
18338static void
18339x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18340		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18341		     HOST_WIDE_INT vcall_offset, tree function)
18342{
18343  rtx xops[3];
18344  rtx this = x86_this_parameter (function);
18345  rtx this_reg, tmp;
18346
18347  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
18348     pull it in now and let DELTA benefit.  */
18349  if (REG_P (this))
18350    this_reg = this;
18351  else if (vcall_offset)
18352    {
18353      /* Put the this parameter into %eax.  */
18354      xops[0] = this;
18355      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18356      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18357    }
18358  else
18359    this_reg = NULL_RTX;
18360
18361  /* Adjust the this parameter by a fixed constant.  */
18362  if (delta)
18363    {
18364      xops[0] = GEN_INT (delta);
18365      xops[1] = this_reg ? this_reg : this;
18366      if (TARGET_64BIT)
18367	{
18368	  if (!x86_64_general_operand (xops[0], DImode))
18369	    {
18370	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18371	      xops[1] = tmp;
18372	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18373	      xops[0] = tmp;
18374	      xops[1] = this;
18375	    }
18376	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18377	}
18378      else
18379	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18380    }
18381
18382  /* Adjust the this parameter by a value stored in the vtable.  */
18383  if (vcall_offset)
18384    {
18385      if (TARGET_64BIT)
18386	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18387      else
18388	{
18389	  int tmp_regno = 2 /* ECX */;
18390	  if (lookup_attribute ("fastcall",
18391	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
18392	    tmp_regno = 0 /* EAX */;
18393	  tmp = gen_rtx_REG (SImode, tmp_regno);
18394	}
18395
18396      xops[0] = gen_rtx_MEM (Pmode, this_reg);
18397      xops[1] = tmp;
18398      if (TARGET_64BIT)
18399	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18400      else
18401	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18402
18403      /* Adjust the this parameter.  */
18404      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18405      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18406	{
18407	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18408	  xops[0] = GEN_INT (vcall_offset);
18409	  xops[1] = tmp2;
18410	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18411	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18412	}
18413      xops[1] = this_reg;
18414      if (TARGET_64BIT)
18415	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18416      else
18417	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18418    }
18419
18420  /* If necessary, drop THIS back to its stack slot.  */
18421  if (this_reg && this_reg != this)
18422    {
18423      xops[0] = this_reg;
18424      xops[1] = this;
18425      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18426    }
18427
18428  xops[0] = XEXP (DECL_RTL (function), 0);
18429  if (TARGET_64BIT)
18430    {
18431      if (!flag_pic || (*targetm.binds_local_p) (function))
18432	output_asm_insn ("jmp\t%P0", xops);
18433      else
18434	{
18435	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18436	  tmp = gen_rtx_CONST (Pmode, tmp);
18437	  tmp = gen_rtx_MEM (QImode, tmp);
18438	  xops[0] = tmp;
18439	  output_asm_insn ("jmp\t%A0", xops);
18440	}
18441    }
18442  else
18443    {
18444      if (!flag_pic || (*targetm.binds_local_p) (function))
18445	output_asm_insn ("jmp\t%P0", xops);
18446      else
18447#if TARGET_MACHO
18448	if (TARGET_MACHO)
18449	  {
18450	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
18451	    tmp = (gen_rtx_SYMBOL_REF
18452		   (Pmode,
18453		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18454	    tmp = gen_rtx_MEM (QImode, tmp);
18455	    xops[0] = tmp;
18456	    output_asm_insn ("jmp\t%0", xops);
18457	  }
18458	else
18459#endif /* TARGET_MACHO */
18460	{
18461	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18462	  output_set_got (tmp, NULL_RTX);
18463
18464	  xops[1] = tmp;
18465	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18466	  output_asm_insn ("jmp\t{*}%1", xops);
18467	}
18468    }
18469}
18470
18471static void
18472x86_file_start (void)
18473{
18474  default_file_start ();
18475#if TARGET_MACHO
18476  darwin_file_start ();
18477#endif
18478  if (X86_FILE_START_VERSION_DIRECTIVE)
18479    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18480  if (X86_FILE_START_FLTUSED)
18481    fputs ("\t.global\t__fltused\n", asm_out_file);
18482  if (ix86_asm_dialect == ASM_INTEL)
18483    fputs ("\t.intel_syntax\n", asm_out_file);
18484}
18485
18486int
18487x86_field_alignment (tree field, int computed)
18488{
18489  enum machine_mode mode;
18490  tree type = TREE_TYPE (field);
18491
18492  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18493    return computed;
18494  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18495		    ? get_inner_array_type (type) : type);
18496  if (mode == DFmode || mode == DCmode
18497      || GET_MODE_CLASS (mode) == MODE_INT
18498      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18499    return MIN (32, computed);
18500  return computed;
18501}
18502
18503/* Output assembler code to FILE to increment profiler label # LABELNO
18504   for profiling a function entry.  */
18505void
18506x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18507{
18508  if (TARGET_64BIT)
18509    if (flag_pic)
18510      {
18511#ifndef NO_PROFILE_COUNTERS
18512	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18513#endif
18514	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18515      }
18516    else
18517      {
18518#ifndef NO_PROFILE_COUNTERS
18519	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18520#endif
18521	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18522      }
18523  else if (flag_pic)
18524    {
18525#ifndef NO_PROFILE_COUNTERS
18526      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18527	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18528#endif
18529      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18530    }
18531  else
18532    {
18533#ifndef NO_PROFILE_COUNTERS
18534      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18535	       PROFILE_COUNT_REGISTER);
18536#endif
18537      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18538    }
18539}
18540
18541/* We don't have exact information about the insn sizes, but we may assume
18542   quite safely that we are informed about all 1 byte insns and memory
18543   address sizes.  This is enough to eliminate unnecessary padding in
18544   99% of cases.  */
18545
18546static int
18547min_insn_size (rtx insn)
18548{
18549  int l = 0;
18550
18551  if (!INSN_P (insn) || !active_insn_p (insn))
18552    return 0;
18553
18554  /* Discard alignments we've emit and jump instructions.  */
18555  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18556      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18557    return 0;
18558  if (GET_CODE (insn) == JUMP_INSN
18559      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18560	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18561    return 0;
18562
18563  /* Important case - calls are always 5 bytes.
18564     It is common to have many calls in the row.  */
18565  if (GET_CODE (insn) == CALL_INSN
18566      && symbolic_reference_mentioned_p (PATTERN (insn))
18567      && !SIBLING_CALL_P (insn))
18568    return 5;
18569  if (get_attr_length (insn) <= 1)
18570    return 1;
18571
18572  /* For normal instructions we may rely on the sizes of addresses
18573     and the presence of symbol to require 4 bytes of encoding.
18574     This is not the case for jumps where references are PC relative.  */
18575  if (GET_CODE (insn) != JUMP_INSN)
18576    {
18577      l = get_attr_length_address (insn);
18578      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18579	l = 4;
18580    }
18581  if (l)
18582    return 1+l;
18583  else
18584    return 2;
18585}
18586
18587/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18588   window.  */
18589
18590static void
18591ix86_avoid_jump_misspredicts (void)
18592{
18593  rtx insn, start = get_insns ();
18594  int nbytes = 0, njumps = 0;
18595  int isjump = 0;
18596
18597  /* Look for all minimal intervals of instructions containing 4 jumps.
18598     The intervals are bounded by START and INSN.  NBYTES is the total
18599     size of instructions in the interval including INSN and not including
18600     START.  When the NBYTES is smaller than 16 bytes, it is possible
18601     that the end of START and INSN ends up in the same 16byte page.
18602
18603     The smallest offset in the page INSN can start is the case where START
18604     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
18605     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18606     */
18607  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18608    {
18609
18610      nbytes += min_insn_size (insn);
18611      if (dump_file)
18612        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18613		INSN_UID (insn), min_insn_size (insn));
18614      if ((GET_CODE (insn) == JUMP_INSN
18615	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
18616	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18617	  || GET_CODE (insn) == CALL_INSN)
18618	njumps++;
18619      else
18620	continue;
18621
18622      while (njumps > 3)
18623	{
18624	  start = NEXT_INSN (start);
18625	  if ((GET_CODE (start) == JUMP_INSN
18626	       && GET_CODE (PATTERN (start)) != ADDR_VEC
18627	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18628	      || GET_CODE (start) == CALL_INSN)
18629	    njumps--, isjump = 1;
18630	  else
18631	    isjump = 0;
18632	  nbytes -= min_insn_size (start);
18633	}
18634      gcc_assert (njumps >= 0);
18635      if (dump_file)
18636        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18637		INSN_UID (start), INSN_UID (insn), nbytes);
18638
18639      if (njumps == 3 && isjump && nbytes < 16)
18640	{
18641	  int padsize = 15 - nbytes + min_insn_size (insn);
18642
18643	  if (dump_file)
18644	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18645		     INSN_UID (insn), padsize);
18646          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18647	}
18648    }
18649}
18650
18651/* AMD Athlon works faster
18652   when RET is not destination of conditional jump or directly preceded
18653   by other jump instruction.  We avoid the penalty by inserting NOP just
18654   before the RET instructions in such cases.  */
18655static void
18656ix86_pad_returns (void)
18657{
18658  edge e;
18659  edge_iterator ei;
18660
18661  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18662    {
18663      basic_block bb = e->src;
18664      rtx ret = BB_END (bb);
18665      rtx prev;
18666      bool replace = false;
18667
18668      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18669	  || !maybe_hot_bb_p (bb))
18670	continue;
18671      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18672	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18673	  break;
18674      if (prev && GET_CODE (prev) == CODE_LABEL)
18675	{
18676	  edge e;
18677	  edge_iterator ei;
18678
18679	  FOR_EACH_EDGE (e, ei, bb->preds)
18680	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
18681		&& !(e->flags & EDGE_FALLTHRU))
18682	      replace = true;
18683	}
18684      if (!replace)
18685	{
18686	  prev = prev_active_insn (ret);
18687	  if (prev
18688	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18689		  || GET_CODE (prev) == CALL_INSN))
18690	    replace = true;
18691	  /* Empty functions get branch mispredict even when the jump destination
18692	     is not visible to us.  */
18693	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18694	    replace = true;
18695	}
18696      if (replace)
18697	{
18698	  emit_insn_before (gen_return_internal_long (), ret);
18699	  delete_insn (ret);
18700	}
18701    }
18702}
18703
18704/* Implement machine specific optimizations.  We implement padding of returns
18705   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18706static void
18707ix86_reorg (void)
18708{
18709  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18710    ix86_pad_returns ();
18711  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18712    ix86_avoid_jump_misspredicts ();
18713}
18714
18715/* Return nonzero when QImode register that must be represented via REX prefix
18716   is used.  */
18717bool
18718x86_extended_QIreg_mentioned_p (rtx insn)
18719{
18720  int i;
18721  extract_insn_cached (insn);
18722  for (i = 0; i < recog_data.n_operands; i++)
18723    if (REG_P (recog_data.operand[i])
18724	&& REGNO (recog_data.operand[i]) >= 4)
18725       return true;
18726  return false;
18727}
18728
18729/* Return nonzero when P points to register encoded via REX prefix.
18730   Called via for_each_rtx.  */
18731static int
18732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18733{
18734   unsigned int regno;
18735   if (!REG_P (*p))
18736     return 0;
18737   regno = REGNO (*p);
18738   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18739}
18740
18741/* Return true when INSN mentions register that must be encoded using REX
18742   prefix.  */
18743bool
18744x86_extended_reg_mentioned_p (rtx insn)
18745{
18746  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18747}
18748
18749/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18750   optabs would emit if we didn't have TFmode patterns.  */
18751
18752void
18753x86_emit_floatuns (rtx operands[2])
18754{
18755  rtx neglab, donelab, i0, i1, f0, in, out;
18756  enum machine_mode mode, inmode;
18757
18758  inmode = GET_MODE (operands[1]);
18759  gcc_assert (inmode == SImode || inmode == DImode);
18760
18761  out = operands[0];
18762  in = force_reg (inmode, operands[1]);
18763  mode = GET_MODE (out);
18764  neglab = gen_label_rtx ();
18765  donelab = gen_label_rtx ();
18766  i1 = gen_reg_rtx (Pmode);
18767  f0 = gen_reg_rtx (mode);
18768
18769  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18770
18771  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18772  emit_jump_insn (gen_jump (donelab));
18773  emit_barrier ();
18774
18775  emit_label (neglab);
18776
18777  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18778  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18779  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18780  expand_float (f0, i0, 0);
18781  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18782
18783  emit_label (donelab);
18784}
18785
18786/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18787   with all elements equal to VAR.  Return true if successful.  */
18788
18789static bool
18790ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18791				   rtx target, rtx val)
18792{
18793  enum machine_mode smode, wsmode, wvmode;
18794  rtx x;
18795
18796  switch (mode)
18797    {
18798    case V2SImode:
18799    case V2SFmode:
18800      if (!mmx_ok)
18801	return false;
18802      /* FALLTHRU */
18803
18804    case V2DFmode:
18805    case V2DImode:
18806    case V4SFmode:
18807    case V4SImode:
18808      val = force_reg (GET_MODE_INNER (mode), val);
18809      x = gen_rtx_VEC_DUPLICATE (mode, val);
18810      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18811      return true;
18812
18813    case V4HImode:
18814      if (!mmx_ok)
18815	return false;
18816      if (TARGET_SSE || TARGET_3DNOW_A)
18817	{
18818	  val = gen_lowpart (SImode, val);
18819	  x = gen_rtx_TRUNCATE (HImode, val);
18820	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18821	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18822	  return true;
18823	}
18824      else
18825	{
18826	  smode = HImode;
18827	  wsmode = SImode;
18828	  wvmode = V2SImode;
18829	  goto widen;
18830	}
18831
18832    case V8QImode:
18833      if (!mmx_ok)
18834	return false;
18835      smode = QImode;
18836      wsmode = HImode;
18837      wvmode = V4HImode;
18838      goto widen;
18839    case V8HImode:
18840      if (TARGET_SSE2)
18841	{
18842	  rtx tmp1, tmp2;
18843	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18844	  tmp1 = gen_reg_rtx (SImode);
18845	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18846	  /* Insert the SImode value as low element of V4SImode vector. */
18847	  tmp2 = gen_reg_rtx (V4SImode);
18848	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18849				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18850				    CONST0_RTX (V4SImode),
18851				    const1_rtx);
18852	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18853	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18854	  tmp1 = gen_reg_rtx (V8HImode);
18855	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18856	  /* Duplicate the low short through the whole low SImode word.  */
18857	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18858	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18859	  tmp2 = gen_reg_rtx (V4SImode);
18860	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18861	  /* Replicate the low element of the V4SImode vector.  */
18862	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18863	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18864	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18865	  return true;
18866	}
18867      smode = HImode;
18868      wsmode = SImode;
18869      wvmode = V4SImode;
18870      goto widen;
18871    case V16QImode:
18872      if (TARGET_SSE2)
18873	{
18874	  rtx tmp1, tmp2;
18875	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18876	  tmp1 = gen_reg_rtx (SImode);
18877	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18878	  /* Insert the SImode value as low element of V4SImode vector. */
18879	  tmp2 = gen_reg_rtx (V4SImode);
18880	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18881				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18882				    CONST0_RTX (V4SImode),
18883				    const1_rtx);
18884	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18885	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18886	  tmp1 = gen_reg_rtx (V16QImode);
18887	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18888	  /* Duplicate the low byte through the whole low SImode word.  */
18889	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18890	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18891	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18892	  tmp2 = gen_reg_rtx (V4SImode);
18893	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18894	  /* Replicate the low element of the V4SImode vector.  */
18895	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18896	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18897	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18898	  return true;
18899	}
18900      smode = QImode;
18901      wsmode = HImode;
18902      wvmode = V8HImode;
18903      goto widen;
18904    widen:
18905      /* Replicate the value once into the next wider mode and recurse.  */
18906      val = convert_modes (wsmode, smode, val, true);
18907      x = expand_simple_binop (wsmode, ASHIFT, val,
18908			       GEN_INT (GET_MODE_BITSIZE (smode)),
18909			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18910      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18911
18912      x = gen_reg_rtx (wvmode);
18913      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18914	gcc_unreachable ();
18915      emit_move_insn (target, gen_lowpart (mode, x));
18916      return true;
18917
18918    default:
18919      return false;
18920    }
18921}
18922
18923/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18924   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18925   if successful.  */
18926
18927static bool
18928ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18929				     rtx target, rtx var, int one_var)
18930{
18931  enum machine_mode vsimode;
18932  rtx new_target;
18933  rtx x, tmp;
18934
18935  switch (mode)
18936    {
18937    case V2SFmode:
18938    case V2SImode:
18939      if (!mmx_ok)
18940	return false;
18941      /* FALLTHRU */
18942
18943    case V2DFmode:
18944    case V2DImode:
18945      if (one_var != 0)
18946	return false;
18947      var = force_reg (GET_MODE_INNER (mode), var);
18948      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18949      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18950      return true;
18951
18952    case V4SFmode:
18953    case V4SImode:
18954      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18955	new_target = gen_reg_rtx (mode);
18956      else
18957	new_target = target;
18958      var = force_reg (GET_MODE_INNER (mode), var);
18959      x = gen_rtx_VEC_DUPLICATE (mode, var);
18960      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18961      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18962      if (one_var != 0)
18963	{
18964	  /* We need to shuffle the value to the correct position, so
18965	     create a new pseudo to store the intermediate result.  */
18966
18967	  /* With SSE2, we can use the integer shuffle insns.  */
18968	  if (mode != V4SFmode && TARGET_SSE2)
18969	    {
18970	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18971					    GEN_INT (1),
18972					    GEN_INT (one_var == 1 ? 0 : 1),
18973					    GEN_INT (one_var == 2 ? 0 : 1),
18974					    GEN_INT (one_var == 3 ? 0 : 1)));
18975	      if (target != new_target)
18976		emit_move_insn (target, new_target);
18977	      return true;
18978	    }
18979
18980	  /* Otherwise convert the intermediate result to V4SFmode and
18981	     use the SSE1 shuffle instructions.  */
18982	  if (mode != V4SFmode)
18983	    {
18984	      tmp = gen_reg_rtx (V4SFmode);
18985	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18986	    }
18987	  else
18988	    tmp = new_target;
18989
18990	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18991				       GEN_INT (1),
18992				       GEN_INT (one_var == 1 ? 0 : 1),
18993				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18994				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18995
18996	  if (mode != V4SFmode)
18997	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18998	  else if (tmp != target)
18999	    emit_move_insn (target, tmp);
19000	}
19001      else if (target != new_target)
19002	emit_move_insn (target, new_target);
19003      return true;
19004
19005    case V8HImode:
19006    case V16QImode:
19007      vsimode = V4SImode;
19008      goto widen;
19009    case V4HImode:
19010    case V8QImode:
19011      if (!mmx_ok)
19012	return false;
19013      vsimode = V2SImode;
19014      goto widen;
19015    widen:
19016      if (one_var != 0)
19017	return false;
19018
19019      /* Zero extend the variable element to SImode and recurse.  */
19020      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19021
19022      x = gen_reg_rtx (vsimode);
19023      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19024						var, one_var))
19025	gcc_unreachable ();
19026
19027      emit_move_insn (target, gen_lowpart (mode, x));
19028      return true;
19029
19030    default:
19031      return false;
19032    }
19033}
19034
19035/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
19036   consisting of the values in VALS.  It is known that all elements
19037   except ONE_VAR are constants.  Return true if successful.  */
19038
19039static bool
19040ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19041				 rtx target, rtx vals, int one_var)
19042{
19043  rtx var = XVECEXP (vals, 0, one_var);
19044  enum machine_mode wmode;
19045  rtx const_vec, x;
19046
19047  const_vec = copy_rtx (vals);
19048  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19049  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19050
19051  switch (mode)
19052    {
19053    case V2DFmode:
19054    case V2DImode:
19055    case V2SFmode:
19056    case V2SImode:
19057      /* For the two element vectors, it's just as easy to use
19058	 the general case.  */
19059      return false;
19060
19061    case V4SFmode:
19062    case V4SImode:
19063    case V8HImode:
19064    case V4HImode:
19065      break;
19066
19067    case V16QImode:
19068      wmode = V8HImode;
19069      goto widen;
19070    case V8QImode:
19071      wmode = V4HImode;
19072      goto widen;
19073    widen:
19074      /* There's no way to set one QImode entry easily.  Combine
19075	 the variable value with its adjacent constant value, and
19076	 promote to an HImode set.  */
19077      x = XVECEXP (vals, 0, one_var ^ 1);
19078      if (one_var & 1)
19079	{
19080	  var = convert_modes (HImode, QImode, var, true);
19081	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19082				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
19083	  x = GEN_INT (INTVAL (x) & 0xff);
19084	}
19085      else
19086	{
19087	  var = convert_modes (HImode, QImode, var, true);
19088	  x = gen_int_mode (INTVAL (x) << 8, HImode);
19089	}
19090      if (x != const0_rtx)
19091	var = expand_simple_binop (HImode, IOR, var, x, var,
19092				   1, OPTAB_LIB_WIDEN);
19093
19094      x = gen_reg_rtx (wmode);
19095      emit_move_insn (x, gen_lowpart (wmode, const_vec));
19096      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19097
19098      emit_move_insn (target, gen_lowpart (mode, x));
19099      return true;
19100
19101    default:
19102      return false;
19103    }
19104
19105  emit_move_insn (target, const_vec);
19106  ix86_expand_vector_set (mmx_ok, target, var, one_var);
19107  return true;
19108}
19109
19110/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
19111   all values variable, and none identical.  */
19112
19113static void
19114ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19115				 rtx target, rtx vals)
19116{
19117  enum machine_mode half_mode = GET_MODE_INNER (mode);
19118  rtx op0 = NULL, op1 = NULL;
19119  bool use_vec_concat = false;
19120
19121  switch (mode)
19122    {
19123    case V2SFmode:
19124    case V2SImode:
19125      if (!mmx_ok && !TARGET_SSE)
19126	break;
19127      /* FALLTHRU */
19128
19129    case V2DFmode:
19130    case V2DImode:
19131      /* For the two element vectors, we always implement VEC_CONCAT.  */
19132      op0 = XVECEXP (vals, 0, 0);
19133      op1 = XVECEXP (vals, 0, 1);
19134      use_vec_concat = true;
19135      break;
19136
19137    case V4SFmode:
19138      half_mode = V2SFmode;
19139      goto half;
19140    case V4SImode:
19141      half_mode = V2SImode;
19142      goto half;
19143    half:
19144      {
19145	rtvec v;
19146
19147	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
19148	   Recurse to load the two halves.  */
19149
19150	op0 = gen_reg_rtx (half_mode);
19151	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19152	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19153
19154	op1 = gen_reg_rtx (half_mode);
19155	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19156	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19157
19158	use_vec_concat = true;
19159      }
19160      break;
19161
19162    case V8HImode:
19163    case V16QImode:
19164    case V4HImode:
19165    case V8QImode:
19166      break;
19167
19168    default:
19169      gcc_unreachable ();
19170    }
19171
19172  if (use_vec_concat)
19173    {
19174      if (!register_operand (op0, half_mode))
19175	op0 = force_reg (half_mode, op0);
19176      if (!register_operand (op1, half_mode))
19177	op1 = force_reg (half_mode, op1);
19178
19179      emit_insn (gen_rtx_SET (VOIDmode, target,
19180			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
19181    }
19182  else
19183    {
19184      int i, j, n_elts, n_words, n_elt_per_word;
19185      enum machine_mode inner_mode;
19186      rtx words[4], shift;
19187
19188      inner_mode = GET_MODE_INNER (mode);
19189      n_elts = GET_MODE_NUNITS (mode);
19190      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19191      n_elt_per_word = n_elts / n_words;
19192      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19193
19194      for (i = 0; i < n_words; ++i)
19195	{
19196	  rtx word = NULL_RTX;
19197
19198	  for (j = 0; j < n_elt_per_word; ++j)
19199	    {
19200	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19201	      elt = convert_modes (word_mode, inner_mode, elt, true);
19202
19203	      if (j == 0)
19204		word = elt;
19205	      else
19206		{
19207		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19208					      word, 1, OPTAB_LIB_WIDEN);
19209		  word = expand_simple_binop (word_mode, IOR, word, elt,
19210					      word, 1, OPTAB_LIB_WIDEN);
19211		}
19212	    }
19213
19214	  words[i] = word;
19215	}
19216
19217      if (n_words == 1)
19218	emit_move_insn (target, gen_lowpart (mode, words[0]));
19219      else if (n_words == 2)
19220	{
19221	  rtx tmp = gen_reg_rtx (mode);
19222	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19223	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19224	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19225	  emit_move_insn (target, tmp);
19226	}
19227      else if (n_words == 4)
19228	{
19229	  rtx tmp = gen_reg_rtx (V4SImode);
19230	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19231	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19232	  emit_move_insn (target, gen_lowpart (mode, tmp));
19233	}
19234      else
19235	gcc_unreachable ();
19236    }
19237}
19238
19239/* Initialize vector TARGET via VALS.  Suppress the use of MMX
19240   instructions unless MMX_OK is true.  */
19241
19242void
19243ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19244{
19245  enum machine_mode mode = GET_MODE (target);
19246  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19247  int n_elts = GET_MODE_NUNITS (mode);
19248  int n_var = 0, one_var = -1;
19249  bool all_same = true, all_const_zero = true;
19250  int i;
19251  rtx x;
19252
19253  for (i = 0; i < n_elts; ++i)
19254    {
19255      x = XVECEXP (vals, 0, i);
19256      if (!CONSTANT_P (x))
19257	n_var++, one_var = i;
19258      else if (x != CONST0_RTX (inner_mode))
19259	all_const_zero = false;
19260      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19261	all_same = false;
19262    }
19263
19264  /* Constants are best loaded from the constant pool.  */
19265  if (n_var == 0)
19266    {
19267      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19268      return;
19269    }
19270
19271  /* If all values are identical, broadcast the value.  */
19272  if (all_same
19273      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19274					    XVECEXP (vals, 0, 0)))
19275    return;
19276
19277  /* Values where only one field is non-constant are best loaded from
19278     the pool and overwritten via move later.  */
19279  if (n_var == 1)
19280    {
19281      if (all_const_zero
19282	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19283						  XVECEXP (vals, 0, one_var),
19284						  one_var))
19285	return;
19286
19287      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19288	return;
19289    }
19290
19291  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19292}
19293
19294void
19295ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19296{
19297  enum machine_mode mode = GET_MODE (target);
19298  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19299  bool use_vec_merge = false;
19300  rtx tmp;
19301
19302  switch (mode)
19303    {
19304    case V2SFmode:
19305    case V2SImode:
19306      if (mmx_ok)
19307	{
19308	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19309	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19310	  if (elt == 0)
19311	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19312	  else
19313	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19314	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19315	  return;
19316	}
19317      break;
19318
19319    case V2DFmode:
19320    case V2DImode:
19321      {
19322	rtx op0, op1;
19323
19324	/* For the two element vectors, we implement a VEC_CONCAT with
19325	   the extraction of the other element.  */
19326
19327	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19328	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19329
19330	if (elt == 0)
19331	  op0 = val, op1 = tmp;
19332	else
19333	  op0 = tmp, op1 = val;
19334
19335	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19336	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19337      }
19338      return;
19339
19340    case V4SFmode:
19341      switch (elt)
19342	{
19343	case 0:
19344	  use_vec_merge = true;
19345	  break;
19346
19347	case 1:
19348	  /* tmp = target = A B C D */
19349	  tmp = copy_to_reg (target);
19350	  /* target = A A B B */
19351	  emit_insn (gen_sse_unpcklps (target, target, target));
19352	  /* target = X A B B */
19353	  ix86_expand_vector_set (false, target, val, 0);
19354	  /* target = A X C D  */
19355	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19356				       GEN_INT (1), GEN_INT (0),
19357				       GEN_INT (2+4), GEN_INT (3+4)));
19358	  return;
19359
19360	case 2:
19361	  /* tmp = target = A B C D */
19362	  tmp = copy_to_reg (target);
19363	  /* tmp = X B C D */
19364	  ix86_expand_vector_set (false, tmp, val, 0);
19365	  /* target = A B X D */
19366	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19367				       GEN_INT (0), GEN_INT (1),
19368				       GEN_INT (0+4), GEN_INT (3+4)));
19369	  return;
19370
19371	case 3:
19372	  /* tmp = target = A B C D */
19373	  tmp = copy_to_reg (target);
19374	  /* tmp = X B C D */
19375	  ix86_expand_vector_set (false, tmp, val, 0);
19376	  /* target = A B X D */
19377	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
19378				       GEN_INT (0), GEN_INT (1),
19379				       GEN_INT (2+4), GEN_INT (0+4)));
19380	  return;
19381
19382	default:
19383	  gcc_unreachable ();
19384	}
19385      break;
19386
19387    case V4SImode:
19388      /* Element 0 handled by vec_merge below.  */
19389      if (elt == 0)
19390	{
19391	  use_vec_merge = true;
19392	  break;
19393	}
19394
19395      if (TARGET_SSE2)
19396	{
19397	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
19398	     store into element 0, then shuffle them back.  */
19399
19400	  rtx order[4];
19401
19402	  order[0] = GEN_INT (elt);
19403	  order[1] = const1_rtx;
19404	  order[2] = const2_rtx;
19405	  order[3] = GEN_INT (3);
19406	  order[elt] = const0_rtx;
19407
19408	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19409					order[1], order[2], order[3]));
19410
19411	  ix86_expand_vector_set (false, target, val, 0);
19412
19413	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19414					order[1], order[2], order[3]));
19415	}
19416      else
19417	{
19418	  /* For SSE1, we have to reuse the V4SF code.  */
19419	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19420				  gen_lowpart (SFmode, val), elt);
19421	}
19422      return;
19423
19424    case V8HImode:
19425      use_vec_merge = TARGET_SSE2;
19426      break;
19427    case V4HImode:
19428      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19429      break;
19430
19431    case V16QImode:
19432    case V8QImode:
19433    default:
19434      break;
19435    }
19436
19437  if (use_vec_merge)
19438    {
19439      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19440      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19441      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19442    }
19443  else
19444    {
19445      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19446
19447      emit_move_insn (mem, target);
19448
19449      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19450      emit_move_insn (tmp, val);
19451
19452      emit_move_insn (target, mem);
19453    }
19454}
19455
19456void
19457ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19458{
19459  enum machine_mode mode = GET_MODE (vec);
19460  enum machine_mode inner_mode = GET_MODE_INNER (mode);
19461  bool use_vec_extr = false;
19462  rtx tmp;
19463
19464  switch (mode)
19465    {
19466    case V2SImode:
19467    case V2SFmode:
19468      if (!mmx_ok)
19469	break;
19470      /* FALLTHRU */
19471
19472    case V2DFmode:
19473    case V2DImode:
19474      use_vec_extr = true;
19475      break;
19476
19477    case V4SFmode:
19478      switch (elt)
19479	{
19480	case 0:
19481	  tmp = vec;
19482	  break;
19483
19484	case 1:
19485	case 3:
19486	  tmp = gen_reg_rtx (mode);
19487	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19488				       GEN_INT (elt), GEN_INT (elt),
19489				       GEN_INT (elt+4), GEN_INT (elt+4)));
19490	  break;
19491
19492	case 2:
19493	  tmp = gen_reg_rtx (mode);
19494	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19495	  break;
19496
19497	default:
19498	  gcc_unreachable ();
19499	}
19500      vec = tmp;
19501      use_vec_extr = true;
19502      elt = 0;
19503      break;
19504
19505    case V4SImode:
19506      if (TARGET_SSE2)
19507	{
19508	  switch (elt)
19509	    {
19510	    case 0:
19511	      tmp = vec;
19512	      break;
19513
19514	    case 1:
19515	    case 3:
19516	      tmp = gen_reg_rtx (mode);
19517	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19518					    GEN_INT (elt), GEN_INT (elt),
19519					    GEN_INT (elt), GEN_INT (elt)));
19520	      break;
19521
19522	    case 2:
19523	      tmp = gen_reg_rtx (mode);
19524	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19525	      break;
19526
19527	    default:
19528	      gcc_unreachable ();
19529	    }
19530	  vec = tmp;
19531	  use_vec_extr = true;
19532	  elt = 0;
19533	}
19534      else
19535	{
19536	  /* For SSE1, we have to reuse the V4SF code.  */
19537	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19538				      gen_lowpart (V4SFmode, vec), elt);
19539	  return;
19540	}
19541      break;
19542
19543    case V8HImode:
19544      use_vec_extr = TARGET_SSE2;
19545      break;
19546    case V4HImode:
19547      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19548      break;
19549
19550    case V16QImode:
19551    case V8QImode:
19552      /* ??? Could extract the appropriate HImode element and shift.  */
19553    default:
19554      break;
19555    }
19556
19557  if (use_vec_extr)
19558    {
19559      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19560      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19561
19562      /* Let the rtl optimizers know about the zero extension performed.  */
19563      if (inner_mode == HImode)
19564	{
19565	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19566	  target = gen_lowpart (SImode, target);
19567	}
19568
19569      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19570    }
19571  else
19572    {
19573      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19574
19575      emit_move_insn (mem, vec);
19576
19577      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19578      emit_move_insn (target, tmp);
19579    }
19580}
19581
19582/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
19583   pattern to reduce; DEST is the destination; IN is the input vector.  */
19584
19585void
19586ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19587{
19588  rtx tmp1, tmp2, tmp3;
19589
19590  tmp1 = gen_reg_rtx (V4SFmode);
19591  tmp2 = gen_reg_rtx (V4SFmode);
19592  tmp3 = gen_reg_rtx (V4SFmode);
19593
19594  emit_insn (gen_sse_movhlps (tmp1, in, in));
19595  emit_insn (fn (tmp2, tmp1, in));
19596
19597  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19598			       GEN_INT (1), GEN_INT (1),
19599			       GEN_INT (1+4), GEN_INT (1+4)));
19600  emit_insn (fn (dest, tmp2, tmp3));
19601}
19602
19603/* Target hook for scalar_mode_supported_p.  */
19604static bool
19605ix86_scalar_mode_supported_p (enum machine_mode mode)
19606{
19607  if (DECIMAL_FLOAT_MODE_P (mode))
19608    return true;
19609  else
19610    return default_scalar_mode_supported_p (mode);
19611}
19612
19613/* Implements target hook vector_mode_supported_p.  */
19614static bool
19615ix86_vector_mode_supported_p (enum machine_mode mode)
19616{
19617  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19618    return true;
19619  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19620    return true;
19621  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19622    return true;
19623  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19624    return true;
19625  return false;
19626}
19627
19628/* Worker function for TARGET_MD_ASM_CLOBBERS.
19629
19630   We do this in the new i386 backend to maintain source compatibility
19631   with the old cc0-based compiler.  */
19632
19633static tree
19634ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19635		      tree inputs ATTRIBUTE_UNUSED,
19636		      tree clobbers)
19637{
19638  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19639			clobbers);
19640  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19641			clobbers);
19642  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19643			clobbers);
19644  return clobbers;
19645}
19646
19647/* Return true if this goes in small data/bss.  */
19648
19649static bool
19650ix86_in_large_data_p (tree exp)
19651{
19652  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19653    return false;
19654
19655  /* Functions are never large data.  */
19656  if (TREE_CODE (exp) == FUNCTION_DECL)
19657    return false;
19658
19659  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19660    {
19661      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19662      if (strcmp (section, ".ldata") == 0
19663	  || strcmp (section, ".lbss") == 0)
19664	return true;
19665      return false;
19666    }
19667  else
19668    {
19669      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19670
19671      /* If this is an incomplete type with size 0, then we can't put it
19672	 in data because it might be too big when completed.  */
19673      if (!size || size > ix86_section_threshold)
19674	return true;
19675    }
19676
19677  return false;
19678}
19679static void
19680ix86_encode_section_info (tree decl, rtx rtl, int first)
19681{
19682  default_encode_section_info (decl, rtl, first);
19683
19684  if (TREE_CODE (decl) == VAR_DECL
19685      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19686      && ix86_in_large_data_p (decl))
19687    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19688}
19689
19690/* Worker function for REVERSE_CONDITION.  */
19691
19692enum rtx_code
19693ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19694{
19695  return (mode != CCFPmode && mode != CCFPUmode
19696	  ? reverse_condition (code)
19697	  : reverse_condition_maybe_unordered (code));
19698}
19699
19700/* Output code to perform an x87 FP register move, from OPERANDS[1]
19701   to OPERANDS[0].  */
19702
19703const char *
19704output_387_reg_move (rtx insn, rtx *operands)
19705{
19706  if (REG_P (operands[1])
19707      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19708    {
19709      if (REGNO (operands[0]) == FIRST_STACK_REG)
19710	return output_387_ffreep (operands, 0);
19711      return "fstp\t%y0";
19712    }
19713  if (STACK_TOP_P (operands[0]))
19714    return "fld%z1\t%y1";
19715  return "fst\t%y0";
19716}
19717
19718/* Output code to perform a conditional jump to LABEL, if C2 flag in
19719   FP status register is set.  */
19720
19721void
19722ix86_emit_fp_unordered_jump (rtx label)
19723{
19724  rtx reg = gen_reg_rtx (HImode);
19725  rtx temp;
19726
19727  emit_insn (gen_x86_fnstsw_1 (reg));
19728
19729  if (TARGET_USE_SAHF)
19730    {
19731      emit_insn (gen_x86_sahf_1 (reg));
19732
19733      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19734      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19735    }
19736  else
19737    {
19738      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19739
19740      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19741      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19742    }
19743
19744  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19745			      gen_rtx_LABEL_REF (VOIDmode, label),
19746			      pc_rtx);
19747  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19748  emit_jump_insn (temp);
19749}
19750
19751/* Output code to perform a log1p XFmode calculation.  */
19752
19753void ix86_emit_i387_log1p (rtx op0, rtx op1)
19754{
19755  rtx label1 = gen_label_rtx ();
19756  rtx label2 = gen_label_rtx ();
19757
19758  rtx tmp = gen_reg_rtx (XFmode);
19759  rtx tmp2 = gen_reg_rtx (XFmode);
19760
19761  emit_insn (gen_absxf2 (tmp, op1));
19762  emit_insn (gen_cmpxf (tmp,
19763    CONST_DOUBLE_FROM_REAL_VALUE (
19764       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19765       XFmode)));
19766  emit_jump_insn (gen_bge (label1));
19767
19768  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19769  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19770  emit_jump (label2);
19771
19772  emit_label (label1);
19773  emit_move_insn (tmp, CONST1_RTX (XFmode));
19774  emit_insn (gen_addxf3 (tmp, op1, tmp));
19775  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19776  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19777
19778  emit_label (label2);
19779}
19780
19781/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19782
19783static void
19784i386_solaris_elf_named_section (const char *name, unsigned int flags,
19785				tree decl)
19786{
19787  /* With Binutils 2.15, the "@unwind" marker must be specified on
19788     every occurrence of the ".eh_frame" section, not just the first
19789     one.  */
19790  if (TARGET_64BIT
19791      && strcmp (name, ".eh_frame") == 0)
19792    {
19793      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19794	       flags & SECTION_WRITE ? "aw" : "a");
19795      return;
19796    }
19797  default_elf_asm_named_section (name, flags, decl);
19798}
19799
19800/* Return the mangling of TYPE if it is an extended fundamental type.  */
19801
19802static const char *
19803ix86_mangle_fundamental_type (tree type)
19804{
19805  switch (TYPE_MODE (type))
19806    {
19807    case TFmode:
19808      /* __float128 is "g".  */
19809      return "g";
19810    case XFmode:
19811      /* "long double" or __float80 is "e".  */
19812      return "e";
19813    default:
19814      return NULL;
19815    }
19816}
19817
19818/* For 32-bit code we can save PIC register setup by using
19819   __stack_chk_fail_local hidden function instead of calling
19820   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19821   register, so it is better to call __stack_chk_fail directly.  */
19822
19823static tree
19824ix86_stack_protect_fail (void)
19825{
19826  return TARGET_64BIT
19827	 ? default_external_stack_protect_fail ()
19828	 : default_hidden_stack_protect_fail ();
19829}
19830
19831/* Select a format to encode pointers in exception handling data.  CODE
19832   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19833   true if the symbol may be affected by dynamic relocations.
19834
19835   ??? All x86 object file formats are capable of representing this.
19836   After all, the relocation needed is the same as for the call insn.
19837   Whether or not a particular assembler allows us to enter such, I
19838   guess we'll have to see.  */
19839int
19840asm_preferred_eh_data_format (int code, int global)
19841{
19842  if (flag_pic)
19843    {
19844      int type = DW_EH_PE_sdata8;
19845      if (!TARGET_64BIT
19846	  || ix86_cmodel == CM_SMALL_PIC
19847	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19848	type = DW_EH_PE_sdata4;
19849      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19850    }
19851  if (ix86_cmodel == CM_SMALL
19852      || (ix86_cmodel == CM_MEDIUM && code))
19853    return DW_EH_PE_udata4;
19854  return DW_EH_PE_absptr;
19855}
19856
19857#include "gt-i386.h"
19858