i386.c revision 198344
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 198344 2009-10-21 19:26:12Z jhb $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables.  */
61#define MODE_INDEX(mode)					\
62  ((mode) == QImode ? 0						\
63   : (mode) == HImode ? 1					\
64   : (mode) == SImode ? 2					\
65   : (mode) == DImode ? 3					\
66   : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = {	/* costs for tuning for size */
74  COSTS_N_BYTES (2),			/* cost of an add instruction */
75  COSTS_N_BYTES (3),			/* cost of a lea instruction */
76  COSTS_N_BYTES (2),			/* variable shift costs */
77  COSTS_N_BYTES (3),			/* constant shift costs */
78  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
79   COSTS_N_BYTES (3),			/*                               HI */
80   COSTS_N_BYTES (3),			/*                               SI */
81   COSTS_N_BYTES (3),			/*                               DI */
82   COSTS_N_BYTES (5)},			/*                            other */
83  0,					/* cost of multiply per each bit set */
84  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
85   COSTS_N_BYTES (3),			/*                          HI */
86   COSTS_N_BYTES (3),			/*                          SI */
87   COSTS_N_BYTES (3),			/*                          DI */
88   COSTS_N_BYTES (5)},			/*                       other */
89  COSTS_N_BYTES (3),			/* cost of movsx */
90  COSTS_N_BYTES (3),			/* cost of movzx */
91  0,					/* "large" insn */
92  2,					/* MOVE_RATIO */
93  2,					/* cost for loading QImode using movzbl */
94  {2, 2, 2},				/* cost of loading integer registers
95					   in QImode, HImode and SImode.
96					   Relative to reg-reg move (2).  */
97  {2, 2, 2},				/* cost of storing integer registers */
98  2,					/* cost of reg,reg fld/fst */
99  {2, 2, 2},				/* cost of loading fp registers
100					   in SFmode, DFmode and XFmode */
101  {2, 2, 2},				/* cost of storing fp registers
102					   in SFmode, DFmode and XFmode */
103  3,					/* cost of moving MMX register */
104  {3, 3},				/* cost of loading MMX registers
105					   in SImode and DImode */
106  {3, 3},				/* cost of storing MMX registers
107					   in SImode and DImode */
108  3,					/* cost of moving SSE register */
109  {3, 3, 3},				/* cost of loading SSE registers
110					   in SImode, DImode and TImode */
111  {3, 3, 3},				/* cost of storing SSE registers
112					   in SImode, DImode and TImode */
113  3,					/* MMX or SSE register to integer */
114  0,					/* size of prefetch block */
115  0,					/* number of parallel prefetches */
116  2,					/* Branch cost */
117  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
118  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
119  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
120  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
121  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
122  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = {	/* 386 specific costs */
128  COSTS_N_INSNS (1),			/* cost of an add instruction */
129  COSTS_N_INSNS (1),			/* cost of a lea instruction */
130  COSTS_N_INSNS (3),			/* variable shift costs */
131  COSTS_N_INSNS (2),			/* constant shift costs */
132  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
133   COSTS_N_INSNS (6),			/*                               HI */
134   COSTS_N_INSNS (6),			/*                               SI */
135   COSTS_N_INSNS (6),			/*                               DI */
136   COSTS_N_INSNS (6)},			/*                               other */
137  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
138  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
139   COSTS_N_INSNS (23),			/*                          HI */
140   COSTS_N_INSNS (23),			/*                          SI */
141   COSTS_N_INSNS (23),			/*                          DI */
142   COSTS_N_INSNS (23)},			/*                          other */
143  COSTS_N_INSNS (3),			/* cost of movsx */
144  COSTS_N_INSNS (2),			/* cost of movzx */
145  15,					/* "large" insn */
146  3,					/* MOVE_RATIO */
147  4,					/* cost for loading QImode using movzbl */
148  {2, 4, 2},				/* cost of loading integer registers
149					   in QImode, HImode and SImode.
150					   Relative to reg-reg move (2).  */
151  {2, 4, 2},				/* cost of storing integer registers */
152  2,					/* cost of reg,reg fld/fst */
153  {8, 8, 8},				/* cost of loading fp registers
154					   in SFmode, DFmode and XFmode */
155  {8, 8, 8},				/* cost of storing fp registers
156					   in SFmode, DFmode and XFmode */
157  2,					/* cost of moving MMX register */
158  {4, 8},				/* cost of loading MMX registers
159					   in SImode and DImode */
160  {4, 8},				/* cost of storing MMX registers
161					   in SImode and DImode */
162  2,					/* cost of moving SSE register */
163  {4, 8, 16},				/* cost of loading SSE registers
164					   in SImode, DImode and TImode */
165  {4, 8, 16},				/* cost of storing SSE registers
166					   in SImode, DImode and TImode */
167  3,					/* MMX or SSE register to integer */
168  0,					/* size of prefetch block */
169  0,					/* number of parallel prefetches */
170  1,					/* Branch cost */
171  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
172  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
173  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
174  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
175  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
176  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
177};
178
179static const
180struct processor_costs i486_cost = {	/* 486 specific costs */
181  COSTS_N_INSNS (1),			/* cost of an add instruction */
182  COSTS_N_INSNS (1),			/* cost of a lea instruction */
183  COSTS_N_INSNS (3),			/* variable shift costs */
184  COSTS_N_INSNS (2),			/* constant shift costs */
185  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
186   COSTS_N_INSNS (12),			/*                               HI */
187   COSTS_N_INSNS (12),			/*                               SI */
188   COSTS_N_INSNS (12),			/*                               DI */
189   COSTS_N_INSNS (12)},			/*                               other */
190  1,					/* cost of multiply per each bit set */
191  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
192   COSTS_N_INSNS (40),			/*                          HI */
193   COSTS_N_INSNS (40),			/*                          SI */
194   COSTS_N_INSNS (40),			/*                          DI */
195   COSTS_N_INSNS (40)},			/*                          other */
196  COSTS_N_INSNS (3),			/* cost of movsx */
197  COSTS_N_INSNS (2),			/* cost of movzx */
198  15,					/* "large" insn */
199  3,					/* MOVE_RATIO */
200  4,					/* cost for loading QImode using movzbl */
201  {2, 4, 2},				/* cost of loading integer registers
202					   in QImode, HImode and SImode.
203					   Relative to reg-reg move (2).  */
204  {2, 4, 2},				/* cost of storing integer registers */
205  2,					/* cost of reg,reg fld/fst */
206  {8, 8, 8},				/* cost of loading fp registers
207					   in SFmode, DFmode and XFmode */
208  {8, 8, 8},				/* cost of storing fp registers
209					   in SFmode, DFmode and XFmode */
210  2,					/* cost of moving MMX register */
211  {4, 8},				/* cost of loading MMX registers
212					   in SImode and DImode */
213  {4, 8},				/* cost of storing MMX registers
214					   in SImode and DImode */
215  2,					/* cost of moving SSE register */
216  {4, 8, 16},				/* cost of loading SSE registers
217					   in SImode, DImode and TImode */
218  {4, 8, 16},				/* cost of storing SSE registers
219					   in SImode, DImode and TImode */
220  3,					/* MMX or SSE register to integer */
221  0,					/* size of prefetch block */
222  0,					/* number of parallel prefetches */
223  1,					/* Branch cost */
224  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
225  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
226  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
227  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
228  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
229  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
230};
231
232static const
233struct processor_costs pentium_cost = {
234  COSTS_N_INSNS (1),			/* cost of an add instruction */
235  COSTS_N_INSNS (1),			/* cost of a lea instruction */
236  COSTS_N_INSNS (4),			/* variable shift costs */
237  COSTS_N_INSNS (1),			/* constant shift costs */
238  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
239   COSTS_N_INSNS (11),			/*                               HI */
240   COSTS_N_INSNS (11),			/*                               SI */
241   COSTS_N_INSNS (11),			/*                               DI */
242   COSTS_N_INSNS (11)},			/*                               other */
243  0,					/* cost of multiply per each bit set */
244  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
245   COSTS_N_INSNS (25),			/*                          HI */
246   COSTS_N_INSNS (25),			/*                          SI */
247   COSTS_N_INSNS (25),			/*                          DI */
248   COSTS_N_INSNS (25)},			/*                          other */
249  COSTS_N_INSNS (3),			/* cost of movsx */
250  COSTS_N_INSNS (2),			/* cost of movzx */
251  8,					/* "large" insn */
252  6,					/* MOVE_RATIO */
253  6,					/* cost for loading QImode using movzbl */
254  {2, 4, 2},				/* cost of loading integer registers
255					   in QImode, HImode and SImode.
256					   Relative to reg-reg move (2).  */
257  {2, 4, 2},				/* cost of storing integer registers */
258  2,					/* cost of reg,reg fld/fst */
259  {2, 2, 6},				/* cost of loading fp registers
260					   in SFmode, DFmode and XFmode */
261  {4, 4, 6},				/* cost of storing fp registers
262					   in SFmode, DFmode and XFmode */
263  8,					/* cost of moving MMX register */
264  {8, 8},				/* cost of loading MMX registers
265					   in SImode and DImode */
266  {8, 8},				/* cost of storing MMX registers
267					   in SImode and DImode */
268  2,					/* cost of moving SSE register */
269  {4, 8, 16},				/* cost of loading SSE registers
270					   in SImode, DImode and TImode */
271  {4, 8, 16},				/* cost of storing SSE registers
272					   in SImode, DImode and TImode */
273  3,					/* MMX or SSE register to integer */
274  0,					/* size of prefetch block */
275  0,					/* number of parallel prefetches */
276  2,					/* Branch cost */
277  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
278  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
279  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
280  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
281  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
282  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287  COSTS_N_INSNS (1),			/* cost of an add instruction */
288  COSTS_N_INSNS (1),			/* cost of a lea instruction */
289  COSTS_N_INSNS (1),			/* variable shift costs */
290  COSTS_N_INSNS (1),			/* constant shift costs */
291  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
292   COSTS_N_INSNS (4),			/*                               HI */
293   COSTS_N_INSNS (4),			/*                               SI */
294   COSTS_N_INSNS (4),			/*                               DI */
295   COSTS_N_INSNS (4)},			/*                               other */
296  0,					/* cost of multiply per each bit set */
297  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
298   COSTS_N_INSNS (17),			/*                          HI */
299   COSTS_N_INSNS (17),			/*                          SI */
300   COSTS_N_INSNS (17),			/*                          DI */
301   COSTS_N_INSNS (17)},			/*                          other */
302  COSTS_N_INSNS (1),			/* cost of movsx */
303  COSTS_N_INSNS (1),			/* cost of movzx */
304  8,					/* "large" insn */
305  6,					/* MOVE_RATIO */
306  2,					/* cost for loading QImode using movzbl */
307  {4, 4, 4},				/* cost of loading integer registers
308					   in QImode, HImode and SImode.
309					   Relative to reg-reg move (2).  */
310  {2, 2, 2},				/* cost of storing integer registers */
311  2,					/* cost of reg,reg fld/fst */
312  {2, 2, 6},				/* cost of loading fp registers
313					   in SFmode, DFmode and XFmode */
314  {4, 4, 6},				/* cost of storing fp registers
315					   in SFmode, DFmode and XFmode */
316  2,					/* cost of moving MMX register */
317  {2, 2},				/* cost of loading MMX registers
318					   in SImode and DImode */
319  {2, 2},				/* cost of storing MMX registers
320					   in SImode and DImode */
321  2,					/* cost of moving SSE register */
322  {2, 2, 8},				/* cost of loading SSE registers
323					   in SImode, DImode and TImode */
324  {2, 2, 8},				/* cost of storing SSE registers
325					   in SImode, DImode and TImode */
326  3,					/* MMX or SSE register to integer */
327  32,					/* size of prefetch block */
328  6,					/* number of parallel prefetches */
329  2,					/* Branch cost */
330  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
331  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
332  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
333  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
334  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
335  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
336};
337
338static const
339struct processor_costs k6_cost = {
340  COSTS_N_INSNS (1),			/* cost of an add instruction */
341  COSTS_N_INSNS (2),			/* cost of a lea instruction */
342  COSTS_N_INSNS (1),			/* variable shift costs */
343  COSTS_N_INSNS (1),			/* constant shift costs */
344  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
345   COSTS_N_INSNS (3),			/*                               HI */
346   COSTS_N_INSNS (3),			/*                               SI */
347   COSTS_N_INSNS (3),			/*                               DI */
348   COSTS_N_INSNS (3)},			/*                               other */
349  0,					/* cost of multiply per each bit set */
350  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
351   COSTS_N_INSNS (18),			/*                          HI */
352   COSTS_N_INSNS (18),			/*                          SI */
353   COSTS_N_INSNS (18),			/*                          DI */
354   COSTS_N_INSNS (18)},			/*                          other */
355  COSTS_N_INSNS (2),			/* cost of movsx */
356  COSTS_N_INSNS (2),			/* cost of movzx */
357  8,					/* "large" insn */
358  4,					/* MOVE_RATIO */
359  3,					/* cost for loading QImode using movzbl */
360  {4, 5, 4},				/* cost of loading integer registers
361					   in QImode, HImode and SImode.
362					   Relative to reg-reg move (2).  */
363  {2, 3, 2},				/* cost of storing integer registers */
364  4,					/* cost of reg,reg fld/fst */
365  {6, 6, 6},				/* cost of loading fp registers
366					   in SFmode, DFmode and XFmode */
367  {4, 4, 4},				/* cost of storing fp registers
368					   in SFmode, DFmode and XFmode */
369  2,					/* cost of moving MMX register */
370  {2, 2},				/* cost of loading MMX registers
371					   in SImode and DImode */
372  {2, 2},				/* cost of storing MMX registers
373					   in SImode and DImode */
374  2,					/* cost of moving SSE register */
375  {2, 2, 8},				/* cost of loading SSE registers
376					   in SImode, DImode and TImode */
377  {2, 2, 8},				/* cost of storing SSE registers
378					   in SImode, DImode and TImode */
379  6,					/* MMX or SSE register to integer */
380  32,					/* size of prefetch block */
381  1,					/* number of parallel prefetches */
382  1,					/* Branch cost */
383  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
384  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
385  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
386  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
387  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
388  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
389};
390
391static const
392struct processor_costs athlon_cost = {
393  COSTS_N_INSNS (1),			/* cost of an add instruction */
394  COSTS_N_INSNS (2),			/* cost of a lea instruction */
395  COSTS_N_INSNS (1),			/* variable shift costs */
396  COSTS_N_INSNS (1),			/* constant shift costs */
397  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
398   COSTS_N_INSNS (5),			/*                               HI */
399   COSTS_N_INSNS (5),			/*                               SI */
400   COSTS_N_INSNS (5),			/*                               DI */
401   COSTS_N_INSNS (5)},			/*                               other */
402  0,					/* cost of multiply per each bit set */
403  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
404   COSTS_N_INSNS (26),			/*                          HI */
405   COSTS_N_INSNS (42),			/*                          SI */
406   COSTS_N_INSNS (74),			/*                          DI */
407   COSTS_N_INSNS (74)},			/*                          other */
408  COSTS_N_INSNS (1),			/* cost of movsx */
409  COSTS_N_INSNS (1),			/* cost of movzx */
410  8,					/* "large" insn */
411  9,					/* MOVE_RATIO */
412  4,					/* cost for loading QImode using movzbl */
413  {3, 4, 3},				/* cost of loading integer registers
414					   in QImode, HImode and SImode.
415					   Relative to reg-reg move (2).  */
416  {3, 4, 3},				/* cost of storing integer registers */
417  4,					/* cost of reg,reg fld/fst */
418  {4, 4, 12},				/* cost of loading fp registers
419					   in SFmode, DFmode and XFmode */
420  {6, 6, 8},				/* cost of storing fp registers
421					   in SFmode, DFmode and XFmode */
422  2,					/* cost of moving MMX register */
423  {4, 4},				/* cost of loading MMX registers
424					   in SImode and DImode */
425  {4, 4},				/* cost of storing MMX registers
426					   in SImode and DImode */
427  2,					/* cost of moving SSE register */
428  {4, 4, 6},				/* cost of loading SSE registers
429					   in SImode, DImode and TImode */
430  {4, 4, 5},				/* cost of storing SSE registers
431					   in SImode, DImode and TImode */
432  5,					/* MMX or SSE register to integer */
433  64,					/* size of prefetch block */
434  6,					/* number of parallel prefetches */
435  5,					/* Branch cost */
436  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
437  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
438  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
439  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
440  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
441  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
442};
443
444static const
445struct processor_costs k8_cost = {
446  COSTS_N_INSNS (1),			/* cost of an add instruction */
447  COSTS_N_INSNS (2),			/* cost of a lea instruction */
448  COSTS_N_INSNS (1),			/* variable shift costs */
449  COSTS_N_INSNS (1),			/* constant shift costs */
450  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
451   COSTS_N_INSNS (4),			/*                               HI */
452   COSTS_N_INSNS (3),			/*                               SI */
453   COSTS_N_INSNS (4),			/*                               DI */
454   COSTS_N_INSNS (5)},			/*                               other */
455  0,					/* cost of multiply per each bit set */
456  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
457   COSTS_N_INSNS (26),			/*                          HI */
458   COSTS_N_INSNS (42),			/*                          SI */
459   COSTS_N_INSNS (74),			/*                          DI */
460   COSTS_N_INSNS (74)},			/*                          other */
461  COSTS_N_INSNS (1),			/* cost of movsx */
462  COSTS_N_INSNS (1),			/* cost of movzx */
463  8,					/* "large" insn */
464  9,					/* MOVE_RATIO */
465  4,					/* cost for loading QImode using movzbl */
466  {3, 4, 3},				/* cost of loading integer registers
467					   in QImode, HImode and SImode.
468					   Relative to reg-reg move (2).  */
469  {3, 4, 3},				/* cost of storing integer registers */
470  4,					/* cost of reg,reg fld/fst */
471  {4, 4, 12},				/* cost of loading fp registers
472					   in SFmode, DFmode and XFmode */
473  {6, 6, 8},				/* cost of storing fp registers
474					   in SFmode, DFmode and XFmode */
475  2,					/* cost of moving MMX register */
476  {3, 3},				/* cost of loading MMX registers
477					   in SImode and DImode */
478  {4, 4},				/* cost of storing MMX registers
479					   in SImode and DImode */
480  2,					/* cost of moving SSE register */
481  {4, 3, 6},				/* cost of loading SSE registers
482					   in SImode, DImode and TImode */
483  {4, 4, 5},				/* cost of storing SSE registers
484					   in SImode, DImode and TImode */
485  5,					/* MMX or SSE register to integer */
486  64,					/* size of prefetch block */
487  6,					/* number of parallel prefetches */
488  5,					/* Branch cost */
489  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
490  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
491  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
492  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
493  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
494  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
495};
496
497static const
498struct processor_costs pentium4_cost = {
499  COSTS_N_INSNS (1),			/* cost of an add instruction */
500  COSTS_N_INSNS (3),			/* cost of a lea instruction */
501  COSTS_N_INSNS (4),			/* variable shift costs */
502  COSTS_N_INSNS (4),			/* constant shift costs */
503  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
504   COSTS_N_INSNS (15),			/*                               HI */
505   COSTS_N_INSNS (15),			/*                               SI */
506   COSTS_N_INSNS (15),			/*                               DI */
507   COSTS_N_INSNS (15)},			/*                               other */
508  0,					/* cost of multiply per each bit set */
509  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
510   COSTS_N_INSNS (56),			/*                          HI */
511   COSTS_N_INSNS (56),			/*                          SI */
512   COSTS_N_INSNS (56),			/*                          DI */
513   COSTS_N_INSNS (56)},			/*                          other */
514  COSTS_N_INSNS (1),			/* cost of movsx */
515  COSTS_N_INSNS (1),			/* cost of movzx */
516  16,					/* "large" insn */
517  6,					/* MOVE_RATIO */
518  2,					/* cost for loading QImode using movzbl */
519  {4, 5, 4},				/* cost of loading integer registers
520					   in QImode, HImode and SImode.
521					   Relative to reg-reg move (2).  */
522  {2, 3, 2},				/* cost of storing integer registers */
523  2,					/* cost of reg,reg fld/fst */
524  {2, 2, 6},				/* cost of loading fp registers
525					   in SFmode, DFmode and XFmode */
526  {4, 4, 6},				/* cost of storing fp registers
527					   in SFmode, DFmode and XFmode */
528  2,					/* cost of moving MMX register */
529  {2, 2},				/* cost of loading MMX registers
530					   in SImode and DImode */
531  {2, 2},				/* cost of storing MMX registers
532					   in SImode and DImode */
533  12,					/* cost of moving SSE register */
534  {12, 12, 12},				/* cost of loading SSE registers
535					   in SImode, DImode and TImode */
536  {2, 2, 8},				/* cost of storing SSE registers
537					   in SImode, DImode and TImode */
538  10,					/* MMX or SSE register to integer */
539  64,					/* size of prefetch block */
540  6,					/* number of parallel prefetches */
541  2,					/* Branch cost */
542  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
543  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
544  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
545  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
546  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
547  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
548};
549
550static const
551struct processor_costs nocona_cost = {
552  COSTS_N_INSNS (1),			/* cost of an add instruction */
553  COSTS_N_INSNS (1),			/* cost of a lea instruction */
554  COSTS_N_INSNS (1),			/* variable shift costs */
555  COSTS_N_INSNS (1),			/* constant shift costs */
556  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
557   COSTS_N_INSNS (10),			/*                               HI */
558   COSTS_N_INSNS (10),			/*                               SI */
559   COSTS_N_INSNS (10),			/*                               DI */
560   COSTS_N_INSNS (10)},			/*                               other */
561  0,					/* cost of multiply per each bit set */
562  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
563   COSTS_N_INSNS (66),			/*                          HI */
564   COSTS_N_INSNS (66),			/*                          SI */
565   COSTS_N_INSNS (66),			/*                          DI */
566   COSTS_N_INSNS (66)},			/*                          other */
567  COSTS_N_INSNS (1),			/* cost of movsx */
568  COSTS_N_INSNS (1),			/* cost of movzx */
569  16,					/* "large" insn */
570  17,					/* MOVE_RATIO */
571  4,					/* cost for loading QImode using movzbl */
572  {4, 4, 4},				/* cost of loading integer registers
573					   in QImode, HImode and SImode.
574					   Relative to reg-reg move (2).  */
575  {4, 4, 4},				/* cost of storing integer registers */
576  3,					/* cost of reg,reg fld/fst */
577  {12, 12, 12},				/* cost of loading fp registers
578					   in SFmode, DFmode and XFmode */
579  {4, 4, 4},				/* cost of storing fp registers
580					   in SFmode, DFmode and XFmode */
581  6,					/* cost of moving MMX register */
582  {12, 12},				/* cost of loading MMX registers
583					   in SImode and DImode */
584  {12, 12},				/* cost of storing MMX registers
585					   in SImode and DImode */
586  6,					/* cost of moving SSE register */
587  {12, 12, 12},				/* cost of loading SSE registers
588					   in SImode, DImode and TImode */
589  {12, 12, 12},				/* cost of storing SSE registers
590					   in SImode, DImode and TImode */
591  8,					/* MMX or SSE register to integer */
592  128,					/* size of prefetch block */
593  8,					/* number of parallel prefetches */
594  1,					/* Branch cost */
595  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
596  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
597  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
598  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
599  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
600  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
601};
602
603/* Generic64 should produce code tuned for Nocona and K8.  */
604static const
605struct processor_costs generic64_cost = {
606  COSTS_N_INSNS (1),			/* cost of an add instruction */
607  /* On all chips taken into consideration lea is 2 cycles and more.  With
608     this cost however our current implementation of synth_mult results in
609     use of unnecessary temporary registers causing regression on several
610     SPECfp benchmarks.  */
611  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
612  COSTS_N_INSNS (1),			/* variable shift costs */
613  COSTS_N_INSNS (1),			/* constant shift costs */
614  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
615   COSTS_N_INSNS (4),			/*                               HI */
616   COSTS_N_INSNS (3),			/*                               SI */
617   COSTS_N_INSNS (4),			/*                               DI */
618   COSTS_N_INSNS (2)},			/*                               other */
619  0,					/* cost of multiply per each bit set */
620  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
621   COSTS_N_INSNS (26),			/*                          HI */
622   COSTS_N_INSNS (42),			/*                          SI */
623   COSTS_N_INSNS (74),			/*                          DI */
624   COSTS_N_INSNS (74)},			/*                          other */
625  COSTS_N_INSNS (1),			/* cost of movsx */
626  COSTS_N_INSNS (1),			/* cost of movzx */
627  8,					/* "large" insn */
628  17,					/* MOVE_RATIO */
629  4,					/* cost for loading QImode using movzbl */
630  {4, 4, 4},				/* cost of loading integer registers
631					   in QImode, HImode and SImode.
632					   Relative to reg-reg move (2).  */
633  {4, 4, 4},				/* cost of storing integer registers */
634  4,					/* cost of reg,reg fld/fst */
635  {12, 12, 12},				/* cost of loading fp registers
636					   in SFmode, DFmode and XFmode */
637  {6, 6, 8},				/* cost of storing fp registers
638					   in SFmode, DFmode and XFmode */
639  2,					/* cost of moving MMX register */
640  {8, 8},				/* cost of loading MMX registers
641					   in SImode and DImode */
642  {8, 8},				/* cost of storing MMX registers
643					   in SImode and DImode */
644  2,					/* cost of moving SSE register */
645  {8, 8, 8},				/* cost of loading SSE registers
646					   in SImode, DImode and TImode */
647  {8, 8, 8},				/* cost of storing SSE registers
648					   in SImode, DImode and TImode */
649  5,					/* MMX or SSE register to integer */
650  64,					/* size of prefetch block */
651  6,					/* number of parallel prefetches */
652  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
653     is increased to perhaps more appropriate value of 5.  */
654  3,					/* Branch cost */
655  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
656  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
657  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
658  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
659  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
660  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
661};
662
663/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
664static const
665struct processor_costs generic32_cost = {
666  COSTS_N_INSNS (1),			/* cost of an add instruction */
667  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
668  COSTS_N_INSNS (1),			/* variable shift costs */
669  COSTS_N_INSNS (1),			/* constant shift costs */
670  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
671   COSTS_N_INSNS (4),			/*                               HI */
672   COSTS_N_INSNS (3),			/*                               SI */
673   COSTS_N_INSNS (4),			/*                               DI */
674   COSTS_N_INSNS (2)},			/*                               other */
675  0,					/* cost of multiply per each bit set */
676  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
677   COSTS_N_INSNS (26),			/*                          HI */
678   COSTS_N_INSNS (42),			/*                          SI */
679   COSTS_N_INSNS (74),			/*                          DI */
680   COSTS_N_INSNS (74)},			/*                          other */
681  COSTS_N_INSNS (1),			/* cost of movsx */
682  COSTS_N_INSNS (1),			/* cost of movzx */
683  8,					/* "large" insn */
684  17,					/* MOVE_RATIO */
685  4,					/* cost for loading QImode using movzbl */
686  {4, 4, 4},				/* cost of loading integer registers
687					   in QImode, HImode and SImode.
688					   Relative to reg-reg move (2).  */
689  {4, 4, 4},				/* cost of storing integer registers */
690  4,					/* cost of reg,reg fld/fst */
691  {12, 12, 12},				/* cost of loading fp registers
692					   in SFmode, DFmode and XFmode */
693  {6, 6, 8},				/* cost of storing fp registers
694					   in SFmode, DFmode and XFmode */
695  2,					/* cost of moving MMX register */
696  {8, 8},				/* cost of loading MMX registers
697					   in SImode and DImode */
698  {8, 8},				/* cost of storing MMX registers
699					   in SImode and DImode */
700  2,					/* cost of moving SSE register */
701  {8, 8, 8},				/* cost of loading SSE registers
702					   in SImode, DImode and TImode */
703  {8, 8, 8},				/* cost of storing SSE registers
704					   in SImode, DImode and TImode */
705  5,					/* MMX or SSE register to integer */
706  64,					/* size of prefetch block */
707  6,					/* number of parallel prefetches */
708  3,					/* Branch cost */
709  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
710  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
711  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
712  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
713  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
714  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
715};
716
717const struct processor_costs *ix86_cost = &pentium_cost;
718
719/* Processor feature/optimization bitmasks.  */
720#define m_386 (1<<PROCESSOR_I386)
721#define m_486 (1<<PROCESSOR_I486)
722#define m_PENT (1<<PROCESSOR_PENTIUM)
723#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
724#define m_K6  (1<<PROCESSOR_K6)
725#define m_ATHLON  (1<<PROCESSOR_ATHLON)
726#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
727#define m_K8  (1<<PROCESSOR_K8)
728#define m_ATHLON_K8  (m_K8 | m_ATHLON)
729#define m_NOCONA  (1<<PROCESSOR_NOCONA)
730#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
731#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
732#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
733
734/* Generic instruction choice should be common subset of supported CPUs
735   (PPro/PENT4/NOCONA/Athlon/K8).  */
736
737/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
738   Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
739   generic because it is not working well with PPro base chips.  */
740const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
741const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
742const int x86_zero_extend_with_and = m_486 | m_PENT;
743const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
744const int x86_double_with_add = ~m_386;
745const int x86_use_bit_test = m_386;
746const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
747const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
748const int x86_3dnow_a = m_ATHLON_K8;
749const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
750/* Branch hints were put in P4 based on simulation result. But
751   after P4 was made, no performance benefit was observed with
752   branch hints. It also increases the code size. As the result,
753   icc never generates branch hints.  */
754const int x86_branch_hints = 0;
755const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
756/* We probably ought to watch for partial register stalls on Generic32
757   compilation setting as well.  However in current implementation the
758   partial register stalls are not eliminated very well - they can
759   be introduced via subregs synthesized by combine and can happen
760   in caller/callee saving sequences.
761   Because this option pays back little on PPro based chips and is in conflict
762   with partial reg. dependencies used by Athlon/P4 based chips, it is better
763   to leave it off for generic32 for now.  */
764const int x86_partial_reg_stall = m_PPRO;
765const int x86_partial_flag_reg_stall = m_GENERIC;
766const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
767const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
768const int x86_use_mov0 = m_K6;
769const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
770const int x86_read_modify_write = ~m_PENT;
771const int x86_read_modify = ~(m_PENT | m_PPRO);
772const int x86_split_long_moves = m_PPRO;
773const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
774const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
775const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
776const int x86_qimode_math = ~(0);
777const int x86_promote_qi_regs = 0;
778/* On PPro this flag is meant to avoid partial register stalls.  Just like
779   the x86_partial_reg_stall this option might be considered for Generic32
780   if our scheme for avoiding partial stalls was more effective.  */
781const int x86_himode_math = ~(m_PPRO);
782const int x86_promote_hi_regs = m_PPRO;
783const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
784const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
786const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
787const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
788const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
789const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
790const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
791const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
792const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
793const int x86_shift1 = ~m_486;
794const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
795/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
796   that thread 128bit SSE registers as single units versus K8 based chips that
797   divide SSE registers to two 64bit halves.
798   x86_sse_partial_reg_dependency promote all store destinations to be 128bit
799   to allow register renaming on 128bit SSE units, but usually results in one
800   extra microop on 64bit SSE units.  Experimental results shows that disabling
801   this option on P4 brings over 20% SPECfp regression, while enabling it on
802   K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
803   of moves.  */
804const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
805/* Set for machines where the type and dependencies are resolved on SSE
806   register parts instead of whole registers, so we may maintain just
807   lower part of scalar values in proper format leaving the upper part
808   undefined.  */
809const int x86_sse_split_regs = m_ATHLON_K8;
810const int x86_sse_typeless_stores = m_ATHLON_K8;
811const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
812const int x86_use_ffreep = m_ATHLON_K8;
813const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
814const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
815
816/* ??? Allowing interunit moves makes it all too easy for the compiler to put
817   integer data in xmm registers.  Which results in pretty abysmal code.  */
818const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
819
820const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
821/* Some CPU cores are not able to predict more than 4 branch instructions in
822   the 16 byte window.  */
823const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
824const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
825const int x86_use_bt = m_ATHLON_K8;
826/* Compare and exchange was added for 80486.  */
827const int x86_cmpxchg = ~m_386;
828/* Compare and exchange 8 bytes was added for pentium.  */
829const int x86_cmpxchg8b = ~(m_386 | m_486);
830/* Compare and exchange 16 bytes was added for nocona.  */
831const int x86_cmpxchg16b = m_NOCONA;
832/* Exchange and add was added for 80486.  */
833const int x86_xadd = ~m_386;
834const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
835
836/* In case the average insn count for single function invocation is
837   lower than this constant, emit fast (but longer) prologue and
838   epilogue code.  */
839#define FAST_PROLOGUE_INSN_COUNT 20
840
841/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
842static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
843static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
844static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
845
846/* Array of the smallest class containing reg number REGNO, indexed by
847   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
848
849enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850{
851  /* ax, dx, cx, bx */
852  AREG, DREG, CREG, BREG,
853  /* si, di, bp, sp */
854  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
855  /* FP registers */
856  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
857  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858  /* arg pointer */
859  NON_Q_REGS,
860  /* flags, fpsr, dirflag, frame */
861  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
862  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
863  SSE_REGS, SSE_REGS,
864  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
865  MMX_REGS, MMX_REGS,
866  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
867  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
868  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
869  SSE_REGS, SSE_REGS,
870};
871
872/* The "default" register map used in 32bit mode.  */
873
874int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
875{
876  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
877  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
878  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
879  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
880  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
881  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
882  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
883};
884
885static int const x86_64_int_parameter_registers[6] =
886{
887  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
888  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889};
890
891static int const x86_64_int_return_registers[4] =
892{
893  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894};
895
896/* The "default" register map used in 64bit mode.  */
897int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
898{
899  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
900  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
901  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
902  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
903  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
904  8,9,10,11,12,13,14,15,		/* extended integer registers */
905  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
906};
907
908/* Define the register numbers to be used in Dwarf debugging information.
909   The SVR4 reference port C compiler uses the following register numbers
910   in its Dwarf output code:
911	0 for %eax (gcc regno = 0)
912	1 for %ecx (gcc regno = 2)
913	2 for %edx (gcc regno = 1)
914	3 for %ebx (gcc regno = 3)
915	4 for %esp (gcc regno = 7)
916	5 for %ebp (gcc regno = 6)
917	6 for %esi (gcc regno = 4)
918	7 for %edi (gcc regno = 5)
919   The following three DWARF register numbers are never generated by
920   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
921   believes these numbers have these meanings.
922	8  for %eip    (no gcc equivalent)
923	9  for %eflags (gcc regno = 17)
924	10 for %trapno (no gcc equivalent)
925   It is not at all clear how we should number the FP stack registers
926   for the x86 architecture.  If the version of SDB on x86/svr4 were
927   a bit less brain dead with respect to floating-point then we would
928   have a precedent to follow with respect to DWARF register numbers
929   for x86 FP registers, but the SDB on x86/svr4 is so completely
930   broken with respect to FP registers that it is hardly worth thinking
931   of it as something to strive for compatibility with.
932   The version of x86/svr4 SDB I have at the moment does (partially)
933   seem to believe that DWARF register number 11 is associated with
934   the x86 register %st(0), but that's about all.  Higher DWARF
935   register numbers don't seem to be associated with anything in
936   particular, and even for DWARF regno 11, SDB only seems to under-
937   stand that it should say that a variable lives in %st(0) (when
938   asked via an `=' command) if we said it was in DWARF regno 11,
939   but SDB still prints garbage when asked for the value of the
940   variable in question (via a `/' command).
941   (Also note that the labels SDB prints for various FP stack regs
942   when doing an `x' command are all wrong.)
943   Note that these problems generally don't affect the native SVR4
944   C compiler because it doesn't allow the use of -O with -g and
945   because when it is *not* optimizing, it allocates a memory
946   location for each floating-point variable, and the memory
947   location is what gets described in the DWARF AT_location
948   attribute for the variable in question.
949   Regardless of the severe mental illness of the x86/svr4 SDB, we
950   do something sensible here and we use the following DWARF
951   register numbers.  Note that these are all stack-top-relative
952   numbers.
953	11 for %st(0) (gcc regno = 8)
954	12 for %st(1) (gcc regno = 9)
955	13 for %st(2) (gcc regno = 10)
956	14 for %st(3) (gcc regno = 11)
957	15 for %st(4) (gcc regno = 12)
958	16 for %st(5) (gcc regno = 13)
959	17 for %st(6) (gcc regno = 14)
960	18 for %st(7) (gcc regno = 15)
961*/
962int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
963{
964  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
965  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
966  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
967  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
968  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
969  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
970  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
971};
972
973/* Test and compare insns in i386.md store the information needed to
974   generate branch and scc insns here.  */
975
976rtx ix86_compare_op0 = NULL_RTX;
977rtx ix86_compare_op1 = NULL_RTX;
978rtx ix86_compare_emitted = NULL_RTX;
979
980/* Size of the register save area.  */
981#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
982
983/* Define the structure for the machine field in struct function.  */
984
985struct stack_local_entry GTY(())
986{
987  unsigned short mode;
988  unsigned short n;
989  rtx rtl;
990  struct stack_local_entry *next;
991};
992
993/* Structure describing stack frame layout.
994   Stack grows downward:
995
996   [arguments]
997					      <- ARG_POINTER
998   saved pc
999
1000   saved frame pointer if frame_pointer_needed
1001					      <- HARD_FRAME_POINTER
1002   [saved regs]
1003
1004   [padding1]          \
1005		        )
1006   [va_arg registers]  (
1007		        > to_allocate	      <- FRAME_POINTER
1008   [frame]	       (
1009		        )
1010   [padding2]	       /
1011  */
1012struct ix86_frame
1013{
1014  int nregs;
1015  int padding1;
1016  int va_arg_size;
1017  HOST_WIDE_INT frame;
1018  int padding2;
1019  int outgoing_arguments_size;
1020  int red_zone_size;
1021
1022  HOST_WIDE_INT to_allocate;
1023  /* The offsets relative to ARG_POINTER.  */
1024  HOST_WIDE_INT frame_pointer_offset;
1025  HOST_WIDE_INT hard_frame_pointer_offset;
1026  HOST_WIDE_INT stack_pointer_offset;
1027
1028  /* When save_regs_using_mov is set, emit prologue using
1029     move instead of push instructions.  */
1030  bool save_regs_using_mov;
1031};
1032
1033/* Code model option.  */
1034enum cmodel ix86_cmodel;
1035/* Asm dialect.  */
1036enum asm_dialect ix86_asm_dialect = ASM_ATT;
1037/* TLS dialects.  */
1038enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1039
1040/* Which unit we are generating floating point math for.  */
1041enum fpmath_unit ix86_fpmath;
1042
1043/* Which cpu are we scheduling for.  */
1044enum processor_type ix86_tune;
1045/* Which instruction set architecture to use.  */
1046enum processor_type ix86_arch;
1047
1048/* true if sse prefetch instruction is not NOOP.  */
1049int x86_prefetch_sse;
1050
1051/* ix86_regparm_string as a number */
1052static int ix86_regparm;
1053
1054/* -mstackrealign option */
1055extern int ix86_force_align_arg_pointer;
1056static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1057
1058/* Preferred alignment for stack boundary in bits.  */
1059unsigned int ix86_preferred_stack_boundary;
1060
1061/* Values 1-5: see jump.c */
1062int ix86_branch_cost;
1063
1064/* Variables which are this size or smaller are put in the data/bss
1065   or ldata/lbss sections.  */
1066
1067int ix86_section_threshold = 65536;
1068
1069/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1070char internal_label_prefix[16];
1071int internal_label_prefix_len;
1072
1073static bool ix86_handle_option (size_t, const char *, int);
1074static void output_pic_addr_const (FILE *, rtx, int);
1075static void put_condition_code (enum rtx_code, enum machine_mode,
1076				int, int, FILE *);
1077static const char *get_some_local_dynamic_name (void);
1078static int get_some_local_dynamic_name_1 (rtx *, void *);
1079static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1080static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1081						   rtx *);
1082static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1083static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1084						   enum machine_mode);
1085static rtx get_thread_pointer (int);
1086static rtx legitimize_tls_address (rtx, enum tls_model, int);
1087static void get_pc_thunk_name (char [32], unsigned int);
1088static rtx gen_push (rtx);
1089static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1090static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1091static struct machine_function * ix86_init_machine_status (void);
1092static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1093static int ix86_nsaved_regs (void);
1094static void ix86_emit_save_regs (void);
1095static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1096static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1097static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1098static HOST_WIDE_INT ix86_GOT_alias_set (void);
1099static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1100static rtx ix86_expand_aligntest (rtx, int);
1101static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1102static int ix86_issue_rate (void);
1103static int ix86_adjust_cost (rtx, rtx, rtx, int);
1104static int ia32_multipass_dfa_lookahead (void);
1105static void ix86_init_mmx_sse_builtins (void);
1106static rtx x86_this_parameter (tree);
1107static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1108				 HOST_WIDE_INT, tree);
1109static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1110static void x86_file_start (void);
1111static void ix86_reorg (void);
1112static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1113static tree ix86_build_builtin_va_list (void);
1114static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1115					 tree, int *, int);
1116static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1117static bool ix86_scalar_mode_supported_p (enum machine_mode);
1118static bool ix86_vector_mode_supported_p (enum machine_mode);
1119
1120static int ix86_address_cost (rtx);
1121static bool ix86_cannot_force_const_mem (rtx);
1122static rtx ix86_delegitimize_address (rtx);
1123
1124static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1125
1126struct builtin_description;
1127static rtx ix86_expand_sse_comi (const struct builtin_description *,
1128				 tree, rtx);
1129static rtx ix86_expand_sse_compare (const struct builtin_description *,
1130				    tree, rtx);
1131static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1132static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1133static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1134static rtx ix86_expand_store_builtin (enum insn_code, tree);
1135static rtx safe_vector_operand (rtx, enum machine_mode);
1136static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1137static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1138static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1139static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1140static int ix86_fp_comparison_cost (enum rtx_code code);
1141static unsigned int ix86_select_alt_pic_regnum (void);
1142static int ix86_save_reg (unsigned int, int);
1143static void ix86_compute_frame_layout (struct ix86_frame *);
1144static int ix86_comp_type_attributes (tree, tree);
1145static int ix86_function_regparm (tree, tree);
1146const struct attribute_spec ix86_attribute_table[];
1147static bool ix86_function_ok_for_sibcall (tree, tree);
1148static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1149static int ix86_value_regno (enum machine_mode, tree, tree);
1150static bool contains_128bit_aligned_vector_p (tree);
1151static rtx ix86_struct_value_rtx (tree, int);
1152static bool ix86_ms_bitfield_layout_p (tree);
1153static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1154static int extended_reg_mentioned_1 (rtx *, void *);
1155static bool ix86_rtx_costs (rtx, int, int, int *);
1156static int min_insn_size (rtx);
1157static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1158static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1159static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1160				    tree, bool);
1161static void ix86_init_builtins (void);
1162static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1163static const char *ix86_mangle_fundamental_type (tree);
1164static tree ix86_stack_protect_fail (void);
1165static rtx ix86_internal_arg_pointer (void);
1166static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1167
1168/* This function is only used on Solaris.  */
1169static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170  ATTRIBUTE_UNUSED;
1171
1172/* Register class used for passing given 64bit part of the argument.
1173   These represent classes as documented by the PS ABI, with the exception
1174   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1175   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1176
1177   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1178   whenever possible (upper half does contain padding).
1179 */
1180enum x86_64_reg_class
1181  {
1182    X86_64_NO_CLASS,
1183    X86_64_INTEGER_CLASS,
1184    X86_64_INTEGERSI_CLASS,
1185    X86_64_SSE_CLASS,
1186    X86_64_SSESF_CLASS,
1187    X86_64_SSEDF_CLASS,
1188    X86_64_SSEUP_CLASS,
1189    X86_64_X87_CLASS,
1190    X86_64_X87UP_CLASS,
1191    X86_64_COMPLEX_X87_CLASS,
1192    X86_64_MEMORY_CLASS
1193  };
1194static const char * const x86_64_reg_class_name[] = {
1195  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1196  "sseup", "x87", "x87up", "cplx87", "no"
1197};
1198
1199#define MAX_CLASSES 4
1200
1201/* Table of constants used by fldpi, fldln2, etc....  */
1202static REAL_VALUE_TYPE ext_80387_constants_table [5];
1203static bool ext_80387_constants_init = 0;
1204static void init_ext_80387_constants (void);
1205static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1206static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1207static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1208static section *x86_64_elf_select_section (tree decl, int reloc,
1209					   unsigned HOST_WIDE_INT align)
1210					     ATTRIBUTE_UNUSED;
1211
1212/* Initialize the GCC target structure.  */
1213#undef TARGET_ATTRIBUTE_TABLE
1214#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1215#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1216#  undef TARGET_MERGE_DECL_ATTRIBUTES
1217#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218#endif
1219
1220#undef TARGET_COMP_TYPE_ATTRIBUTES
1221#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1222
1223#undef TARGET_INIT_BUILTINS
1224#define TARGET_INIT_BUILTINS ix86_init_builtins
1225#undef TARGET_EXPAND_BUILTIN
1226#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1227
1228#undef TARGET_ASM_FUNCTION_EPILOGUE
1229#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1230
1231#undef TARGET_ENCODE_SECTION_INFO
1232#ifndef SUBTARGET_ENCODE_SECTION_INFO
1233#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1234#else
1235#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236#endif
1237
1238#undef TARGET_ASM_OPEN_PAREN
1239#define TARGET_ASM_OPEN_PAREN ""
1240#undef TARGET_ASM_CLOSE_PAREN
1241#define TARGET_ASM_CLOSE_PAREN ""
1242
1243#undef TARGET_ASM_ALIGNED_HI_OP
1244#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1245#undef TARGET_ASM_ALIGNED_SI_OP
1246#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1247#ifdef ASM_QUAD
1248#undef TARGET_ASM_ALIGNED_DI_OP
1249#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250#endif
1251
1252#undef TARGET_ASM_UNALIGNED_HI_OP
1253#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1254#undef TARGET_ASM_UNALIGNED_SI_OP
1255#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1256#undef TARGET_ASM_UNALIGNED_DI_OP
1257#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1258
1259#undef TARGET_SCHED_ADJUST_COST
1260#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1261#undef TARGET_SCHED_ISSUE_RATE
1262#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1263#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1264#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1265  ia32_multipass_dfa_lookahead
1266
1267#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1268#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269
1270#ifdef HAVE_AS_TLS
1271#undef TARGET_HAVE_TLS
1272#define TARGET_HAVE_TLS true
1273#endif
1274#undef TARGET_CANNOT_FORCE_CONST_MEM
1275#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1276#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1277#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1278
1279#undef TARGET_DELEGITIMIZE_ADDRESS
1280#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1281
1282#undef TARGET_MS_BITFIELD_LAYOUT_P
1283#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284
1285#if TARGET_MACHO
1286#undef TARGET_BINDS_LOCAL_P
1287#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288#endif
1289
1290#undef TARGET_ASM_OUTPUT_MI_THUNK
1291#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1292#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1293#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1294
1295#undef TARGET_ASM_FILE_START
1296#define TARGET_ASM_FILE_START x86_file_start
1297
1298#undef TARGET_DEFAULT_TARGET_FLAGS
1299#define TARGET_DEFAULT_TARGET_FLAGS	\
1300  (TARGET_DEFAULT			\
1301   | TARGET_64BIT_DEFAULT		\
1302   | TARGET_SUBTARGET_DEFAULT		\
1303   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1304
1305#undef TARGET_HANDLE_OPTION
1306#define TARGET_HANDLE_OPTION ix86_handle_option
1307
1308#undef TARGET_RTX_COSTS
1309#define TARGET_RTX_COSTS ix86_rtx_costs
1310#undef TARGET_ADDRESS_COST
1311#define TARGET_ADDRESS_COST ix86_address_cost
1312
1313#undef TARGET_FIXED_CONDITION_CODE_REGS
1314#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1315#undef TARGET_CC_MODES_COMPATIBLE
1316#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1317
1318#undef TARGET_MACHINE_DEPENDENT_REORG
1319#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1320
1321#undef TARGET_BUILD_BUILTIN_VA_LIST
1322#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1323
1324#undef TARGET_MD_ASM_CLOBBERS
1325#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1326
1327#undef TARGET_PROMOTE_PROTOTYPES
1328#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1329#undef TARGET_STRUCT_VALUE_RTX
1330#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1331#undef TARGET_SETUP_INCOMING_VARARGS
1332#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1333#undef TARGET_MUST_PASS_IN_STACK
1334#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1335#undef TARGET_PASS_BY_REFERENCE
1336#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1337#undef TARGET_INTERNAL_ARG_POINTER
1338#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1339#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1340#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1341
1342#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1343#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1344
1345#undef TARGET_SCALAR_MODE_SUPPORTED_P
1346#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1347
1348#undef TARGET_VECTOR_MODE_SUPPORTED_P
1349#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350
1351#ifdef HAVE_AS_TLS
1352#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1353#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354#endif
1355
1356#ifdef SUBTARGET_INSERT_ATTRIBUTES
1357#undef TARGET_INSERT_ATTRIBUTES
1358#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359#endif
1360
1361#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1362#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1363
1364#undef TARGET_STACK_PROTECT_FAIL
1365#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1366
1367#undef TARGET_FUNCTION_VALUE
1368#define TARGET_FUNCTION_VALUE ix86_function_value
1369
1370struct gcc_target targetm = TARGET_INITIALIZER;
1371
1372
1373/* The svr4 ABI for the i386 says that records and unions are returned
1374   in memory.  */
1375#ifndef DEFAULT_PCC_STRUCT_RETURN
1376#define DEFAULT_PCC_STRUCT_RETURN 1
1377#endif
1378
1379/* Implement TARGET_HANDLE_OPTION.  */
1380
1381static bool
1382ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1383{
1384  switch (code)
1385    {
1386    case OPT_m3dnow:
1387      if (!value)
1388	{
1389	  target_flags &= ~MASK_3DNOW_A;
1390	  target_flags_explicit |= MASK_3DNOW_A;
1391	}
1392      return true;
1393
1394    case OPT_mmmx:
1395      if (!value)
1396	{
1397	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1398	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1399	}
1400      return true;
1401
1402    case OPT_msse:
1403      if (!value)
1404	{
1405	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1406	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1407	}
1408      return true;
1409
1410    case OPT_msse2:
1411      if (!value)
1412	{
1413	  target_flags &= ~MASK_SSE3;
1414	  target_flags_explicit |= MASK_SSE3;
1415	}
1416      return true;
1417
1418    default:
1419      return true;
1420    }
1421}
1422
1423/* Sometimes certain combinations of command options do not make
1424   sense on a particular target machine.  You can define a macro
1425   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1426   defined, is executed once just after all the command options have
1427   been parsed.
1428
1429   Don't use this macro to turn on various extra optimizations for
1430   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1431
1432void
1433override_options (void)
1434{
1435  int i;
1436  int ix86_tune_defaulted = 0;
1437
1438  /* Comes from final.c -- no real reason to change it.  */
1439#define MAX_CODE_ALIGN 16
1440
1441  static struct ptt
1442    {
1443      const struct processor_costs *cost;	/* Processor costs */
1444      const int target_enable;			/* Target flags to enable.  */
1445      const int target_disable;			/* Target flags to disable.  */
1446      const int align_loop;			/* Default alignments.  */
1447      const int align_loop_max_skip;
1448      const int align_jump;
1449      const int align_jump_max_skip;
1450      const int align_func;
1451    }
1452  const processor_target_table[PROCESSOR_max] =
1453    {
1454      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1455      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1456      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1457      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1458      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1459      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1460      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1461      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1462      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1463      {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1464      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465    };
1466
1467  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468  static struct pta
1469    {
1470      const char *const name;		/* processor name or nickname.  */
1471      const enum processor_type processor;
1472      const enum pta_flags
1473	{
1474	  PTA_SSE = 1,
1475	  PTA_SSE2 = 2,
1476	  PTA_SSE3 = 4,
1477	  PTA_MMX = 8,
1478	  PTA_PREFETCH_SSE = 16,
1479	  PTA_3DNOW = 32,
1480	  PTA_3DNOW_A = 64,
1481	  PTA_64BIT = 128
1482	} flags;
1483    }
1484  const processor_alias_table[] =
1485    {
1486      {"i386", PROCESSOR_I386, 0},
1487      {"i486", PROCESSOR_I486, 0},
1488      {"i586", PROCESSOR_PENTIUM, 0},
1489      {"pentium", PROCESSOR_PENTIUM, 0},
1490      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1491      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1492      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1493      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1494      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1495      {"i686", PROCESSOR_PENTIUMPRO, 0},
1496      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1497      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1498      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1499      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1500      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1501      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502				       | PTA_MMX | PTA_PREFETCH_SSE},
1503      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1504				        | PTA_MMX | PTA_PREFETCH_SSE},
1505      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1506				        | PTA_MMX | PTA_PREFETCH_SSE},
1507      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1508				        | PTA_MMX | PTA_PREFETCH_SSE},
1509      {"k6", PROCESSOR_K6, PTA_MMX},
1510      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1511      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1512      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1513				   | PTA_3DNOW_A},
1514      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1515					 | PTA_3DNOW | PTA_3DNOW_A},
1516      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517				    | PTA_3DNOW_A | PTA_SSE},
1518      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519				      | PTA_3DNOW_A | PTA_SSE},
1520      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1521				      | PTA_3DNOW_A | PTA_SSE},
1522      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1523			       | PTA_SSE | PTA_SSE2 },
1524      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1531				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1532      {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
1533      {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
1534    };
1535
1536  int const pta_size = ARRAY_SIZE (processor_alias_table);
1537
1538#ifdef SUBTARGET_OVERRIDE_OPTIONS
1539  SUBTARGET_OVERRIDE_OPTIONS;
1540#endif
1541
1542#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1543  SUBSUBTARGET_OVERRIDE_OPTIONS;
1544#endif
1545
1546  /* -fPIC is the default for x86_64.  */
1547  if (TARGET_MACHO && TARGET_64BIT)
1548    flag_pic = 2;
1549
1550  /* Set the default values for switches whose default depends on TARGET_64BIT
1551     in case they weren't overwritten by command line options.  */
1552  if (TARGET_64BIT)
1553    {
1554      /* Mach-O doesn't support omitting the frame pointer for now.  */
1555      if (flag_omit_frame_pointer == 2)
1556	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1557      if (flag_asynchronous_unwind_tables == 2)
1558	flag_asynchronous_unwind_tables = 1;
1559      if (flag_pcc_struct_return == 2)
1560	flag_pcc_struct_return = 0;
1561    }
1562  else
1563    {
1564      if (flag_omit_frame_pointer == 2)
1565	flag_omit_frame_pointer = 0;
1566      if (flag_asynchronous_unwind_tables == 2)
1567	flag_asynchronous_unwind_tables = 0;
1568      if (flag_pcc_struct_return == 2)
1569	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1570    }
1571
1572  /* Need to check -mtune=generic first.  */
1573  if (ix86_tune_string)
1574    {
1575      if (!strcmp (ix86_tune_string, "generic")
1576	  || !strcmp (ix86_tune_string, "i686")
1577	  /* As special support for cross compilers we read -mtune=native
1578	     as -mtune=generic.  With native compilers we won't see the
1579	     -mtune=native, as it was changed by the driver.  */
1580	  || !strcmp (ix86_tune_string, "native"))
1581	{
1582	  if (TARGET_64BIT)
1583	    ix86_tune_string = "generic64";
1584	  else
1585	    ix86_tune_string = "generic32";
1586	}
1587      else if (!strncmp (ix86_tune_string, "generic", 7))
1588	error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1589    }
1590  else
1591    {
1592      if (ix86_arch_string)
1593	ix86_tune_string = ix86_arch_string;
1594      if (!ix86_tune_string)
1595	{
1596	  ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1597	  ix86_tune_defaulted = 1;
1598	}
1599
1600      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
1601	 need to use a sensible tune option.  */
1602      if (!strcmp (ix86_tune_string, "generic")
1603	  || !strcmp (ix86_tune_string, "x86-64")
1604	  || !strcmp (ix86_tune_string, "i686"))
1605	{
1606	  if (TARGET_64BIT)
1607	    ix86_tune_string = "generic64";
1608	  else
1609	    ix86_tune_string = "generic32";
1610	}
1611    }
1612  if (!strcmp (ix86_tune_string, "x86-64"))
1613    warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated.  Use -mtune=k8 or "
1614	     "-mtune=generic instead as appropriate.");
1615
1616  if (!ix86_arch_string)
1617    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1618  if (!strcmp (ix86_arch_string, "generic"))
1619    error ("generic CPU can be used only for -mtune= switch");
1620  if (!strncmp (ix86_arch_string, "generic", 7))
1621    error ("bad value (%s) for -march= switch", ix86_arch_string);
1622
1623  if (ix86_cmodel_string != 0)
1624    {
1625      if (!strcmp (ix86_cmodel_string, "small"))
1626	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1627      else if (!strcmp (ix86_cmodel_string, "medium"))
1628	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1629      else if (flag_pic)
1630	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1631      else if (!strcmp (ix86_cmodel_string, "32"))
1632	ix86_cmodel = CM_32;
1633      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1634	ix86_cmodel = CM_KERNEL;
1635      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1636	ix86_cmodel = CM_LARGE;
1637      else
1638	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1639    }
1640  else
1641    {
1642      ix86_cmodel = CM_32;
1643      if (TARGET_64BIT)
1644	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1645    }
1646  if (ix86_asm_string != 0)
1647    {
1648      if (! TARGET_MACHO
1649	  && !strcmp (ix86_asm_string, "intel"))
1650	ix86_asm_dialect = ASM_INTEL;
1651      else if (!strcmp (ix86_asm_string, "att"))
1652	ix86_asm_dialect = ASM_ATT;
1653      else
1654	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1655    }
1656  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1657    error ("code model %qs not supported in the %s bit mode",
1658	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1659  if (ix86_cmodel == CM_LARGE)
1660    sorry ("code model %<large%> not supported yet");
1661  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1662    sorry ("%i-bit mode not compiled in",
1663	   (target_flags & MASK_64BIT) ? 64 : 32);
1664
1665  for (i = 0; i < pta_size; i++)
1666    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1667      {
1668	ix86_arch = processor_alias_table[i].processor;
1669	/* Default cpu tuning to the architecture.  */
1670	ix86_tune = ix86_arch;
1671	if (processor_alias_table[i].flags & PTA_MMX
1672	    && !(target_flags_explicit & MASK_MMX))
1673	  target_flags |= MASK_MMX;
1674	if (processor_alias_table[i].flags & PTA_3DNOW
1675	    && !(target_flags_explicit & MASK_3DNOW))
1676	  target_flags |= MASK_3DNOW;
1677	if (processor_alias_table[i].flags & PTA_3DNOW_A
1678	    && !(target_flags_explicit & MASK_3DNOW_A))
1679	  target_flags |= MASK_3DNOW_A;
1680	if (processor_alias_table[i].flags & PTA_SSE
1681	    && !(target_flags_explicit & MASK_SSE))
1682	  target_flags |= MASK_SSE;
1683	if (processor_alias_table[i].flags & PTA_SSE2
1684	    && !(target_flags_explicit & MASK_SSE2))
1685	  target_flags |= MASK_SSE2;
1686	if (processor_alias_table[i].flags & PTA_SSE3
1687	    && !(target_flags_explicit & MASK_SSE3))
1688	  target_flags |= MASK_SSE3;
1689	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1690	  x86_prefetch_sse = true;
1691	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1692	  error ("CPU you selected does not support x86-64 "
1693		 "instruction set");
1694	break;
1695      }
1696
1697  if (i == pta_size)
1698    error ("bad value (%s) for -march= switch", ix86_arch_string);
1699
1700  for (i = 0; i < pta_size; i++)
1701    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1702      {
1703	ix86_tune = processor_alias_table[i].processor;
1704	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1705	  {
1706	    if (ix86_tune_defaulted)
1707	      {
1708		ix86_tune_string = "x86-64";
1709		for (i = 0; i < pta_size; i++)
1710		  if (! strcmp (ix86_tune_string,
1711				processor_alias_table[i].name))
1712		    break;
1713		ix86_tune = processor_alias_table[i].processor;
1714	      }
1715	    else
1716	      error ("CPU you selected does not support x86-64 "
1717		     "instruction set");
1718	  }
1719        /* Intel CPUs have always interpreted SSE prefetch instructions as
1720	   NOPs; so, we can enable SSE prefetch instructions even when
1721	   -mtune (rather than -march) points us to a processor that has them.
1722	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1723	   higher processors.  */
1724	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1725	  x86_prefetch_sse = true;
1726	break;
1727      }
1728  if (i == pta_size)
1729    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1730
1731  if (optimize_size)
1732    ix86_cost = &size_cost;
1733  else
1734    ix86_cost = processor_target_table[ix86_tune].cost;
1735  target_flags |= processor_target_table[ix86_tune].target_enable;
1736  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1737
1738  /* Arrange to set up i386_stack_locals for all functions.  */
1739  init_machine_status = ix86_init_machine_status;
1740
1741  /* Validate -mregparm= value.  */
1742  if (ix86_regparm_string)
1743    {
1744      i = atoi (ix86_regparm_string);
1745      if (i < 0 || i > REGPARM_MAX)
1746	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1747      else
1748	ix86_regparm = i;
1749    }
1750  else
1751   if (TARGET_64BIT)
1752     ix86_regparm = REGPARM_MAX;
1753
1754  /* If the user has provided any of the -malign-* options,
1755     warn and use that value only if -falign-* is not set.
1756     Remove this code in GCC 3.2 or later.  */
1757  if (ix86_align_loops_string)
1758    {
1759      warning (0, "-malign-loops is obsolete, use -falign-loops");
1760      if (align_loops == 0)
1761	{
1762	  i = atoi (ix86_align_loops_string);
1763	  if (i < 0 || i > MAX_CODE_ALIGN)
1764	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1765	  else
1766	    align_loops = 1 << i;
1767	}
1768    }
1769
1770  if (ix86_align_jumps_string)
1771    {
1772      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1773      if (align_jumps == 0)
1774	{
1775	  i = atoi (ix86_align_jumps_string);
1776	  if (i < 0 || i > MAX_CODE_ALIGN)
1777	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1778	  else
1779	    align_jumps = 1 << i;
1780	}
1781    }
1782
1783  if (ix86_align_funcs_string)
1784    {
1785      warning (0, "-malign-functions is obsolete, use -falign-functions");
1786      if (align_functions == 0)
1787	{
1788	  i = atoi (ix86_align_funcs_string);
1789	  if (i < 0 || i > MAX_CODE_ALIGN)
1790	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1791	  else
1792	    align_functions = 1 << i;
1793	}
1794    }
1795
1796  /* Default align_* from the processor table.  */
1797  if (align_loops == 0)
1798    {
1799      align_loops = processor_target_table[ix86_tune].align_loop;
1800      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1801    }
1802  if (align_jumps == 0)
1803    {
1804      align_jumps = processor_target_table[ix86_tune].align_jump;
1805      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1806    }
1807  if (align_functions == 0)
1808    {
1809      align_functions = processor_target_table[ix86_tune].align_func;
1810    }
1811
1812  /* Validate -mbranch-cost= value, or provide default.  */
1813  ix86_branch_cost = ix86_cost->branch_cost;
1814  if (ix86_branch_cost_string)
1815    {
1816      i = atoi (ix86_branch_cost_string);
1817      if (i < 0 || i > 5)
1818	error ("-mbranch-cost=%d is not between 0 and 5", i);
1819      else
1820	ix86_branch_cost = i;
1821    }
1822  if (ix86_section_threshold_string)
1823    {
1824      i = atoi (ix86_section_threshold_string);
1825      if (i < 0)
1826	error ("-mlarge-data-threshold=%d is negative", i);
1827      else
1828	ix86_section_threshold = i;
1829    }
1830
1831  if (ix86_tls_dialect_string)
1832    {
1833      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1834	ix86_tls_dialect = TLS_DIALECT_GNU;
1835      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1836	ix86_tls_dialect = TLS_DIALECT_GNU2;
1837      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1838	ix86_tls_dialect = TLS_DIALECT_SUN;
1839      else
1840	error ("bad value (%s) for -mtls-dialect= switch",
1841	       ix86_tls_dialect_string);
1842    }
1843
1844  /* Keep nonleaf frame pointers.  */
1845  if (flag_omit_frame_pointer)
1846    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1847  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1848    flag_omit_frame_pointer = 1;
1849
1850  /* If we're doing fast math, we don't care about comparison order
1851     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1852  if (flag_finite_math_only)
1853    target_flags &= ~MASK_IEEE_FP;
1854
1855  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1856     since the insns won't need emulation.  */
1857  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1858    target_flags &= ~MASK_NO_FANCY_MATH_387;
1859
1860  /* Likewise, if the target doesn't have a 387, or we've specified
1861     software floating point, don't use 387 inline intrinsics.  */
1862  if (!TARGET_80387)
1863    target_flags |= MASK_NO_FANCY_MATH_387;
1864
1865  /* Turn on SSE2 builtins for -msse3.  */
1866  if (TARGET_SSE3)
1867    target_flags |= MASK_SSE2;
1868
1869  /* Turn on SSE builtins for -msse2.  */
1870  if (TARGET_SSE2)
1871    target_flags |= MASK_SSE;
1872
1873  /* Turn on MMX builtins for -msse.  */
1874  if (TARGET_SSE)
1875    {
1876      target_flags |= MASK_MMX & ~target_flags_explicit;
1877      x86_prefetch_sse = true;
1878    }
1879
1880  /* Turn on MMX builtins for 3Dnow.  */
1881  if (TARGET_3DNOW)
1882    target_flags |= MASK_MMX;
1883
1884  if (TARGET_64BIT)
1885    {
1886      if (TARGET_ALIGN_DOUBLE)
1887	error ("-malign-double makes no sense in the 64bit mode");
1888      if (TARGET_RTD)
1889	error ("-mrtd calling convention not supported in the 64bit mode");
1890
1891      /* Enable by default the SSE and MMX builtins.  Do allow the user to
1892	 explicitly disable any of these.  In particular, disabling SSE and
1893	 MMX for kernel code is extremely useful.  */
1894      target_flags
1895	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1896	    & ~target_flags_explicit);
1897     }
1898  else
1899    {
1900      /* i386 ABI does not specify red zone.  It still makes sense to use it
1901         when programmer takes care to stack from being destroyed.  */
1902      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1903        target_flags |= MASK_NO_RED_ZONE;
1904    }
1905
1906  /* Validate -mpreferred-stack-boundary= value, or provide default.
1907     The default of 128 bits is for Pentium III's SSE __m128.  We can't
1908     change it because of optimize_size.  Otherwise, we can't mix object
1909     files compiled with -Os and -On.  */
1910  ix86_preferred_stack_boundary = 128;
1911  if (ix86_preferred_stack_boundary_string)
1912    {
1913      i = atoi (ix86_preferred_stack_boundary_string);
1914      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1915	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1916	       TARGET_64BIT ? 4 : 2);
1917      else
1918	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1919    }
1920
1921  /* Accept -msseregparm only if at least SSE support is enabled.  */
1922  if (TARGET_SSEREGPARM
1923      && ! TARGET_SSE)
1924    error ("-msseregparm used without SSE enabled");
1925
1926  ix86_fpmath = TARGET_FPMATH_DEFAULT;
1927
1928  if (ix86_fpmath_string != 0)
1929    {
1930      if (! strcmp (ix86_fpmath_string, "387"))
1931	ix86_fpmath = FPMATH_387;
1932      else if (! strcmp (ix86_fpmath_string, "sse"))
1933	{
1934	  if (!TARGET_SSE)
1935	    {
1936	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1937	      ix86_fpmath = FPMATH_387;
1938	    }
1939	  else
1940	    ix86_fpmath = FPMATH_SSE;
1941	}
1942      else if (! strcmp (ix86_fpmath_string, "387,sse")
1943	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1944	{
1945	  if (!TARGET_SSE)
1946	    {
1947	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1948	      ix86_fpmath = FPMATH_387;
1949	    }
1950	  else if (!TARGET_80387)
1951	    {
1952	      warning (0, "387 instruction set disabled, using SSE arithmetics");
1953	      ix86_fpmath = FPMATH_SSE;
1954	    }
1955	  else
1956	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1957	}
1958      else
1959	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1960    }
1961
1962  /* If the i387 is disabled, then do not return values in it. */
1963  if (!TARGET_80387)
1964    target_flags &= ~MASK_FLOAT_RETURNS;
1965
1966  if ((x86_accumulate_outgoing_args & TUNEMASK)
1967      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1968      && !optimize_size)
1969    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970
1971  /* ??? Unwind info is not correct around the CFG unless either a frame
1972     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1973     unwind info generation to be aware of the CFG and propagating states
1974     around edges.  */
1975  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1976       || flag_exceptions || flag_non_call_exceptions)
1977      && flag_omit_frame_pointer
1978      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1979    {
1980      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1981	warning (0, "unwind tables currently require either a frame pointer "
1982		 "or -maccumulate-outgoing-args for correctness");
1983      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1984    }
1985
1986  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1987  {
1988    char *p;
1989    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1990    p = strchr (internal_label_prefix, 'X');
1991    internal_label_prefix_len = p - internal_label_prefix;
1992    *p = '\0';
1993  }
1994
1995  /* When scheduling description is not available, disable scheduler pass
1996     so it won't slow down the compilation and make x87 code slower.  */
1997  if (!TARGET_SCHEDULE)
1998    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1999}
2000
2001/* switch to the appropriate section for output of DECL.
2002   DECL is either a `VAR_DECL' node or a constant of some sort.
2003   RELOC indicates whether forming the initial value of DECL requires
2004   link-time relocations.  */
2005
2006static section *
2007x86_64_elf_select_section (tree decl, int reloc,
2008			   unsigned HOST_WIDE_INT align)
2009{
2010  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2011      && ix86_in_large_data_p (decl))
2012    {
2013      const char *sname = NULL;
2014      unsigned int flags = SECTION_WRITE;
2015      switch (categorize_decl_for_section (decl, reloc))
2016	{
2017	case SECCAT_DATA:
2018	  sname = ".ldata";
2019	  break;
2020	case SECCAT_DATA_REL:
2021	  sname = ".ldata.rel";
2022	  break;
2023	case SECCAT_DATA_REL_LOCAL:
2024	  sname = ".ldata.rel.local";
2025	  break;
2026	case SECCAT_DATA_REL_RO:
2027	  sname = ".ldata.rel.ro";
2028	  break;
2029	case SECCAT_DATA_REL_RO_LOCAL:
2030	  sname = ".ldata.rel.ro.local";
2031	  break;
2032	case SECCAT_BSS:
2033	  sname = ".lbss";
2034	  flags |= SECTION_BSS;
2035	  break;
2036	case SECCAT_RODATA:
2037	case SECCAT_RODATA_MERGE_STR:
2038	case SECCAT_RODATA_MERGE_STR_INIT:
2039	case SECCAT_RODATA_MERGE_CONST:
2040	  sname = ".lrodata";
2041	  flags = 0;
2042	  break;
2043	case SECCAT_SRODATA:
2044	case SECCAT_SDATA:
2045	case SECCAT_SBSS:
2046	  gcc_unreachable ();
2047	case SECCAT_TEXT:
2048	case SECCAT_TDATA:
2049	case SECCAT_TBSS:
2050	  /* We don't split these for medium model.  Place them into
2051	     default sections and hope for best.  */
2052	  break;
2053	}
2054      if (sname)
2055	{
2056	  /* We might get called with string constants, but get_named_section
2057	     doesn't like them as they are not DECLs.  Also, we need to set
2058	     flags in that case.  */
2059	  if (!DECL_P (decl))
2060	    return get_section (sname, flags, NULL);
2061	  return get_named_section (decl, sname, reloc);
2062	}
2063    }
2064  return default_elf_select_section (decl, reloc, align);
2065}
2066
2067/* Build up a unique section name, expressed as a
2068   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2069   RELOC indicates whether the initial value of EXP requires
2070   link-time relocations.  */
2071
2072static void
2073x86_64_elf_unique_section (tree decl, int reloc)
2074{
2075  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2076      && ix86_in_large_data_p (decl))
2077    {
2078      const char *prefix = NULL;
2079      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
2080      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2081
2082      switch (categorize_decl_for_section (decl, reloc))
2083	{
2084	case SECCAT_DATA:
2085	case SECCAT_DATA_REL:
2086	case SECCAT_DATA_REL_LOCAL:
2087	case SECCAT_DATA_REL_RO:
2088	case SECCAT_DATA_REL_RO_LOCAL:
2089          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2090	  break;
2091	case SECCAT_BSS:
2092          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2093	  break;
2094	case SECCAT_RODATA:
2095	case SECCAT_RODATA_MERGE_STR:
2096	case SECCAT_RODATA_MERGE_STR_INIT:
2097	case SECCAT_RODATA_MERGE_CONST:
2098          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2099	  break;
2100	case SECCAT_SRODATA:
2101	case SECCAT_SDATA:
2102	case SECCAT_SBSS:
2103	  gcc_unreachable ();
2104	case SECCAT_TEXT:
2105	case SECCAT_TDATA:
2106	case SECCAT_TBSS:
2107	  /* We don't split these for medium model.  Place them into
2108	     default sections and hope for best.  */
2109	  break;
2110	}
2111      if (prefix)
2112	{
2113	  const char *name;
2114	  size_t nlen, plen;
2115	  char *string;
2116	  plen = strlen (prefix);
2117
2118	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2119	  name = targetm.strip_name_encoding (name);
2120	  nlen = strlen (name);
2121
2122	  string = alloca (nlen + plen + 1);
2123	  memcpy (string, prefix, plen);
2124	  memcpy (string + plen, name, nlen + 1);
2125
2126	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2127	  return;
2128	}
2129    }
2130  default_unique_section (decl, reloc);
2131}
2132
2133#ifdef COMMON_ASM_OP
2134/* This says how to output assembler code to declare an
2135   uninitialized external linkage data object.
2136
2137   For medium model x86-64 we need to use .largecomm opcode for
2138   large objects.  */
2139void
2140x86_elf_aligned_common (FILE *file,
2141			const char *name, unsigned HOST_WIDE_INT size,
2142			int align)
2143{
2144  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2145      && size > (unsigned int)ix86_section_threshold)
2146    fprintf (file, ".largecomm\t");
2147  else
2148    fprintf (file, "%s", COMMON_ASM_OP);
2149  assemble_name (file, name);
2150  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2151	   size, align / BITS_PER_UNIT);
2152}
2153
2154/* Utility function for targets to use in implementing
2155   ASM_OUTPUT_ALIGNED_BSS.  */
2156
2157void
2158x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2159			const char *name, unsigned HOST_WIDE_INT size,
2160			int align)
2161{
2162  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2163      && size > (unsigned int)ix86_section_threshold)
2164    switch_to_section (get_named_section (decl, ".lbss", 0));
2165  else
2166    switch_to_section (bss_section);
2167  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2168#ifdef ASM_DECLARE_OBJECT_NAME
2169  last_assemble_variable_decl = decl;
2170  ASM_DECLARE_OBJECT_NAME (file, name, decl);
2171#else
2172  /* Standard thing is just output label for the object.  */
2173  ASM_OUTPUT_LABEL (file, name);
2174#endif /* ASM_DECLARE_OBJECT_NAME */
2175  ASM_OUTPUT_SKIP (file, size ? size : 1);
2176}
2177#endif
2178
2179void
2180optimization_options (int level, int size ATTRIBUTE_UNUSED)
2181{
2182  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
2183     make the problem with not enough registers even worse.  */
2184#ifdef INSN_SCHEDULING
2185  if (level > 1)
2186    flag_schedule_insns = 0;
2187#endif
2188
2189  if (TARGET_MACHO)
2190    /* The Darwin libraries never set errno, so we might as well
2191       avoid calling them when that's the only reason we would.  */
2192    flag_errno_math = 0;
2193
2194  /* The default values of these switches depend on the TARGET_64BIT
2195     that is not known at this moment.  Mark these values with 2 and
2196     let user the to override these.  In case there is no command line option
2197     specifying them, we will set the defaults in override_options.  */
2198  if (optimize >= 1)
2199    flag_omit_frame_pointer = 2;
2200  flag_pcc_struct_return = 2;
2201  flag_asynchronous_unwind_tables = 2;
2202#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2203  SUBTARGET_OPTIMIZATION_OPTIONS;
2204#endif
2205}
2206
2207/* Table of valid machine attributes.  */
2208const struct attribute_spec ix86_attribute_table[] =
2209{
2210  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2211  /* Stdcall attribute says callee is responsible for popping arguments
2212     if they are not variable.  */
2213  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2214  /* Fastcall attribute says callee is responsible for popping arguments
2215     if they are not variable.  */
2216  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2217  /* Cdecl attribute says the callee is a normal C declaration */
2218  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
2219  /* Regparm attribute specifies how many integer arguments are to be
2220     passed in registers.  */
2221  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
2222  /* Sseregparm attribute says we are using x86_64 calling conventions
2223     for FP arguments.  */
2224  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2225  /* force_align_arg_pointer says this function realigns the stack at entry.  */
2226  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2227    false, true,  true, ix86_handle_cconv_attribute },
2228#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2229  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2230  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2231  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
2232#endif
2233  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2234  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
2235#ifdef SUBTARGET_ATTRIBUTE_TABLE
2236  SUBTARGET_ATTRIBUTE_TABLE,
2237#endif
2238  { NULL,        0, 0, false, false, false, NULL }
2239};
2240
2241/* Decide whether we can make a sibling call to a function.  DECL is the
2242   declaration of the function being targeted by the call and EXP is the
2243   CALL_EXPR representing the call.  */
2244
2245static bool
2246ix86_function_ok_for_sibcall (tree decl, tree exp)
2247{
2248  tree func;
2249  rtx a, b;
2250
2251  /* If we are generating position-independent code, we cannot sibcall
2252     optimize any indirect call, or a direct call to a global function,
2253     as the PLT requires %ebx be live.  */
2254  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2255    return false;
2256
2257  if (decl)
2258    func = decl;
2259  else
2260    {
2261      func = TREE_TYPE (TREE_OPERAND (exp, 0));
2262      if (POINTER_TYPE_P (func))
2263        func = TREE_TYPE (func);
2264    }
2265
2266  /* Check that the return value locations are the same.  Like
2267     if we are returning floats on the 80387 register stack, we cannot
2268     make a sibcall from a function that doesn't return a float to a
2269     function that does or, conversely, from a function that does return
2270     a float to a function that doesn't; the necessary stack adjustment
2271     would not be executed.  This is also the place we notice
2272     differences in the return value ABI.  Note that it is ok for one
2273     of the functions to have void return type as long as the return
2274     value of the other is passed in a register.  */
2275  a = ix86_function_value (TREE_TYPE (exp), func, false);
2276  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2277			   cfun->decl, false);
2278  if (STACK_REG_P (a) || STACK_REG_P (b))
2279    {
2280      if (!rtx_equal_p (a, b))
2281	return false;
2282    }
2283  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2284    ;
2285  else if (!rtx_equal_p (a, b))
2286    return false;
2287
2288  /* If this call is indirect, we'll need to be able to use a call-clobbered
2289     register for the address of the target function.  Make sure that all
2290     such registers are not used for passing parameters.  */
2291  if (!decl && !TARGET_64BIT)
2292    {
2293      tree type;
2294
2295      /* We're looking at the CALL_EXPR, we need the type of the function.  */
2296      type = TREE_OPERAND (exp, 0);		/* pointer expression */
2297      type = TREE_TYPE (type);			/* pointer type */
2298      type = TREE_TYPE (type);			/* function type */
2299
2300      if (ix86_function_regparm (type, NULL) >= 3)
2301	{
2302	  /* ??? Need to count the actual number of registers to be used,
2303	     not the possible number of registers.  Fix later.  */
2304	  return false;
2305	}
2306    }
2307
2308#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2309  /* Dllimport'd functions are also called indirectly.  */
2310  if (decl && DECL_DLLIMPORT_P (decl)
2311      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2312    return false;
2313#endif
2314
2315  /* If we forced aligned the stack, then sibcalling would unalign the
2316     stack, which may break the called function.  */
2317  if (cfun->machine->force_align_arg_pointer)
2318    return false;
2319
2320  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2321  return true;
2322}
2323
2324/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2325   calling convention attributes;
2326   arguments as in struct attribute_spec.handler.  */
2327
2328static tree
2329ix86_handle_cconv_attribute (tree *node, tree name,
2330				   tree args,
2331				   int flags ATTRIBUTE_UNUSED,
2332				   bool *no_add_attrs)
2333{
2334  if (TREE_CODE (*node) != FUNCTION_TYPE
2335      && TREE_CODE (*node) != METHOD_TYPE
2336      && TREE_CODE (*node) != FIELD_DECL
2337      && TREE_CODE (*node) != TYPE_DECL)
2338    {
2339      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2340	       IDENTIFIER_POINTER (name));
2341      *no_add_attrs = true;
2342      return NULL_TREE;
2343    }
2344
2345  /* Can combine regparm with all attributes but fastcall.  */
2346  if (is_attribute_p ("regparm", name))
2347    {
2348      tree cst;
2349
2350      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2351        {
2352	  error ("fastcall and regparm attributes are not compatible");
2353	}
2354
2355      cst = TREE_VALUE (args);
2356      if (TREE_CODE (cst) != INTEGER_CST)
2357	{
2358	  warning (OPT_Wattributes,
2359		   "%qs attribute requires an integer constant argument",
2360		   IDENTIFIER_POINTER (name));
2361	  *no_add_attrs = true;
2362	}
2363      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2364	{
2365	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2366		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2367	  *no_add_attrs = true;
2368	}
2369
2370      if (!TARGET_64BIT
2371	  && lookup_attribute (ix86_force_align_arg_pointer_string,
2372			       TYPE_ATTRIBUTES (*node))
2373	  && compare_tree_int (cst, REGPARM_MAX-1))
2374	{
2375	  error ("%s functions limited to %d register parameters",
2376		 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2377	}
2378
2379      return NULL_TREE;
2380    }
2381
2382  if (TARGET_64BIT)
2383    {
2384      warning (OPT_Wattributes, "%qs attribute ignored",
2385	       IDENTIFIER_POINTER (name));
2386      *no_add_attrs = true;
2387      return NULL_TREE;
2388    }
2389
2390  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2391  if (is_attribute_p ("fastcall", name))
2392    {
2393      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2394        {
2395	  error ("fastcall and cdecl attributes are not compatible");
2396	}
2397      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2398        {
2399	  error ("fastcall and stdcall attributes are not compatible");
2400	}
2401      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2402        {
2403	  error ("fastcall and regparm attributes are not compatible");
2404	}
2405    }
2406
2407  /* Can combine stdcall with fastcall (redundant), regparm and
2408     sseregparm.  */
2409  else if (is_attribute_p ("stdcall", name))
2410    {
2411      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2412        {
2413	  error ("stdcall and cdecl attributes are not compatible");
2414	}
2415      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2416        {
2417	  error ("stdcall and fastcall attributes are not compatible");
2418	}
2419    }
2420
2421  /* Can combine cdecl with regparm and sseregparm.  */
2422  else if (is_attribute_p ("cdecl", name))
2423    {
2424      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2425        {
2426	  error ("stdcall and cdecl attributes are not compatible");
2427	}
2428      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2429        {
2430	  error ("fastcall and cdecl attributes are not compatible");
2431	}
2432    }
2433
2434  /* Can combine sseregparm with all attributes.  */
2435
2436  return NULL_TREE;
2437}
2438
2439/* Return 0 if the attributes for two types are incompatible, 1 if they
2440   are compatible, and 2 if they are nearly compatible (which causes a
2441   warning to be generated).  */
2442
2443static int
2444ix86_comp_type_attributes (tree type1, tree type2)
2445{
2446  /* Check for mismatch of non-default calling convention.  */
2447  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2448
2449  if (TREE_CODE (type1) != FUNCTION_TYPE)
2450    return 1;
2451
2452  /* Check for mismatched fastcall/regparm types.  */
2453  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2454       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2455      || (ix86_function_regparm (type1, NULL)
2456	  != ix86_function_regparm (type2, NULL)))
2457    return 0;
2458
2459  /* Check for mismatched sseregparm types.  */
2460  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2461      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2462    return 0;
2463
2464  /* Check for mismatched return types (cdecl vs stdcall).  */
2465  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2466      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2467    return 0;
2468
2469  return 1;
2470}
2471
2472/* Return the regparm value for a function with the indicated TYPE and DECL.
2473   DECL may be NULL when calling function indirectly
2474   or considering a libcall.  */
2475
2476static int
2477ix86_function_regparm (tree type, tree decl)
2478{
2479  tree attr;
2480  int regparm = ix86_regparm;
2481  bool user_convention = false;
2482
2483  if (!TARGET_64BIT)
2484    {
2485      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2486      if (attr)
2487	{
2488	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2489	  user_convention = true;
2490	}
2491
2492      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2493	{
2494	  regparm = 2;
2495	  user_convention = true;
2496	}
2497
2498      /* Use register calling convention for local functions when possible.  */
2499      if (!TARGET_64BIT && !user_convention && decl
2500	  && flag_unit_at_a_time && !profile_flag)
2501	{
2502	  struct cgraph_local_info *i = cgraph_local_info (decl);
2503	  if (i && i->local)
2504	    {
2505	      int local_regparm, globals = 0, regno;
2506
2507	      /* Make sure no regparm register is taken by a global register
2508		 variable.  */
2509	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2510		if (global_regs[local_regparm])
2511		  break;
2512	      /* We can't use regparm(3) for nested functions as these use
2513		 static chain pointer in third argument.  */
2514	      if (local_regparm == 3
2515		  && decl_function_context (decl)
2516		  && !DECL_NO_STATIC_CHAIN (decl))
2517		local_regparm = 2;
2518	      /* If the function realigns its stackpointer, the
2519		 prologue will clobber %ecx.  If we've already
2520		 generated code for the callee, the callee
2521		 DECL_STRUCT_FUNCTION is gone, so we fall back to
2522		 scanning the attributes for the self-realigning
2523		 property.  */
2524	      if ((DECL_STRUCT_FUNCTION (decl)
2525		   && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2526		  || (!DECL_STRUCT_FUNCTION (decl)
2527		      && lookup_attribute (ix86_force_align_arg_pointer_string,
2528					   TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2529		local_regparm = 2;
2530	      /* Each global register variable increases register preassure,
2531		 so the more global reg vars there are, the smaller regparm
2532		 optimization use, unless requested by the user explicitly.  */
2533	      for (regno = 0; regno < 6; regno++)
2534		if (global_regs[regno])
2535		  globals++;
2536	      local_regparm
2537		= globals < local_regparm ? local_regparm - globals : 0;
2538
2539	      if (local_regparm > regparm)
2540		regparm = local_regparm;
2541	    }
2542	}
2543    }
2544  return regparm;
2545}
2546
2547/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2548   DFmode (2) arguments in SSE registers for a function with the
2549   indicated TYPE and DECL.  DECL may be NULL when calling function
2550   indirectly or considering a libcall.  Otherwise return 0.  */
2551
2552static int
2553ix86_function_sseregparm (tree type, tree decl)
2554{
2555  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2556     by the sseregparm attribute.  */
2557  if (TARGET_SSEREGPARM
2558      || (type
2559	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2560    {
2561      if (!TARGET_SSE)
2562	{
2563	  if (decl)
2564	    error ("Calling %qD with attribute sseregparm without "
2565		   "SSE/SSE2 enabled", decl);
2566	  else
2567	    error ("Calling %qT with attribute sseregparm without "
2568		   "SSE/SSE2 enabled", type);
2569	  return 0;
2570	}
2571
2572      return 2;
2573    }
2574
2575  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2576     (and DFmode for SSE2) arguments in SSE registers,
2577     even for 32-bit targets.  */
2578  if (!TARGET_64BIT && decl
2579      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2580    {
2581      struct cgraph_local_info *i = cgraph_local_info (decl);
2582      if (i && i->local)
2583	return TARGET_SSE2 ? 2 : 1;
2584    }
2585
2586  return 0;
2587}
2588
2589/* Return true if EAX is live at the start of the function.  Used by
2590   ix86_expand_prologue to determine if we need special help before
2591   calling allocate_stack_worker.  */
2592
2593static bool
2594ix86_eax_live_at_start_p (void)
2595{
2596  /* Cheat.  Don't bother working forward from ix86_function_regparm
2597     to the function type to whether an actual argument is located in
2598     eax.  Instead just look at cfg info, which is still close enough
2599     to correct at this point.  This gives false positives for broken
2600     functions that might use uninitialized data that happens to be
2601     allocated in eax, but who cares?  */
2602  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2603}
2604
2605/* Value is the number of bytes of arguments automatically
2606   popped when returning from a subroutine call.
2607   FUNDECL is the declaration node of the function (as a tree),
2608   FUNTYPE is the data type of the function (as a tree),
2609   or for a library call it is an identifier node for the subroutine name.
2610   SIZE is the number of bytes of arguments passed on the stack.
2611
2612   On the 80386, the RTD insn may be used to pop them if the number
2613     of args is fixed, but if the number is variable then the caller
2614     must pop them all.  RTD can't be used for library calls now
2615     because the library is compiled with the Unix compiler.
2616   Use of RTD is a selectable option, since it is incompatible with
2617   standard Unix calling sequences.  If the option is not selected,
2618   the caller must always pop the args.
2619
2620   The attribute stdcall is equivalent to RTD on a per module basis.  */
2621
2622int
2623ix86_return_pops_args (tree fundecl, tree funtype, int size)
2624{
2625  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2626
2627  /* Cdecl functions override -mrtd, and never pop the stack.  */
2628  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2629
2630    /* Stdcall and fastcall functions will pop the stack if not
2631       variable args.  */
2632    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2633        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2634      rtd = 1;
2635
2636    if (rtd
2637        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2638	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2639		== void_type_node)))
2640      return size;
2641  }
2642
2643  /* Lose any fake structure return argument if it is passed on the stack.  */
2644  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2645      && !TARGET_64BIT
2646      && !KEEP_AGGREGATE_RETURN_POINTER)
2647    {
2648      int nregs = ix86_function_regparm (funtype, fundecl);
2649
2650      if (!nregs)
2651	return GET_MODE_SIZE (Pmode);
2652    }
2653
2654  return 0;
2655}
2656
2657/* Argument support functions.  */
2658
2659/* Return true when register may be used to pass function parameters.  */
2660bool
2661ix86_function_arg_regno_p (int regno)
2662{
2663  int i;
2664  if (!TARGET_64BIT)
2665    {
2666      if (TARGET_MACHO)
2667        return (regno < REGPARM_MAX
2668                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2669      else
2670        return (regno < REGPARM_MAX
2671	        || (TARGET_MMX && MMX_REGNO_P (regno)
2672	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2673	        || (TARGET_SSE && SSE_REGNO_P (regno)
2674		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2675    }
2676
2677  if (TARGET_MACHO)
2678    {
2679      if (SSE_REGNO_P (regno) && TARGET_SSE)
2680        return true;
2681    }
2682  else
2683    {
2684      if (TARGET_SSE && SSE_REGNO_P (regno)
2685          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2686        return true;
2687    }
2688  /* RAX is used as hidden argument to va_arg functions.  */
2689  if (!regno)
2690    return true;
2691  for (i = 0; i < REGPARM_MAX; i++)
2692    if (regno == x86_64_int_parameter_registers[i])
2693      return true;
2694  return false;
2695}
2696
2697/* Return if we do not know how to pass TYPE solely in registers.  */
2698
2699static bool
2700ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2701{
2702  if (must_pass_in_stack_var_size_or_pad (mode, type))
2703    return true;
2704
2705  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2706     The layout_type routine is crafty and tries to trick us into passing
2707     currently unsupported vector types on the stack by using TImode.  */
2708  return (!TARGET_64BIT && mode == TImode
2709	  && type && TREE_CODE (type) != VECTOR_TYPE);
2710}
2711
2712/* Initialize a variable CUM of type CUMULATIVE_ARGS
2713   for a call to a function whose data type is FNTYPE.
2714   For a library call, FNTYPE is 0.  */
2715
2716void
2717init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2718		      tree fntype,	/* tree ptr for function decl */
2719		      rtx libname,	/* SYMBOL_REF of library name or 0 */
2720		      tree fndecl)
2721{
2722  static CUMULATIVE_ARGS zero_cum;
2723  tree param, next_param;
2724
2725  if (TARGET_DEBUG_ARG)
2726    {
2727      fprintf (stderr, "\ninit_cumulative_args (");
2728      if (fntype)
2729	fprintf (stderr, "fntype code = %s, ret code = %s",
2730		 tree_code_name[(int) TREE_CODE (fntype)],
2731		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2732      else
2733	fprintf (stderr, "no fntype");
2734
2735      if (libname)
2736	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2737    }
2738
2739  *cum = zero_cum;
2740
2741  /* Set up the number of registers to use for passing arguments.  */
2742  cum->nregs = ix86_regparm;
2743  if (TARGET_SSE)
2744    cum->sse_nregs = SSE_REGPARM_MAX;
2745  if (TARGET_MMX)
2746    cum->mmx_nregs = MMX_REGPARM_MAX;
2747  cum->warn_sse = true;
2748  cum->warn_mmx = true;
2749  cum->maybe_vaarg = false;
2750
2751  /* Use ecx and edx registers if function has fastcall attribute,
2752     else look for regparm information.  */
2753  if (fntype && !TARGET_64BIT)
2754    {
2755      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2756	{
2757	  cum->nregs = 2;
2758	  cum->fastcall = 1;
2759	}
2760      else
2761	cum->nregs = ix86_function_regparm (fntype, fndecl);
2762    }
2763
2764  /* Set up the number of SSE registers used for passing SFmode
2765     and DFmode arguments.  Warn for mismatching ABI.  */
2766  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2767
2768  /* Determine if this function has variable arguments.  This is
2769     indicated by the last argument being 'void_type_mode' if there
2770     are no variable arguments.  If there are variable arguments, then
2771     we won't pass anything in registers in 32-bit mode. */
2772
2773  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2774    {
2775      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2776	   param != 0; param = next_param)
2777	{
2778	  next_param = TREE_CHAIN (param);
2779	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2780	    {
2781	      if (!TARGET_64BIT)
2782		{
2783		  cum->nregs = 0;
2784		  cum->sse_nregs = 0;
2785		  cum->mmx_nregs = 0;
2786		  cum->warn_sse = 0;
2787		  cum->warn_mmx = 0;
2788		  cum->fastcall = 0;
2789		  cum->float_in_sse = 0;
2790		}
2791	      cum->maybe_vaarg = true;
2792	    }
2793	}
2794    }
2795  if ((!fntype && !libname)
2796      || (fntype && !TYPE_ARG_TYPES (fntype)))
2797    cum->maybe_vaarg = true;
2798
2799  if (TARGET_DEBUG_ARG)
2800    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2801
2802  return;
2803}
2804
2805/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2806   But in the case of vector types, it is some vector mode.
2807
2808   When we have only some of our vector isa extensions enabled, then there
2809   are some modes for which vector_mode_supported_p is false.  For these
2810   modes, the generic vector support in gcc will choose some non-vector mode
2811   in order to implement the type.  By computing the natural mode, we'll
2812   select the proper ABI location for the operand and not depend on whatever
2813   the middle-end decides to do with these vector types.  */
2814
2815static enum machine_mode
2816type_natural_mode (tree type)
2817{
2818  enum machine_mode mode = TYPE_MODE (type);
2819
2820  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2821    {
2822      HOST_WIDE_INT size = int_size_in_bytes (type);
2823      if ((size == 8 || size == 16)
2824	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2825	  && TYPE_VECTOR_SUBPARTS (type) > 1)
2826	{
2827	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2828
2829	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2830	    mode = MIN_MODE_VECTOR_FLOAT;
2831	  else
2832	    mode = MIN_MODE_VECTOR_INT;
2833
2834	  /* Get the mode which has this inner mode and number of units.  */
2835	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2836	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2837		&& GET_MODE_INNER (mode) == innermode)
2838	      return mode;
2839
2840	  gcc_unreachable ();
2841	}
2842    }
2843
2844  return mode;
2845}
2846
2847/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2848   this may not agree with the mode that the type system has chosen for the
2849   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2850   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2851
2852static rtx
2853gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2854		     unsigned int regno)
2855{
2856  rtx tmp;
2857
2858  if (orig_mode != BLKmode)
2859    tmp = gen_rtx_REG (orig_mode, regno);
2860  else
2861    {
2862      tmp = gen_rtx_REG (mode, regno);
2863      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2864      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2865    }
2866
2867  return tmp;
2868}
2869
2870/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2871   of this code is to classify each 8bytes of incoming argument by the register
2872   class and assign registers accordingly.  */
2873
2874/* Return the union class of CLASS1 and CLASS2.
2875   See the x86-64 PS ABI for details.  */
2876
2877static enum x86_64_reg_class
2878merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2879{
2880  /* Rule #1: If both classes are equal, this is the resulting class.  */
2881  if (class1 == class2)
2882    return class1;
2883
2884  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2885     the other class.  */
2886  if (class1 == X86_64_NO_CLASS)
2887    return class2;
2888  if (class2 == X86_64_NO_CLASS)
2889    return class1;
2890
2891  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2892  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2893    return X86_64_MEMORY_CLASS;
2894
2895  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2896  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2897      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2898    return X86_64_INTEGERSI_CLASS;
2899  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2900      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2901    return X86_64_INTEGER_CLASS;
2902
2903  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2904     MEMORY is used.  */
2905  if (class1 == X86_64_X87_CLASS
2906      || class1 == X86_64_X87UP_CLASS
2907      || class1 == X86_64_COMPLEX_X87_CLASS
2908      || class2 == X86_64_X87_CLASS
2909      || class2 == X86_64_X87UP_CLASS
2910      || class2 == X86_64_COMPLEX_X87_CLASS)
2911    return X86_64_MEMORY_CLASS;
2912
2913  /* Rule #6: Otherwise class SSE is used.  */
2914  return X86_64_SSE_CLASS;
2915}
2916
2917/* Classify the argument of type TYPE and mode MODE.
2918   CLASSES will be filled by the register class used to pass each word
2919   of the operand.  The number of words is returned.  In case the parameter
2920   should be passed in memory, 0 is returned. As a special case for zero
2921   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2922
2923   BIT_OFFSET is used internally for handling records and specifies offset
2924   of the offset in bits modulo 256 to avoid overflow cases.
2925
2926   See the x86-64 PS ABI for details.
2927*/
2928
2929static int
2930classify_argument (enum machine_mode mode, tree type,
2931		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2932{
2933  HOST_WIDE_INT bytes =
2934    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2935  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2936
2937  /* Variable sized entities are always passed/returned in memory.  */
2938  if (bytes < 0)
2939    return 0;
2940
2941  if (mode != VOIDmode
2942      && targetm.calls.must_pass_in_stack (mode, type))
2943    return 0;
2944
2945  if (type && AGGREGATE_TYPE_P (type))
2946    {
2947      int i;
2948      tree field;
2949      enum x86_64_reg_class subclasses[MAX_CLASSES];
2950
2951      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2952      if (bytes > 16)
2953	return 0;
2954
2955      for (i = 0; i < words; i++)
2956	classes[i] = X86_64_NO_CLASS;
2957
2958      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2959	 signalize memory class, so handle it as special case.  */
2960      if (!words)
2961	{
2962	  classes[0] = X86_64_NO_CLASS;
2963	  return 1;
2964	}
2965
2966      /* Classify each field of record and merge classes.  */
2967      switch (TREE_CODE (type))
2968	{
2969	case RECORD_TYPE:
2970	  /* For classes first merge in the field of the subclasses.  */
2971	  if (TYPE_BINFO (type))
2972	    {
2973	      tree binfo, base_binfo;
2974	      int basenum;
2975
2976	      for (binfo = TYPE_BINFO (type), basenum = 0;
2977		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2978		{
2979		   int num;
2980		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2981		   tree type = BINFO_TYPE (base_binfo);
2982
2983		   num = classify_argument (TYPE_MODE (type),
2984					    type, subclasses,
2985					    (offset + bit_offset) % 256);
2986		   if (!num)
2987		     return 0;
2988		   for (i = 0; i < num; i++)
2989		     {
2990		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2991		       classes[i + pos] =
2992			 merge_classes (subclasses[i], classes[i + pos]);
2993		     }
2994		}
2995	    }
2996	  /* And now merge the fields of structure.  */
2997	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2998	    {
2999	      if (TREE_CODE (field) == FIELD_DECL)
3000		{
3001		  int num;
3002
3003		  if (TREE_TYPE (field) == error_mark_node)
3004		    continue;
3005
3006		  /* Bitfields are always classified as integer.  Handle them
3007		     early, since later code would consider them to be
3008		     misaligned integers.  */
3009		  if (DECL_BIT_FIELD (field))
3010		    {
3011		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3012			   i < ((int_bit_position (field) + (bit_offset % 64))
3013			        + tree_low_cst (DECL_SIZE (field), 0)
3014				+ 63) / 8 / 8; i++)
3015			classes[i] =
3016			  merge_classes (X86_64_INTEGER_CLASS,
3017					 classes[i]);
3018		    }
3019		  else
3020		    {
3021		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3022					       TREE_TYPE (field), subclasses,
3023					       (int_bit_position (field)
3024						+ bit_offset) % 256);
3025		      if (!num)
3026			return 0;
3027		      for (i = 0; i < num; i++)
3028			{
3029			  int pos =
3030			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3031			  classes[i + pos] =
3032			    merge_classes (subclasses[i], classes[i + pos]);
3033			}
3034		    }
3035		}
3036	    }
3037	  break;
3038
3039	case ARRAY_TYPE:
3040	  /* Arrays are handled as small records.  */
3041	  {
3042	    int num;
3043	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3044				     TREE_TYPE (type), subclasses, bit_offset);
3045	    if (!num)
3046	      return 0;
3047
3048	    /* The partial classes are now full classes.  */
3049	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3050	      subclasses[0] = X86_64_SSE_CLASS;
3051	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3052	      subclasses[0] = X86_64_INTEGER_CLASS;
3053
3054	    for (i = 0; i < words; i++)
3055	      classes[i] = subclasses[i % num];
3056
3057	    break;
3058	  }
3059	case UNION_TYPE:
3060	case QUAL_UNION_TYPE:
3061	  /* Unions are similar to RECORD_TYPE but offset is always 0.
3062	     */
3063
3064	  /* Unions are not derived.  */
3065	  gcc_assert (!TYPE_BINFO (type)
3066		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3067	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3068	    {
3069	      if (TREE_CODE (field) == FIELD_DECL)
3070		{
3071		  int num;
3072
3073		  if (TREE_TYPE (field) == error_mark_node)
3074		    continue;
3075
3076		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3077					   TREE_TYPE (field), subclasses,
3078					   bit_offset);
3079		  if (!num)
3080		    return 0;
3081		  for (i = 0; i < num; i++)
3082		    classes[i] = merge_classes (subclasses[i], classes[i]);
3083		}
3084	    }
3085	  break;
3086
3087	default:
3088	  gcc_unreachable ();
3089	}
3090
3091      /* Final merger cleanup.  */
3092      for (i = 0; i < words; i++)
3093	{
3094	  /* If one class is MEMORY, everything should be passed in
3095	     memory.  */
3096	  if (classes[i] == X86_64_MEMORY_CLASS)
3097	    return 0;
3098
3099	  /* The X86_64_SSEUP_CLASS should be always preceded by
3100	     X86_64_SSE_CLASS.  */
3101	  if (classes[i] == X86_64_SSEUP_CLASS
3102	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3103	    classes[i] = X86_64_SSE_CLASS;
3104
3105	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
3106	  if (classes[i] == X86_64_X87UP_CLASS
3107	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3108	    classes[i] = X86_64_SSE_CLASS;
3109	}
3110      return words;
3111    }
3112
3113  /* Compute alignment needed.  We align all types to natural boundaries with
3114     exception of XFmode that is aligned to 64bits.  */
3115  if (mode != VOIDmode && mode != BLKmode)
3116    {
3117      int mode_alignment = GET_MODE_BITSIZE (mode);
3118
3119      if (mode == XFmode)
3120	mode_alignment = 128;
3121      else if (mode == XCmode)
3122	mode_alignment = 256;
3123      if (COMPLEX_MODE_P (mode))
3124	mode_alignment /= 2;
3125      /* Misaligned fields are always returned in memory.  */
3126      if (bit_offset % mode_alignment)
3127	return 0;
3128    }
3129
3130  /* for V1xx modes, just use the base mode */
3131  if (VECTOR_MODE_P (mode)
3132      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3133    mode = GET_MODE_INNER (mode);
3134
3135  /* Classification of atomic types.  */
3136  switch (mode)
3137    {
3138    case SDmode:
3139    case DDmode:
3140      classes[0] = X86_64_SSE_CLASS;
3141      return 1;
3142    case TDmode:
3143      classes[0] = X86_64_SSE_CLASS;
3144      classes[1] = X86_64_SSEUP_CLASS;
3145      return 2;
3146    case DImode:
3147    case SImode:
3148    case HImode:
3149    case QImode:
3150    case CSImode:
3151    case CHImode:
3152    case CQImode:
3153      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3154	classes[0] = X86_64_INTEGERSI_CLASS;
3155      else
3156	classes[0] = X86_64_INTEGER_CLASS;
3157      return 1;
3158    case CDImode:
3159    case TImode:
3160      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3161      return 2;
3162    case CTImode:
3163      return 0;
3164    case SFmode:
3165      if (!(bit_offset % 64))
3166	classes[0] = X86_64_SSESF_CLASS;
3167      else
3168	classes[0] = X86_64_SSE_CLASS;
3169      return 1;
3170    case DFmode:
3171      classes[0] = X86_64_SSEDF_CLASS;
3172      return 1;
3173    case XFmode:
3174      classes[0] = X86_64_X87_CLASS;
3175      classes[1] = X86_64_X87UP_CLASS;
3176      return 2;
3177    case TFmode:
3178      classes[0] = X86_64_SSE_CLASS;
3179      classes[1] = X86_64_SSEUP_CLASS;
3180      return 2;
3181    case SCmode:
3182      classes[0] = X86_64_SSE_CLASS;
3183      return 1;
3184    case DCmode:
3185      classes[0] = X86_64_SSEDF_CLASS;
3186      classes[1] = X86_64_SSEDF_CLASS;
3187      return 2;
3188    case XCmode:
3189      classes[0] = X86_64_COMPLEX_X87_CLASS;
3190      return 1;
3191    case TCmode:
3192      /* This modes is larger than 16 bytes.  */
3193      return 0;
3194    case V4SFmode:
3195    case V4SImode:
3196    case V16QImode:
3197    case V8HImode:
3198    case V2DFmode:
3199    case V2DImode:
3200      classes[0] = X86_64_SSE_CLASS;
3201      classes[1] = X86_64_SSEUP_CLASS;
3202      return 2;
3203    case V2SFmode:
3204    case V2SImode:
3205    case V4HImode:
3206    case V8QImode:
3207      classes[0] = X86_64_SSE_CLASS;
3208      return 1;
3209    case BLKmode:
3210    case VOIDmode:
3211      return 0;
3212    default:
3213      gcc_assert (VECTOR_MODE_P (mode));
3214
3215      if (bytes > 16)
3216	return 0;
3217
3218      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3219
3220      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3221	classes[0] = X86_64_INTEGERSI_CLASS;
3222      else
3223	classes[0] = X86_64_INTEGER_CLASS;
3224      classes[1] = X86_64_INTEGER_CLASS;
3225      return 1 + (bytes > 8);
3226    }
3227}
3228
3229/* Examine the argument and return set number of register required in each
3230   class.  Return 0 iff parameter should be passed in memory.  */
3231static int
3232examine_argument (enum machine_mode mode, tree type, int in_return,
3233		  int *int_nregs, int *sse_nregs)
3234{
3235  enum x86_64_reg_class class[MAX_CLASSES];
3236  int n = classify_argument (mode, type, class, 0);
3237
3238  *int_nregs = 0;
3239  *sse_nregs = 0;
3240  if (!n)
3241    return 0;
3242  for (n--; n >= 0; n--)
3243    switch (class[n])
3244      {
3245      case X86_64_INTEGER_CLASS:
3246      case X86_64_INTEGERSI_CLASS:
3247	(*int_nregs)++;
3248	break;
3249      case X86_64_SSE_CLASS:
3250      case X86_64_SSESF_CLASS:
3251      case X86_64_SSEDF_CLASS:
3252	(*sse_nregs)++;
3253	break;
3254      case X86_64_NO_CLASS:
3255      case X86_64_SSEUP_CLASS:
3256	break;
3257      case X86_64_X87_CLASS:
3258      case X86_64_X87UP_CLASS:
3259	if (!in_return)
3260	  return 0;
3261	break;
3262      case X86_64_COMPLEX_X87_CLASS:
3263	return in_return ? 2 : 0;
3264      case X86_64_MEMORY_CLASS:
3265	gcc_unreachable ();
3266      }
3267  return 1;
3268}
3269
3270/* Construct container for the argument used by GCC interface.  See
3271   FUNCTION_ARG for the detailed description.  */
3272
3273static rtx
3274construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3275		     tree type, int in_return, int nintregs, int nsseregs,
3276		     const int *intreg, int sse_regno)
3277{
3278  /* The following variables hold the static issued_error state.  */
3279  static bool issued_sse_arg_error;
3280  static bool issued_sse_ret_error;
3281  static bool issued_x87_ret_error;
3282
3283  enum machine_mode tmpmode;
3284  int bytes =
3285    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3286  enum x86_64_reg_class class[MAX_CLASSES];
3287  int n;
3288  int i;
3289  int nexps = 0;
3290  int needed_sseregs, needed_intregs;
3291  rtx exp[MAX_CLASSES];
3292  rtx ret;
3293
3294  n = classify_argument (mode, type, class, 0);
3295  if (TARGET_DEBUG_ARG)
3296    {
3297      if (!n)
3298	fprintf (stderr, "Memory class\n");
3299      else
3300	{
3301	  fprintf (stderr, "Classes:");
3302	  for (i = 0; i < n; i++)
3303	    {
3304	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3305	    }
3306	   fprintf (stderr, "\n");
3307	}
3308    }
3309  if (!n)
3310    return NULL;
3311  if (!examine_argument (mode, type, in_return, &needed_intregs,
3312			 &needed_sseregs))
3313    return NULL;
3314  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3315    return NULL;
3316
3317  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
3318     some less clueful developer tries to use floating-point anyway.  */
3319  if (needed_sseregs && !TARGET_SSE)
3320    {
3321      if (in_return)
3322	{
3323	  if (!issued_sse_ret_error)
3324	    {
3325	      error ("SSE register return with SSE disabled");
3326	      issued_sse_ret_error = true;
3327	    }
3328	}
3329      else if (!issued_sse_arg_error)
3330	{
3331	  error ("SSE register argument with SSE disabled");
3332	  issued_sse_arg_error = true;
3333	}
3334      return NULL;
3335    }
3336
3337  /* Likewise, error if the ABI requires us to return values in the
3338     x87 registers and the user specified -mno-80387.  */
3339  if (!TARGET_80387 && in_return)
3340    for (i = 0; i < n; i++)
3341      if (class[i] == X86_64_X87_CLASS
3342	  || class[i] == X86_64_X87UP_CLASS
3343	  || class[i] == X86_64_COMPLEX_X87_CLASS)
3344	{
3345	  if (!issued_x87_ret_error)
3346	    {
3347	      error ("x87 register return with x87 disabled");
3348	      issued_x87_ret_error = true;
3349	    }
3350	  return NULL;
3351	}
3352
3353  /* First construct simple cases.  Avoid SCmode, since we want to use
3354     single register to pass this type.  */
3355  if (n == 1 && mode != SCmode)
3356    switch (class[0])
3357      {
3358      case X86_64_INTEGER_CLASS:
3359      case X86_64_INTEGERSI_CLASS:
3360	return gen_rtx_REG (mode, intreg[0]);
3361      case X86_64_SSE_CLASS:
3362      case X86_64_SSESF_CLASS:
3363      case X86_64_SSEDF_CLASS:
3364	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3365      case X86_64_X87_CLASS:
3366      case X86_64_COMPLEX_X87_CLASS:
3367	return gen_rtx_REG (mode, FIRST_STACK_REG);
3368      case X86_64_NO_CLASS:
3369	/* Zero sized array, struct or class.  */
3370	return NULL;
3371      default:
3372	gcc_unreachable ();
3373      }
3374  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3375      && mode != BLKmode)
3376    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3377  if (n == 2
3378      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3379    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3380  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3381      && class[1] == X86_64_INTEGER_CLASS
3382      && (mode == CDImode || mode == TImode || mode == TFmode)
3383      && intreg[0] + 1 == intreg[1])
3384    return gen_rtx_REG (mode, intreg[0]);
3385
3386  /* Otherwise figure out the entries of the PARALLEL.  */
3387  for (i = 0; i < n; i++)
3388    {
3389      switch (class[i])
3390        {
3391	  case X86_64_NO_CLASS:
3392	    break;
3393	  case X86_64_INTEGER_CLASS:
3394	  case X86_64_INTEGERSI_CLASS:
3395	    /* Merge TImodes on aligned occasions here too.  */
3396	    if (i * 8 + 8 > bytes)
3397	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3398	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3399	      tmpmode = SImode;
3400	    else
3401	      tmpmode = DImode;
3402	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3403	    if (tmpmode == BLKmode)
3404	      tmpmode = DImode;
3405	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3406					       gen_rtx_REG (tmpmode, *intreg),
3407					       GEN_INT (i*8));
3408	    intreg++;
3409	    break;
3410	  case X86_64_SSESF_CLASS:
3411	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3412					       gen_rtx_REG (SFmode,
3413							    SSE_REGNO (sse_regno)),
3414					       GEN_INT (i*8));
3415	    sse_regno++;
3416	    break;
3417	  case X86_64_SSEDF_CLASS:
3418	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3419					       gen_rtx_REG (DFmode,
3420							    SSE_REGNO (sse_regno)),
3421					       GEN_INT (i*8));
3422	    sse_regno++;
3423	    break;
3424	  case X86_64_SSE_CLASS:
3425	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3426	      tmpmode = TImode;
3427	    else
3428	      tmpmode = DImode;
3429	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3430					       gen_rtx_REG (tmpmode,
3431							    SSE_REGNO (sse_regno)),
3432					       GEN_INT (i*8));
3433	    if (tmpmode == TImode)
3434	      i++;
3435	    sse_regno++;
3436	    break;
3437	  default:
3438	    gcc_unreachable ();
3439	}
3440    }
3441
3442  /* Empty aligned struct, union or class.  */
3443  if (nexps == 0)
3444    return NULL;
3445
3446  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3447  for (i = 0; i < nexps; i++)
3448    XVECEXP (ret, 0, i) = exp [i];
3449  return ret;
3450}
3451
3452/* Update the data in CUM to advance over an argument
3453   of mode MODE and data type TYPE.
3454   (TYPE is null for libcalls where that information may not be available.)  */
3455
3456void
3457function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3458		      tree type, int named)
3459{
3460  int bytes =
3461    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3462  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3463
3464  if (type)
3465    mode = type_natural_mode (type);
3466
3467  if (TARGET_DEBUG_ARG)
3468    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3469	     "mode=%s, named=%d)\n\n",
3470	     words, cum->words, cum->nregs, cum->sse_nregs,
3471	     GET_MODE_NAME (mode), named);
3472
3473  if (TARGET_64BIT)
3474    {
3475      int int_nregs, sse_nregs;
3476      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3477	cum->words += words;
3478      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3479	{
3480	  cum->nregs -= int_nregs;
3481	  cum->sse_nregs -= sse_nregs;
3482	  cum->regno += int_nregs;
3483	  cum->sse_regno += sse_nregs;
3484	}
3485      else
3486	cum->words += words;
3487    }
3488  else
3489    {
3490      switch (mode)
3491	{
3492	default:
3493	  break;
3494
3495	case BLKmode:
3496	  if (bytes < 0)
3497	    break;
3498	  /* FALLTHRU */
3499
3500	case DImode:
3501	case SImode:
3502	case HImode:
3503	case QImode:
3504	  cum->words += words;
3505	  cum->nregs -= words;
3506	  cum->regno += words;
3507
3508	  if (cum->nregs <= 0)
3509	    {
3510	      cum->nregs = 0;
3511	      cum->regno = 0;
3512	    }
3513	  break;
3514
3515	case DFmode:
3516	  if (cum->float_in_sse < 2)
3517	    break;
3518	case SFmode:
3519	  if (cum->float_in_sse < 1)
3520	    break;
3521	  /* FALLTHRU */
3522
3523	case TImode:
3524	case V16QImode:
3525	case V8HImode:
3526	case V4SImode:
3527	case V2DImode:
3528	case V4SFmode:
3529	case V2DFmode:
3530	  if (!type || !AGGREGATE_TYPE_P (type))
3531	    {
3532	      cum->sse_words += words;
3533	      cum->sse_nregs -= 1;
3534	      cum->sse_regno += 1;
3535	      if (cum->sse_nregs <= 0)
3536		{
3537		  cum->sse_nregs = 0;
3538		  cum->sse_regno = 0;
3539		}
3540	    }
3541	  break;
3542
3543	case V8QImode:
3544	case V4HImode:
3545	case V2SImode:
3546	case V2SFmode:
3547	  if (!type || !AGGREGATE_TYPE_P (type))
3548	    {
3549	      cum->mmx_words += words;
3550	      cum->mmx_nregs -= 1;
3551	      cum->mmx_regno += 1;
3552	      if (cum->mmx_nregs <= 0)
3553		{
3554		  cum->mmx_nregs = 0;
3555		  cum->mmx_regno = 0;
3556		}
3557	    }
3558	  break;
3559	}
3560    }
3561}
3562
3563/* Define where to put the arguments to a function.
3564   Value is zero to push the argument on the stack,
3565   or a hard register in which to store the argument.
3566
3567   MODE is the argument's machine mode.
3568   TYPE is the data type of the argument (as a tree).
3569    This is null for libcalls where that information may
3570    not be available.
3571   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3572    the preceding args and about the function being called.
3573   NAMED is nonzero if this argument is a named parameter
3574    (otherwise it is an extra parameter matching an ellipsis).  */
3575
3576rtx
3577function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3578	      tree type, int named)
3579{
3580  enum machine_mode mode = orig_mode;
3581  rtx ret = NULL_RTX;
3582  int bytes =
3583    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3584  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3585  static bool warnedsse, warnedmmx;
3586
3587  /* To simplify the code below, represent vector types with a vector mode
3588     even if MMX/SSE are not active.  */
3589  if (type && TREE_CODE (type) == VECTOR_TYPE)
3590    mode = type_natural_mode (type);
3591
3592  /* Handle a hidden AL argument containing number of registers for varargs
3593     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3594     any AL settings.  */
3595  if (mode == VOIDmode)
3596    {
3597      if (TARGET_64BIT)
3598	return GEN_INT (cum->maybe_vaarg
3599			? (cum->sse_nregs < 0
3600			   ? SSE_REGPARM_MAX
3601			   : cum->sse_regno)
3602			: -1);
3603      else
3604	return constm1_rtx;
3605    }
3606  if (TARGET_64BIT)
3607    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3608			       cum->sse_nregs,
3609			       &x86_64_int_parameter_registers [cum->regno],
3610			       cum->sse_regno);
3611  else
3612    switch (mode)
3613      {
3614	/* For now, pass fp/complex values on the stack.  */
3615      default:
3616	break;
3617
3618      case BLKmode:
3619	if (bytes < 0)
3620	  break;
3621	/* FALLTHRU */
3622      case DImode:
3623      case SImode:
3624      case HImode:
3625      case QImode:
3626	if (words <= cum->nregs)
3627	  {
3628	    int regno = cum->regno;
3629
3630	    /* Fastcall allocates the first two DWORD (SImode) or
3631	       smaller arguments to ECX and EDX.  */
3632	    if (cum->fastcall)
3633	      {
3634	        if (mode == BLKmode || mode == DImode)
3635	          break;
3636
3637	        /* ECX not EAX is the first allocated register.  */
3638	        if (regno == 0)
3639		  regno = 2;
3640	      }
3641	    ret = gen_rtx_REG (mode, regno);
3642	  }
3643	break;
3644      case DFmode:
3645	if (cum->float_in_sse < 2)
3646	  break;
3647      case SFmode:
3648	if (cum->float_in_sse < 1)
3649	  break;
3650	/* FALLTHRU */
3651      case TImode:
3652      case V16QImode:
3653      case V8HImode:
3654      case V4SImode:
3655      case V2DImode:
3656      case V4SFmode:
3657      case V2DFmode:
3658	if (!type || !AGGREGATE_TYPE_P (type))
3659	  {
3660	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3661	      {
3662		warnedsse = true;
3663		warning (0, "SSE vector argument without SSE enabled "
3664			 "changes the ABI");
3665	      }
3666	    if (cum->sse_nregs)
3667	      ret = gen_reg_or_parallel (mode, orig_mode,
3668					 cum->sse_regno + FIRST_SSE_REG);
3669	  }
3670	break;
3671      case V8QImode:
3672      case V4HImode:
3673      case V2SImode:
3674      case V2SFmode:
3675	if (!type || !AGGREGATE_TYPE_P (type))
3676	  {
3677	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3678	      {
3679		warnedmmx = true;
3680		warning (0, "MMX vector argument without MMX enabled "
3681			 "changes the ABI");
3682	      }
3683	    if (cum->mmx_nregs)
3684	      ret = gen_reg_or_parallel (mode, orig_mode,
3685					 cum->mmx_regno + FIRST_MMX_REG);
3686	  }
3687	break;
3688      }
3689
3690  if (TARGET_DEBUG_ARG)
3691    {
3692      fprintf (stderr,
3693	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3694	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3695
3696      if (ret)
3697	print_simple_rtl (stderr, ret);
3698      else
3699	fprintf (stderr, ", stack");
3700
3701      fprintf (stderr, " )\n");
3702    }
3703
3704  return ret;
3705}
3706
3707/* A C expression that indicates when an argument must be passed by
3708   reference.  If nonzero for an argument, a copy of that argument is
3709   made in memory and a pointer to the argument is passed instead of
3710   the argument itself.  The pointer is passed in whatever way is
3711   appropriate for passing a pointer to that type.  */
3712
3713static bool
3714ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3715			enum machine_mode mode ATTRIBUTE_UNUSED,
3716			tree type, bool named ATTRIBUTE_UNUSED)
3717{
3718  if (!TARGET_64BIT)
3719    return 0;
3720
3721  if (type && int_size_in_bytes (type) == -1)
3722    {
3723      if (TARGET_DEBUG_ARG)
3724	fprintf (stderr, "function_arg_pass_by_reference\n");
3725      return 1;
3726    }
3727
3728  return 0;
3729}
3730
3731/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3732   ABI.  Only called if TARGET_SSE.  */
3733static bool
3734contains_128bit_aligned_vector_p (tree type)
3735{
3736  enum machine_mode mode = TYPE_MODE (type);
3737  if (SSE_REG_MODE_P (mode)
3738      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3739    return true;
3740  if (TYPE_ALIGN (type) < 128)
3741    return false;
3742
3743  if (AGGREGATE_TYPE_P (type))
3744    {
3745      /* Walk the aggregates recursively.  */
3746      switch (TREE_CODE (type))
3747	{
3748	case RECORD_TYPE:
3749	case UNION_TYPE:
3750	case QUAL_UNION_TYPE:
3751	  {
3752	    tree field;
3753
3754	    if (TYPE_BINFO (type))
3755	      {
3756		tree binfo, base_binfo;
3757		int i;
3758
3759		for (binfo = TYPE_BINFO (type), i = 0;
3760		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3761		  if (contains_128bit_aligned_vector_p
3762		      (BINFO_TYPE (base_binfo)))
3763		    return true;
3764	      }
3765	    /* And now merge the fields of structure.  */
3766	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3767	      {
3768		if (TREE_CODE (field) == FIELD_DECL
3769		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3770		  return true;
3771	      }
3772	    break;
3773	  }
3774
3775	case ARRAY_TYPE:
3776	  /* Just for use if some languages passes arrays by value.  */
3777	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3778	    return true;
3779	  break;
3780
3781	default:
3782	  gcc_unreachable ();
3783	}
3784    }
3785  return false;
3786}
3787
3788/* Gives the alignment boundary, in bits, of an argument with the
3789   specified mode and type.  */
3790
3791int
3792ix86_function_arg_boundary (enum machine_mode mode, tree type)
3793{
3794  int align;
3795  if (type)
3796    align = TYPE_ALIGN (type);
3797  else
3798    align = GET_MODE_ALIGNMENT (mode);
3799  if (align < PARM_BOUNDARY)
3800    align = PARM_BOUNDARY;
3801  if (!TARGET_64BIT)
3802    {
3803      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3804	 make an exception for SSE modes since these require 128bit
3805	 alignment.
3806
3807	 The handling here differs from field_alignment.  ICC aligns MMX
3808	 arguments to 4 byte boundaries, while structure fields are aligned
3809	 to 8 byte boundaries.  */
3810      if (!TARGET_SSE)
3811	align = PARM_BOUNDARY;
3812      else if (!type)
3813	{
3814	  if (!SSE_REG_MODE_P (mode))
3815	    align = PARM_BOUNDARY;
3816	}
3817      else
3818	{
3819	  if (!contains_128bit_aligned_vector_p (type))
3820	    align = PARM_BOUNDARY;
3821	}
3822    }
3823  if (align > 128)
3824    align = 128;
3825  return align;
3826}
3827
3828/* Return true if N is a possible register number of function value.  */
3829bool
3830ix86_function_value_regno_p (int regno)
3831{
3832  if (TARGET_MACHO)
3833    {
3834      if (!TARGET_64BIT)
3835        {
3836          return ((regno) == 0
3837                  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3838                  || ((regno) == FIRST_SSE_REG && TARGET_SSE));
3839        }
3840      return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
3841              || ((regno) == FIRST_SSE_REG && TARGET_SSE)
3842              || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
3843      }
3844  else
3845    {
3846      if (regno == 0
3847          || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3848          || (regno == FIRST_SSE_REG && TARGET_SSE))
3849        return true;
3850
3851      if (!TARGET_64BIT
3852          && (regno == FIRST_MMX_REG && TARGET_MMX))
3853	    return true;
3854
3855      return false;
3856    }
3857}
3858
3859/* Define how to find the value returned by a function.
3860   VALTYPE is the data type of the value (as a tree).
3861   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3862   otherwise, FUNC is 0.  */
3863rtx
3864ix86_function_value (tree valtype, tree fntype_or_decl,
3865		     bool outgoing ATTRIBUTE_UNUSED)
3866{
3867  enum machine_mode natmode = type_natural_mode (valtype);
3868
3869  if (TARGET_64BIT)
3870    {
3871      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3872				     1, REGPARM_MAX, SSE_REGPARM_MAX,
3873				     x86_64_int_return_registers, 0);
3874      /* For zero sized structures, construct_container return NULL, but we
3875	 need to keep rest of compiler happy by returning meaningful value.  */
3876      if (!ret)
3877	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3878      return ret;
3879    }
3880  else
3881    {
3882      tree fn = NULL_TREE, fntype;
3883      if (fntype_or_decl
3884	  && DECL_P (fntype_or_decl))
3885        fn = fntype_or_decl;
3886      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3887      return gen_rtx_REG (TYPE_MODE (valtype),
3888			  ix86_value_regno (natmode, fn, fntype));
3889    }
3890}
3891
3892/* Return true iff type is returned in memory.  */
3893int
3894ix86_return_in_memory (tree type)
3895{
3896  int needed_intregs, needed_sseregs, size;
3897  enum machine_mode mode = type_natural_mode (type);
3898
3899  if (TARGET_64BIT)
3900    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3901
3902  if (mode == BLKmode)
3903    return 1;
3904
3905  size = int_size_in_bytes (type);
3906
3907  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3908    return 0;
3909
3910  if (VECTOR_MODE_P (mode) || mode == TImode)
3911    {
3912      /* User-created vectors small enough to fit in EAX.  */
3913      if (size < 8)
3914	return 0;
3915
3916      /* MMX/3dNow values are returned in MM0,
3917	 except when it doesn't exits.  */
3918      if (size == 8)
3919	return (TARGET_MMX ? 0 : 1);
3920
3921      /* SSE values are returned in XMM0, except when it doesn't exist.  */
3922      if (size == 16)
3923	return (TARGET_SSE ? 0 : 1);
3924    }
3925
3926  if (mode == XFmode)
3927    return 0;
3928
3929  if (mode == TDmode)
3930    return 1;
3931
3932  if (size > 12)
3933    return 1;
3934  return 0;
3935}
3936
3937/* When returning SSE vector types, we have a choice of either
3938     (1) being abi incompatible with a -march switch, or
3939     (2) generating an error.
3940   Given no good solution, I think the safest thing is one warning.
3941   The user won't be able to use -Werror, but....
3942
3943   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3944   called in response to actually generating a caller or callee that
3945   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3946   via aggregate_value_p for general type probing from tree-ssa.  */
3947
3948static rtx
3949ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3950{
3951  static bool warnedsse, warnedmmx;
3952
3953  if (type)
3954    {
3955      /* Look at the return type of the function, not the function type.  */
3956      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3957
3958      if (!TARGET_SSE && !warnedsse)
3959	{
3960	  if (mode == TImode
3961	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3962	    {
3963	      warnedsse = true;
3964	      warning (0, "SSE vector return without SSE enabled "
3965		       "changes the ABI");
3966	    }
3967	}
3968
3969      if (!TARGET_MMX && !warnedmmx)
3970	{
3971	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3972	    {
3973	      warnedmmx = true;
3974	      warning (0, "MMX vector return without MMX enabled "
3975		       "changes the ABI");
3976	    }
3977	}
3978    }
3979
3980  return NULL;
3981}
3982
3983/* Define how to find the value returned by a library function
3984   assuming the value has mode MODE.  */
3985rtx
3986ix86_libcall_value (enum machine_mode mode)
3987{
3988  if (TARGET_64BIT)
3989    {
3990      switch (mode)
3991	{
3992	case SFmode:
3993	case SCmode:
3994	case DFmode:
3995	case DCmode:
3996	case TFmode:
3997	case SDmode:
3998	case DDmode:
3999	case TDmode:
4000	  return gen_rtx_REG (mode, FIRST_SSE_REG);
4001	case XFmode:
4002	case XCmode:
4003	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4004	case TCmode:
4005	  return NULL;
4006	default:
4007	  return gen_rtx_REG (mode, 0);
4008	}
4009    }
4010  else
4011    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4012}
4013
4014/* Given a mode, return the register to use for a return value.  */
4015
4016static int
4017ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4018{
4019  gcc_assert (!TARGET_64BIT);
4020
4021  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4022     we normally prevent this case when mmx is not available.  However
4023     some ABIs may require the result to be returned like DImode.  */
4024  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4025    return TARGET_MMX ? FIRST_MMX_REG : 0;
4026
4027  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
4028     we prevent this case when sse is not available.  However some ABIs
4029     may require the result to be returned like integer TImode.  */
4030  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4031    return TARGET_SSE ? FIRST_SSE_REG : 0;
4032
4033  /* Decimal floating point values can go in %eax, unlike other float modes.  */
4034  if (DECIMAL_FLOAT_MODE_P (mode))
4035    return 0;
4036
4037  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
4038  if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4039    return 0;
4040
4041  /* Floating point return values in %st(0), except for local functions when
4042     SSE math is enabled or for functions with sseregparm attribute.  */
4043  if ((func || fntype)
4044      && (mode == SFmode || mode == DFmode))
4045    {
4046      int sse_level = ix86_function_sseregparm (fntype, func);
4047      if ((sse_level >= 1 && mode == SFmode)
4048	  || (sse_level == 2 && mode == DFmode))
4049        return FIRST_SSE_REG;
4050    }
4051
4052  return FIRST_FLOAT_REG;
4053}
4054
4055/* Create the va_list data type.  */
4056
4057static tree
4058ix86_build_builtin_va_list (void)
4059{
4060  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4061
4062  /* For i386 we use plain pointer to argument area.  */
4063  if (!TARGET_64BIT)
4064    return build_pointer_type (char_type_node);
4065
4066  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4067  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4068
4069  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4070		      unsigned_type_node);
4071  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4072		      unsigned_type_node);
4073  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4074		      ptr_type_node);
4075  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4076		      ptr_type_node);
4077
4078  va_list_gpr_counter_field = f_gpr;
4079  va_list_fpr_counter_field = f_fpr;
4080
4081  DECL_FIELD_CONTEXT (f_gpr) = record;
4082  DECL_FIELD_CONTEXT (f_fpr) = record;
4083  DECL_FIELD_CONTEXT (f_ovf) = record;
4084  DECL_FIELD_CONTEXT (f_sav) = record;
4085
4086  TREE_CHAIN (record) = type_decl;
4087  TYPE_NAME (record) = type_decl;
4088  TYPE_FIELDS (record) = f_gpr;
4089  TREE_CHAIN (f_gpr) = f_fpr;
4090  TREE_CHAIN (f_fpr) = f_ovf;
4091  TREE_CHAIN (f_ovf) = f_sav;
4092
4093  layout_type (record);
4094
4095  /* The correct type is an array type of one element.  */
4096  return build_array_type (record, build_index_type (size_zero_node));
4097}
4098
4099/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
4100
4101static void
4102ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4103			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
4104			     int no_rtl)
4105{
4106  CUMULATIVE_ARGS next_cum;
4107  rtx save_area = NULL_RTX, mem;
4108  rtx label;
4109  rtx label_ref;
4110  rtx tmp_reg;
4111  rtx nsse_reg;
4112  int set;
4113  tree fntype;
4114  int stdarg_p;
4115  int i;
4116
4117  if (!TARGET_64BIT)
4118    return;
4119
4120  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4121    return;
4122
4123  /* Indicate to allocate space on the stack for varargs save area.  */
4124  ix86_save_varrargs_registers = 1;
4125
4126  cfun->stack_alignment_needed = 128;
4127
4128  fntype = TREE_TYPE (current_function_decl);
4129  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4130	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4131		  != void_type_node));
4132
4133  /* For varargs, we do not want to skip the dummy va_dcl argument.
4134     For stdargs, we do want to skip the last named argument.  */
4135  next_cum = *cum;
4136  if (stdarg_p)
4137    function_arg_advance (&next_cum, mode, type, 1);
4138
4139  if (!no_rtl)
4140    save_area = frame_pointer_rtx;
4141
4142  set = get_varargs_alias_set ();
4143
4144  for (i = next_cum.regno;
4145       i < ix86_regparm
4146       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4147       i++)
4148    {
4149      mem = gen_rtx_MEM (Pmode,
4150			 plus_constant (save_area, i * UNITS_PER_WORD));
4151      MEM_NOTRAP_P (mem) = 1;
4152      set_mem_alias_set (mem, set);
4153      emit_move_insn (mem, gen_rtx_REG (Pmode,
4154					x86_64_int_parameter_registers[i]));
4155    }
4156
4157  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4158    {
4159      /* Now emit code to save SSE registers.  The AX parameter contains number
4160	 of SSE parameter registers used to call this function.  We use
4161	 sse_prologue_save insn template that produces computed jump across
4162	 SSE saves.  We need some preparation work to get this working.  */
4163
4164      label = gen_label_rtx ();
4165      label_ref = gen_rtx_LABEL_REF (Pmode, label);
4166
4167      /* Compute address to jump to :
4168         label - 5*eax + nnamed_sse_arguments*5  */
4169      tmp_reg = gen_reg_rtx (Pmode);
4170      nsse_reg = gen_reg_rtx (Pmode);
4171      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4172      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4173			      gen_rtx_MULT (Pmode, nsse_reg,
4174					    GEN_INT (4))));
4175      if (next_cum.sse_regno)
4176	emit_move_insn
4177	  (nsse_reg,
4178	   gen_rtx_CONST (DImode,
4179			  gen_rtx_PLUS (DImode,
4180					label_ref,
4181					GEN_INT (next_cum.sse_regno * 4))));
4182      else
4183	emit_move_insn (nsse_reg, label_ref);
4184      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4185
4186      /* Compute address of memory block we save into.  We always use pointer
4187	 pointing 127 bytes after first byte to store - this is needed to keep
4188	 instruction size limited by 4 bytes.  */
4189      tmp_reg = gen_reg_rtx (Pmode);
4190      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4191			      plus_constant (save_area,
4192					     8 * REGPARM_MAX + 127)));
4193      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4194      MEM_NOTRAP_P (mem) = 1;
4195      set_mem_alias_set (mem, set);
4196      set_mem_align (mem, BITS_PER_WORD);
4197
4198      /* And finally do the dirty job!  */
4199      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4200					GEN_INT (next_cum.sse_regno), label));
4201    }
4202
4203}
4204
4205/* Implement va_start.  */
4206
4207void
4208ix86_va_start (tree valist, rtx nextarg)
4209{
4210  HOST_WIDE_INT words, n_gpr, n_fpr;
4211  tree f_gpr, f_fpr, f_ovf, f_sav;
4212  tree gpr, fpr, ovf, sav, t;
4213  tree type;
4214
4215  /* Only 64bit target needs something special.  */
4216  if (!TARGET_64BIT)
4217    {
4218      std_expand_builtin_va_start (valist, nextarg);
4219      return;
4220    }
4221
4222  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4223  f_fpr = TREE_CHAIN (f_gpr);
4224  f_ovf = TREE_CHAIN (f_fpr);
4225  f_sav = TREE_CHAIN (f_ovf);
4226
4227  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4228  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4229  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4230  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4231  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4232
4233  /* Count number of gp and fp argument registers used.  */
4234  words = current_function_args_info.words;
4235  n_gpr = current_function_args_info.regno;
4236  n_fpr = current_function_args_info.sse_regno;
4237
4238  if (TARGET_DEBUG_ARG)
4239    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4240	     (int) words, (int) n_gpr, (int) n_fpr);
4241
4242  if (cfun->va_list_gpr_size)
4243    {
4244      type = TREE_TYPE (gpr);
4245      t = build2 (MODIFY_EXPR, type, gpr,
4246		  build_int_cst (type, n_gpr * 8));
4247      TREE_SIDE_EFFECTS (t) = 1;
4248      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4249    }
4250
4251  if (cfun->va_list_fpr_size)
4252    {
4253      type = TREE_TYPE (fpr);
4254      t = build2 (MODIFY_EXPR, type, fpr,
4255		  build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4256      TREE_SIDE_EFFECTS (t) = 1;
4257      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4258    }
4259
4260  /* Find the overflow area.  */
4261  type = TREE_TYPE (ovf);
4262  t = make_tree (type, virtual_incoming_args_rtx);
4263  if (words != 0)
4264    t = build2 (PLUS_EXPR, type, t,
4265	        build_int_cst (type, words * UNITS_PER_WORD));
4266  t = build2 (MODIFY_EXPR, type, ovf, t);
4267  TREE_SIDE_EFFECTS (t) = 1;
4268  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4269
4270  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4271    {
4272      /* Find the register save area.
4273	 Prologue of the function save it right above stack frame.  */
4274      type = TREE_TYPE (sav);
4275      t = make_tree (type, frame_pointer_rtx);
4276      t = build2 (MODIFY_EXPR, type, sav, t);
4277      TREE_SIDE_EFFECTS (t) = 1;
4278      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4279    }
4280}
4281
4282/* Implement va_arg.  */
4283
4284tree
4285ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4286{
4287  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4288  tree f_gpr, f_fpr, f_ovf, f_sav;
4289  tree gpr, fpr, ovf, sav, t;
4290  int size, rsize;
4291  tree lab_false, lab_over = NULL_TREE;
4292  tree addr, t2;
4293  rtx container;
4294  int indirect_p = 0;
4295  tree ptrtype;
4296  enum machine_mode nat_mode;
4297
4298  /* Only 64bit target needs something special.  */
4299  if (!TARGET_64BIT)
4300    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4301
4302  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4303  f_fpr = TREE_CHAIN (f_gpr);
4304  f_ovf = TREE_CHAIN (f_fpr);
4305  f_sav = TREE_CHAIN (f_ovf);
4306
4307  valist = build_va_arg_indirect_ref (valist);
4308  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4309  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4310  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4311  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4312
4313  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4314  if (indirect_p)
4315    type = build_pointer_type (type);
4316  size = int_size_in_bytes (type);
4317  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4318
4319  nat_mode = type_natural_mode (type);
4320  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4321				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4322
4323  /* Pull the value out of the saved registers.  */
4324
4325  addr = create_tmp_var (ptr_type_node, "addr");
4326  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4327
4328  if (container)
4329    {
4330      int needed_intregs, needed_sseregs;
4331      bool need_temp;
4332      tree int_addr, sse_addr;
4333
4334      lab_false = create_artificial_label ();
4335      lab_over = create_artificial_label ();
4336
4337      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4338
4339      need_temp = (!REG_P (container)
4340		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
4341		       || TYPE_ALIGN (type) > 128));
4342
4343      /* In case we are passing structure, verify that it is consecutive block
4344         on the register save area.  If not we need to do moves.  */
4345      if (!need_temp && !REG_P (container))
4346	{
4347	  /* Verify that all registers are strictly consecutive  */
4348	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4349	    {
4350	      int i;
4351
4352	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4353		{
4354		  rtx slot = XVECEXP (container, 0, i);
4355		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4356		      || INTVAL (XEXP (slot, 1)) != i * 16)
4357		    need_temp = 1;
4358		}
4359	    }
4360	  else
4361	    {
4362	      int i;
4363
4364	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4365		{
4366		  rtx slot = XVECEXP (container, 0, i);
4367		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4368		      || INTVAL (XEXP (slot, 1)) != i * 8)
4369		    need_temp = 1;
4370		}
4371	    }
4372	}
4373      if (!need_temp)
4374	{
4375	  int_addr = addr;
4376	  sse_addr = addr;
4377	}
4378      else
4379	{
4380	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
4381	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4382	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4383	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4384	}
4385
4386      /* First ensure that we fit completely in registers.  */
4387      if (needed_intregs)
4388	{
4389	  t = build_int_cst (TREE_TYPE (gpr),
4390			     (REGPARM_MAX - needed_intregs + 1) * 8);
4391	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4392	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4393	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4394	  gimplify_and_add (t, pre_p);
4395	}
4396      if (needed_sseregs)
4397	{
4398	  t = build_int_cst (TREE_TYPE (fpr),
4399			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4400			     + REGPARM_MAX * 8);
4401	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4402	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4403	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4404	  gimplify_and_add (t, pre_p);
4405	}
4406
4407      /* Compute index to start of area used for integer regs.  */
4408      if (needed_intregs)
4409	{
4410	  /* int_addr = gpr + sav; */
4411	  t = fold_convert (ptr_type_node, gpr);
4412	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4413	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4414	  gimplify_and_add (t, pre_p);
4415	}
4416      if (needed_sseregs)
4417	{
4418	  /* sse_addr = fpr + sav; */
4419	  t = fold_convert (ptr_type_node, fpr);
4420	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4421	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4422	  gimplify_and_add (t, pre_p);
4423	}
4424      if (need_temp)
4425	{
4426	  int i;
4427	  tree temp = create_tmp_var (type, "va_arg_tmp");
4428
4429	  /* addr = &temp; */
4430	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4431	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4432	  gimplify_and_add (t, pre_p);
4433
4434	  for (i = 0; i < XVECLEN (container, 0); i++)
4435	    {
4436	      rtx slot = XVECEXP (container, 0, i);
4437	      rtx reg = XEXP (slot, 0);
4438	      enum machine_mode mode = GET_MODE (reg);
4439	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4440	      tree addr_type = build_pointer_type (piece_type);
4441	      tree src_addr, src;
4442	      int src_offset;
4443	      tree dest_addr, dest;
4444
4445	      if (SSE_REGNO_P (REGNO (reg)))
4446		{
4447		  src_addr = sse_addr;
4448		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4449		}
4450	      else
4451		{
4452		  src_addr = int_addr;
4453		  src_offset = REGNO (reg) * 8;
4454		}
4455	      src_addr = fold_convert (addr_type, src_addr);
4456	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4457				       size_int (src_offset)));
4458	      src = build_va_arg_indirect_ref (src_addr);
4459
4460	      dest_addr = fold_convert (addr_type, addr);
4461	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4462					size_int (INTVAL (XEXP (slot, 1)))));
4463	      dest = build_va_arg_indirect_ref (dest_addr);
4464
4465	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4466	      gimplify_and_add (t, pre_p);
4467	    }
4468	}
4469
4470      if (needed_intregs)
4471	{
4472	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4473		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4474	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4475	  gimplify_and_add (t, pre_p);
4476	}
4477      if (needed_sseregs)
4478	{
4479	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4480		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4481	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4482	  gimplify_and_add (t, pre_p);
4483	}
4484
4485      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4486      gimplify_and_add (t, pre_p);
4487
4488      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4489      append_to_statement_list (t, pre_p);
4490    }
4491
4492  /* ... otherwise out of the overflow area.  */
4493
4494  /* Care for on-stack alignment if needed.  */
4495  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4496      || integer_zerop (TYPE_SIZE (type)))
4497    t = ovf;
4498  else
4499    {
4500      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4501      t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4502		  build_int_cst (TREE_TYPE (ovf), align - 1));
4503      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4504		  build_int_cst (TREE_TYPE (t), -align));
4505    }
4506  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4507
4508  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4509  gimplify_and_add (t2, pre_p);
4510
4511  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4512	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4513  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4514  gimplify_and_add (t, pre_p);
4515
4516  if (container)
4517    {
4518      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4519      append_to_statement_list (t, pre_p);
4520    }
4521
4522  ptrtype = build_pointer_type (type);
4523  addr = fold_convert (ptrtype, addr);
4524
4525  if (indirect_p)
4526    addr = build_va_arg_indirect_ref (addr);
4527  return build_va_arg_indirect_ref (addr);
4528}
4529
4530/* Return nonzero if OPNUM's MEM should be matched
4531   in movabs* patterns.  */
4532
4533int
4534ix86_check_movabs (rtx insn, int opnum)
4535{
4536  rtx set, mem;
4537
4538  set = PATTERN (insn);
4539  if (GET_CODE (set) == PARALLEL)
4540    set = XVECEXP (set, 0, 0);
4541  gcc_assert (GET_CODE (set) == SET);
4542  mem = XEXP (set, opnum);
4543  while (GET_CODE (mem) == SUBREG)
4544    mem = SUBREG_REG (mem);
4545  gcc_assert (GET_CODE (mem) == MEM);
4546  return (volatile_ok || !MEM_VOLATILE_P (mem));
4547}
4548
4549/* Initialize the table of extra 80387 mathematical constants.  */
4550
4551static void
4552init_ext_80387_constants (void)
4553{
4554  static const char * cst[5] =
4555  {
4556    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4557    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4558    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4559    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4560    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4561  };
4562  int i;
4563
4564  for (i = 0; i < 5; i++)
4565    {
4566      real_from_string (&ext_80387_constants_table[i], cst[i]);
4567      /* Ensure each constant is rounded to XFmode precision.  */
4568      real_convert (&ext_80387_constants_table[i],
4569		    XFmode, &ext_80387_constants_table[i]);
4570    }
4571
4572  ext_80387_constants_init = 1;
4573}
4574
4575/* Return true if the constant is something that can be loaded with
4576   a special instruction.  */
4577
4578int
4579standard_80387_constant_p (rtx x)
4580{
4581  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4582    return -1;
4583
4584  if (x == CONST0_RTX (GET_MODE (x)))
4585    return 1;
4586  if (x == CONST1_RTX (GET_MODE (x)))
4587    return 2;
4588
4589  /* For XFmode constants, try to find a special 80387 instruction when
4590     optimizing for size or on those CPUs that benefit from them.  */
4591  if (GET_MODE (x) == XFmode
4592      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4593    {
4594      REAL_VALUE_TYPE r;
4595      int i;
4596
4597      if (! ext_80387_constants_init)
4598	init_ext_80387_constants ();
4599
4600      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4601      for (i = 0; i < 5; i++)
4602        if (real_identical (&r, &ext_80387_constants_table[i]))
4603	  return i + 3;
4604    }
4605
4606  return 0;
4607}
4608
4609/* Return the opcode of the special instruction to be used to load
4610   the constant X.  */
4611
4612const char *
4613standard_80387_constant_opcode (rtx x)
4614{
4615  switch (standard_80387_constant_p (x))
4616    {
4617    case 1:
4618      return "fldz";
4619    case 2:
4620      return "fld1";
4621    case 3:
4622      return "fldlg2";
4623    case 4:
4624      return "fldln2";
4625    case 5:
4626      return "fldl2e";
4627    case 6:
4628      return "fldl2t";
4629    case 7:
4630      return "fldpi";
4631    default:
4632      gcc_unreachable ();
4633    }
4634}
4635
4636/* Return the CONST_DOUBLE representing the 80387 constant that is
4637   loaded by the specified special instruction.  The argument IDX
4638   matches the return value from standard_80387_constant_p.  */
4639
4640rtx
4641standard_80387_constant_rtx (int idx)
4642{
4643  int i;
4644
4645  if (! ext_80387_constants_init)
4646    init_ext_80387_constants ();
4647
4648  switch (idx)
4649    {
4650    case 3:
4651    case 4:
4652    case 5:
4653    case 6:
4654    case 7:
4655      i = idx - 3;
4656      break;
4657
4658    default:
4659      gcc_unreachable ();
4660    }
4661
4662  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4663				       XFmode);
4664}
4665
4666/* Return 1 if mode is a valid mode for sse.  */
4667static int
4668standard_sse_mode_p (enum machine_mode mode)
4669{
4670  switch (mode)
4671    {
4672    case V16QImode:
4673    case V8HImode:
4674    case V4SImode:
4675    case V2DImode:
4676    case V4SFmode:
4677    case V2DFmode:
4678      return 1;
4679
4680    default:
4681      return 0;
4682    }
4683}
4684
4685/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4686 */
4687int
4688standard_sse_constant_p (rtx x)
4689{
4690  enum machine_mode mode = GET_MODE (x);
4691
4692  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4693    return 1;
4694  if (vector_all_ones_operand (x, mode)
4695      && standard_sse_mode_p (mode))
4696    return TARGET_SSE2 ? 2 : -1;
4697
4698  return 0;
4699}
4700
4701/* Return the opcode of the special instruction to be used to load
4702   the constant X.  */
4703
4704const char *
4705standard_sse_constant_opcode (rtx insn, rtx x)
4706{
4707  switch (standard_sse_constant_p (x))
4708    {
4709    case 1:
4710      if (get_attr_mode (insn) == MODE_V4SF)
4711        return "xorps\t%0, %0";
4712      else if (get_attr_mode (insn) == MODE_V2DF)
4713        return "xorpd\t%0, %0";
4714      else
4715        return "pxor\t%0, %0";
4716    case 2:
4717      return "pcmpeqd\t%0, %0";
4718    }
4719  gcc_unreachable ();
4720}
4721
4722/* Returns 1 if OP contains a symbol reference */
4723
4724int
4725symbolic_reference_mentioned_p (rtx op)
4726{
4727  const char *fmt;
4728  int i;
4729
4730  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4731    return 1;
4732
4733  fmt = GET_RTX_FORMAT (GET_CODE (op));
4734  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4735    {
4736      if (fmt[i] == 'E')
4737	{
4738	  int j;
4739
4740	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4741	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4742	      return 1;
4743	}
4744
4745      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4746	return 1;
4747    }
4748
4749  return 0;
4750}
4751
4752/* Return 1 if it is appropriate to emit `ret' instructions in the
4753   body of a function.  Do this only if the epilogue is simple, needing a
4754   couple of insns.  Prior to reloading, we can't tell how many registers
4755   must be saved, so return 0 then.  Return 0 if there is no frame
4756   marker to de-allocate.  */
4757
4758int
4759ix86_can_use_return_insn_p (void)
4760{
4761  struct ix86_frame frame;
4762
4763  if (! reload_completed || frame_pointer_needed)
4764    return 0;
4765
4766  /* Don't allow more than 32 pop, since that's all we can do
4767     with one instruction.  */
4768  if (current_function_pops_args
4769      && current_function_args_size >= 32768)
4770    return 0;
4771
4772  ix86_compute_frame_layout (&frame);
4773  return frame.to_allocate == 0 && frame.nregs == 0;
4774}
4775
4776/* Value should be nonzero if functions must have frame pointers.
4777   Zero means the frame pointer need not be set up (and parms may
4778   be accessed via the stack pointer) in functions that seem suitable.  */
4779
4780int
4781ix86_frame_pointer_required (void)
4782{
4783  /* If we accessed previous frames, then the generated code expects
4784     to be able to access the saved ebp value in our frame.  */
4785  if (cfun->machine->accesses_prev_frame)
4786    return 1;
4787
4788  /* Several x86 os'es need a frame pointer for other reasons,
4789     usually pertaining to setjmp.  */
4790  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4791    return 1;
4792
4793  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4794     the frame pointer by default.  Turn it back on now if we've not
4795     got a leaf function.  */
4796  if (TARGET_OMIT_LEAF_FRAME_POINTER
4797      && (!current_function_is_leaf
4798	  || ix86_current_function_calls_tls_descriptor))
4799    return 1;
4800
4801  if (current_function_profile)
4802    return 1;
4803
4804  return 0;
4805}
4806
4807/* Record that the current function accesses previous call frames.  */
4808
4809void
4810ix86_setup_frame_addresses (void)
4811{
4812  cfun->machine->accesses_prev_frame = 1;
4813}
4814
4815#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4816# define USE_HIDDEN_LINKONCE 1
4817#else
4818# define USE_HIDDEN_LINKONCE 0
4819#endif
4820
4821static int pic_labels_used;
4822
4823/* Fills in the label name that should be used for a pc thunk for
4824   the given register.  */
4825
4826static void
4827get_pc_thunk_name (char name[32], unsigned int regno)
4828{
4829  gcc_assert (!TARGET_64BIT);
4830
4831  if (USE_HIDDEN_LINKONCE)
4832    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4833  else
4834    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4835}
4836
4837
4838/* This function generates code for -fpic that loads %ebx with
4839   the return address of the caller and then returns.  */
4840
4841void
4842ix86_file_end (void)
4843{
4844  rtx xops[2];
4845  int regno;
4846
4847  for (regno = 0; regno < 8; ++regno)
4848    {
4849      char name[32];
4850
4851      if (! ((pic_labels_used >> regno) & 1))
4852	continue;
4853
4854      get_pc_thunk_name (name, regno);
4855
4856#if TARGET_MACHO
4857      if (TARGET_MACHO)
4858	{
4859	  switch_to_section (darwin_sections[text_coal_section]);
4860	  fputs ("\t.weak_definition\t", asm_out_file);
4861	  assemble_name (asm_out_file, name);
4862	  fputs ("\n\t.private_extern\t", asm_out_file);
4863	  assemble_name (asm_out_file, name);
4864	  fputs ("\n", asm_out_file);
4865	  ASM_OUTPUT_LABEL (asm_out_file, name);
4866	}
4867      else
4868#endif
4869      if (USE_HIDDEN_LINKONCE)
4870	{
4871	  tree decl;
4872
4873	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4874			     error_mark_node);
4875	  TREE_PUBLIC (decl) = 1;
4876	  TREE_STATIC (decl) = 1;
4877	  DECL_ONE_ONLY (decl) = 1;
4878
4879	  (*targetm.asm_out.unique_section) (decl, 0);
4880	  switch_to_section (get_named_section (decl, NULL, 0));
4881
4882	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4883	  fputs ("\t.hidden\t", asm_out_file);
4884	  assemble_name (asm_out_file, name);
4885	  fputc ('\n', asm_out_file);
4886	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4887	}
4888      else
4889	{
4890	  switch_to_section (text_section);
4891	  ASM_OUTPUT_LABEL (asm_out_file, name);
4892	}
4893
4894      xops[0] = gen_rtx_REG (SImode, regno);
4895      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4896      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4897      output_asm_insn ("ret", xops);
4898    }
4899
4900  if (NEED_INDICATE_EXEC_STACK)
4901    file_end_indicate_exec_stack ();
4902}
4903
4904/* Emit code for the SET_GOT patterns.  */
4905
4906const char *
4907output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4908{
4909  rtx xops[3];
4910
4911  xops[0] = dest;
4912  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4913
4914  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4915    {
4916      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4917
4918      if (!flag_pic)
4919	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4920      else
4921	output_asm_insn ("call\t%a2", xops);
4922
4923#if TARGET_MACHO
4924      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4925         is what will be referenced by the Mach-O PIC subsystem.  */
4926      if (!label)
4927	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4928#endif
4929
4930      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4931				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4932
4933      if (flag_pic)
4934	output_asm_insn ("pop{l}\t%0", xops);
4935    }
4936  else
4937    {
4938      char name[32];
4939      get_pc_thunk_name (name, REGNO (dest));
4940      pic_labels_used |= 1 << REGNO (dest);
4941
4942      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4943      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4944      output_asm_insn ("call\t%X2", xops);
4945      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
4946         is what will be referenced by the Mach-O PIC subsystem.  */
4947#if TARGET_MACHO
4948      if (!label)
4949	ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4950      else
4951        targetm.asm_out.internal_label (asm_out_file, "L",
4952					   CODE_LABEL_NUMBER (label));
4953#endif
4954    }
4955
4956  if (TARGET_MACHO)
4957    return "";
4958
4959  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4960    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4961  else
4962    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4963
4964  return "";
4965}
4966
4967/* Generate an "push" pattern for input ARG.  */
4968
4969static rtx
4970gen_push (rtx arg)
4971{
4972  return gen_rtx_SET (VOIDmode,
4973		      gen_rtx_MEM (Pmode,
4974				   gen_rtx_PRE_DEC (Pmode,
4975						    stack_pointer_rtx)),
4976		      arg);
4977}
4978
4979/* Return >= 0 if there is an unused call-clobbered register available
4980   for the entire function.  */
4981
4982static unsigned int
4983ix86_select_alt_pic_regnum (void)
4984{
4985  if (current_function_is_leaf && !current_function_profile
4986      && !ix86_current_function_calls_tls_descriptor)
4987    {
4988      int i;
4989      for (i = 2; i >= 0; --i)
4990        if (!regs_ever_live[i])
4991	  return i;
4992    }
4993
4994  return INVALID_REGNUM;
4995}
4996
4997/* Return 1 if we need to save REGNO.  */
4998static int
4999ix86_save_reg (unsigned int regno, int maybe_eh_return)
5000{
5001  if (pic_offset_table_rtx
5002      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5003      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5004	  || current_function_profile
5005	  || current_function_calls_eh_return
5006	  || current_function_uses_const_pool))
5007    {
5008      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5009	return 0;
5010      return 1;
5011    }
5012
5013  if (current_function_calls_eh_return && maybe_eh_return)
5014    {
5015      unsigned i;
5016      for (i = 0; ; i++)
5017	{
5018	  unsigned test = EH_RETURN_DATA_REGNO (i);
5019	  if (test == INVALID_REGNUM)
5020	    break;
5021	  if (test == regno)
5022	    return 1;
5023	}
5024    }
5025
5026  if (cfun->machine->force_align_arg_pointer
5027      && regno == REGNO (cfun->machine->force_align_arg_pointer))
5028    return 1;
5029
5030  return (regs_ever_live[regno]
5031	  && !call_used_regs[regno]
5032	  && !fixed_regs[regno]
5033	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5034}
5035
5036/* Return number of registers to be saved on the stack.  */
5037
5038static int
5039ix86_nsaved_regs (void)
5040{
5041  int nregs = 0;
5042  int regno;
5043
5044  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5045    if (ix86_save_reg (regno, true))
5046      nregs++;
5047  return nregs;
5048}
5049
5050/* Return the offset between two registers, one to be eliminated, and the other
5051   its replacement, at the start of a routine.  */
5052
5053HOST_WIDE_INT
5054ix86_initial_elimination_offset (int from, int to)
5055{
5056  struct ix86_frame frame;
5057  ix86_compute_frame_layout (&frame);
5058
5059  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5060    return frame.hard_frame_pointer_offset;
5061  else if (from == FRAME_POINTER_REGNUM
5062	   && to == HARD_FRAME_POINTER_REGNUM)
5063    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5064  else
5065    {
5066      gcc_assert (to == STACK_POINTER_REGNUM);
5067
5068      if (from == ARG_POINTER_REGNUM)
5069	return frame.stack_pointer_offset;
5070
5071      gcc_assert (from == FRAME_POINTER_REGNUM);
5072      return frame.stack_pointer_offset - frame.frame_pointer_offset;
5073    }
5074}
5075
5076/* Fill structure ix86_frame about frame of currently computed function.  */
5077
5078static void
5079ix86_compute_frame_layout (struct ix86_frame *frame)
5080{
5081  HOST_WIDE_INT total_size;
5082  unsigned int stack_alignment_needed;
5083  HOST_WIDE_INT offset;
5084  unsigned int preferred_alignment;
5085  HOST_WIDE_INT size = get_frame_size ();
5086
5087  frame->nregs = ix86_nsaved_regs ();
5088  total_size = size;
5089
5090  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5091  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5092
5093  /* During reload iteration the amount of registers saved can change.
5094     Recompute the value as needed.  Do not recompute when amount of registers
5095     didn't change as reload does multiple calls to the function and does not
5096     expect the decision to change within single iteration.  */
5097  if (!optimize_size
5098      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5099    {
5100      int count = frame->nregs;
5101
5102      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5103      /* The fast prologue uses move instead of push to save registers.  This
5104         is significantly longer, but also executes faster as modern hardware
5105         can execute the moves in parallel, but can't do that for push/pop.
5106
5107	 Be careful about choosing what prologue to emit:  When function takes
5108	 many instructions to execute we may use slow version as well as in
5109	 case function is known to be outside hot spot (this is known with
5110	 feedback only).  Weight the size of function by number of registers
5111	 to save as it is cheap to use one or two push instructions but very
5112	 slow to use many of them.  */
5113      if (count)
5114	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5115      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5116	  || (flag_branch_probabilities
5117	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5118        cfun->machine->use_fast_prologue_epilogue = false;
5119      else
5120        cfun->machine->use_fast_prologue_epilogue
5121	   = !expensive_function_p (count);
5122    }
5123  if (TARGET_PROLOGUE_USING_MOVE
5124      && cfun->machine->use_fast_prologue_epilogue)
5125    frame->save_regs_using_mov = true;
5126  else
5127    frame->save_regs_using_mov = false;
5128
5129
5130  /* Skip return address and saved base pointer.  */
5131  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5132
5133  frame->hard_frame_pointer_offset = offset;
5134
5135  /* Do some sanity checking of stack_alignment_needed and
5136     preferred_alignment, since i386 port is the only using those features
5137     that may break easily.  */
5138
5139  gcc_assert (!size || stack_alignment_needed);
5140  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5141  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5142  gcc_assert (stack_alignment_needed
5143	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5144
5145  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5146    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5147
5148  /* Register save area */
5149  offset += frame->nregs * UNITS_PER_WORD;
5150
5151  /* Va-arg area */
5152  if (ix86_save_varrargs_registers)
5153    {
5154      offset += X86_64_VARARGS_SIZE;
5155      frame->va_arg_size = X86_64_VARARGS_SIZE;
5156    }
5157  else
5158    frame->va_arg_size = 0;
5159
5160  /* Align start of frame for local function.  */
5161  frame->padding1 = ((offset + stack_alignment_needed - 1)
5162		     & -stack_alignment_needed) - offset;
5163
5164  offset += frame->padding1;
5165
5166  /* Frame pointer points here.  */
5167  frame->frame_pointer_offset = offset;
5168
5169  offset += size;
5170
5171  /* Add outgoing arguments area.  Can be skipped if we eliminated
5172     all the function calls as dead code.
5173     Skipping is however impossible when function calls alloca.  Alloca
5174     expander assumes that last current_function_outgoing_args_size
5175     of stack frame are unused.  */
5176  if (ACCUMULATE_OUTGOING_ARGS
5177      && (!current_function_is_leaf || current_function_calls_alloca
5178	  || ix86_current_function_calls_tls_descriptor))
5179    {
5180      offset += current_function_outgoing_args_size;
5181      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5182    }
5183  else
5184    frame->outgoing_arguments_size = 0;
5185
5186  /* Align stack boundary.  Only needed if we're calling another function
5187     or using alloca.  */
5188  if (!current_function_is_leaf || current_function_calls_alloca
5189      || ix86_current_function_calls_tls_descriptor)
5190    frame->padding2 = ((offset + preferred_alignment - 1)
5191		       & -preferred_alignment) - offset;
5192  else
5193    frame->padding2 = 0;
5194
5195  offset += frame->padding2;
5196
5197  /* We've reached end of stack frame.  */
5198  frame->stack_pointer_offset = offset;
5199
5200  /* Size prologue needs to allocate.  */
5201  frame->to_allocate =
5202    (size + frame->padding1 + frame->padding2
5203     + frame->outgoing_arguments_size + frame->va_arg_size);
5204
5205  if ((!frame->to_allocate && frame->nregs <= 1)
5206      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5207    frame->save_regs_using_mov = false;
5208
5209  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5210      && current_function_is_leaf
5211      && !ix86_current_function_calls_tls_descriptor)
5212    {
5213      frame->red_zone_size = frame->to_allocate;
5214      if (frame->save_regs_using_mov)
5215	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5216      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5217	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5218    }
5219  else
5220    frame->red_zone_size = 0;
5221  frame->to_allocate -= frame->red_zone_size;
5222  frame->stack_pointer_offset -= frame->red_zone_size;
5223#if 0
5224  fprintf (stderr, "nregs: %i\n", frame->nregs);
5225  fprintf (stderr, "size: %i\n", size);
5226  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5227  fprintf (stderr, "padding1: %i\n", frame->padding1);
5228  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5229  fprintf (stderr, "padding2: %i\n", frame->padding2);
5230  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5231  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5232  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5233  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5234	   frame->hard_frame_pointer_offset);
5235  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5236#endif
5237}
5238
5239/* Emit code to save registers in the prologue.  */
5240
5241static void
5242ix86_emit_save_regs (void)
5243{
5244  unsigned int regno;
5245  rtx insn;
5246
5247  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5248    if (ix86_save_reg (regno, true))
5249      {
5250	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5251	RTX_FRAME_RELATED_P (insn) = 1;
5252      }
5253}
5254
5255/* Emit code to save registers using MOV insns.  First register
5256   is restored from POINTER + OFFSET.  */
5257static void
5258ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5259{
5260  unsigned int regno;
5261  rtx insn;
5262
5263  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5264    if (ix86_save_reg (regno, true))
5265      {
5266	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5267					       Pmode, offset),
5268			       gen_rtx_REG (Pmode, regno));
5269	RTX_FRAME_RELATED_P (insn) = 1;
5270	offset += UNITS_PER_WORD;
5271      }
5272}
5273
5274/* Expand prologue or epilogue stack adjustment.
5275   The pattern exist to put a dependency on all ebp-based memory accesses.
5276   STYLE should be negative if instructions should be marked as frame related,
5277   zero if %r11 register is live and cannot be freely used and positive
5278   otherwise.  */
5279
5280static void
5281pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5282{
5283  rtx insn;
5284
5285  if (! TARGET_64BIT)
5286    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5287  else if (x86_64_immediate_operand (offset, DImode))
5288    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5289  else
5290    {
5291      rtx r11;
5292      /* r11 is used by indirect sibcall return as well, set before the
5293	 epilogue and used after the epilogue.  ATM indirect sibcall
5294	 shouldn't be used together with huge frame sizes in one
5295	 function because of the frame_size check in sibcall.c.  */
5296      gcc_assert (style);
5297      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5298      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5299      if (style < 0)
5300	RTX_FRAME_RELATED_P (insn) = 1;
5301      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5302							       offset));
5303    }
5304  if (style < 0)
5305    RTX_FRAME_RELATED_P (insn) = 1;
5306}
5307
5308/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
5309
5310static rtx
5311ix86_internal_arg_pointer (void)
5312{
5313  bool has_force_align_arg_pointer =
5314    (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5315			    TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5316  if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5317       && DECL_NAME (current_function_decl)
5318       && MAIN_NAME_P (DECL_NAME (current_function_decl))
5319       && DECL_FILE_SCOPE_P (current_function_decl))
5320      || ix86_force_align_arg_pointer
5321      || has_force_align_arg_pointer)
5322    {
5323      /* Nested functions can't realign the stack due to a register
5324	 conflict.  */
5325      if (DECL_CONTEXT (current_function_decl)
5326	  && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5327	{
5328	  if (ix86_force_align_arg_pointer)
5329	    warning (0, "-mstackrealign ignored for nested functions");
5330	  if (has_force_align_arg_pointer)
5331	    error ("%s not supported for nested functions",
5332		   ix86_force_align_arg_pointer_string);
5333	  return virtual_incoming_args_rtx;
5334	}
5335      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5336      return copy_to_reg (cfun->machine->force_align_arg_pointer);
5337    }
5338  else
5339    return virtual_incoming_args_rtx;
5340}
5341
5342/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5343   This is called from dwarf2out.c to emit call frame instructions
5344   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5345static void
5346ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5347{
5348  rtx unspec = SET_SRC (pattern);
5349  gcc_assert (GET_CODE (unspec) == UNSPEC);
5350
5351  switch (index)
5352    {
5353    case UNSPEC_REG_SAVE:
5354      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5355			      SET_DEST (pattern));
5356      break;
5357    case UNSPEC_DEF_CFA:
5358      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5359			 INTVAL (XVECEXP (unspec, 0, 0)));
5360      break;
5361    default:
5362      gcc_unreachable ();
5363    }
5364}
5365
5366/* Expand the prologue into a bunch of separate insns.  */
5367
5368void
5369ix86_expand_prologue (void)
5370{
5371  rtx insn;
5372  bool pic_reg_used;
5373  struct ix86_frame frame;
5374  HOST_WIDE_INT allocate;
5375
5376  ix86_compute_frame_layout (&frame);
5377
5378  if (cfun->machine->force_align_arg_pointer)
5379    {
5380      rtx x, y;
5381
5382      /* Grab the argument pointer.  */
5383      x = plus_constant (stack_pointer_rtx, 4);
5384      y = cfun->machine->force_align_arg_pointer;
5385      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5386      RTX_FRAME_RELATED_P (insn) = 1;
5387
5388      /* The unwind info consists of two parts: install the fafp as the cfa,
5389	 and record the fafp as the "save register" of the stack pointer.
5390	 The later is there in order that the unwinder can see where it
5391	 should restore the stack pointer across the and insn.  */
5392      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5393      x = gen_rtx_SET (VOIDmode, y, x);
5394      RTX_FRAME_RELATED_P (x) = 1;
5395      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5396			  UNSPEC_REG_SAVE);
5397      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5398      RTX_FRAME_RELATED_P (y) = 1;
5399      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5400      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5401      REG_NOTES (insn) = x;
5402
5403      /* Align the stack.  */
5404      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5405			     GEN_INT (-16)));
5406
5407      /* And here we cheat like madmen with the unwind info.  We force the
5408	 cfa register back to sp+4, which is exactly what it was at the
5409	 start of the function.  Re-pushing the return address results in
5410	 the return at the same spot relative to the cfa, and thus is
5411	 correct wrt the unwind info.  */
5412      x = cfun->machine->force_align_arg_pointer;
5413      x = gen_frame_mem (Pmode, plus_constant (x, -4));
5414      insn = emit_insn (gen_push (x));
5415      RTX_FRAME_RELATED_P (insn) = 1;
5416
5417      x = GEN_INT (4);
5418      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5419      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5420      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5421      REG_NOTES (insn) = x;
5422    }
5423
5424  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5425     slower on all targets.  Also sdb doesn't like it.  */
5426
5427  if (frame_pointer_needed)
5428    {
5429      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5430      RTX_FRAME_RELATED_P (insn) = 1;
5431
5432      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5433      RTX_FRAME_RELATED_P (insn) = 1;
5434    }
5435
5436  allocate = frame.to_allocate;
5437
5438  if (!frame.save_regs_using_mov)
5439    ix86_emit_save_regs ();
5440  else
5441    allocate += frame.nregs * UNITS_PER_WORD;
5442
5443  /* When using red zone we may start register saving before allocating
5444     the stack frame saving one cycle of the prologue.  */
5445  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5446    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5447				   : stack_pointer_rtx,
5448				   -frame.nregs * UNITS_PER_WORD);
5449
5450  if (allocate == 0)
5451    ;
5452  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5453    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5454			       GEN_INT (-allocate), -1);
5455  else
5456    {
5457      /* Only valid for Win32.  */
5458      rtx eax = gen_rtx_REG (SImode, 0);
5459      bool eax_live = ix86_eax_live_at_start_p ();
5460      rtx t;
5461
5462      gcc_assert (!TARGET_64BIT);
5463
5464      if (eax_live)
5465	{
5466	  emit_insn (gen_push (eax));
5467	  allocate -= 4;
5468	}
5469
5470      emit_move_insn (eax, GEN_INT (allocate));
5471
5472      insn = emit_insn (gen_allocate_stack_worker (eax));
5473      RTX_FRAME_RELATED_P (insn) = 1;
5474      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5475      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5476      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5477					    t, REG_NOTES (insn));
5478
5479      if (eax_live)
5480	{
5481	  if (frame_pointer_needed)
5482	    t = plus_constant (hard_frame_pointer_rtx,
5483			       allocate
5484			       - frame.to_allocate
5485			       - frame.nregs * UNITS_PER_WORD);
5486	  else
5487	    t = plus_constant (stack_pointer_rtx, allocate);
5488	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5489	}
5490    }
5491
5492  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5493    {
5494      if (!frame_pointer_needed || !frame.to_allocate)
5495        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5496      else
5497        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5498				       -frame.nregs * UNITS_PER_WORD);
5499    }
5500
5501  pic_reg_used = false;
5502  if (pic_offset_table_rtx
5503      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5504	  || current_function_profile))
5505    {
5506      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5507
5508      if (alt_pic_reg_used != INVALID_REGNUM)
5509	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5510
5511      pic_reg_used = true;
5512    }
5513
5514  if (pic_reg_used)
5515    {
5516      if (TARGET_64BIT)
5517        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5518      else
5519        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5520
5521      /* Even with accurate pre-reload life analysis, we can wind up
5522	 deleting all references to the pic register after reload.
5523	 Consider if cross-jumping unifies two sides of a branch
5524	 controlled by a comparison vs the only read from a global.
5525	 In which case, allow the set_got to be deleted, though we're
5526	 too late to do anything about the ebx save in the prologue.  */
5527      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5528    }
5529
5530  /* Prevent function calls from be scheduled before the call to mcount.
5531     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5532  if (current_function_profile)
5533    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5534}
5535
5536/* Emit code to restore saved registers using MOV insns.  First register
5537   is restored from POINTER + OFFSET.  */
5538static void
5539ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5540				  int maybe_eh_return)
5541{
5542  int regno;
5543  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5544
5545  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5546    if (ix86_save_reg (regno, maybe_eh_return))
5547      {
5548	/* Ensure that adjust_address won't be forced to produce pointer
5549	   out of range allowed by x86-64 instruction set.  */
5550	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5551	  {
5552	    rtx r11;
5553
5554	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5555	    emit_move_insn (r11, GEN_INT (offset));
5556	    emit_insn (gen_adddi3 (r11, r11, pointer));
5557	    base_address = gen_rtx_MEM (Pmode, r11);
5558	    offset = 0;
5559	  }
5560	emit_move_insn (gen_rtx_REG (Pmode, regno),
5561			adjust_address (base_address, Pmode, offset));
5562	offset += UNITS_PER_WORD;
5563      }
5564}
5565
5566/* Restore function stack, frame, and registers.  */
5567
5568void
5569ix86_expand_epilogue (int style)
5570{
5571  int regno;
5572  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5573  struct ix86_frame frame;
5574  HOST_WIDE_INT offset;
5575
5576  ix86_compute_frame_layout (&frame);
5577
5578  /* Calculate start of saved registers relative to ebp.  Special care
5579     must be taken for the normal return case of a function using
5580     eh_return: the eax and edx registers are marked as saved, but not
5581     restored along this path.  */
5582  offset = frame.nregs;
5583  if (current_function_calls_eh_return && style != 2)
5584    offset -= 2;
5585  offset *= -UNITS_PER_WORD;
5586
5587  /* If we're only restoring one register and sp is not valid then
5588     using a move instruction to restore the register since it's
5589     less work than reloading sp and popping the register.
5590
5591     The default code result in stack adjustment using add/lea instruction,
5592     while this code results in LEAVE instruction (or discrete equivalent),
5593     so it is profitable in some other cases as well.  Especially when there
5594     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5595     and there is exactly one register to pop. This heuristic may need some
5596     tuning in future.  */
5597  if ((!sp_valid && frame.nregs <= 1)
5598      || (TARGET_EPILOGUE_USING_MOVE
5599	  && cfun->machine->use_fast_prologue_epilogue
5600	  && (frame.nregs > 1 || frame.to_allocate))
5601      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5602      || (frame_pointer_needed && TARGET_USE_LEAVE
5603	  && cfun->machine->use_fast_prologue_epilogue
5604	  && frame.nregs == 1)
5605      || current_function_calls_eh_return)
5606    {
5607      /* Restore registers.  We can use ebp or esp to address the memory
5608	 locations.  If both are available, default to ebp, since offsets
5609	 are known to be small.  Only exception is esp pointing directly to the
5610	 end of block of saved registers, where we may simplify addressing
5611	 mode.  */
5612
5613      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5614	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5615					  frame.to_allocate, style == 2);
5616      else
5617	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5618					  offset, style == 2);
5619
5620      /* eh_return epilogues need %ecx added to the stack pointer.  */
5621      if (style == 2)
5622	{
5623	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5624
5625	  if (frame_pointer_needed)
5626	    {
5627	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5628	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5629	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5630
5631	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5632	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5633
5634	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5635					 const0_rtx, style);
5636	    }
5637	  else
5638	    {
5639	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5640	      tmp = plus_constant (tmp, (frame.to_allocate
5641                                         + frame.nregs * UNITS_PER_WORD));
5642	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5643	    }
5644	}
5645      else if (!frame_pointer_needed)
5646	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5647				   GEN_INT (frame.to_allocate
5648					    + frame.nregs * UNITS_PER_WORD),
5649				   style);
5650      /* If not an i386, mov & pop is faster than "leave".  */
5651      else if (TARGET_USE_LEAVE || optimize_size
5652	       || !cfun->machine->use_fast_prologue_epilogue)
5653	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5654      else
5655	{
5656	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5657				     hard_frame_pointer_rtx,
5658				     const0_rtx, style);
5659	  if (TARGET_64BIT)
5660	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5661	  else
5662	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5663	}
5664    }
5665  else
5666    {
5667      /* First step is to deallocate the stack frame so that we can
5668	 pop the registers.  */
5669      if (!sp_valid)
5670	{
5671	  gcc_assert (frame_pointer_needed);
5672	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5673				     hard_frame_pointer_rtx,
5674				     GEN_INT (offset), style);
5675	}
5676      else if (frame.to_allocate)
5677	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5678				   GEN_INT (frame.to_allocate), style);
5679
5680      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5681	if (ix86_save_reg (regno, false))
5682	  {
5683	    if (TARGET_64BIT)
5684	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5685	    else
5686	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5687	  }
5688      if (frame_pointer_needed)
5689	{
5690	  /* Leave results in shorter dependency chains on CPUs that are
5691	     able to grok it fast.  */
5692	  if (TARGET_USE_LEAVE)
5693	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5694	  else if (TARGET_64BIT)
5695	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5696	  else
5697	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5698	}
5699    }
5700
5701  if (cfun->machine->force_align_arg_pointer)
5702    {
5703      emit_insn (gen_addsi3 (stack_pointer_rtx,
5704			     cfun->machine->force_align_arg_pointer,
5705			     GEN_INT (-4)));
5706    }
5707
5708  /* Sibcall epilogues don't want a return instruction.  */
5709  if (style == 0)
5710    return;
5711
5712  if (current_function_pops_args && current_function_args_size)
5713    {
5714      rtx popc = GEN_INT (current_function_pops_args);
5715
5716      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5717	 return address, do explicit add, and jump indirectly to the
5718	 caller.  */
5719
5720      if (current_function_pops_args >= 65536)
5721	{
5722	  rtx ecx = gen_rtx_REG (SImode, 2);
5723
5724	  /* There is no "pascal" calling convention in 64bit ABI.  */
5725	  gcc_assert (!TARGET_64BIT);
5726
5727	  emit_insn (gen_popsi1 (ecx));
5728	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5729	  emit_jump_insn (gen_return_indirect_internal (ecx));
5730	}
5731      else
5732	emit_jump_insn (gen_return_pop_internal (popc));
5733    }
5734  else
5735    emit_jump_insn (gen_return_internal ());
5736}
5737
5738/* Reset from the function's potential modifications.  */
5739
5740static void
5741ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5742			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5743{
5744  if (pic_offset_table_rtx)
5745    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5746#if TARGET_MACHO
5747  /* Mach-O doesn't support labels at the end of objects, so if
5748     it looks like we might want one, insert a NOP.  */
5749  {
5750    rtx insn = get_last_insn ();
5751    while (insn
5752	   && NOTE_P (insn)
5753	   && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5754      insn = PREV_INSN (insn);
5755    if (insn
5756	&& (LABEL_P (insn)
5757	    || (NOTE_P (insn)
5758		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5759      fputs ("\tnop\n", file);
5760  }
5761#endif
5762
5763}
5764
5765/* Extract the parts of an RTL expression that is a valid memory address
5766   for an instruction.  Return 0 if the structure of the address is
5767   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5768   strictly valid, but still used for computing length of lea instruction.  */
5769
5770int
5771ix86_decompose_address (rtx addr, struct ix86_address *out)
5772{
5773  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5774  rtx base_reg, index_reg;
5775  HOST_WIDE_INT scale = 1;
5776  rtx scale_rtx = NULL_RTX;
5777  int retval = 1;
5778  enum ix86_address_seg seg = SEG_DEFAULT;
5779
5780  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5781    base = addr;
5782  else if (GET_CODE (addr) == PLUS)
5783    {
5784      rtx addends[4], op;
5785      int n = 0, i;
5786
5787      op = addr;
5788      do
5789	{
5790	  if (n >= 4)
5791	    return 0;
5792	  addends[n++] = XEXP (op, 1);
5793	  op = XEXP (op, 0);
5794	}
5795      while (GET_CODE (op) == PLUS);
5796      if (n >= 4)
5797	return 0;
5798      addends[n] = op;
5799
5800      for (i = n; i >= 0; --i)
5801	{
5802	  op = addends[i];
5803	  switch (GET_CODE (op))
5804	    {
5805	    case MULT:
5806	      if (index)
5807		return 0;
5808	      index = XEXP (op, 0);
5809	      scale_rtx = XEXP (op, 1);
5810	      break;
5811
5812	    case UNSPEC:
5813	      if (XINT (op, 1) == UNSPEC_TP
5814	          && TARGET_TLS_DIRECT_SEG_REFS
5815	          && seg == SEG_DEFAULT)
5816		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5817	      else
5818		return 0;
5819	      break;
5820
5821	    case REG:
5822	    case SUBREG:
5823	      if (!base)
5824		base = op;
5825	      else if (!index)
5826		index = op;
5827	      else
5828		return 0;
5829	      break;
5830
5831	    case CONST:
5832	    case CONST_INT:
5833	    case SYMBOL_REF:
5834	    case LABEL_REF:
5835	      if (disp)
5836		return 0;
5837	      disp = op;
5838	      break;
5839
5840	    default:
5841	      return 0;
5842	    }
5843	}
5844    }
5845  else if (GET_CODE (addr) == MULT)
5846    {
5847      index = XEXP (addr, 0);		/* index*scale */
5848      scale_rtx = XEXP (addr, 1);
5849    }
5850  else if (GET_CODE (addr) == ASHIFT)
5851    {
5852      rtx tmp;
5853
5854      /* We're called for lea too, which implements ashift on occasion.  */
5855      index = XEXP (addr, 0);
5856      tmp = XEXP (addr, 1);
5857      if (GET_CODE (tmp) != CONST_INT)
5858	return 0;
5859      scale = INTVAL (tmp);
5860      if ((unsigned HOST_WIDE_INT) scale > 3)
5861	return 0;
5862      scale = 1 << scale;
5863      retval = -1;
5864    }
5865  else
5866    disp = addr;			/* displacement */
5867
5868  /* Extract the integral value of scale.  */
5869  if (scale_rtx)
5870    {
5871      if (GET_CODE (scale_rtx) != CONST_INT)
5872	return 0;
5873      scale = INTVAL (scale_rtx);
5874    }
5875
5876  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5877  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5878
5879  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5880  if (base_reg && index_reg && scale == 1
5881      && (index_reg == arg_pointer_rtx
5882	  || index_reg == frame_pointer_rtx
5883	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5884    {
5885      rtx tmp;
5886      tmp = base, base = index, index = tmp;
5887      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5888    }
5889
5890  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5891  if ((base_reg == hard_frame_pointer_rtx
5892       || base_reg == frame_pointer_rtx
5893       || base_reg == arg_pointer_rtx) && !disp)
5894    disp = const0_rtx;
5895
5896  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5897     Avoid this by transforming to [%esi+0].  */
5898  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5899      && base_reg && !index_reg && !disp
5900      && REG_P (base_reg)
5901      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5902    disp = const0_rtx;
5903
5904  /* Special case: encode reg+reg instead of reg*2.  */
5905  if (!base && index && scale && scale == 2)
5906    base = index, base_reg = index_reg, scale = 1;
5907
5908  /* Special case: scaling cannot be encoded without base or displacement.  */
5909  if (!base && !disp && index && scale != 1)
5910    disp = const0_rtx;
5911
5912  out->base = base;
5913  out->index = index;
5914  out->disp = disp;
5915  out->scale = scale;
5916  out->seg = seg;
5917
5918  return retval;
5919}
5920
5921/* Return cost of the memory address x.
5922   For i386, it is better to use a complex address than let gcc copy
5923   the address into a reg and make a new pseudo.  But not if the address
5924   requires to two regs - that would mean more pseudos with longer
5925   lifetimes.  */
5926static int
5927ix86_address_cost (rtx x)
5928{
5929  struct ix86_address parts;
5930  int cost = 1;
5931  int ok = ix86_decompose_address (x, &parts);
5932
5933  gcc_assert (ok);
5934
5935  if (parts.base && GET_CODE (parts.base) == SUBREG)
5936    parts.base = SUBREG_REG (parts.base);
5937  if (parts.index && GET_CODE (parts.index) == SUBREG)
5938    parts.index = SUBREG_REG (parts.index);
5939
5940  /* More complex memory references are better.  */
5941  if (parts.disp && parts.disp != const0_rtx)
5942    cost--;
5943  if (parts.seg != SEG_DEFAULT)
5944    cost--;
5945
5946  /* Attempt to minimize number of registers in the address.  */
5947  if ((parts.base
5948       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5949      || (parts.index
5950	  && (!REG_P (parts.index)
5951	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5952    cost++;
5953
5954  if (parts.base
5955      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5956      && parts.index
5957      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5958      && parts.base != parts.index)
5959    cost++;
5960
5961  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5962     since it's predecode logic can't detect the length of instructions
5963     and it degenerates to vector decoded.  Increase cost of such
5964     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5965     to split such addresses or even refuse such addresses at all.
5966
5967     Following addressing modes are affected:
5968      [base+scale*index]
5969      [scale*index+disp]
5970      [base+index]
5971
5972     The first and last case  may be avoidable by explicitly coding the zero in
5973     memory address, but I don't have AMD-K6 machine handy to check this
5974     theory.  */
5975
5976  if (TARGET_K6
5977      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5978	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5979	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5980    cost += 10;
5981
5982  return cost;
5983}
5984
5985/* If X is a machine specific address (i.e. a symbol or label being
5986   referenced as a displacement from the GOT implemented using an
5987   UNSPEC), then return the base term.  Otherwise return X.  */
5988
5989rtx
5990ix86_find_base_term (rtx x)
5991{
5992  rtx term;
5993
5994  if (TARGET_64BIT)
5995    {
5996      if (GET_CODE (x) != CONST)
5997	return x;
5998      term = XEXP (x, 0);
5999      if (GET_CODE (term) == PLUS
6000	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
6001	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6002	term = XEXP (term, 0);
6003      if (GET_CODE (term) != UNSPEC
6004	  || XINT (term, 1) != UNSPEC_GOTPCREL)
6005	return x;
6006
6007      term = XVECEXP (term, 0, 0);
6008
6009      if (GET_CODE (term) != SYMBOL_REF
6010	  && GET_CODE (term) != LABEL_REF)
6011	return x;
6012
6013      return term;
6014    }
6015
6016  term = ix86_delegitimize_address (x);
6017
6018  if (GET_CODE (term) != SYMBOL_REF
6019      && GET_CODE (term) != LABEL_REF)
6020    return x;
6021
6022  return term;
6023}
6024
6025/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6026   this is used for to form addresses to local data when -fPIC is in
6027   use.  */
6028
6029static bool
6030darwin_local_data_pic (rtx disp)
6031{
6032  if (GET_CODE (disp) == MINUS)
6033    {
6034      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6035          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6036        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6037          {
6038            const char *sym_name = XSTR (XEXP (disp, 1), 0);
6039            if (! strcmp (sym_name, "<pic base>"))
6040              return true;
6041          }
6042    }
6043
6044  return false;
6045}
6046
6047/* Determine if a given RTX is a valid constant.  We already know this
6048   satisfies CONSTANT_P.  */
6049
6050bool
6051legitimate_constant_p (rtx x)
6052{
6053  switch (GET_CODE (x))
6054    {
6055    case CONST:
6056      x = XEXP (x, 0);
6057
6058      if (GET_CODE (x) == PLUS)
6059	{
6060	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6061	    return false;
6062	  x = XEXP (x, 0);
6063	}
6064
6065      if (TARGET_MACHO && darwin_local_data_pic (x))
6066	return true;
6067
6068      /* Only some unspecs are valid as "constants".  */
6069      if (GET_CODE (x) == UNSPEC)
6070	switch (XINT (x, 1))
6071	  {
6072	  case UNSPEC_GOTOFF:
6073	    return TARGET_64BIT;
6074	  case UNSPEC_TPOFF:
6075	  case UNSPEC_NTPOFF:
6076	    x = XVECEXP (x, 0, 0);
6077	    return (GET_CODE (x) == SYMBOL_REF
6078		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6079	  case UNSPEC_DTPOFF:
6080	    x = XVECEXP (x, 0, 0);
6081	    return (GET_CODE (x) == SYMBOL_REF
6082		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6083	  default:
6084	    return false;
6085	  }
6086
6087      /* We must have drilled down to a symbol.  */
6088      if (GET_CODE (x) == LABEL_REF)
6089	return true;
6090      if (GET_CODE (x) != SYMBOL_REF)
6091	return false;
6092      /* FALLTHRU */
6093
6094    case SYMBOL_REF:
6095      /* TLS symbols are never valid.  */
6096      if (SYMBOL_REF_TLS_MODEL (x))
6097	return false;
6098      break;
6099
6100    case CONST_DOUBLE:
6101      if (GET_MODE (x) == TImode
6102	  && x != CONST0_RTX (TImode)
6103          && !TARGET_64BIT)
6104	return false;
6105      break;
6106
6107    case CONST_VECTOR:
6108      if (x == CONST0_RTX (GET_MODE (x)))
6109	return true;
6110      return false;
6111
6112    default:
6113      break;
6114    }
6115
6116  /* Otherwise we handle everything else in the move patterns.  */
6117  return true;
6118}
6119
6120/* Determine if it's legal to put X into the constant pool.  This
6121   is not possible for the address of thread-local symbols, which
6122   is checked above.  */
6123
6124static bool
6125ix86_cannot_force_const_mem (rtx x)
6126{
6127  /* We can always put integral constants and vectors in memory.  */
6128  switch (GET_CODE (x))
6129    {
6130    case CONST_INT:
6131    case CONST_DOUBLE:
6132    case CONST_VECTOR:
6133      return false;
6134
6135    default:
6136      break;
6137    }
6138  return !legitimate_constant_p (x);
6139}
6140
6141/* Determine if a given RTX is a valid constant address.  */
6142
6143bool
6144constant_address_p (rtx x)
6145{
6146  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6147}
6148
6149/* Nonzero if the constant value X is a legitimate general operand
6150   when generating PIC code.  It is given that flag_pic is on and
6151   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
6152
6153bool
6154legitimate_pic_operand_p (rtx x)
6155{
6156  rtx inner;
6157
6158  switch (GET_CODE (x))
6159    {
6160    case CONST:
6161      inner = XEXP (x, 0);
6162      if (GET_CODE (inner) == PLUS
6163	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6164	inner = XEXP (inner, 0);
6165
6166      /* Only some unspecs are valid as "constants".  */
6167      if (GET_CODE (inner) == UNSPEC)
6168	switch (XINT (inner, 1))
6169	  {
6170	  case UNSPEC_GOTOFF:
6171	    return TARGET_64BIT;
6172	  case UNSPEC_TPOFF:
6173	    x = XVECEXP (inner, 0, 0);
6174	    return (GET_CODE (x) == SYMBOL_REF
6175		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6176	  default:
6177	    return false;
6178	  }
6179      /* FALLTHRU */
6180
6181    case SYMBOL_REF:
6182    case LABEL_REF:
6183      return legitimate_pic_address_disp_p (x);
6184
6185    default:
6186      return true;
6187    }
6188}
6189
6190/* Determine if a given CONST RTX is a valid memory displacement
6191   in PIC mode.  */
6192
6193int
6194legitimate_pic_address_disp_p (rtx disp)
6195{
6196  bool saw_plus;
6197
6198  /* In 64bit mode we can allow direct addresses of symbols and labels
6199     when they are not dynamic symbols.  */
6200  if (TARGET_64BIT)
6201    {
6202      rtx op0 = disp, op1;
6203
6204      switch (GET_CODE (disp))
6205	{
6206	case LABEL_REF:
6207	  return true;
6208
6209	case CONST:
6210	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
6211	    break;
6212	  op0 = XEXP (XEXP (disp, 0), 0);
6213	  op1 = XEXP (XEXP (disp, 0), 1);
6214	  if (GET_CODE (op1) != CONST_INT
6215	      || INTVAL (op1) >= 16*1024*1024
6216	      || INTVAL (op1) < -16*1024*1024)
6217            break;
6218	  if (GET_CODE (op0) == LABEL_REF)
6219	    return true;
6220	  if (GET_CODE (op0) != SYMBOL_REF)
6221	    break;
6222	  /* FALLTHRU */
6223
6224	case SYMBOL_REF:
6225	  /* TLS references should always be enclosed in UNSPEC.  */
6226	  if (SYMBOL_REF_TLS_MODEL (op0))
6227	    return false;
6228	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6229	    return true;
6230	  break;
6231
6232	default:
6233	  break;
6234	}
6235    }
6236  if (GET_CODE (disp) != CONST)
6237    return 0;
6238  disp = XEXP (disp, 0);
6239
6240  if (TARGET_64BIT)
6241    {
6242      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
6243         of GOT tables.  We should not need these anyway.  */
6244      if (GET_CODE (disp) != UNSPEC
6245	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
6246	      && XINT (disp, 1) != UNSPEC_GOTOFF))
6247	return 0;
6248
6249      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6250	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6251	return 0;
6252      return 1;
6253    }
6254
6255  saw_plus = false;
6256  if (GET_CODE (disp) == PLUS)
6257    {
6258      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6259	return 0;
6260      disp = XEXP (disp, 0);
6261      saw_plus = true;
6262    }
6263
6264  if (TARGET_MACHO && darwin_local_data_pic (disp))
6265    return 1;
6266
6267  if (GET_CODE (disp) != UNSPEC)
6268    return 0;
6269
6270  switch (XINT (disp, 1))
6271    {
6272    case UNSPEC_GOT:
6273      if (saw_plus)
6274	return false;
6275      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6276    case UNSPEC_GOTOFF:
6277      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6278	 While ABI specify also 32bit relocation but we don't produce it in
6279	 small PIC model at all.  */
6280      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6281	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6282	  && !TARGET_64BIT)
6283        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6284      return false;
6285    case UNSPEC_GOTTPOFF:
6286    case UNSPEC_GOTNTPOFF:
6287    case UNSPEC_INDNTPOFF:
6288      if (saw_plus)
6289	return false;
6290      disp = XVECEXP (disp, 0, 0);
6291      return (GET_CODE (disp) == SYMBOL_REF
6292	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6293    case UNSPEC_NTPOFF:
6294      disp = XVECEXP (disp, 0, 0);
6295      return (GET_CODE (disp) == SYMBOL_REF
6296	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6297    case UNSPEC_DTPOFF:
6298      disp = XVECEXP (disp, 0, 0);
6299      return (GET_CODE (disp) == SYMBOL_REF
6300	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6301    }
6302
6303  return 0;
6304}
6305
6306/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6307   memory address for an instruction.  The MODE argument is the machine mode
6308   for the MEM expression that wants to use this address.
6309
6310   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6311   convert common non-canonical forms to canonical form so that they will
6312   be recognized.  */
6313
6314int
6315legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6316{
6317  struct ix86_address parts;
6318  rtx base, index, disp;
6319  HOST_WIDE_INT scale;
6320  const char *reason = NULL;
6321  rtx reason_rtx = NULL_RTX;
6322
6323  if (TARGET_DEBUG_ADDR)
6324    {
6325      fprintf (stderr,
6326	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6327	       GET_MODE_NAME (mode), strict);
6328      debug_rtx (addr);
6329    }
6330
6331  if (ix86_decompose_address (addr, &parts) <= 0)
6332    {
6333      reason = "decomposition failed";
6334      goto report_error;
6335    }
6336
6337  base = parts.base;
6338  index = parts.index;
6339  disp = parts.disp;
6340  scale = parts.scale;
6341
6342  /* Validate base register.
6343
6344     Don't allow SUBREG's that span more than a word here.  It can lead to spill
6345     failures when the base is one word out of a two word structure, which is
6346     represented internally as a DImode int.  */
6347
6348  if (base)
6349    {
6350      rtx reg;
6351      reason_rtx = base;
6352
6353      if (REG_P (base))
6354  	reg = base;
6355      else if (GET_CODE (base) == SUBREG
6356	       && REG_P (SUBREG_REG (base))
6357	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6358		  <= UNITS_PER_WORD)
6359  	reg = SUBREG_REG (base);
6360      else
6361	{
6362	  reason = "base is not a register";
6363	  goto report_error;
6364	}
6365
6366      if (GET_MODE (base) != Pmode)
6367	{
6368	  reason = "base is not in Pmode";
6369	  goto report_error;
6370	}
6371
6372      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6373	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6374	{
6375	  reason = "base is not valid";
6376	  goto report_error;
6377	}
6378    }
6379
6380  /* Validate index register.
6381
6382     Don't allow SUBREG's that span more than a word here -- same as above.  */
6383
6384  if (index)
6385    {
6386      rtx reg;
6387      reason_rtx = index;
6388
6389      if (REG_P (index))
6390  	reg = index;
6391      else if (GET_CODE (index) == SUBREG
6392	       && REG_P (SUBREG_REG (index))
6393	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6394		  <= UNITS_PER_WORD)
6395  	reg = SUBREG_REG (index);
6396      else
6397	{
6398	  reason = "index is not a register";
6399	  goto report_error;
6400	}
6401
6402      if (GET_MODE (index) != Pmode)
6403	{
6404	  reason = "index is not in Pmode";
6405	  goto report_error;
6406	}
6407
6408      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6409	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6410	{
6411	  reason = "index is not valid";
6412	  goto report_error;
6413	}
6414    }
6415
6416  /* Validate scale factor.  */
6417  if (scale != 1)
6418    {
6419      reason_rtx = GEN_INT (scale);
6420      if (!index)
6421	{
6422	  reason = "scale without index";
6423	  goto report_error;
6424	}
6425
6426      if (scale != 2 && scale != 4 && scale != 8)
6427	{
6428	  reason = "scale is not a valid multiplier";
6429	  goto report_error;
6430	}
6431    }
6432
6433  /* Validate displacement.  */
6434  if (disp)
6435    {
6436      reason_rtx = disp;
6437
6438      if (GET_CODE (disp) == CONST
6439	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6440	switch (XINT (XEXP (disp, 0), 1))
6441	  {
6442	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6443	     used.  While ABI specify also 32bit relocations, we don't produce
6444	     them at all and use IP relative instead.  */
6445	  case UNSPEC_GOT:
6446	  case UNSPEC_GOTOFF:
6447	    gcc_assert (flag_pic);
6448	    if (!TARGET_64BIT)
6449	      goto is_legitimate_pic;
6450	    reason = "64bit address unspec";
6451	    goto report_error;
6452
6453	  case UNSPEC_GOTPCREL:
6454	    gcc_assert (flag_pic);
6455	    goto is_legitimate_pic;
6456
6457	  case UNSPEC_GOTTPOFF:
6458	  case UNSPEC_GOTNTPOFF:
6459	  case UNSPEC_INDNTPOFF:
6460	  case UNSPEC_NTPOFF:
6461	  case UNSPEC_DTPOFF:
6462	    break;
6463
6464	  default:
6465	    reason = "invalid address unspec";
6466	    goto report_error;
6467	  }
6468
6469      else if (SYMBOLIC_CONST (disp)
6470	       && (flag_pic
6471		   || (TARGET_MACHO
6472#if TARGET_MACHO
6473		       && MACHOPIC_INDIRECT
6474		       && !machopic_operand_p (disp)
6475#endif
6476	       )))
6477	{
6478
6479	is_legitimate_pic:
6480	  if (TARGET_64BIT && (index || base))
6481	    {
6482	      /* foo@dtpoff(%rX) is ok.  */
6483	      if (GET_CODE (disp) != CONST
6484		  || GET_CODE (XEXP (disp, 0)) != PLUS
6485		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6486		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6487		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6488		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6489		{
6490		  reason = "non-constant pic memory reference";
6491		  goto report_error;
6492		}
6493	    }
6494	  else if (! legitimate_pic_address_disp_p (disp))
6495	    {
6496	      reason = "displacement is an invalid pic construct";
6497	      goto report_error;
6498	    }
6499
6500          /* This code used to verify that a symbolic pic displacement
6501	     includes the pic_offset_table_rtx register.
6502
6503	     While this is good idea, unfortunately these constructs may
6504	     be created by "adds using lea" optimization for incorrect
6505	     code like:
6506
6507	     int a;
6508	     int foo(int i)
6509	       {
6510	         return *(&a+i);
6511	       }
6512
6513	     This code is nonsensical, but results in addressing
6514	     GOT table with pic_offset_table_rtx base.  We can't
6515	     just refuse it easily, since it gets matched by
6516	     "addsi3" pattern, that later gets split to lea in the
6517	     case output register differs from input.  While this
6518	     can be handled by separate addsi pattern for this case
6519	     that never results in lea, this seems to be easier and
6520	     correct fix for crash to disable this test.  */
6521	}
6522      else if (GET_CODE (disp) != LABEL_REF
6523	       && GET_CODE (disp) != CONST_INT
6524	       && (GET_CODE (disp) != CONST
6525		   || !legitimate_constant_p (disp))
6526	       && (GET_CODE (disp) != SYMBOL_REF
6527		   || !legitimate_constant_p (disp)))
6528	{
6529	  reason = "displacement is not constant";
6530	  goto report_error;
6531	}
6532      else if (TARGET_64BIT
6533	       && !x86_64_immediate_operand (disp, VOIDmode))
6534	{
6535	  reason = "displacement is out of range";
6536	  goto report_error;
6537	}
6538    }
6539
6540  /* Everything looks valid.  */
6541  if (TARGET_DEBUG_ADDR)
6542    fprintf (stderr, "Success.\n");
6543  return TRUE;
6544
6545 report_error:
6546  if (TARGET_DEBUG_ADDR)
6547    {
6548      fprintf (stderr, "Error: %s\n", reason);
6549      debug_rtx (reason_rtx);
6550    }
6551  return FALSE;
6552}
6553
6554/* Return a unique alias set for the GOT.  */
6555
6556static HOST_WIDE_INT
6557ix86_GOT_alias_set (void)
6558{
6559  static HOST_WIDE_INT set = -1;
6560  if (set == -1)
6561    set = new_alias_set ();
6562  return set;
6563}
6564
6565/* Return a legitimate reference for ORIG (an address) using the
6566   register REG.  If REG is 0, a new pseudo is generated.
6567
6568   There are two types of references that must be handled:
6569
6570   1. Global data references must load the address from the GOT, via
6571      the PIC reg.  An insn is emitted to do this load, and the reg is
6572      returned.
6573
6574   2. Static data references, constant pool addresses, and code labels
6575      compute the address as an offset from the GOT, whose base is in
6576      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6577      differentiate them from global data objects.  The returned
6578      address is the PIC reg + an unspec constant.
6579
6580   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6581   reg also appears in the address.  */
6582
6583static rtx
6584legitimize_pic_address (rtx orig, rtx reg)
6585{
6586  rtx addr = orig;
6587  rtx new = orig;
6588  rtx base;
6589
6590#if TARGET_MACHO
6591  if (TARGET_MACHO && !TARGET_64BIT)
6592    {
6593      if (reg == 0)
6594	reg = gen_reg_rtx (Pmode);
6595      /* Use the generic Mach-O PIC machinery.  */
6596      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6597    }
6598#endif
6599
6600  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6601    new = addr;
6602  else if (TARGET_64BIT
6603	   && ix86_cmodel != CM_SMALL_PIC
6604	   && local_symbolic_operand (addr, Pmode))
6605    {
6606      rtx tmpreg;
6607      /* This symbol may be referenced via a displacement from the PIC
6608	 base address (@GOTOFF).  */
6609
6610      if (reload_in_progress)
6611	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6612      if (GET_CODE (addr) == CONST)
6613	addr = XEXP (addr, 0);
6614      if (GET_CODE (addr) == PLUS)
6615	  {
6616            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6617	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6618	  }
6619	else
6620          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6621      new = gen_rtx_CONST (Pmode, new);
6622      if (!reg)
6623        tmpreg = gen_reg_rtx (Pmode);
6624      else
6625	tmpreg = reg;
6626      emit_move_insn (tmpreg, new);
6627
6628      if (reg != 0)
6629	{
6630	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6631				     tmpreg, 1, OPTAB_DIRECT);
6632	  new = reg;
6633	}
6634      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6635    }
6636  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6637    {
6638      /* This symbol may be referenced via a displacement from the PIC
6639	 base address (@GOTOFF).  */
6640
6641      if (reload_in_progress)
6642	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6643      if (GET_CODE (addr) == CONST)
6644	addr = XEXP (addr, 0);
6645      if (GET_CODE (addr) == PLUS)
6646	  {
6647            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6648	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6649	  }
6650	else
6651          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6652      new = gen_rtx_CONST (Pmode, new);
6653      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6654
6655      if (reg != 0)
6656	{
6657	  emit_move_insn (reg, new);
6658	  new = reg;
6659	}
6660    }
6661  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6662    {
6663      if (TARGET_64BIT)
6664	{
6665	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6666	  new = gen_rtx_CONST (Pmode, new);
6667	  new = gen_const_mem (Pmode, new);
6668	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6669
6670	  if (reg == 0)
6671	    reg = gen_reg_rtx (Pmode);
6672	  /* Use directly gen_movsi, otherwise the address is loaded
6673	     into register for CSE.  We don't want to CSE this addresses,
6674	     instead we CSE addresses from the GOT table, so skip this.  */
6675	  emit_insn (gen_movsi (reg, new));
6676	  new = reg;
6677	}
6678      else
6679	{
6680	  /* This symbol must be referenced via a load from the
6681	     Global Offset Table (@GOT).  */
6682
6683	  if (reload_in_progress)
6684	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6685	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6686	  new = gen_rtx_CONST (Pmode, new);
6687	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6688	  new = gen_const_mem (Pmode, new);
6689	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6690
6691	  if (reg == 0)
6692	    reg = gen_reg_rtx (Pmode);
6693	  emit_move_insn (reg, new);
6694	  new = reg;
6695	}
6696    }
6697  else
6698    {
6699      if (GET_CODE (addr) == CONST_INT
6700	  && !x86_64_immediate_operand (addr, VOIDmode))
6701	{
6702	  if (reg)
6703	    {
6704	      emit_move_insn (reg, addr);
6705	      new = reg;
6706	    }
6707	  else
6708	    new = force_reg (Pmode, addr);
6709	}
6710      else if (GET_CODE (addr) == CONST)
6711	{
6712	  addr = XEXP (addr, 0);
6713
6714	  /* We must match stuff we generate before.  Assume the only
6715	     unspecs that can get here are ours.  Not that we could do
6716	     anything with them anyway....  */
6717	  if (GET_CODE (addr) == UNSPEC
6718	      || (GET_CODE (addr) == PLUS
6719		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6720	    return orig;
6721	  gcc_assert (GET_CODE (addr) == PLUS);
6722	}
6723      if (GET_CODE (addr) == PLUS)
6724	{
6725	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6726
6727	  /* Check first to see if this is a constant offset from a @GOTOFF
6728	     symbol reference.  */
6729	  if (local_symbolic_operand (op0, Pmode)
6730	      && GET_CODE (op1) == CONST_INT)
6731	    {
6732	      if (!TARGET_64BIT)
6733		{
6734		  if (reload_in_progress)
6735		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6736		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6737					UNSPEC_GOTOFF);
6738		  new = gen_rtx_PLUS (Pmode, new, op1);
6739		  new = gen_rtx_CONST (Pmode, new);
6740		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6741
6742		  if (reg != 0)
6743		    {
6744		      emit_move_insn (reg, new);
6745		      new = reg;
6746		    }
6747		}
6748	      else
6749		{
6750		  if (INTVAL (op1) < -16*1024*1024
6751		      || INTVAL (op1) >= 16*1024*1024)
6752		    {
6753		      if (!x86_64_immediate_operand (op1, Pmode))
6754			op1 = force_reg (Pmode, op1);
6755		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6756		    }
6757		}
6758	    }
6759	  else
6760	    {
6761	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6762	      new  = legitimize_pic_address (XEXP (addr, 1),
6763					     base == reg ? NULL_RTX : reg);
6764
6765	      if (GET_CODE (new) == CONST_INT)
6766		new = plus_constant (base, INTVAL (new));
6767	      else
6768		{
6769		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6770		    {
6771		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6772		      new = XEXP (new, 1);
6773		    }
6774		  new = gen_rtx_PLUS (Pmode, base, new);
6775		}
6776	    }
6777	}
6778    }
6779  return new;
6780}
6781
6782/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6783
6784static rtx
6785get_thread_pointer (int to_reg)
6786{
6787  rtx tp, reg, insn;
6788
6789  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6790  if (!to_reg)
6791    return tp;
6792
6793  reg = gen_reg_rtx (Pmode);
6794  insn = gen_rtx_SET (VOIDmode, reg, tp);
6795  insn = emit_insn (insn);
6796
6797  return reg;
6798}
6799
6800/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6801   false if we expect this to be used for a memory address and true if
6802   we expect to load the address into a register.  */
6803
6804static rtx
6805legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6806{
6807  rtx dest, base, off, pic, tp;
6808  int type;
6809
6810  switch (model)
6811    {
6812    case TLS_MODEL_GLOBAL_DYNAMIC:
6813      dest = gen_reg_rtx (Pmode);
6814      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6815
6816      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6817	{
6818	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6819
6820	  start_sequence ();
6821	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6822	  insns = get_insns ();
6823	  end_sequence ();
6824
6825	  emit_libcall_block (insns, dest, rax, x);
6826	}
6827      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6828	emit_insn (gen_tls_global_dynamic_64 (dest, x));
6829      else
6830	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6831
6832      if (TARGET_GNU2_TLS)
6833	{
6834	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6835
6836	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6837	}
6838      break;
6839
6840    case TLS_MODEL_LOCAL_DYNAMIC:
6841      base = gen_reg_rtx (Pmode);
6842      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6843
6844      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6845	{
6846	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6847
6848	  start_sequence ();
6849	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6850	  insns = get_insns ();
6851	  end_sequence ();
6852
6853	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6854	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6855	  emit_libcall_block (insns, base, rax, note);
6856	}
6857      else if (TARGET_64BIT && TARGET_GNU2_TLS)
6858	emit_insn (gen_tls_local_dynamic_base_64 (base));
6859      else
6860	emit_insn (gen_tls_local_dynamic_base_32 (base));
6861
6862      if (TARGET_GNU2_TLS)
6863	{
6864	  rtx x = ix86_tls_module_base ();
6865
6866	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
6867			       gen_rtx_MINUS (Pmode, x, tp));
6868	}
6869
6870      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6871      off = gen_rtx_CONST (Pmode, off);
6872
6873      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6874
6875      if (TARGET_GNU2_TLS)
6876	{
6877	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6878
6879	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6880	}
6881
6882      break;
6883
6884    case TLS_MODEL_INITIAL_EXEC:
6885      if (TARGET_64BIT)
6886	{
6887	  pic = NULL;
6888	  type = UNSPEC_GOTNTPOFF;
6889	}
6890      else if (flag_pic)
6891	{
6892	  if (reload_in_progress)
6893	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6894	  pic = pic_offset_table_rtx;
6895	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6896	}
6897      else if (!TARGET_ANY_GNU_TLS)
6898	{
6899	  pic = gen_reg_rtx (Pmode);
6900	  emit_insn (gen_set_got (pic));
6901	  type = UNSPEC_GOTTPOFF;
6902	}
6903      else
6904	{
6905	  pic = NULL;
6906	  type = UNSPEC_INDNTPOFF;
6907	}
6908
6909      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6910      off = gen_rtx_CONST (Pmode, off);
6911      if (pic)
6912	off = gen_rtx_PLUS (Pmode, pic, off);
6913      off = gen_const_mem (Pmode, off);
6914      set_mem_alias_set (off, ix86_GOT_alias_set ());
6915
6916      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6917	{
6918          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6919	  off = force_reg (Pmode, off);
6920	  return gen_rtx_PLUS (Pmode, base, off);
6921	}
6922      else
6923	{
6924	  base = get_thread_pointer (true);
6925	  dest = gen_reg_rtx (Pmode);
6926	  emit_insn (gen_subsi3 (dest, base, off));
6927	}
6928      break;
6929
6930    case TLS_MODEL_LOCAL_EXEC:
6931      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6932			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6933			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6934      off = gen_rtx_CONST (Pmode, off);
6935
6936      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6937	{
6938	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6939	  return gen_rtx_PLUS (Pmode, base, off);
6940	}
6941      else
6942	{
6943	  base = get_thread_pointer (true);
6944	  dest = gen_reg_rtx (Pmode);
6945	  emit_insn (gen_subsi3 (dest, base, off));
6946	}
6947      break;
6948
6949    default:
6950      gcc_unreachable ();
6951    }
6952
6953  return dest;
6954}
6955
6956/* Try machine-dependent ways of modifying an illegitimate address
6957   to be legitimate.  If we find one, return the new, valid address.
6958   This macro is used in only one place: `memory_address' in explow.c.
6959
6960   OLDX is the address as it was before break_out_memory_refs was called.
6961   In some cases it is useful to look at this to decide what needs to be done.
6962
6963   MODE and WIN are passed so that this macro can use
6964   GO_IF_LEGITIMATE_ADDRESS.
6965
6966   It is always safe for this macro to do nothing.  It exists to recognize
6967   opportunities to optimize the output.
6968
6969   For the 80386, we handle X+REG by loading X into a register R and
6970   using R+REG.  R will go in a general reg and indexing will be used.
6971   However, if REG is a broken-out memory address or multiplication,
6972   nothing needs to be done because REG can certainly go in a general reg.
6973
6974   When -fpic is used, special handling is needed for symbolic references.
6975   See comments by legitimize_pic_address in i386.c for details.  */
6976
6977rtx
6978legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6979{
6980  int changed = 0;
6981  unsigned log;
6982
6983  if (TARGET_DEBUG_ADDR)
6984    {
6985      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6986	       GET_MODE_NAME (mode));
6987      debug_rtx (x);
6988    }
6989
6990  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6991  if (log)
6992    return legitimize_tls_address (x, log, false);
6993  if (GET_CODE (x) == CONST
6994      && GET_CODE (XEXP (x, 0)) == PLUS
6995      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6996      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6997    {
6998      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6999      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7000    }
7001
7002  if (flag_pic && SYMBOLIC_CONST (x))
7003    return legitimize_pic_address (x, 0);
7004
7005  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7006  if (GET_CODE (x) == ASHIFT
7007      && GET_CODE (XEXP (x, 1)) == CONST_INT
7008      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7009    {
7010      changed = 1;
7011      log = INTVAL (XEXP (x, 1));
7012      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7013			GEN_INT (1 << log));
7014    }
7015
7016  if (GET_CODE (x) == PLUS)
7017    {
7018      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
7019
7020      if (GET_CODE (XEXP (x, 0)) == ASHIFT
7021	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7022	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7023	{
7024	  changed = 1;
7025	  log = INTVAL (XEXP (XEXP (x, 0), 1));
7026	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
7027				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7028				      GEN_INT (1 << log));
7029	}
7030
7031      if (GET_CODE (XEXP (x, 1)) == ASHIFT
7032	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7033	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7034	{
7035	  changed = 1;
7036	  log = INTVAL (XEXP (XEXP (x, 1), 1));
7037	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
7038				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7039				      GEN_INT (1 << log));
7040	}
7041
7042      /* Put multiply first if it isn't already.  */
7043      if (GET_CODE (XEXP (x, 1)) == MULT)
7044	{
7045	  rtx tmp = XEXP (x, 0);
7046	  XEXP (x, 0) = XEXP (x, 1);
7047	  XEXP (x, 1) = tmp;
7048	  changed = 1;
7049	}
7050
7051      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7052	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
7053	 created by virtual register instantiation, register elimination, and
7054	 similar optimizations.  */
7055      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7056	{
7057	  changed = 1;
7058	  x = gen_rtx_PLUS (Pmode,
7059			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
7060					  XEXP (XEXP (x, 1), 0)),
7061			    XEXP (XEXP (x, 1), 1));
7062	}
7063
7064      /* Canonicalize
7065	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7066	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
7067      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7068	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7069	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7070	       && CONSTANT_P (XEXP (x, 1)))
7071	{
7072	  rtx constant;
7073	  rtx other = NULL_RTX;
7074
7075	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7076	    {
7077	      constant = XEXP (x, 1);
7078	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7079	    }
7080	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7081	    {
7082	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7083	      other = XEXP (x, 1);
7084	    }
7085	  else
7086	    constant = 0;
7087
7088	  if (constant)
7089	    {
7090	      changed = 1;
7091	      x = gen_rtx_PLUS (Pmode,
7092				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7093					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
7094				plus_constant (other, INTVAL (constant)));
7095	    }
7096	}
7097
7098      if (changed && legitimate_address_p (mode, x, FALSE))
7099	return x;
7100
7101      if (GET_CODE (XEXP (x, 0)) == MULT)
7102	{
7103	  changed = 1;
7104	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7105	}
7106
7107      if (GET_CODE (XEXP (x, 1)) == MULT)
7108	{
7109	  changed = 1;
7110	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7111	}
7112
7113      if (changed
7114	  && GET_CODE (XEXP (x, 1)) == REG
7115	  && GET_CODE (XEXP (x, 0)) == REG)
7116	return x;
7117
7118      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7119	{
7120	  changed = 1;
7121	  x = legitimize_pic_address (x, 0);
7122	}
7123
7124      if (changed && legitimate_address_p (mode, x, FALSE))
7125	return x;
7126
7127      if (GET_CODE (XEXP (x, 0)) == REG)
7128	{
7129	  rtx temp = gen_reg_rtx (Pmode);
7130	  rtx val  = force_operand (XEXP (x, 1), temp);
7131	  if (val != temp)
7132	    emit_move_insn (temp, val);
7133
7134	  XEXP (x, 1) = temp;
7135	  return x;
7136	}
7137
7138      else if (GET_CODE (XEXP (x, 1)) == REG)
7139	{
7140	  rtx temp = gen_reg_rtx (Pmode);
7141	  rtx val  = force_operand (XEXP (x, 0), temp);
7142	  if (val != temp)
7143	    emit_move_insn (temp, val);
7144
7145	  XEXP (x, 0) = temp;
7146	  return x;
7147	}
7148    }
7149
7150  return x;
7151}
7152
7153/* Print an integer constant expression in assembler syntax.  Addition
7154   and subtraction are the only arithmetic that may appear in these
7155   expressions.  FILE is the stdio stream to write to, X is the rtx, and
7156   CODE is the operand print code from the output string.  */
7157
7158static void
7159output_pic_addr_const (FILE *file, rtx x, int code)
7160{
7161  char buf[256];
7162
7163  switch (GET_CODE (x))
7164    {
7165    case PC:
7166      gcc_assert (flag_pic);
7167      putc ('.', file);
7168      break;
7169
7170    case SYMBOL_REF:
7171      if (! TARGET_MACHO || TARGET_64BIT)
7172	output_addr_const (file, x);
7173      else
7174	{
7175	  const char *name = XSTR (x, 0);
7176
7177	  /* Mark the decl as referenced so that cgraph will output the function.  */
7178	  if (SYMBOL_REF_DECL (x))
7179	    mark_decl_referenced (SYMBOL_REF_DECL (x));
7180
7181#if TARGET_MACHO
7182	  if (MACHOPIC_INDIRECT
7183	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7184	    name = machopic_indirection_name (x, /*stub_p=*/true);
7185#endif
7186	  assemble_name (file, name);
7187	}
7188      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7189	fputs ("@PLT", file);
7190      break;
7191
7192    case LABEL_REF:
7193      x = XEXP (x, 0);
7194      /* FALLTHRU */
7195    case CODE_LABEL:
7196      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7197      assemble_name (asm_out_file, buf);
7198      break;
7199
7200    case CONST_INT:
7201      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7202      break;
7203
7204    case CONST:
7205      /* This used to output parentheses around the expression,
7206	 but that does not work on the 386 (either ATT or BSD assembler).  */
7207      output_pic_addr_const (file, XEXP (x, 0), code);
7208      break;
7209
7210    case CONST_DOUBLE:
7211      if (GET_MODE (x) == VOIDmode)
7212	{
7213	  /* We can use %d if the number is <32 bits and positive.  */
7214	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7215	    fprintf (file, "0x%lx%08lx",
7216		     (unsigned long) CONST_DOUBLE_HIGH (x),
7217		     (unsigned long) CONST_DOUBLE_LOW (x));
7218	  else
7219	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7220	}
7221      else
7222	/* We can't handle floating point constants;
7223	   PRINT_OPERAND must handle them.  */
7224	output_operand_lossage ("floating constant misused");
7225      break;
7226
7227    case PLUS:
7228      /* Some assemblers need integer constants to appear first.  */
7229      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7230	{
7231	  output_pic_addr_const (file, XEXP (x, 0), code);
7232	  putc ('+', file);
7233	  output_pic_addr_const (file, XEXP (x, 1), code);
7234	}
7235      else
7236	{
7237	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7238	  output_pic_addr_const (file, XEXP (x, 1), code);
7239	  putc ('+', file);
7240	  output_pic_addr_const (file, XEXP (x, 0), code);
7241	}
7242      break;
7243
7244    case MINUS:
7245      if (!TARGET_MACHO)
7246	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7247      output_pic_addr_const (file, XEXP (x, 0), code);
7248      putc ('-', file);
7249      output_pic_addr_const (file, XEXP (x, 1), code);
7250      if (!TARGET_MACHO)
7251	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7252      break;
7253
7254     case UNSPEC:
7255       gcc_assert (XVECLEN (x, 0) == 1);
7256       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7257       switch (XINT (x, 1))
7258	{
7259	case UNSPEC_GOT:
7260	  fputs ("@GOT", file);
7261	  break;
7262	case UNSPEC_GOTOFF:
7263	  fputs ("@GOTOFF", file);
7264	  break;
7265	case UNSPEC_GOTPCREL:
7266	  fputs ("@GOTPCREL(%rip)", file);
7267	  break;
7268	case UNSPEC_GOTTPOFF:
7269	  /* FIXME: This might be @TPOFF in Sun ld too.  */
7270	  fputs ("@GOTTPOFF", file);
7271	  break;
7272	case UNSPEC_TPOFF:
7273	  fputs ("@TPOFF", file);
7274	  break;
7275	case UNSPEC_NTPOFF:
7276	  if (TARGET_64BIT)
7277	    fputs ("@TPOFF", file);
7278	  else
7279	    fputs ("@NTPOFF", file);
7280	  break;
7281	case UNSPEC_DTPOFF:
7282	  fputs ("@DTPOFF", file);
7283	  break;
7284	case UNSPEC_GOTNTPOFF:
7285	  if (TARGET_64BIT)
7286	    fputs ("@GOTTPOFF(%rip)", file);
7287	  else
7288	    fputs ("@GOTNTPOFF", file);
7289	  break;
7290	case UNSPEC_INDNTPOFF:
7291	  fputs ("@INDNTPOFF", file);
7292	  break;
7293	default:
7294	  output_operand_lossage ("invalid UNSPEC as operand");
7295	  break;
7296	}
7297       break;
7298
7299    default:
7300      output_operand_lossage ("invalid expression as operand");
7301    }
7302}
7303
7304/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7305   We need to emit DTP-relative relocations.  */
7306
7307static void
7308i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7309{
7310  fputs (ASM_LONG, file);
7311  output_addr_const (file, x);
7312  fputs ("@DTPOFF", file);
7313  switch (size)
7314    {
7315    case 4:
7316      break;
7317    case 8:
7318      fputs (", 0", file);
7319      break;
7320    default:
7321      gcc_unreachable ();
7322   }
7323}
7324
7325/* In the name of slightly smaller debug output, and to cater to
7326   general assembler lossage, recognize PIC+GOTOFF and turn it back
7327   into a direct symbol reference.
7328
7329   On Darwin, this is necessary to avoid a crash, because Darwin
7330   has a different PIC label for each routine but the DWARF debugging
7331   information is not associated with any particular routine, so it's
7332   necessary to remove references to the PIC label from RTL stored by
7333   the DWARF output code.  */
7334
7335static rtx
7336ix86_delegitimize_address (rtx orig_x)
7337{
7338  rtx x = orig_x;
7339  /* reg_addend is NULL or a multiple of some register.  */
7340  rtx reg_addend = NULL_RTX;
7341  /* const_addend is NULL or a const_int.  */
7342  rtx const_addend = NULL_RTX;
7343  /* This is the result, or NULL.  */
7344  rtx result = NULL_RTX;
7345
7346  if (GET_CODE (x) == MEM)
7347    x = XEXP (x, 0);
7348
7349  if (TARGET_64BIT)
7350    {
7351      if (GET_CODE (x) != CONST
7352	  || GET_CODE (XEXP (x, 0)) != UNSPEC
7353	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7354	  || GET_CODE (orig_x) != MEM)
7355	return orig_x;
7356      return XVECEXP (XEXP (x, 0), 0, 0);
7357    }
7358
7359  if (GET_CODE (x) != PLUS
7360      || GET_CODE (XEXP (x, 1)) != CONST)
7361    return orig_x;
7362
7363  if (GET_CODE (XEXP (x, 0)) == REG
7364      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7365    /* %ebx + GOT/GOTOFF */
7366    ;
7367  else if (GET_CODE (XEXP (x, 0)) == PLUS)
7368    {
7369      /* %ebx + %reg * scale + GOT/GOTOFF */
7370      reg_addend = XEXP (x, 0);
7371      if (GET_CODE (XEXP (reg_addend, 0)) == REG
7372	  && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7373	reg_addend = XEXP (reg_addend, 1);
7374      else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7375	       && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7376	reg_addend = XEXP (reg_addend, 0);
7377      else
7378	return orig_x;
7379      if (GET_CODE (reg_addend) != REG
7380	  && GET_CODE (reg_addend) != MULT
7381	  && GET_CODE (reg_addend) != ASHIFT)
7382	return orig_x;
7383    }
7384  else
7385    return orig_x;
7386
7387  x = XEXP (XEXP (x, 1), 0);
7388  if (GET_CODE (x) == PLUS
7389      && GET_CODE (XEXP (x, 1)) == CONST_INT)
7390    {
7391      const_addend = XEXP (x, 1);
7392      x = XEXP (x, 0);
7393    }
7394
7395  if (GET_CODE (x) == UNSPEC
7396      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7397	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7398    result = XVECEXP (x, 0, 0);
7399
7400  if (TARGET_MACHO && darwin_local_data_pic (x)
7401      && GET_CODE (orig_x) != MEM)
7402    result = XEXP (x, 0);
7403
7404  if (! result)
7405    return orig_x;
7406
7407  if (const_addend)
7408    result = gen_rtx_PLUS (Pmode, result, const_addend);
7409  if (reg_addend)
7410    result = gen_rtx_PLUS (Pmode, reg_addend, result);
7411  return result;
7412}
7413
7414static void
7415put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7416		    int fp, FILE *file)
7417{
7418  const char *suffix;
7419
7420  if (mode == CCFPmode || mode == CCFPUmode)
7421    {
7422      enum rtx_code second_code, bypass_code;
7423      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7424      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7425      code = ix86_fp_compare_code_to_integer (code);
7426      mode = CCmode;
7427    }
7428  if (reverse)
7429    code = reverse_condition (code);
7430
7431  switch (code)
7432    {
7433    case EQ:
7434      suffix = "e";
7435      break;
7436    case NE:
7437      suffix = "ne";
7438      break;
7439    case GT:
7440      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7441      suffix = "g";
7442      break;
7443    case GTU:
7444      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7445	 Those same assemblers have the same but opposite lossage on cmov.  */
7446      gcc_assert (mode == CCmode);
7447      suffix = fp ? "nbe" : "a";
7448      break;
7449    case LT:
7450      switch (mode)
7451	{
7452	case CCNOmode:
7453	case CCGOCmode:
7454	  suffix = "s";
7455	  break;
7456
7457	case CCmode:
7458	case CCGCmode:
7459	  suffix = "l";
7460	  break;
7461
7462	default:
7463	  gcc_unreachable ();
7464	}
7465      break;
7466    case LTU:
7467      gcc_assert (mode == CCmode);
7468      suffix = "b";
7469      break;
7470    case GE:
7471      switch (mode)
7472	{
7473	case CCNOmode:
7474	case CCGOCmode:
7475	  suffix = "ns";
7476	  break;
7477
7478	case CCmode:
7479	case CCGCmode:
7480	  suffix = "ge";
7481	  break;
7482
7483	default:
7484	  gcc_unreachable ();
7485	}
7486      break;
7487    case GEU:
7488      /* ??? As above.  */
7489      gcc_assert (mode == CCmode);
7490      suffix = fp ? "nb" : "ae";
7491      break;
7492    case LE:
7493      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7494      suffix = "le";
7495      break;
7496    case LEU:
7497      gcc_assert (mode == CCmode);
7498      suffix = "be";
7499      break;
7500    case UNORDERED:
7501      suffix = fp ? "u" : "p";
7502      break;
7503    case ORDERED:
7504      suffix = fp ? "nu" : "np";
7505      break;
7506    default:
7507      gcc_unreachable ();
7508    }
7509  fputs (suffix, file);
7510}
7511
7512/* Print the name of register X to FILE based on its machine mode and number.
7513   If CODE is 'w', pretend the mode is HImode.
7514   If CODE is 'b', pretend the mode is QImode.
7515   If CODE is 'k', pretend the mode is SImode.
7516   If CODE is 'q', pretend the mode is DImode.
7517   If CODE is 'h', pretend the reg is the 'high' byte register.
7518   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7519
7520void
7521print_reg (rtx x, int code, FILE *file)
7522{
7523  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7524	      && REGNO (x) != FRAME_POINTER_REGNUM
7525	      && REGNO (x) != FLAGS_REG
7526	      && REGNO (x) != FPSR_REG);
7527
7528  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7529    putc ('%', file);
7530
7531  if (code == 'w' || MMX_REG_P (x))
7532    code = 2;
7533  else if (code == 'b')
7534    code = 1;
7535  else if (code == 'k')
7536    code = 4;
7537  else if (code == 'q')
7538    code = 8;
7539  else if (code == 'y')
7540    code = 3;
7541  else if (code == 'h')
7542    code = 0;
7543  else
7544    code = GET_MODE_SIZE (GET_MODE (x));
7545
7546  /* Irritatingly, AMD extended registers use different naming convention
7547     from the normal registers.  */
7548  if (REX_INT_REG_P (x))
7549    {
7550      gcc_assert (TARGET_64BIT);
7551      switch (code)
7552	{
7553	  case 0:
7554	    error ("extended registers have no high halves");
7555	    break;
7556	  case 1:
7557	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7558	    break;
7559	  case 2:
7560	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7561	    break;
7562	  case 4:
7563	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7564	    break;
7565	  case 8:
7566	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7567	    break;
7568	  default:
7569	    error ("unsupported operand size for extended register");
7570	    break;
7571	}
7572      return;
7573    }
7574  switch (code)
7575    {
7576    case 3:
7577      if (STACK_TOP_P (x))
7578	{
7579	  fputs ("st(0)", file);
7580	  break;
7581	}
7582      /* FALLTHRU */
7583    case 8:
7584    case 4:
7585    case 12:
7586      if (! ANY_FP_REG_P (x))
7587	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7588      /* FALLTHRU */
7589    case 16:
7590    case 2:
7591    normal:
7592      fputs (hi_reg_name[REGNO (x)], file);
7593      break;
7594    case 1:
7595      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7596	goto normal;
7597      fputs (qi_reg_name[REGNO (x)], file);
7598      break;
7599    case 0:
7600      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7601	goto normal;
7602      fputs (qi_high_reg_name[REGNO (x)], file);
7603      break;
7604    default:
7605      gcc_unreachable ();
7606    }
7607}
7608
7609/* Locate some local-dynamic symbol still in use by this function
7610   so that we can print its name in some tls_local_dynamic_base
7611   pattern.  */
7612
7613static const char *
7614get_some_local_dynamic_name (void)
7615{
7616  rtx insn;
7617
7618  if (cfun->machine->some_ld_name)
7619    return cfun->machine->some_ld_name;
7620
7621  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7622    if (INSN_P (insn)
7623	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7624      return cfun->machine->some_ld_name;
7625
7626  gcc_unreachable ();
7627}
7628
7629static int
7630get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7631{
7632  rtx x = *px;
7633
7634  if (GET_CODE (x) == SYMBOL_REF
7635      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7636    {
7637      cfun->machine->some_ld_name = XSTR (x, 0);
7638      return 1;
7639    }
7640
7641  return 0;
7642}
7643
7644/* Meaning of CODE:
7645   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7646   C -- print opcode suffix for set/cmov insn.
7647   c -- like C, but print reversed condition
7648   F,f -- likewise, but for floating-point.
7649   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7650        otherwise nothing
7651   R -- print the prefix for register names.
7652   z -- print the opcode suffix for the size of the current operand.
7653   * -- print a star (in certain assembler syntax)
7654   A -- print an absolute memory reference.
7655   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7656   s -- print a shift double count, followed by the assemblers argument
7657	delimiter.
7658   b -- print the QImode name of the register for the indicated operand.
7659	%b0 would print %al if operands[0] is reg 0.
7660   w --  likewise, print the HImode name of the register.
7661   k --  likewise, print the SImode name of the register.
7662   q --  likewise, print the DImode name of the register.
7663   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7664   y -- print "st(0)" instead of "st" as a register.
7665   D -- print condition for SSE cmp instruction.
7666   P -- if PIC, print an @PLT suffix.
7667   X -- don't print any sort of PIC '@' suffix for a symbol.
7668   & -- print some in-use local-dynamic symbol name.
7669   H -- print a memory address offset by 8; used for sse high-parts
7670 */
7671
7672void
7673print_operand (FILE *file, rtx x, int code)
7674{
7675  if (code)
7676    {
7677      switch (code)
7678	{
7679	case '*':
7680	  if (ASSEMBLER_DIALECT == ASM_ATT)
7681	    putc ('*', file);
7682	  return;
7683
7684	case '&':
7685	  assemble_name (file, get_some_local_dynamic_name ());
7686	  return;
7687
7688	case 'A':
7689	  switch (ASSEMBLER_DIALECT)
7690	    {
7691	    case ASM_ATT:
7692	      putc ('*', file);
7693	      break;
7694
7695	    case ASM_INTEL:
7696	      /* Intel syntax. For absolute addresses, registers should not
7697		 be surrounded by braces.  */
7698	      if (GET_CODE (x) != REG)
7699		{
7700		  putc ('[', file);
7701		  PRINT_OPERAND (file, x, 0);
7702		  putc (']', file);
7703		  return;
7704		}
7705	      break;
7706
7707	    default:
7708	      gcc_unreachable ();
7709	    }
7710
7711	  PRINT_OPERAND (file, x, 0);
7712	  return;
7713
7714
7715	case 'L':
7716	  if (ASSEMBLER_DIALECT == ASM_ATT)
7717	    putc ('l', file);
7718	  return;
7719
7720	case 'W':
7721	  if (ASSEMBLER_DIALECT == ASM_ATT)
7722	    putc ('w', file);
7723	  return;
7724
7725	case 'B':
7726	  if (ASSEMBLER_DIALECT == ASM_ATT)
7727	    putc ('b', file);
7728	  return;
7729
7730	case 'Q':
7731	  if (ASSEMBLER_DIALECT == ASM_ATT)
7732	    putc ('l', file);
7733	  return;
7734
7735	case 'S':
7736	  if (ASSEMBLER_DIALECT == ASM_ATT)
7737	    putc ('s', file);
7738	  return;
7739
7740	case 'T':
7741	  if (ASSEMBLER_DIALECT == ASM_ATT)
7742	    putc ('t', file);
7743	  return;
7744
7745	case 'z':
7746	  /* 387 opcodes don't get size suffixes if the operands are
7747	     registers.  */
7748	  if (STACK_REG_P (x))
7749	    return;
7750
7751	  /* Likewise if using Intel opcodes.  */
7752	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7753	    return;
7754
7755	  /* This is the size of op from size of operand.  */
7756	  switch (GET_MODE_SIZE (GET_MODE (x)))
7757	    {
7758	    case 2:
7759#ifdef HAVE_GAS_FILDS_FISTS
7760	      putc ('s', file);
7761#endif
7762	      return;
7763
7764	    case 4:
7765	      if (GET_MODE (x) == SFmode)
7766		{
7767		  putc ('s', file);
7768		  return;
7769		}
7770	      else
7771		putc ('l', file);
7772	      return;
7773
7774	    case 12:
7775	    case 16:
7776	      putc ('t', file);
7777	      return;
7778
7779	    case 8:
7780	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7781		{
7782#ifdef GAS_MNEMONICS
7783		  putc ('q', file);
7784#else
7785		  putc ('l', file);
7786		  putc ('l', file);
7787#endif
7788		}
7789	      else
7790	        putc ('l', file);
7791	      return;
7792
7793	    default:
7794	      gcc_unreachable ();
7795	    }
7796
7797	case 'b':
7798	case 'w':
7799	case 'k':
7800	case 'q':
7801	case 'h':
7802	case 'y':
7803	case 'X':
7804	case 'P':
7805	  break;
7806
7807	case 's':
7808	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7809	    {
7810	      PRINT_OPERAND (file, x, 0);
7811	      putc (',', file);
7812	    }
7813	  return;
7814
7815	case 'D':
7816	  /* Little bit of braindamage here.  The SSE compare instructions
7817	     does use completely different names for the comparisons that the
7818	     fp conditional moves.  */
7819	  switch (GET_CODE (x))
7820	    {
7821	    case EQ:
7822	    case UNEQ:
7823	      fputs ("eq", file);
7824	      break;
7825	    case LT:
7826	    case UNLT:
7827	      fputs ("lt", file);
7828	      break;
7829	    case LE:
7830	    case UNLE:
7831	      fputs ("le", file);
7832	      break;
7833	    case UNORDERED:
7834	      fputs ("unord", file);
7835	      break;
7836	    case NE:
7837	    case LTGT:
7838	      fputs ("neq", file);
7839	      break;
7840	    case UNGE:
7841	    case GE:
7842	      fputs ("nlt", file);
7843	      break;
7844	    case UNGT:
7845	    case GT:
7846	      fputs ("nle", file);
7847	      break;
7848	    case ORDERED:
7849	      fputs ("ord", file);
7850	      break;
7851	    default:
7852	      gcc_unreachable ();
7853	    }
7854	  return;
7855	case 'O':
7856#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7857	  if (ASSEMBLER_DIALECT == ASM_ATT)
7858	    {
7859	      switch (GET_MODE (x))
7860		{
7861		case HImode: putc ('w', file); break;
7862		case SImode:
7863		case SFmode: putc ('l', file); break;
7864		case DImode:
7865		case DFmode: putc ('q', file); break;
7866		default: gcc_unreachable ();
7867		}
7868	      putc ('.', file);
7869	    }
7870#endif
7871	  return;
7872	case 'C':
7873	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7874	  return;
7875	case 'F':
7876#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7877	  if (ASSEMBLER_DIALECT == ASM_ATT)
7878	    putc ('.', file);
7879#endif
7880	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7881	  return;
7882
7883	  /* Like above, but reverse condition */
7884	case 'c':
7885	  /* Check to see if argument to %c is really a constant
7886	     and not a condition code which needs to be reversed.  */
7887	  if (!COMPARISON_P (x))
7888	  {
7889	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7890	     return;
7891	  }
7892	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7893	  return;
7894	case 'f':
7895#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7896	  if (ASSEMBLER_DIALECT == ASM_ATT)
7897	    putc ('.', file);
7898#endif
7899	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7900	  return;
7901
7902	case 'H':
7903	  /* It doesn't actually matter what mode we use here, as we're
7904	     only going to use this for printing.  */
7905	  x = adjust_address_nv (x, DImode, 8);
7906	  break;
7907
7908	case '+':
7909	  {
7910	    rtx x;
7911
7912	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7913	      return;
7914
7915	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7916	    if (x)
7917	      {
7918		int pred_val = INTVAL (XEXP (x, 0));
7919
7920		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7921		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7922		  {
7923		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7924		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7925
7926		    /* Emit hints only in the case default branch prediction
7927		       heuristics would fail.  */
7928		    if (taken != cputaken)
7929		      {
7930			/* We use 3e (DS) prefix for taken branches and
7931			   2e (CS) prefix for not taken branches.  */
7932			if (taken)
7933			  fputs ("ds ; ", file);
7934			else
7935			  fputs ("cs ; ", file);
7936		      }
7937		  }
7938	      }
7939	    return;
7940	  }
7941	default:
7942	    output_operand_lossage ("invalid operand code '%c'", code);
7943	}
7944    }
7945
7946  if (GET_CODE (x) == REG)
7947    print_reg (x, code, file);
7948
7949  else if (GET_CODE (x) == MEM)
7950    {
7951      /* No `byte ptr' prefix for call instructions.  */
7952      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7953	{
7954	  const char * size;
7955	  switch (GET_MODE_SIZE (GET_MODE (x)))
7956	    {
7957	    case 1: size = "BYTE"; break;
7958	    case 2: size = "WORD"; break;
7959	    case 4: size = "DWORD"; break;
7960	    case 8: size = "QWORD"; break;
7961	    case 12: size = "XWORD"; break;
7962	    case 16: size = "XMMWORD"; break;
7963	    default:
7964	      gcc_unreachable ();
7965	    }
7966
7967	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7968	  if (code == 'b')
7969	    size = "BYTE";
7970	  else if (code == 'w')
7971	    size = "WORD";
7972	  else if (code == 'k')
7973	    size = "DWORD";
7974
7975	  fputs (size, file);
7976	  fputs (" PTR ", file);
7977	}
7978
7979      x = XEXP (x, 0);
7980      /* Avoid (%rip) for call operands.  */
7981      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7982	       && GET_CODE (x) != CONST_INT)
7983	output_addr_const (file, x);
7984      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7985	output_operand_lossage ("invalid constraints for operand");
7986      else
7987	output_address (x);
7988    }
7989
7990  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7991    {
7992      REAL_VALUE_TYPE r;
7993      long l;
7994
7995      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7996      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7997
7998      if (ASSEMBLER_DIALECT == ASM_ATT)
7999	putc ('$', file);
8000      fprintf (file, "0x%08lx", l);
8001    }
8002
8003  /* These float cases don't actually occur as immediate operands.  */
8004  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8005    {
8006      char dstr[30];
8007
8008      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8009      fprintf (file, "%s", dstr);
8010    }
8011
8012  else if (GET_CODE (x) == CONST_DOUBLE
8013	   && GET_MODE (x) == XFmode)
8014    {
8015      char dstr[30];
8016
8017      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8018      fprintf (file, "%s", dstr);
8019    }
8020
8021  else
8022    {
8023      /* We have patterns that allow zero sets of memory, for instance.
8024	 In 64-bit mode, we should probably support all 8-byte vectors,
8025	 since we can in fact encode that into an immediate.  */
8026      if (GET_CODE (x) == CONST_VECTOR)
8027	{
8028	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8029	  x = const0_rtx;
8030	}
8031
8032      if (code != 'P')
8033	{
8034	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8035	    {
8036	      if (ASSEMBLER_DIALECT == ASM_ATT)
8037		putc ('$', file);
8038	    }
8039	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8040		   || GET_CODE (x) == LABEL_REF)
8041	    {
8042	      if (ASSEMBLER_DIALECT == ASM_ATT)
8043		putc ('$', file);
8044	      else
8045		fputs ("OFFSET FLAT:", file);
8046	    }
8047	}
8048      if (GET_CODE (x) == CONST_INT)
8049	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8050      else if (flag_pic)
8051	output_pic_addr_const (file, x, code);
8052      else
8053	output_addr_const (file, x);
8054    }
8055}
8056
8057/* Print a memory operand whose address is ADDR.  */
8058
8059void
8060print_operand_address (FILE *file, rtx addr)
8061{
8062  struct ix86_address parts;
8063  rtx base, index, disp;
8064  int scale;
8065  int ok = ix86_decompose_address (addr, &parts);
8066
8067  gcc_assert (ok);
8068
8069  base = parts.base;
8070  index = parts.index;
8071  disp = parts.disp;
8072  scale = parts.scale;
8073
8074  switch (parts.seg)
8075    {
8076    case SEG_DEFAULT:
8077      break;
8078    case SEG_FS:
8079    case SEG_GS:
8080      if (USER_LABEL_PREFIX[0] == 0)
8081	putc ('%', file);
8082      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8083      break;
8084    default:
8085      gcc_unreachable ();
8086    }
8087
8088  if (!base && !index)
8089    {
8090      /* Displacement only requires special attention.  */
8091
8092      if (GET_CODE (disp) == CONST_INT)
8093	{
8094	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8095	    {
8096	      if (USER_LABEL_PREFIX[0] == 0)
8097		putc ('%', file);
8098	      fputs ("ds:", file);
8099	    }
8100	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8101	}
8102      else if (flag_pic)
8103	output_pic_addr_const (file, disp, 0);
8104      else
8105	output_addr_const (file, disp);
8106
8107      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
8108      if (TARGET_64BIT)
8109	{
8110	  if (GET_CODE (disp) == CONST
8111	      && GET_CODE (XEXP (disp, 0)) == PLUS
8112	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8113	    disp = XEXP (XEXP (disp, 0), 0);
8114	  if (GET_CODE (disp) == LABEL_REF
8115	      || (GET_CODE (disp) == SYMBOL_REF
8116		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
8117	    fputs ("(%rip)", file);
8118	}
8119    }
8120  else
8121    {
8122      if (ASSEMBLER_DIALECT == ASM_ATT)
8123	{
8124	  if (disp)
8125	    {
8126	      if (flag_pic)
8127		output_pic_addr_const (file, disp, 0);
8128	      else if (GET_CODE (disp) == LABEL_REF)
8129		output_asm_label (disp);
8130	      else
8131		output_addr_const (file, disp);
8132	    }
8133
8134	  putc ('(', file);
8135	  if (base)
8136	    print_reg (base, 0, file);
8137	  if (index)
8138	    {
8139	      putc (',', file);
8140	      print_reg (index, 0, file);
8141	      if (scale != 1)
8142		fprintf (file, ",%d", scale);
8143	    }
8144	  putc (')', file);
8145	}
8146      else
8147	{
8148	  rtx offset = NULL_RTX;
8149
8150	  if (disp)
8151	    {
8152	      /* Pull out the offset of a symbol; print any symbol itself.  */
8153	      if (GET_CODE (disp) == CONST
8154		  && GET_CODE (XEXP (disp, 0)) == PLUS
8155		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8156		{
8157		  offset = XEXP (XEXP (disp, 0), 1);
8158		  disp = gen_rtx_CONST (VOIDmode,
8159					XEXP (XEXP (disp, 0), 0));
8160		}
8161
8162	      if (flag_pic)
8163		output_pic_addr_const (file, disp, 0);
8164	      else if (GET_CODE (disp) == LABEL_REF)
8165		output_asm_label (disp);
8166	      else if (GET_CODE (disp) == CONST_INT)
8167		offset = disp;
8168	      else
8169		output_addr_const (file, disp);
8170	    }
8171
8172	  putc ('[', file);
8173	  if (base)
8174	    {
8175	      print_reg (base, 0, file);
8176	      if (offset)
8177		{
8178		  if (INTVAL (offset) >= 0)
8179		    putc ('+', file);
8180		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8181		}
8182	    }
8183	  else if (offset)
8184	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8185	  else
8186	    putc ('0', file);
8187
8188	  if (index)
8189	    {
8190	      putc ('+', file);
8191	      print_reg (index, 0, file);
8192	      if (scale != 1)
8193		fprintf (file, "*%d", scale);
8194	    }
8195	  putc (']', file);
8196	}
8197    }
8198}
8199
8200bool
8201output_addr_const_extra (FILE *file, rtx x)
8202{
8203  rtx op;
8204
8205  if (GET_CODE (x) != UNSPEC)
8206    return false;
8207
8208  op = XVECEXP (x, 0, 0);
8209  switch (XINT (x, 1))
8210    {
8211    case UNSPEC_GOTTPOFF:
8212      output_addr_const (file, op);
8213      /* FIXME: This might be @TPOFF in Sun ld.  */
8214      fputs ("@GOTTPOFF", file);
8215      break;
8216    case UNSPEC_TPOFF:
8217      output_addr_const (file, op);
8218      fputs ("@TPOFF", file);
8219      break;
8220    case UNSPEC_NTPOFF:
8221      output_addr_const (file, op);
8222      if (TARGET_64BIT)
8223	fputs ("@TPOFF", file);
8224      else
8225	fputs ("@NTPOFF", file);
8226      break;
8227    case UNSPEC_DTPOFF:
8228      output_addr_const (file, op);
8229      fputs ("@DTPOFF", file);
8230      break;
8231    case UNSPEC_GOTNTPOFF:
8232      output_addr_const (file, op);
8233      if (TARGET_64BIT)
8234	fputs ("@GOTTPOFF(%rip)", file);
8235      else
8236	fputs ("@GOTNTPOFF", file);
8237      break;
8238    case UNSPEC_INDNTPOFF:
8239      output_addr_const (file, op);
8240      fputs ("@INDNTPOFF", file);
8241      break;
8242
8243    default:
8244      return false;
8245    }
8246
8247  return true;
8248}
8249
8250/* Split one or more DImode RTL references into pairs of SImode
8251   references.  The RTL can be REG, offsettable MEM, integer constant, or
8252   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8253   split and "num" is its length.  lo_half and hi_half are output arrays
8254   that parallel "operands".  */
8255
8256void
8257split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8258{
8259  while (num--)
8260    {
8261      rtx op = operands[num];
8262
8263      /* simplify_subreg refuse to split volatile memory addresses,
8264         but we still have to handle it.  */
8265      if (GET_CODE (op) == MEM)
8266	{
8267	  lo_half[num] = adjust_address (op, SImode, 0);
8268	  hi_half[num] = adjust_address (op, SImode, 4);
8269	}
8270      else
8271	{
8272	  lo_half[num] = simplify_gen_subreg (SImode, op,
8273					      GET_MODE (op) == VOIDmode
8274					      ? DImode : GET_MODE (op), 0);
8275	  hi_half[num] = simplify_gen_subreg (SImode, op,
8276					      GET_MODE (op) == VOIDmode
8277					      ? DImode : GET_MODE (op), 4);
8278	}
8279    }
8280}
8281/* Split one or more TImode RTL references into pairs of DImode
8282   references.  The RTL can be REG, offsettable MEM, integer constant, or
8283   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
8284   split and "num" is its length.  lo_half and hi_half are output arrays
8285   that parallel "operands".  */
8286
8287void
8288split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8289{
8290  while (num--)
8291    {
8292      rtx op = operands[num];
8293
8294      /* simplify_subreg refuse to split volatile memory addresses, but we
8295         still have to handle it.  */
8296      if (GET_CODE (op) == MEM)
8297	{
8298	  lo_half[num] = adjust_address (op, DImode, 0);
8299	  hi_half[num] = adjust_address (op, DImode, 8);
8300	}
8301      else
8302	{
8303	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8304	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8305	}
8306    }
8307}
8308
8309/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8310   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
8311   is the expression of the binary operation.  The output may either be
8312   emitted here, or returned to the caller, like all output_* functions.
8313
8314   There is no guarantee that the operands are the same mode, as they
8315   might be within FLOAT or FLOAT_EXTEND expressions.  */
8316
8317#ifndef SYSV386_COMPAT
8318/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
8319   wants to fix the assemblers because that causes incompatibility
8320   with gcc.  No-one wants to fix gcc because that causes
8321   incompatibility with assemblers...  You can use the option of
8322   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
8323#define SYSV386_COMPAT 1
8324#endif
8325
8326const char *
8327output_387_binary_op (rtx insn, rtx *operands)
8328{
8329  static char buf[30];
8330  const char *p;
8331  const char *ssep;
8332  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8333
8334#ifdef ENABLE_CHECKING
8335  /* Even if we do not want to check the inputs, this documents input
8336     constraints.  Which helps in understanding the following code.  */
8337  if (STACK_REG_P (operands[0])
8338      && ((REG_P (operands[1])
8339	   && REGNO (operands[0]) == REGNO (operands[1])
8340	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8341	  || (REG_P (operands[2])
8342	      && REGNO (operands[0]) == REGNO (operands[2])
8343	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8344      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8345    ; /* ok */
8346  else
8347    gcc_assert (is_sse);
8348#endif
8349
8350  switch (GET_CODE (operands[3]))
8351    {
8352    case PLUS:
8353      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8354	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8355	p = "fiadd";
8356      else
8357	p = "fadd";
8358      ssep = "add";
8359      break;
8360
8361    case MINUS:
8362      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8363	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8364	p = "fisub";
8365      else
8366	p = "fsub";
8367      ssep = "sub";
8368      break;
8369
8370    case MULT:
8371      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8372	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8373	p = "fimul";
8374      else
8375	p = "fmul";
8376      ssep = "mul";
8377      break;
8378
8379    case DIV:
8380      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8381	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8382	p = "fidiv";
8383      else
8384	p = "fdiv";
8385      ssep = "div";
8386      break;
8387
8388    default:
8389      gcc_unreachable ();
8390    }
8391
8392  if (is_sse)
8393   {
8394      strcpy (buf, ssep);
8395      if (GET_MODE (operands[0]) == SFmode)
8396	strcat (buf, "ss\t{%2, %0|%0, %2}");
8397      else
8398	strcat (buf, "sd\t{%2, %0|%0, %2}");
8399      return buf;
8400   }
8401  strcpy (buf, p);
8402
8403  switch (GET_CODE (operands[3]))
8404    {
8405    case MULT:
8406    case PLUS:
8407      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8408	{
8409	  rtx temp = operands[2];
8410	  operands[2] = operands[1];
8411	  operands[1] = temp;
8412	}
8413
8414      /* know operands[0] == operands[1].  */
8415
8416      if (GET_CODE (operands[2]) == MEM)
8417	{
8418	  p = "%z2\t%2";
8419	  break;
8420	}
8421
8422      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8423	{
8424	  if (STACK_TOP_P (operands[0]))
8425	    /* How is it that we are storing to a dead operand[2]?
8426	       Well, presumably operands[1] is dead too.  We can't
8427	       store the result to st(0) as st(0) gets popped on this
8428	       instruction.  Instead store to operands[2] (which I
8429	       think has to be st(1)).  st(1) will be popped later.
8430	       gcc <= 2.8.1 didn't have this check and generated
8431	       assembly code that the Unixware assembler rejected.  */
8432	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8433	  else
8434	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8435	  break;
8436	}
8437
8438      if (STACK_TOP_P (operands[0]))
8439	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8440      else
8441	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8442      break;
8443
8444    case MINUS:
8445    case DIV:
8446      if (GET_CODE (operands[1]) == MEM)
8447	{
8448	  p = "r%z1\t%1";
8449	  break;
8450	}
8451
8452      if (GET_CODE (operands[2]) == MEM)
8453	{
8454	  p = "%z2\t%2";
8455	  break;
8456	}
8457
8458      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8459	{
8460#if SYSV386_COMPAT
8461	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8462	     derived assemblers, confusingly reverse the direction of
8463	     the operation for fsub{r} and fdiv{r} when the
8464	     destination register is not st(0).  The Intel assembler
8465	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8466	     figure out what the hardware really does.  */
8467	  if (STACK_TOP_P (operands[0]))
8468	    p = "{p\t%0, %2|rp\t%2, %0}";
8469	  else
8470	    p = "{rp\t%2, %0|p\t%0, %2}";
8471#else
8472	  if (STACK_TOP_P (operands[0]))
8473	    /* As above for fmul/fadd, we can't store to st(0).  */
8474	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8475	  else
8476	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8477#endif
8478	  break;
8479	}
8480
8481      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8482	{
8483#if SYSV386_COMPAT
8484	  if (STACK_TOP_P (operands[0]))
8485	    p = "{rp\t%0, %1|p\t%1, %0}";
8486	  else
8487	    p = "{p\t%1, %0|rp\t%0, %1}";
8488#else
8489	  if (STACK_TOP_P (operands[0]))
8490	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8491	  else
8492	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8493#endif
8494	  break;
8495	}
8496
8497      if (STACK_TOP_P (operands[0]))
8498	{
8499	  if (STACK_TOP_P (operands[1]))
8500	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8501	  else
8502	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8503	  break;
8504	}
8505      else if (STACK_TOP_P (operands[1]))
8506	{
8507#if SYSV386_COMPAT
8508	  p = "{\t%1, %0|r\t%0, %1}";
8509#else
8510	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8511#endif
8512	}
8513      else
8514	{
8515#if SYSV386_COMPAT
8516	  p = "{r\t%2, %0|\t%0, %2}";
8517#else
8518	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8519#endif
8520	}
8521      break;
8522
8523    default:
8524      gcc_unreachable ();
8525    }
8526
8527  strcat (buf, p);
8528  return buf;
8529}
8530
8531/* Return needed mode for entity in optimize_mode_switching pass.  */
8532
8533int
8534ix86_mode_needed (int entity, rtx insn)
8535{
8536  enum attr_i387_cw mode;
8537
8538  /* The mode UNINITIALIZED is used to store control word after a
8539     function call or ASM pattern.  The mode ANY specify that function
8540     has no requirements on the control word and make no changes in the
8541     bits we are interested in.  */
8542
8543  if (CALL_P (insn)
8544      || (NONJUMP_INSN_P (insn)
8545	  && (asm_noperands (PATTERN (insn)) >= 0
8546	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8547    return I387_CW_UNINITIALIZED;
8548
8549  if (recog_memoized (insn) < 0)
8550    return I387_CW_ANY;
8551
8552  mode = get_attr_i387_cw (insn);
8553
8554  switch (entity)
8555    {
8556    case I387_TRUNC:
8557      if (mode == I387_CW_TRUNC)
8558	return mode;
8559      break;
8560
8561    case I387_FLOOR:
8562      if (mode == I387_CW_FLOOR)
8563	return mode;
8564      break;
8565
8566    case I387_CEIL:
8567      if (mode == I387_CW_CEIL)
8568	return mode;
8569      break;
8570
8571    case I387_MASK_PM:
8572      if (mode == I387_CW_MASK_PM)
8573	return mode;
8574      break;
8575
8576    default:
8577      gcc_unreachable ();
8578    }
8579
8580  return I387_CW_ANY;
8581}
8582
8583/* Output code to initialize control word copies used by trunc?f?i and
8584   rounding patterns.  CURRENT_MODE is set to current control word,
8585   while NEW_MODE is set to new control word.  */
8586
8587void
8588emit_i387_cw_initialization (int mode)
8589{
8590  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8591  rtx new_mode;
8592
8593  int slot;
8594
8595  rtx reg = gen_reg_rtx (HImode);
8596
8597  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8598  emit_move_insn (reg, stored_mode);
8599
8600  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8601    {
8602      switch (mode)
8603	{
8604	case I387_CW_TRUNC:
8605	  /* round toward zero (truncate) */
8606	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8607	  slot = SLOT_CW_TRUNC;
8608	  break;
8609
8610	case I387_CW_FLOOR:
8611	  /* round down toward -oo */
8612	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8613	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8614	  slot = SLOT_CW_FLOOR;
8615	  break;
8616
8617	case I387_CW_CEIL:
8618	  /* round up toward +oo */
8619	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8620	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8621	  slot = SLOT_CW_CEIL;
8622	  break;
8623
8624	case I387_CW_MASK_PM:
8625	  /* mask precision exception for nearbyint() */
8626	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8627	  slot = SLOT_CW_MASK_PM;
8628	  break;
8629
8630	default:
8631	  gcc_unreachable ();
8632	}
8633    }
8634  else
8635    {
8636      switch (mode)
8637	{
8638	case I387_CW_TRUNC:
8639	  /* round toward zero (truncate) */
8640	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8641	  slot = SLOT_CW_TRUNC;
8642	  break;
8643
8644	case I387_CW_FLOOR:
8645	  /* round down toward -oo */
8646	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8647	  slot = SLOT_CW_FLOOR;
8648	  break;
8649
8650	case I387_CW_CEIL:
8651	  /* round up toward +oo */
8652	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8653	  slot = SLOT_CW_CEIL;
8654	  break;
8655
8656	case I387_CW_MASK_PM:
8657	  /* mask precision exception for nearbyint() */
8658	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8659	  slot = SLOT_CW_MASK_PM;
8660	  break;
8661
8662	default:
8663	  gcc_unreachable ();
8664	}
8665    }
8666
8667  gcc_assert (slot < MAX_386_STACK_LOCALS);
8668
8669  new_mode = assign_386_stack_local (HImode, slot);
8670  emit_move_insn (new_mode, reg);
8671}
8672
8673/* Output code for INSN to convert a float to a signed int.  OPERANDS
8674   are the insn operands.  The output may be [HSD]Imode and the input
8675   operand may be [SDX]Fmode.  */
8676
8677const char *
8678output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8679{
8680  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8681  int dimode_p = GET_MODE (operands[0]) == DImode;
8682  int round_mode = get_attr_i387_cw (insn);
8683
8684  /* Jump through a hoop or two for DImode, since the hardware has no
8685     non-popping instruction.  We used to do this a different way, but
8686     that was somewhat fragile and broke with post-reload splitters.  */
8687  if ((dimode_p || fisttp) && !stack_top_dies)
8688    output_asm_insn ("fld\t%y1", operands);
8689
8690  gcc_assert (STACK_TOP_P (operands[1]));
8691  gcc_assert (GET_CODE (operands[0]) == MEM);
8692
8693  if (fisttp)
8694      output_asm_insn ("fisttp%z0\t%0", operands);
8695  else
8696    {
8697      if (round_mode != I387_CW_ANY)
8698	output_asm_insn ("fldcw\t%3", operands);
8699      if (stack_top_dies || dimode_p)
8700	output_asm_insn ("fistp%z0\t%0", operands);
8701      else
8702	output_asm_insn ("fist%z0\t%0", operands);
8703      if (round_mode != I387_CW_ANY)
8704	output_asm_insn ("fldcw\t%2", operands);
8705    }
8706
8707  return "";
8708}
8709
8710/* Output code for x87 ffreep insn.  The OPNO argument, which may only
8711   have the values zero or one, indicates the ffreep insn's operand
8712   from the OPERANDS array.  */
8713
8714static const char *
8715output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8716{
8717  if (TARGET_USE_FFREEP)
8718#if HAVE_AS_IX86_FFREEP
8719    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8720#else
8721    switch (REGNO (operands[opno]))
8722      {
8723      case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8724      case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8725      case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8726      case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8727      case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8728      case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8729      case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8730      case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8731      }
8732#endif
8733
8734  return opno ? "fstp\t%y1" : "fstp\t%y0";
8735}
8736
8737
8738/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8739   should be used.  UNORDERED_P is true when fucom should be used.  */
8740
8741const char *
8742output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8743{
8744  int stack_top_dies;
8745  rtx cmp_op0, cmp_op1;
8746  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8747
8748  if (eflags_p)
8749    {
8750      cmp_op0 = operands[0];
8751      cmp_op1 = operands[1];
8752    }
8753  else
8754    {
8755      cmp_op0 = operands[1];
8756      cmp_op1 = operands[2];
8757    }
8758
8759  if (is_sse)
8760    {
8761      if (GET_MODE (operands[0]) == SFmode)
8762	if (unordered_p)
8763	  return "ucomiss\t{%1, %0|%0, %1}";
8764	else
8765	  return "comiss\t{%1, %0|%0, %1}";
8766      else
8767	if (unordered_p)
8768	  return "ucomisd\t{%1, %0|%0, %1}";
8769	else
8770	  return "comisd\t{%1, %0|%0, %1}";
8771    }
8772
8773  gcc_assert (STACK_TOP_P (cmp_op0));
8774
8775  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8776
8777  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8778    {
8779      if (stack_top_dies)
8780	{
8781	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8782	  return output_387_ffreep (operands, 1);
8783	}
8784      else
8785	return "ftst\n\tfnstsw\t%0";
8786    }
8787
8788  if (STACK_REG_P (cmp_op1)
8789      && stack_top_dies
8790      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8791      && REGNO (cmp_op1) != FIRST_STACK_REG)
8792    {
8793      /* If both the top of the 387 stack dies, and the other operand
8794	 is also a stack register that dies, then this must be a
8795	 `fcompp' float compare */
8796
8797      if (eflags_p)
8798	{
8799	  /* There is no double popping fcomi variant.  Fortunately,
8800	     eflags is immune from the fstp's cc clobbering.  */
8801	  if (unordered_p)
8802	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8803	  else
8804	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8805	  return output_387_ffreep (operands, 0);
8806	}
8807      else
8808	{
8809	  if (unordered_p)
8810	    return "fucompp\n\tfnstsw\t%0";
8811	  else
8812	    return "fcompp\n\tfnstsw\t%0";
8813	}
8814    }
8815  else
8816    {
8817      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8818
8819      static const char * const alt[16] =
8820      {
8821	"fcom%z2\t%y2\n\tfnstsw\t%0",
8822	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8823	"fucom%z2\t%y2\n\tfnstsw\t%0",
8824	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8825
8826	"ficom%z2\t%y2\n\tfnstsw\t%0",
8827	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8828	NULL,
8829	NULL,
8830
8831	"fcomi\t{%y1, %0|%0, %y1}",
8832	"fcomip\t{%y1, %0|%0, %y1}",
8833	"fucomi\t{%y1, %0|%0, %y1}",
8834	"fucomip\t{%y1, %0|%0, %y1}",
8835
8836	NULL,
8837	NULL,
8838	NULL,
8839	NULL
8840      };
8841
8842      int mask;
8843      const char *ret;
8844
8845      mask  = eflags_p << 3;
8846      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8847      mask |= unordered_p << 1;
8848      mask |= stack_top_dies;
8849
8850      gcc_assert (mask < 16);
8851      ret = alt[mask];
8852      gcc_assert (ret);
8853
8854      return ret;
8855    }
8856}
8857
8858void
8859ix86_output_addr_vec_elt (FILE *file, int value)
8860{
8861  const char *directive = ASM_LONG;
8862
8863#ifdef ASM_QUAD
8864  if (TARGET_64BIT)
8865    directive = ASM_QUAD;
8866#else
8867  gcc_assert (!TARGET_64BIT);
8868#endif
8869
8870  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8871}
8872
8873void
8874ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8875{
8876  if (TARGET_64BIT)
8877    fprintf (file, "%s%s%d-%s%d\n",
8878	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8879  else if (HAVE_AS_GOTOFF_IN_DATA)
8880    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8881#if TARGET_MACHO
8882  else if (TARGET_MACHO)
8883    {
8884      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8885      machopic_output_function_base_name (file);
8886      fprintf(file, "\n");
8887    }
8888#endif
8889  else
8890    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8891		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8892}
8893
8894/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8895   for the target.  */
8896
8897void
8898ix86_expand_clear (rtx dest)
8899{
8900  rtx tmp;
8901
8902  /* We play register width games, which are only valid after reload.  */
8903  gcc_assert (reload_completed);
8904
8905  /* Avoid HImode and its attendant prefix byte.  */
8906  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8907    dest = gen_rtx_REG (SImode, REGNO (dest));
8908
8909  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8910
8911  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8912  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8913    {
8914      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8915      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8916    }
8917
8918  emit_insn (tmp);
8919}
8920
8921/* X is an unchanging MEM.  If it is a constant pool reference, return
8922   the constant pool rtx, else NULL.  */
8923
8924rtx
8925maybe_get_pool_constant (rtx x)
8926{
8927  x = ix86_delegitimize_address (XEXP (x, 0));
8928
8929  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8930    return get_pool_constant (x);
8931
8932  return NULL_RTX;
8933}
8934
8935void
8936ix86_expand_move (enum machine_mode mode, rtx operands[])
8937{
8938  int strict = (reload_in_progress || reload_completed);
8939  rtx op0, op1;
8940  enum tls_model model;
8941
8942  op0 = operands[0];
8943  op1 = operands[1];
8944
8945  if (GET_CODE (op1) == SYMBOL_REF)
8946    {
8947      model = SYMBOL_REF_TLS_MODEL (op1);
8948      if (model)
8949	{
8950	  op1 = legitimize_tls_address (op1, model, true);
8951	  op1 = force_operand (op1, op0);
8952	  if (op1 == op0)
8953	    return;
8954	}
8955    }
8956  else if (GET_CODE (op1) == CONST
8957	   && GET_CODE (XEXP (op1, 0)) == PLUS
8958	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8959    {
8960      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8961      if (model)
8962	{
8963	  rtx addend = XEXP (XEXP (op1, 0), 1);
8964	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8965	  op1 = force_operand (op1, NULL);
8966	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8967				     op0, 1, OPTAB_DIRECT);
8968	  if (op1 == op0)
8969	    return;
8970	}
8971    }
8972
8973  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8974    {
8975      if (TARGET_MACHO && !TARGET_64BIT)
8976	{
8977#if TARGET_MACHO
8978	  if (MACHOPIC_PURE)
8979	    {
8980	      rtx temp = ((reload_in_progress
8981			   || ((op0 && GET_CODE (op0) == REG)
8982			       && mode == Pmode))
8983			  ? op0 : gen_reg_rtx (Pmode));
8984	      op1 = machopic_indirect_data_reference (op1, temp);
8985	      op1 = machopic_legitimize_pic_address (op1, mode,
8986						     temp == op1 ? 0 : temp);
8987	    }
8988	  else if (MACHOPIC_INDIRECT)
8989	    op1 = machopic_indirect_data_reference (op1, 0);
8990	  if (op0 == op1)
8991	    return;
8992#endif
8993	}
8994      else
8995	{
8996	  if (GET_CODE (op0) == MEM)
8997	    op1 = force_reg (Pmode, op1);
8998	  else
8999	    op1 = legitimize_address (op1, op1, Pmode);
9000	}
9001    }
9002  else
9003    {
9004      if (GET_CODE (op0) == MEM
9005	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9006	      || !push_operand (op0, mode))
9007	  && GET_CODE (op1) == MEM)
9008	op1 = force_reg (mode, op1);
9009
9010      if (push_operand (op0, mode)
9011	  && ! general_no_elim_operand (op1, mode))
9012	op1 = copy_to_mode_reg (mode, op1);
9013
9014      /* Force large constants in 64bit compilation into register
9015	 to get them CSEed.  */
9016      if (TARGET_64BIT && mode == DImode
9017	  && immediate_operand (op1, mode)
9018	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
9019	  && !register_operand (op0, mode)
9020	  && optimize && !reload_completed && !reload_in_progress)
9021	op1 = copy_to_mode_reg (mode, op1);
9022
9023      if (FLOAT_MODE_P (mode))
9024	{
9025	  /* If we are loading a floating point constant to a register,
9026	     force the value to memory now, since we'll get better code
9027	     out the back end.  */
9028
9029	  if (strict)
9030	    ;
9031	  else if (GET_CODE (op1) == CONST_DOUBLE)
9032	    {
9033	      op1 = validize_mem (force_const_mem (mode, op1));
9034	      if (!register_operand (op0, mode))
9035		{
9036		  rtx temp = gen_reg_rtx (mode);
9037		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9038		  emit_move_insn (op0, temp);
9039		  return;
9040		}
9041	    }
9042	}
9043    }
9044
9045  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9046}
9047
9048void
9049ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9050{
9051  rtx op0 = operands[0], op1 = operands[1];
9052
9053  /* Force constants other than zero into memory.  We do not know how
9054     the instructions used to build constants modify the upper 64 bits
9055     of the register, once we have that information we may be able
9056     to handle some of them more efficiently.  */
9057  if ((reload_in_progress | reload_completed) == 0
9058      && register_operand (op0, mode)
9059      && CONSTANT_P (op1)
9060      && standard_sse_constant_p (op1) <= 0)
9061    op1 = validize_mem (force_const_mem (mode, op1));
9062
9063  /* Make operand1 a register if it isn't already.  */
9064  if (!no_new_pseudos
9065      && !register_operand (op0, mode)
9066      && !register_operand (op1, mode))
9067    {
9068      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9069      return;
9070    }
9071
9072  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9073}
9074
9075/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
9076   straight to ix86_expand_vector_move.  */
9077
9078void
9079ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9080{
9081  rtx op0, op1, m;
9082
9083  op0 = operands[0];
9084  op1 = operands[1];
9085
9086  if (MEM_P (op1))
9087    {
9088      /* If we're optimizing for size, movups is the smallest.  */
9089      if (optimize_size)
9090	{
9091	  op0 = gen_lowpart (V4SFmode, op0);
9092	  op1 = gen_lowpart (V4SFmode, op1);
9093	  emit_insn (gen_sse_movups (op0, op1));
9094	  return;
9095	}
9096
9097      /* ??? If we have typed data, then it would appear that using
9098	 movdqu is the only way to get unaligned data loaded with
9099	 integer type.  */
9100      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9101	{
9102	  op0 = gen_lowpart (V16QImode, op0);
9103	  op1 = gen_lowpart (V16QImode, op1);
9104	  emit_insn (gen_sse2_movdqu (op0, op1));
9105	  return;
9106	}
9107
9108      if (TARGET_SSE2 && mode == V2DFmode)
9109	{
9110	  rtx zero;
9111
9112	  /* When SSE registers are split into halves, we can avoid
9113	     writing to the top half twice.  */
9114	  if (TARGET_SSE_SPLIT_REGS)
9115	    {
9116	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9117	      zero = op0;
9118	    }
9119	  else
9120	    {
9121	      /* ??? Not sure about the best option for the Intel chips.
9122		 The following would seem to satisfy; the register is
9123		 entirely cleared, breaking the dependency chain.  We
9124		 then store to the upper half, with a dependency depth
9125		 of one.  A rumor has it that Intel recommends two movsd
9126		 followed by an unpacklpd, but this is unconfirmed.  And
9127		 given that the dependency depth of the unpacklpd would
9128		 still be one, I'm not sure why this would be better.  */
9129	      zero = CONST0_RTX (V2DFmode);
9130	    }
9131
9132	  m = adjust_address (op1, DFmode, 0);
9133	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
9134	  m = adjust_address (op1, DFmode, 8);
9135	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
9136	}
9137      else
9138	{
9139	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9140	    emit_move_insn (op0, CONST0_RTX (mode));
9141	  else
9142	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9143
9144	  if (mode != V4SFmode)
9145	    op0 = gen_lowpart (V4SFmode, op0);
9146	  m = adjust_address (op1, V2SFmode, 0);
9147	  emit_insn (gen_sse_loadlps (op0, op0, m));
9148	  m = adjust_address (op1, V2SFmode, 8);
9149	  emit_insn (gen_sse_loadhps (op0, op0, m));
9150	}
9151    }
9152  else if (MEM_P (op0))
9153    {
9154      /* If we're optimizing for size, movups is the smallest.  */
9155      if (optimize_size)
9156	{
9157	  op0 = gen_lowpart (V4SFmode, op0);
9158	  op1 = gen_lowpart (V4SFmode, op1);
9159	  emit_insn (gen_sse_movups (op0, op1));
9160	  return;
9161	}
9162
9163      /* ??? Similar to above, only less clear because of quote
9164	 typeless stores unquote.  */
9165      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9166	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9167        {
9168	  op0 = gen_lowpart (V16QImode, op0);
9169	  op1 = gen_lowpart (V16QImode, op1);
9170	  emit_insn (gen_sse2_movdqu (op0, op1));
9171	  return;
9172	}
9173
9174      if (TARGET_SSE2 && mode == V2DFmode)
9175	{
9176	  m = adjust_address (op0, DFmode, 0);
9177	  emit_insn (gen_sse2_storelpd (m, op1));
9178	  m = adjust_address (op0, DFmode, 8);
9179	  emit_insn (gen_sse2_storehpd (m, op1));
9180	}
9181      else
9182	{
9183	  if (mode != V4SFmode)
9184	    op1 = gen_lowpart (V4SFmode, op1);
9185	  m = adjust_address (op0, V2SFmode, 0);
9186	  emit_insn (gen_sse_storelps (m, op1));
9187	  m = adjust_address (op0, V2SFmode, 8);
9188	  emit_insn (gen_sse_storehps (m, op1));
9189	}
9190    }
9191  else
9192    gcc_unreachable ();
9193}
9194
9195/* Expand a push in MODE.  This is some mode for which we do not support
9196   proper push instructions, at least from the registers that we expect
9197   the value to live in.  */
9198
9199void
9200ix86_expand_push (enum machine_mode mode, rtx x)
9201{
9202  rtx tmp;
9203
9204  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9205			     GEN_INT (-GET_MODE_SIZE (mode)),
9206			     stack_pointer_rtx, 1, OPTAB_DIRECT);
9207  if (tmp != stack_pointer_rtx)
9208    emit_move_insn (stack_pointer_rtx, tmp);
9209
9210  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9211  emit_move_insn (tmp, x);
9212}
9213
9214/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
9215   destination to use for the operation.  If different from the true
9216   destination in operands[0], a copy operation will be required.  */
9217
9218rtx
9219ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9220			    rtx operands[])
9221{
9222  int matching_memory;
9223  rtx src1, src2, dst;
9224
9225  dst = operands[0];
9226  src1 = operands[1];
9227  src2 = operands[2];
9228
9229  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9230  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9231      && (rtx_equal_p (dst, src2)
9232	  || immediate_operand (src1, mode)))
9233    {
9234      rtx temp = src1;
9235      src1 = src2;
9236      src2 = temp;
9237    }
9238
9239  /* If the destination is memory, and we do not have matching source
9240     operands, do things in registers.  */
9241  matching_memory = 0;
9242  if (GET_CODE (dst) == MEM)
9243    {
9244      if (rtx_equal_p (dst, src1))
9245	matching_memory = 1;
9246      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9247	       && rtx_equal_p (dst, src2))
9248	matching_memory = 2;
9249      else
9250	dst = gen_reg_rtx (mode);
9251    }
9252
9253  /* Both source operands cannot be in memory.  */
9254  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9255    {
9256      if (matching_memory != 2)
9257	src2 = force_reg (mode, src2);
9258      else
9259	src1 = force_reg (mode, src1);
9260    }
9261
9262  /* If the operation is not commutable, source 1 cannot be a constant
9263     or non-matching memory.  */
9264  if ((CONSTANT_P (src1)
9265       || (!matching_memory && GET_CODE (src1) == MEM))
9266      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9267    src1 = force_reg (mode, src1);
9268
9269  src1 = operands[1] = src1;
9270  src2 = operands[2] = src2;
9271  return dst;
9272}
9273
9274/* Similarly, but assume that the destination has already been
9275   set up properly.  */
9276
9277void
9278ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9279				    enum machine_mode mode, rtx operands[])
9280{
9281  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9282  gcc_assert (dst == operands[0]);
9283}
9284
9285/* Attempt to expand a binary operator.  Make the expansion closer to the
9286   actual machine, then just general_operand, which will allow 3 separate
9287   memory references (one output, two input) in a single insn.  */
9288
9289void
9290ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9291			     rtx operands[])
9292{
9293  rtx src1, src2, dst, op, clob;
9294
9295  dst = ix86_fixup_binary_operands (code, mode, operands);
9296  src1 = operands[1];
9297  src2 = operands[2];
9298
9299 /* Emit the instruction.  */
9300
9301  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9302  if (reload_in_progress)
9303    {
9304      /* Reload doesn't know about the flags register, and doesn't know that
9305         it doesn't want to clobber it.  We can only do this with PLUS.  */
9306      gcc_assert (code == PLUS);
9307      emit_insn (op);
9308    }
9309  else
9310    {
9311      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9312      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9313    }
9314
9315  /* Fix up the destination if needed.  */
9316  if (dst != operands[0])
9317    emit_move_insn (operands[0], dst);
9318}
9319
9320/* Return TRUE or FALSE depending on whether the binary operator meets the
9321   appropriate constraints.  */
9322
9323int
9324ix86_binary_operator_ok (enum rtx_code code,
9325			 enum machine_mode mode ATTRIBUTE_UNUSED,
9326			 rtx operands[3])
9327{
9328  /* Both source operands cannot be in memory.  */
9329  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9330    return 0;
9331  /* If the operation is not commutable, source 1 cannot be a constant.  */
9332  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9333    return 0;
9334  /* If the destination is memory, we must have a matching source operand.  */
9335  if (GET_CODE (operands[0]) == MEM
9336      && ! (rtx_equal_p (operands[0], operands[1])
9337	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9338		&& rtx_equal_p (operands[0], operands[2]))))
9339    return 0;
9340  /* If the operation is not commutable and the source 1 is memory, we must
9341     have a matching destination.  */
9342  if (GET_CODE (operands[1]) == MEM
9343      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9344      && ! rtx_equal_p (operands[0], operands[1]))
9345    return 0;
9346  return 1;
9347}
9348
9349/* Attempt to expand a unary operator.  Make the expansion closer to the
9350   actual machine, then just general_operand, which will allow 2 separate
9351   memory references (one output, one input) in a single insn.  */
9352
9353void
9354ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9355			    rtx operands[])
9356{
9357  int matching_memory;
9358  rtx src, dst, op, clob;
9359
9360  dst = operands[0];
9361  src = operands[1];
9362
9363  /* If the destination is memory, and we do not have matching source
9364     operands, do things in registers.  */
9365  matching_memory = 0;
9366  if (MEM_P (dst))
9367    {
9368      if (rtx_equal_p (dst, src))
9369	matching_memory = 1;
9370      else
9371	dst = gen_reg_rtx (mode);
9372    }
9373
9374  /* When source operand is memory, destination must match.  */
9375  if (MEM_P (src) && !matching_memory)
9376    src = force_reg (mode, src);
9377
9378  /* Emit the instruction.  */
9379
9380  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9381  if (reload_in_progress || code == NOT)
9382    {
9383      /* Reload doesn't know about the flags register, and doesn't know that
9384         it doesn't want to clobber it.  */
9385      gcc_assert (code == NOT);
9386      emit_insn (op);
9387    }
9388  else
9389    {
9390      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9391      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9392    }
9393
9394  /* Fix up the destination if needed.  */
9395  if (dst != operands[0])
9396    emit_move_insn (operands[0], dst);
9397}
9398
9399/* Return TRUE or FALSE depending on whether the unary operator meets the
9400   appropriate constraints.  */
9401
9402int
9403ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9404			enum machine_mode mode ATTRIBUTE_UNUSED,
9405			rtx operands[2] ATTRIBUTE_UNUSED)
9406{
9407  /* If one of operands is memory, source and destination must match.  */
9408  if ((GET_CODE (operands[0]) == MEM
9409       || GET_CODE (operands[1]) == MEM)
9410      && ! rtx_equal_p (operands[0], operands[1]))
9411    return FALSE;
9412  return TRUE;
9413}
9414
9415/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9416   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
9417   true, then replicate the mask for all elements of the vector register.
9418   If INVERT is true, then create a mask excluding the sign bit.  */
9419
9420rtx
9421ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9422{
9423  enum machine_mode vec_mode;
9424  HOST_WIDE_INT hi, lo;
9425  int shift = 63;
9426  rtvec v;
9427  rtx mask;
9428
9429  /* Find the sign bit, sign extended to 2*HWI.  */
9430  if (mode == SFmode)
9431    lo = 0x80000000, hi = lo < 0;
9432  else if (HOST_BITS_PER_WIDE_INT >= 64)
9433    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9434  else
9435    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9436
9437  if (invert)
9438    lo = ~lo, hi = ~hi;
9439
9440  /* Force this value into the low part of a fp vector constant.  */
9441  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9442  mask = gen_lowpart (mode, mask);
9443
9444  if (mode == SFmode)
9445    {
9446      if (vect)
9447	v = gen_rtvec (4, mask, mask, mask, mask);
9448      else
9449	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9450		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9451      vec_mode = V4SFmode;
9452    }
9453  else
9454    {
9455      if (vect)
9456	v = gen_rtvec (2, mask, mask);
9457      else
9458	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9459      vec_mode = V2DFmode;
9460    }
9461
9462  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9463}
9464
9465/* Generate code for floating point ABS or NEG.  */
9466
9467void
9468ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9469				rtx operands[])
9470{
9471  rtx mask, set, use, clob, dst, src;
9472  bool matching_memory;
9473  bool use_sse = false;
9474  bool vector_mode = VECTOR_MODE_P (mode);
9475  enum machine_mode elt_mode = mode;
9476
9477  if (vector_mode)
9478    {
9479      elt_mode = GET_MODE_INNER (mode);
9480      use_sse = true;
9481    }
9482  else if (TARGET_SSE_MATH)
9483    use_sse = SSE_FLOAT_MODE_P (mode);
9484
9485  /* NEG and ABS performed with SSE use bitwise mask operations.
9486     Create the appropriate mask now.  */
9487  if (use_sse)
9488    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9489  else
9490    mask = NULL_RTX;
9491
9492  dst = operands[0];
9493  src = operands[1];
9494
9495  /* If the destination is memory, and we don't have matching source
9496     operands or we're using the x87, do things in registers.  */
9497  matching_memory = false;
9498  if (MEM_P (dst))
9499    {
9500      if (use_sse && rtx_equal_p (dst, src))
9501	matching_memory = true;
9502      else
9503	dst = gen_reg_rtx (mode);
9504    }
9505  if (MEM_P (src) && !matching_memory)
9506    src = force_reg (mode, src);
9507
9508  if (vector_mode)
9509    {
9510      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9511      set = gen_rtx_SET (VOIDmode, dst, set);
9512      emit_insn (set);
9513    }
9514  else
9515    {
9516      set = gen_rtx_fmt_e (code, mode, src);
9517      set = gen_rtx_SET (VOIDmode, dst, set);
9518      if (mask)
9519        {
9520          use = gen_rtx_USE (VOIDmode, mask);
9521          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9522          emit_insn (gen_rtx_PARALLEL (VOIDmode,
9523				       gen_rtvec (3, set, use, clob)));
9524        }
9525      else
9526	emit_insn (set);
9527    }
9528
9529  if (dst != operands[0])
9530    emit_move_insn (operands[0], dst);
9531}
9532
9533/* Expand a copysign operation.  Special case operand 0 being a constant.  */
9534
9535void
9536ix86_expand_copysign (rtx operands[])
9537{
9538  enum machine_mode mode, vmode;
9539  rtx dest, op0, op1, mask, nmask;
9540
9541  dest = operands[0];
9542  op0 = operands[1];
9543  op1 = operands[2];
9544
9545  mode = GET_MODE (dest);
9546  vmode = mode == SFmode ? V4SFmode : V2DFmode;
9547
9548  if (GET_CODE (op0) == CONST_DOUBLE)
9549    {
9550      rtvec v;
9551
9552      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9553	op0 = simplify_unary_operation (ABS, mode, op0, mode);
9554
9555      if (op0 == CONST0_RTX (mode))
9556	op0 = CONST0_RTX (vmode);
9557      else
9558        {
9559	  if (mode == SFmode)
9560	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9561                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9562	  else
9563	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9564          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9565	}
9566
9567      mask = ix86_build_signbit_mask (mode, 0, 0);
9568
9569      if (mode == SFmode)
9570	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9571      else
9572	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9573    }
9574  else
9575    {
9576      nmask = ix86_build_signbit_mask (mode, 0, 1);
9577      mask = ix86_build_signbit_mask (mode, 0, 0);
9578
9579      if (mode == SFmode)
9580	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9581      else
9582	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9583    }
9584}
9585
9586/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
9587   be a constant, and so has already been expanded into a vector constant.  */
9588
9589void
9590ix86_split_copysign_const (rtx operands[])
9591{
9592  enum machine_mode mode, vmode;
9593  rtx dest, op0, op1, mask, x;
9594
9595  dest = operands[0];
9596  op0 = operands[1];
9597  op1 = operands[2];
9598  mask = operands[3];
9599
9600  mode = GET_MODE (dest);
9601  vmode = GET_MODE (mask);
9602
9603  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9604  x = gen_rtx_AND (vmode, dest, mask);
9605  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9606
9607  if (op0 != CONST0_RTX (vmode))
9608    {
9609      x = gen_rtx_IOR (vmode, dest, op0);
9610      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9611    }
9612}
9613
9614/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9615   so we have to do two masks.  */
9616
9617void
9618ix86_split_copysign_var (rtx operands[])
9619{
9620  enum machine_mode mode, vmode;
9621  rtx dest, scratch, op0, op1, mask, nmask, x;
9622
9623  dest = operands[0];
9624  scratch = operands[1];
9625  op0 = operands[2];
9626  op1 = operands[3];
9627  nmask = operands[4];
9628  mask = operands[5];
9629
9630  mode = GET_MODE (dest);
9631  vmode = GET_MODE (mask);
9632
9633  if (rtx_equal_p (op0, op1))
9634    {
9635      /* Shouldn't happen often (it's useless, obviously), but when it does
9636	 we'd generate incorrect code if we continue below.  */
9637      emit_move_insn (dest, op0);
9638      return;
9639    }
9640
9641  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9642    {
9643      gcc_assert (REGNO (op1) == REGNO (scratch));
9644
9645      x = gen_rtx_AND (vmode, scratch, mask);
9646      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9647
9648      dest = mask;
9649      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9650      x = gen_rtx_NOT (vmode, dest);
9651      x = gen_rtx_AND (vmode, x, op0);
9652      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9653    }
9654  else
9655    {
9656      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9657	{
9658	  x = gen_rtx_AND (vmode, scratch, mask);
9659	}
9660      else						/* alternative 2,4 */
9661	{
9662          gcc_assert (REGNO (mask) == REGNO (scratch));
9663          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9664	  x = gen_rtx_AND (vmode, scratch, op1);
9665	}
9666      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9667
9668      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
9669	{
9670	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
9671	  x = gen_rtx_AND (vmode, dest, nmask);
9672	}
9673      else						/* alternative 3,4 */
9674	{
9675          gcc_assert (REGNO (nmask) == REGNO (dest));
9676	  dest = nmask;
9677	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9678	  x = gen_rtx_AND (vmode, dest, op0);
9679	}
9680      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9681    }
9682
9683  x = gen_rtx_IOR (vmode, dest, scratch);
9684  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9685}
9686
9687/* Return TRUE or FALSE depending on whether the first SET in INSN
9688   has source and destination with matching CC modes, and that the
9689   CC mode is at least as constrained as REQ_MODE.  */
9690
9691int
9692ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9693{
9694  rtx set;
9695  enum machine_mode set_mode;
9696
9697  set = PATTERN (insn);
9698  if (GET_CODE (set) == PARALLEL)
9699    set = XVECEXP (set, 0, 0);
9700  gcc_assert (GET_CODE (set) == SET);
9701  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9702
9703  set_mode = GET_MODE (SET_DEST (set));
9704  switch (set_mode)
9705    {
9706    case CCNOmode:
9707      if (req_mode != CCNOmode
9708	  && (req_mode != CCmode
9709	      || XEXP (SET_SRC (set), 1) != const0_rtx))
9710	return 0;
9711      break;
9712    case CCmode:
9713      if (req_mode == CCGCmode)
9714	return 0;
9715      /* FALLTHRU */
9716    case CCGCmode:
9717      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9718	return 0;
9719      /* FALLTHRU */
9720    case CCGOCmode:
9721      if (req_mode == CCZmode)
9722	return 0;
9723      /* FALLTHRU */
9724    case CCZmode:
9725      break;
9726
9727    default:
9728      gcc_unreachable ();
9729    }
9730
9731  return (GET_MODE (SET_SRC (set)) == set_mode);
9732}
9733
9734/* Generate insn patterns to do an integer compare of OPERANDS.  */
9735
9736static rtx
9737ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9738{
9739  enum machine_mode cmpmode;
9740  rtx tmp, flags;
9741
9742  cmpmode = SELECT_CC_MODE (code, op0, op1);
9743  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9744
9745  /* This is very simple, but making the interface the same as in the
9746     FP case makes the rest of the code easier.  */
9747  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9748  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9749
9750  /* Return the test that should be put into the flags user, i.e.
9751     the bcc, scc, or cmov instruction.  */
9752  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9753}
9754
9755/* Figure out whether to use ordered or unordered fp comparisons.
9756   Return the appropriate mode to use.  */
9757
9758enum machine_mode
9759ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9760{
9761  /* ??? In order to make all comparisons reversible, we do all comparisons
9762     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9763     all forms trapping and nontrapping comparisons, we can make inequality
9764     comparisons trapping again, since it results in better code when using
9765     FCOM based compares.  */
9766  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9767}
9768
9769enum machine_mode
9770ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9771{
9772  if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9773    return ix86_fp_compare_mode (code);
9774  switch (code)
9775    {
9776      /* Only zero flag is needed.  */
9777    case EQ:			/* ZF=0 */
9778    case NE:			/* ZF!=0 */
9779      return CCZmode;
9780      /* Codes needing carry flag.  */
9781    case GEU:			/* CF=0 */
9782    case GTU:			/* CF=0 & ZF=0 */
9783    case LTU:			/* CF=1 */
9784    case LEU:			/* CF=1 | ZF=1 */
9785      return CCmode;
9786      /* Codes possibly doable only with sign flag when
9787         comparing against zero.  */
9788    case GE:			/* SF=OF   or   SF=0 */
9789    case LT:			/* SF<>OF  or   SF=1 */
9790      if (op1 == const0_rtx)
9791	return CCGOCmode;
9792      else
9793	/* For other cases Carry flag is not required.  */
9794	return CCGCmode;
9795      /* Codes doable only with sign flag when comparing
9796         against zero, but we miss jump instruction for it
9797         so we need to use relational tests against overflow
9798         that thus needs to be zero.  */
9799    case GT:			/* ZF=0 & SF=OF */
9800    case LE:			/* ZF=1 | SF<>OF */
9801      if (op1 == const0_rtx)
9802	return CCNOmode;
9803      else
9804	return CCGCmode;
9805      /* strcmp pattern do (use flags) and combine may ask us for proper
9806	 mode.  */
9807    case USE:
9808      return CCmode;
9809    default:
9810      gcc_unreachable ();
9811    }
9812}
9813
9814/* Return the fixed registers used for condition codes.  */
9815
9816static bool
9817ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9818{
9819  *p1 = FLAGS_REG;
9820  *p2 = FPSR_REG;
9821  return true;
9822}
9823
9824/* If two condition code modes are compatible, return a condition code
9825   mode which is compatible with both.  Otherwise, return
9826   VOIDmode.  */
9827
9828static enum machine_mode
9829ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9830{
9831  if (m1 == m2)
9832    return m1;
9833
9834  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9835    return VOIDmode;
9836
9837  if ((m1 == CCGCmode && m2 == CCGOCmode)
9838      || (m1 == CCGOCmode && m2 == CCGCmode))
9839    return CCGCmode;
9840
9841  switch (m1)
9842    {
9843    default:
9844      gcc_unreachable ();
9845
9846    case CCmode:
9847    case CCGCmode:
9848    case CCGOCmode:
9849    case CCNOmode:
9850    case CCZmode:
9851      switch (m2)
9852	{
9853	default:
9854	  return VOIDmode;
9855
9856	case CCmode:
9857	case CCGCmode:
9858	case CCGOCmode:
9859	case CCNOmode:
9860	case CCZmode:
9861	  return CCmode;
9862	}
9863
9864    case CCFPmode:
9865    case CCFPUmode:
9866      /* These are only compatible with themselves, which we already
9867	 checked above.  */
9868      return VOIDmode;
9869    }
9870}
9871
9872/* Return true if we should use an FCOMI instruction for this fp comparison.  */
9873
9874int
9875ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9876{
9877  enum rtx_code swapped_code = swap_condition (code);
9878  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9879	  || (ix86_fp_comparison_cost (swapped_code)
9880	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
9881}
9882
9883/* Swap, force into registers, or otherwise massage the two operands
9884   to a fp comparison.  The operands are updated in place; the new
9885   comparison code is returned.  */
9886
9887static enum rtx_code
9888ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9889{
9890  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9891  rtx op0 = *pop0, op1 = *pop1;
9892  enum machine_mode op_mode = GET_MODE (op0);
9893  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9894
9895  /* All of the unordered compare instructions only work on registers.
9896     The same is true of the fcomi compare instructions.  The XFmode
9897     compare instructions require registers except when comparing
9898     against zero or when converting operand 1 from fixed point to
9899     floating point.  */
9900
9901  if (!is_sse
9902      && (fpcmp_mode == CCFPUmode
9903	  || (op_mode == XFmode
9904	      && ! (standard_80387_constant_p (op0) == 1
9905		    || standard_80387_constant_p (op1) == 1)
9906	      && GET_CODE (op1) != FLOAT)
9907	  || ix86_use_fcomi_compare (code)))
9908    {
9909      op0 = force_reg (op_mode, op0);
9910      op1 = force_reg (op_mode, op1);
9911    }
9912  else
9913    {
9914      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
9915	 things around if they appear profitable, otherwise force op0
9916	 into a register.  */
9917
9918      if (standard_80387_constant_p (op0) == 0
9919	  || (GET_CODE (op0) == MEM
9920	      && ! (standard_80387_constant_p (op1) == 0
9921		    || GET_CODE (op1) == MEM)))
9922	{
9923	  rtx tmp;
9924	  tmp = op0, op0 = op1, op1 = tmp;
9925	  code = swap_condition (code);
9926	}
9927
9928      if (GET_CODE (op0) != REG)
9929	op0 = force_reg (op_mode, op0);
9930
9931      if (CONSTANT_P (op1))
9932	{
9933	  int tmp = standard_80387_constant_p (op1);
9934	  if (tmp == 0)
9935	    op1 = validize_mem (force_const_mem (op_mode, op1));
9936	  else if (tmp == 1)
9937	    {
9938	      if (TARGET_CMOVE)
9939		op1 = force_reg (op_mode, op1);
9940	    }
9941	  else
9942	    op1 = force_reg (op_mode, op1);
9943	}
9944    }
9945
9946  /* Try to rearrange the comparison to make it cheaper.  */
9947  if (ix86_fp_comparison_cost (code)
9948      > ix86_fp_comparison_cost (swap_condition (code))
9949      && (GET_CODE (op1) == REG || !no_new_pseudos))
9950    {
9951      rtx tmp;
9952      tmp = op0, op0 = op1, op1 = tmp;
9953      code = swap_condition (code);
9954      if (GET_CODE (op0) != REG)
9955	op0 = force_reg (op_mode, op0);
9956    }
9957
9958  *pop0 = op0;
9959  *pop1 = op1;
9960  return code;
9961}
9962
9963/* Convert comparison codes we use to represent FP comparison to integer
9964   code that will result in proper branch.  Return UNKNOWN if no such code
9965   is available.  */
9966
9967enum rtx_code
9968ix86_fp_compare_code_to_integer (enum rtx_code code)
9969{
9970  switch (code)
9971    {
9972    case GT:
9973      return GTU;
9974    case GE:
9975      return GEU;
9976    case ORDERED:
9977    case UNORDERED:
9978      return code;
9979      break;
9980    case UNEQ:
9981      return EQ;
9982      break;
9983    case UNLT:
9984      return LTU;
9985      break;
9986    case UNLE:
9987      return LEU;
9988      break;
9989    case LTGT:
9990      return NE;
9991      break;
9992    default:
9993      return UNKNOWN;
9994    }
9995}
9996
9997/* Split comparison code CODE into comparisons we can do using branch
9998   instructions.  BYPASS_CODE is comparison code for branch that will
9999   branch around FIRST_CODE and SECOND_CODE.  If some of branches
10000   is not required, set value to UNKNOWN.
10001   We never require more than two branches.  */
10002
10003void
10004ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10005			  enum rtx_code *first_code,
10006			  enum rtx_code *second_code)
10007{
10008  *first_code = code;
10009  *bypass_code = UNKNOWN;
10010  *second_code = UNKNOWN;
10011
10012  /* The fcomi comparison sets flags as follows:
10013
10014     cmp    ZF PF CF
10015     >      0  0  0
10016     <      0  0  1
10017     =      1  0  0
10018     un     1  1  1 */
10019
10020  switch (code)
10021    {
10022    case GT:			/* GTU - CF=0 & ZF=0 */
10023    case GE:			/* GEU - CF=0 */
10024    case ORDERED:		/* PF=0 */
10025    case UNORDERED:		/* PF=1 */
10026    case UNEQ:			/* EQ - ZF=1 */
10027    case UNLT:			/* LTU - CF=1 */
10028    case UNLE:			/* LEU - CF=1 | ZF=1 */
10029    case LTGT:			/* EQ - ZF=0 */
10030      break;
10031    case LT:			/* LTU - CF=1 - fails on unordered */
10032      *first_code = UNLT;
10033      *bypass_code = UNORDERED;
10034      break;
10035    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
10036      *first_code = UNLE;
10037      *bypass_code = UNORDERED;
10038      break;
10039    case EQ:			/* EQ - ZF=1 - fails on unordered */
10040      *first_code = UNEQ;
10041      *bypass_code = UNORDERED;
10042      break;
10043    case NE:			/* NE - ZF=0 - fails on unordered */
10044      *first_code = LTGT;
10045      *second_code = UNORDERED;
10046      break;
10047    case UNGE:			/* GEU - CF=0 - fails on unordered */
10048      *first_code = GE;
10049      *second_code = UNORDERED;
10050      break;
10051    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
10052      *first_code = GT;
10053      *second_code = UNORDERED;
10054      break;
10055    default:
10056      gcc_unreachable ();
10057    }
10058  if (!TARGET_IEEE_FP)
10059    {
10060      *second_code = UNKNOWN;
10061      *bypass_code = UNKNOWN;
10062    }
10063}
10064
10065/* Return cost of comparison done fcom + arithmetics operations on AX.
10066   All following functions do use number of instructions as a cost metrics.
10067   In future this should be tweaked to compute bytes for optimize_size and
10068   take into account performance of various instructions on various CPUs.  */
10069static int
10070ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10071{
10072  if (!TARGET_IEEE_FP)
10073    return 4;
10074  /* The cost of code output by ix86_expand_fp_compare.  */
10075  switch (code)
10076    {
10077    case UNLE:
10078    case UNLT:
10079    case LTGT:
10080    case GT:
10081    case GE:
10082    case UNORDERED:
10083    case ORDERED:
10084    case UNEQ:
10085      return 4;
10086      break;
10087    case LT:
10088    case NE:
10089    case EQ:
10090    case UNGE:
10091      return 5;
10092      break;
10093    case LE:
10094    case UNGT:
10095      return 6;
10096      break;
10097    default:
10098      gcc_unreachable ();
10099    }
10100}
10101
10102/* Return cost of comparison done using fcomi operation.
10103   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10104static int
10105ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10106{
10107  enum rtx_code bypass_code, first_code, second_code;
10108  /* Return arbitrarily high cost when instruction is not supported - this
10109     prevents gcc from using it.  */
10110  if (!TARGET_CMOVE)
10111    return 1024;
10112  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10113  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10114}
10115
10116/* Return cost of comparison done using sahf operation.
10117   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10118static int
10119ix86_fp_comparison_sahf_cost (enum rtx_code code)
10120{
10121  enum rtx_code bypass_code, first_code, second_code;
10122  /* Return arbitrarily high cost when instruction is not preferred - this
10123     avoids gcc from using it.  */
10124  if (!TARGET_USE_SAHF && !optimize_size)
10125    return 1024;
10126  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10127  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10128}
10129
10130/* Compute cost of the comparison done using any method.
10131   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
10132static int
10133ix86_fp_comparison_cost (enum rtx_code code)
10134{
10135  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10136  int min;
10137
10138  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10139  sahf_cost = ix86_fp_comparison_sahf_cost (code);
10140
10141  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10142  if (min > sahf_cost)
10143    min = sahf_cost;
10144  if (min > fcomi_cost)
10145    min = fcomi_cost;
10146  return min;
10147}
10148
10149/* Generate insn patterns to do a floating point compare of OPERANDS.  */
10150
10151static rtx
10152ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10153			rtx *second_test, rtx *bypass_test)
10154{
10155  enum machine_mode fpcmp_mode, intcmp_mode;
10156  rtx tmp, tmp2;
10157  int cost = ix86_fp_comparison_cost (code);
10158  enum rtx_code bypass_code, first_code, second_code;
10159
10160  fpcmp_mode = ix86_fp_compare_mode (code);
10161  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10162
10163  if (second_test)
10164    *second_test = NULL_RTX;
10165  if (bypass_test)
10166    *bypass_test = NULL_RTX;
10167
10168  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10169
10170  /* Do fcomi/sahf based test when profitable.  */
10171  if ((bypass_code == UNKNOWN || bypass_test)
10172      && (second_code == UNKNOWN || second_test)
10173      && ix86_fp_comparison_arithmetics_cost (code) > cost)
10174    {
10175      if (TARGET_CMOVE)
10176	{
10177	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10178	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10179			     tmp);
10180	  emit_insn (tmp);
10181	}
10182      else
10183	{
10184	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10185	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10186	  if (!scratch)
10187	    scratch = gen_reg_rtx (HImode);
10188	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10189	  emit_insn (gen_x86_sahf_1 (scratch));
10190	}
10191
10192      /* The FP codes work out to act like unsigned.  */
10193      intcmp_mode = fpcmp_mode;
10194      code = first_code;
10195      if (bypass_code != UNKNOWN)
10196	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10197				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10198				       const0_rtx);
10199      if (second_code != UNKNOWN)
10200	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10201				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
10202				       const0_rtx);
10203    }
10204  else
10205    {
10206      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
10207      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10208      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10209      if (!scratch)
10210	scratch = gen_reg_rtx (HImode);
10211      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10212
10213      /* In the unordered case, we have to check C2 for NaN's, which
10214	 doesn't happen to work out to anything nice combination-wise.
10215	 So do some bit twiddling on the value we've got in AH to come
10216	 up with an appropriate set of condition codes.  */
10217
10218      intcmp_mode = CCNOmode;
10219      switch (code)
10220	{
10221	case GT:
10222	case UNGT:
10223	  if (code == GT || !TARGET_IEEE_FP)
10224	    {
10225	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10226	      code = EQ;
10227	    }
10228	  else
10229	    {
10230	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10231	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10232	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10233	      intcmp_mode = CCmode;
10234	      code = GEU;
10235	    }
10236	  break;
10237	case LT:
10238	case UNLT:
10239	  if (code == LT && TARGET_IEEE_FP)
10240	    {
10241	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10242	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10243	      intcmp_mode = CCmode;
10244	      code = EQ;
10245	    }
10246	  else
10247	    {
10248	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10249	      code = NE;
10250	    }
10251	  break;
10252	case GE:
10253	case UNGE:
10254	  if (code == GE || !TARGET_IEEE_FP)
10255	    {
10256	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10257	      code = EQ;
10258	    }
10259	  else
10260	    {
10261	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10262	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10263					     GEN_INT (0x01)));
10264	      code = NE;
10265	    }
10266	  break;
10267	case LE:
10268	case UNLE:
10269	  if (code == LE && TARGET_IEEE_FP)
10270	    {
10271	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10272	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10273	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10274	      intcmp_mode = CCmode;
10275	      code = LTU;
10276	    }
10277	  else
10278	    {
10279	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10280	      code = NE;
10281	    }
10282	  break;
10283	case EQ:
10284	case UNEQ:
10285	  if (code == EQ && TARGET_IEEE_FP)
10286	    {
10287	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10288	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10289	      intcmp_mode = CCmode;
10290	      code = EQ;
10291	    }
10292	  else
10293	    {
10294	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10295	      code = NE;
10296	      break;
10297	    }
10298	  break;
10299	case NE:
10300	case LTGT:
10301	  if (code == NE && TARGET_IEEE_FP)
10302	    {
10303	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10304	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10305					     GEN_INT (0x40)));
10306	      code = NE;
10307	    }
10308	  else
10309	    {
10310	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10311	      code = EQ;
10312	    }
10313	  break;
10314
10315	case UNORDERED:
10316	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10317	  code = NE;
10318	  break;
10319	case ORDERED:
10320	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10321	  code = EQ;
10322	  break;
10323
10324	default:
10325	  gcc_unreachable ();
10326	}
10327    }
10328
10329  /* Return the test that should be put into the flags user, i.e.
10330     the bcc, scc, or cmov instruction.  */
10331  return gen_rtx_fmt_ee (code, VOIDmode,
10332			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10333			 const0_rtx);
10334}
10335
10336rtx
10337ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10338{
10339  rtx op0, op1, ret;
10340  op0 = ix86_compare_op0;
10341  op1 = ix86_compare_op1;
10342
10343  if (second_test)
10344    *second_test = NULL_RTX;
10345  if (bypass_test)
10346    *bypass_test = NULL_RTX;
10347
10348  if (ix86_compare_emitted)
10349    {
10350      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10351      ix86_compare_emitted = NULL_RTX;
10352    }
10353  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10354    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10355				  second_test, bypass_test);
10356  else
10357    ret = ix86_expand_int_compare (code, op0, op1);
10358
10359  return ret;
10360}
10361
10362/* Return true if the CODE will result in nontrivial jump sequence.  */
10363bool
10364ix86_fp_jump_nontrivial_p (enum rtx_code code)
10365{
10366  enum rtx_code bypass_code, first_code, second_code;
10367  if (!TARGET_CMOVE)
10368    return true;
10369  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10370  return bypass_code != UNKNOWN || second_code != UNKNOWN;
10371}
10372
10373void
10374ix86_expand_branch (enum rtx_code code, rtx label)
10375{
10376  rtx tmp;
10377
10378  /* If we have emitted a compare insn, go straight to simple.
10379     ix86_expand_compare won't emit anything if ix86_compare_emitted
10380     is non NULL.  */
10381  if (ix86_compare_emitted)
10382    goto simple;
10383
10384  switch (GET_MODE (ix86_compare_op0))
10385    {
10386    case QImode:
10387    case HImode:
10388    case SImode:
10389      simple:
10390      tmp = ix86_expand_compare (code, NULL, NULL);
10391      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10392				  gen_rtx_LABEL_REF (VOIDmode, label),
10393				  pc_rtx);
10394      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10395      return;
10396
10397    case SFmode:
10398    case DFmode:
10399    case XFmode:
10400      {
10401	rtvec vec;
10402	int use_fcomi;
10403	enum rtx_code bypass_code, first_code, second_code;
10404
10405	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10406					     &ix86_compare_op1);
10407
10408	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10409
10410	/* Check whether we will use the natural sequence with one jump.  If
10411	   so, we can expand jump early.  Otherwise delay expansion by
10412	   creating compound insn to not confuse optimizers.  */
10413	if (bypass_code == UNKNOWN && second_code == UNKNOWN
10414	    && TARGET_CMOVE)
10415	  {
10416	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10417				  gen_rtx_LABEL_REF (VOIDmode, label),
10418				  pc_rtx, NULL_RTX, NULL_RTX);
10419	  }
10420	else
10421	  {
10422	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
10423				  ix86_compare_op0, ix86_compare_op1);
10424	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10425					gen_rtx_LABEL_REF (VOIDmode, label),
10426					pc_rtx);
10427	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10428
10429	    use_fcomi = ix86_use_fcomi_compare (code);
10430	    vec = rtvec_alloc (3 + !use_fcomi);
10431	    RTVEC_ELT (vec, 0) = tmp;
10432	    RTVEC_ELT (vec, 1)
10433	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10434	    RTVEC_ELT (vec, 2)
10435	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10436	    if (! use_fcomi)
10437	      RTVEC_ELT (vec, 3)
10438		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10439
10440	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10441	  }
10442	return;
10443      }
10444
10445    case DImode:
10446      if (TARGET_64BIT)
10447	goto simple;
10448    case TImode:
10449      /* Expand DImode branch into multiple compare+branch.  */
10450      {
10451	rtx lo[2], hi[2], label2;
10452	enum rtx_code code1, code2, code3;
10453	enum machine_mode submode;
10454
10455	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10456	  {
10457	    tmp = ix86_compare_op0;
10458	    ix86_compare_op0 = ix86_compare_op1;
10459	    ix86_compare_op1 = tmp;
10460	    code = swap_condition (code);
10461	  }
10462	if (GET_MODE (ix86_compare_op0) == DImode)
10463	  {
10464	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10465	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10466	    submode = SImode;
10467	  }
10468	else
10469	  {
10470	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10471	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10472	    submode = DImode;
10473	  }
10474
10475	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10476	   avoid two branches.  This costs one extra insn, so disable when
10477	   optimizing for size.  */
10478
10479	if ((code == EQ || code == NE)
10480	    && (!optimize_size
10481	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
10482	  {
10483	    rtx xor0, xor1;
10484
10485	    xor1 = hi[0];
10486	    if (hi[1] != const0_rtx)
10487	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10488				   NULL_RTX, 0, OPTAB_WIDEN);
10489
10490	    xor0 = lo[0];
10491	    if (lo[1] != const0_rtx)
10492	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10493				   NULL_RTX, 0, OPTAB_WIDEN);
10494
10495	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
10496				NULL_RTX, 0, OPTAB_WIDEN);
10497
10498	    ix86_compare_op0 = tmp;
10499	    ix86_compare_op1 = const0_rtx;
10500	    ix86_expand_branch (code, label);
10501	    return;
10502	  }
10503
10504	/* Otherwise, if we are doing less-than or greater-or-equal-than,
10505	   op1 is a constant and the low word is zero, then we can just
10506	   examine the high word.  */
10507
10508	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10509	  switch (code)
10510	    {
10511	    case LT: case LTU: case GE: case GEU:
10512	      ix86_compare_op0 = hi[0];
10513	      ix86_compare_op1 = hi[1];
10514	      ix86_expand_branch (code, label);
10515	      return;
10516	    default:
10517	      break;
10518	    }
10519
10520	/* Otherwise, we need two or three jumps.  */
10521
10522	label2 = gen_label_rtx ();
10523
10524	code1 = code;
10525	code2 = swap_condition (code);
10526	code3 = unsigned_condition (code);
10527
10528	switch (code)
10529	  {
10530	  case LT: case GT: case LTU: case GTU:
10531	    break;
10532
10533	  case LE:   code1 = LT;  code2 = GT;  break;
10534	  case GE:   code1 = GT;  code2 = LT;  break;
10535	  case LEU:  code1 = LTU; code2 = GTU; break;
10536	  case GEU:  code1 = GTU; code2 = LTU; break;
10537
10538	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
10539	  case NE:   code2 = UNKNOWN; break;
10540
10541	  default:
10542	    gcc_unreachable ();
10543	  }
10544
10545	/*
10546	 * a < b =>
10547	 *    if (hi(a) < hi(b)) goto true;
10548	 *    if (hi(a) > hi(b)) goto false;
10549	 *    if (lo(a) < lo(b)) goto true;
10550	 *  false:
10551	 */
10552
10553	ix86_compare_op0 = hi[0];
10554	ix86_compare_op1 = hi[1];
10555
10556	if (code1 != UNKNOWN)
10557	  ix86_expand_branch (code1, label);
10558	if (code2 != UNKNOWN)
10559	  ix86_expand_branch (code2, label2);
10560
10561	ix86_compare_op0 = lo[0];
10562	ix86_compare_op1 = lo[1];
10563	ix86_expand_branch (code3, label);
10564
10565	if (code2 != UNKNOWN)
10566	  emit_label (label2);
10567	return;
10568      }
10569
10570    default:
10571      gcc_unreachable ();
10572    }
10573}
10574
10575/* Split branch based on floating point condition.  */
10576void
10577ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10578		      rtx target1, rtx target2, rtx tmp, rtx pushed)
10579{
10580  rtx second, bypass;
10581  rtx label = NULL_RTX;
10582  rtx condition;
10583  int bypass_probability = -1, second_probability = -1, probability = -1;
10584  rtx i;
10585
10586  if (target2 != pc_rtx)
10587    {
10588      rtx tmp = target2;
10589      code = reverse_condition_maybe_unordered (code);
10590      target2 = target1;
10591      target1 = tmp;
10592    }
10593
10594  condition = ix86_expand_fp_compare (code, op1, op2,
10595				      tmp, &second, &bypass);
10596
10597  /* Remove pushed operand from stack.  */
10598  if (pushed)
10599    ix86_free_from_memory (GET_MODE (pushed));
10600
10601  if (split_branch_probability >= 0)
10602    {
10603      /* Distribute the probabilities across the jumps.
10604	 Assume the BYPASS and SECOND to be always test
10605	 for UNORDERED.  */
10606      probability = split_branch_probability;
10607
10608      /* Value of 1 is low enough to make no need for probability
10609	 to be updated.  Later we may run some experiments and see
10610	 if unordered values are more frequent in practice.  */
10611      if (bypass)
10612	bypass_probability = 1;
10613      if (second)
10614	second_probability = 1;
10615    }
10616  if (bypass != NULL_RTX)
10617    {
10618      label = gen_label_rtx ();
10619      i = emit_jump_insn (gen_rtx_SET
10620			  (VOIDmode, pc_rtx,
10621			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10622						 bypass,
10623						 gen_rtx_LABEL_REF (VOIDmode,
10624								    label),
10625						 pc_rtx)));
10626      if (bypass_probability >= 0)
10627	REG_NOTES (i)
10628	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10629			       GEN_INT (bypass_probability),
10630			       REG_NOTES (i));
10631    }
10632  i = emit_jump_insn (gen_rtx_SET
10633		      (VOIDmode, pc_rtx,
10634		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10635					     condition, target1, target2)));
10636  if (probability >= 0)
10637    REG_NOTES (i)
10638      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10639			   GEN_INT (probability),
10640			   REG_NOTES (i));
10641  if (second != NULL_RTX)
10642    {
10643      i = emit_jump_insn (gen_rtx_SET
10644			  (VOIDmode, pc_rtx,
10645			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10646						 target2)));
10647      if (second_probability >= 0)
10648	REG_NOTES (i)
10649	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10650			       GEN_INT (second_probability),
10651			       REG_NOTES (i));
10652    }
10653  if (label != NULL_RTX)
10654    emit_label (label);
10655}
10656
10657int
10658ix86_expand_setcc (enum rtx_code code, rtx dest)
10659{
10660  rtx ret, tmp, tmpreg, equiv;
10661  rtx second_test, bypass_test;
10662
10663  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10664    return 0; /* FAIL */
10665
10666  gcc_assert (GET_MODE (dest) == QImode);
10667
10668  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10669  PUT_MODE (ret, QImode);
10670
10671  tmp = dest;
10672  tmpreg = dest;
10673
10674  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10675  if (bypass_test || second_test)
10676    {
10677      rtx test = second_test;
10678      int bypass = 0;
10679      rtx tmp2 = gen_reg_rtx (QImode);
10680      if (bypass_test)
10681	{
10682	  gcc_assert (!second_test);
10683	  test = bypass_test;
10684	  bypass = 1;
10685	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10686	}
10687      PUT_MODE (test, QImode);
10688      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10689
10690      if (bypass)
10691	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10692      else
10693	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10694    }
10695
10696  /* Attach a REG_EQUAL note describing the comparison result.  */
10697  if (ix86_compare_op0 && ix86_compare_op1)
10698    {
10699      equiv = simplify_gen_relational (code, QImode,
10700				       GET_MODE (ix86_compare_op0),
10701				       ix86_compare_op0, ix86_compare_op1);
10702      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10703    }
10704
10705  return 1; /* DONE */
10706}
10707
10708/* Expand comparison setting or clearing carry flag.  Return true when
10709   successful and set pop for the operation.  */
10710static bool
10711ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10712{
10713  enum machine_mode mode =
10714    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10715
10716  /* Do not handle DImode compares that go through special path.  Also we can't
10717     deal with FP compares yet.  This is possible to add.  */
10718  if (mode == (TARGET_64BIT ? TImode : DImode))
10719    return false;
10720  if (FLOAT_MODE_P (mode))
10721    {
10722      rtx second_test = NULL, bypass_test = NULL;
10723      rtx compare_op, compare_seq;
10724
10725      /* Shortcut:  following common codes never translate into carry flag compares.  */
10726      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10727	  || code == ORDERED || code == UNORDERED)
10728	return false;
10729
10730      /* These comparisons require zero flag; swap operands so they won't.  */
10731      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10732	  && !TARGET_IEEE_FP)
10733	{
10734	  rtx tmp = op0;
10735	  op0 = op1;
10736	  op1 = tmp;
10737	  code = swap_condition (code);
10738	}
10739
10740      /* Try to expand the comparison and verify that we end up with carry flag
10741	 based comparison.  This is fails to be true only when we decide to expand
10742	 comparison using arithmetic that is not too common scenario.  */
10743      start_sequence ();
10744      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10745					   &second_test, &bypass_test);
10746      compare_seq = get_insns ();
10747      end_sequence ();
10748
10749      if (second_test || bypass_test)
10750	return false;
10751      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10752	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10753        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10754      else
10755	code = GET_CODE (compare_op);
10756      if (code != LTU && code != GEU)
10757	return false;
10758      emit_insn (compare_seq);
10759      *pop = compare_op;
10760      return true;
10761    }
10762  if (!INTEGRAL_MODE_P (mode))
10763    return false;
10764  switch (code)
10765    {
10766    case LTU:
10767    case GEU:
10768      break;
10769
10770    /* Convert a==0 into (unsigned)a<1.  */
10771    case EQ:
10772    case NE:
10773      if (op1 != const0_rtx)
10774	return false;
10775      op1 = const1_rtx;
10776      code = (code == EQ ? LTU : GEU);
10777      break;
10778
10779    /* Convert a>b into b<a or a>=b-1.  */
10780    case GTU:
10781    case LEU:
10782      if (GET_CODE (op1) == CONST_INT)
10783	{
10784	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10785	  /* Bail out on overflow.  We still can swap operands but that
10786	     would force loading of the constant into register.  */
10787	  if (op1 == const0_rtx
10788	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10789	    return false;
10790	  code = (code == GTU ? GEU : LTU);
10791	}
10792      else
10793	{
10794	  rtx tmp = op1;
10795	  op1 = op0;
10796	  op0 = tmp;
10797	  code = (code == GTU ? LTU : GEU);
10798	}
10799      break;
10800
10801    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10802    case LT:
10803    case GE:
10804      if (mode == DImode || op1 != const0_rtx)
10805	return false;
10806      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10807      code = (code == LT ? GEU : LTU);
10808      break;
10809    case LE:
10810    case GT:
10811      if (mode == DImode || op1 != constm1_rtx)
10812	return false;
10813      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10814      code = (code == LE ? GEU : LTU);
10815      break;
10816
10817    default:
10818      return false;
10819    }
10820  /* Swapping operands may cause constant to appear as first operand.  */
10821  if (!nonimmediate_operand (op0, VOIDmode))
10822    {
10823      if (no_new_pseudos)
10824	return false;
10825      op0 = force_reg (mode, op0);
10826    }
10827  ix86_compare_op0 = op0;
10828  ix86_compare_op1 = op1;
10829  *pop = ix86_expand_compare (code, NULL, NULL);
10830  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10831  return true;
10832}
10833
10834int
10835ix86_expand_int_movcc (rtx operands[])
10836{
10837  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10838  rtx compare_seq, compare_op;
10839  rtx second_test, bypass_test;
10840  enum machine_mode mode = GET_MODE (operands[0]);
10841  bool sign_bit_compare_p = false;;
10842
10843  start_sequence ();
10844  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10845  compare_seq = get_insns ();
10846  end_sequence ();
10847
10848  compare_code = GET_CODE (compare_op);
10849
10850  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10851      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10852    sign_bit_compare_p = true;
10853
10854  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10855     HImode insns, we'd be swallowed in word prefix ops.  */
10856
10857  if ((mode != HImode || TARGET_FAST_PREFIX)
10858      && (mode != (TARGET_64BIT ? TImode : DImode))
10859      && GET_CODE (operands[2]) == CONST_INT
10860      && GET_CODE (operands[3]) == CONST_INT)
10861    {
10862      rtx out = operands[0];
10863      HOST_WIDE_INT ct = INTVAL (operands[2]);
10864      HOST_WIDE_INT cf = INTVAL (operands[3]);
10865      HOST_WIDE_INT diff;
10866
10867      diff = ct - cf;
10868      /*  Sign bit compares are better done using shifts than we do by using
10869	  sbb.  */
10870      if (sign_bit_compare_p
10871	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10872					     ix86_compare_op1, &compare_op))
10873	{
10874	  /* Detect overlap between destination and compare sources.  */
10875	  rtx tmp = out;
10876
10877          if (!sign_bit_compare_p)
10878	    {
10879	      bool fpcmp = false;
10880
10881	      compare_code = GET_CODE (compare_op);
10882
10883	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10884		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10885		{
10886		  fpcmp = true;
10887		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
10888		}
10889
10890	      /* To simplify rest of code, restrict to the GEU case.  */
10891	      if (compare_code == LTU)
10892		{
10893		  HOST_WIDE_INT tmp = ct;
10894		  ct = cf;
10895		  cf = tmp;
10896		  compare_code = reverse_condition (compare_code);
10897		  code = reverse_condition (code);
10898		}
10899	      else
10900		{
10901		  if (fpcmp)
10902		    PUT_CODE (compare_op,
10903			      reverse_condition_maybe_unordered
10904			        (GET_CODE (compare_op)));
10905		  else
10906		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10907		}
10908	      diff = ct - cf;
10909
10910	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10911		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
10912		tmp = gen_reg_rtx (mode);
10913
10914	      if (mode == DImode)
10915		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10916	      else
10917		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10918	    }
10919	  else
10920	    {
10921	      if (code == GT || code == GE)
10922		code = reverse_condition (code);
10923	      else
10924		{
10925		  HOST_WIDE_INT tmp = ct;
10926		  ct = cf;
10927		  cf = tmp;
10928		  diff = ct - cf;
10929		}
10930	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10931				     ix86_compare_op1, VOIDmode, 0, -1);
10932	    }
10933
10934	  if (diff == 1)
10935	    {
10936	      /*
10937	       * cmpl op0,op1
10938	       * sbbl dest,dest
10939	       * [addl dest, ct]
10940	       *
10941	       * Size 5 - 8.
10942	       */
10943	      if (ct)
10944		tmp = expand_simple_binop (mode, PLUS,
10945					   tmp, GEN_INT (ct),
10946					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10947	    }
10948	  else if (cf == -1)
10949	    {
10950	      /*
10951	       * cmpl op0,op1
10952	       * sbbl dest,dest
10953	       * orl $ct, dest
10954	       *
10955	       * Size 8.
10956	       */
10957	      tmp = expand_simple_binop (mode, IOR,
10958					 tmp, GEN_INT (ct),
10959					 copy_rtx (tmp), 1, OPTAB_DIRECT);
10960	    }
10961	  else if (diff == -1 && ct)
10962	    {
10963	      /*
10964	       * cmpl op0,op1
10965	       * sbbl dest,dest
10966	       * notl dest
10967	       * [addl dest, cf]
10968	       *
10969	       * Size 8 - 11.
10970	       */
10971	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10972	      if (cf)
10973		tmp = expand_simple_binop (mode, PLUS,
10974					   copy_rtx (tmp), GEN_INT (cf),
10975					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10976	    }
10977	  else
10978	    {
10979	      /*
10980	       * cmpl op0,op1
10981	       * sbbl dest,dest
10982	       * [notl dest]
10983	       * andl cf - ct, dest
10984	       * [addl dest, ct]
10985	       *
10986	       * Size 8 - 11.
10987	       */
10988
10989	      if (cf == 0)
10990		{
10991		  cf = ct;
10992		  ct = 0;
10993		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10994		}
10995
10996	      tmp = expand_simple_binop (mode, AND,
10997					 copy_rtx (tmp),
10998					 gen_int_mode (cf - ct, mode),
10999					 copy_rtx (tmp), 1, OPTAB_DIRECT);
11000	      if (ct)
11001		tmp = expand_simple_binop (mode, PLUS,
11002					   copy_rtx (tmp), GEN_INT (ct),
11003					   copy_rtx (tmp), 1, OPTAB_DIRECT);
11004	    }
11005
11006	  if (!rtx_equal_p (tmp, out))
11007	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11008
11009	  return 1; /* DONE */
11010	}
11011
11012      if (diff < 0)
11013	{
11014	  HOST_WIDE_INT tmp;
11015	  tmp = ct, ct = cf, cf = tmp;
11016	  diff = -diff;
11017	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11018	    {
11019	      /* We may be reversing unordered compare to normal compare, that
11020		 is not valid in general (we may convert non-trapping condition
11021		 to trapping one), however on i386 we currently emit all
11022		 comparisons unordered.  */
11023	      compare_code = reverse_condition_maybe_unordered (compare_code);
11024	      code = reverse_condition_maybe_unordered (code);
11025	    }
11026	  else
11027	    {
11028	      compare_code = reverse_condition (compare_code);
11029	      code = reverse_condition (code);
11030	    }
11031	}
11032
11033      compare_code = UNKNOWN;
11034      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11035	  && GET_CODE (ix86_compare_op1) == CONST_INT)
11036	{
11037	  if (ix86_compare_op1 == const0_rtx
11038	      && (code == LT || code == GE))
11039	    compare_code = code;
11040	  else if (ix86_compare_op1 == constm1_rtx)
11041	    {
11042	      if (code == LE)
11043		compare_code = LT;
11044	      else if (code == GT)
11045		compare_code = GE;
11046	    }
11047	}
11048
11049      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
11050      if (compare_code != UNKNOWN
11051	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11052	  && (cf == -1 || ct == -1))
11053	{
11054	  /* If lea code below could be used, only optimize
11055	     if it results in a 2 insn sequence.  */
11056
11057	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11058		 || diff == 3 || diff == 5 || diff == 9)
11059	      || (compare_code == LT && ct == -1)
11060	      || (compare_code == GE && cf == -1))
11061	    {
11062	      /*
11063	       * notl op1	(if necessary)
11064	       * sarl $31, op1
11065	       * orl cf, op1
11066	       */
11067	      if (ct != -1)
11068		{
11069		  cf = ct;
11070		  ct = -1;
11071		  code = reverse_condition (code);
11072		}
11073
11074	      out = emit_store_flag (out, code, ix86_compare_op0,
11075				     ix86_compare_op1, VOIDmode, 0, -1);
11076
11077	      out = expand_simple_binop (mode, IOR,
11078					 out, GEN_INT (cf),
11079					 out, 1, OPTAB_DIRECT);
11080	      if (out != operands[0])
11081		emit_move_insn (operands[0], out);
11082
11083	      return 1; /* DONE */
11084	    }
11085	}
11086
11087
11088      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11089	   || diff == 3 || diff == 5 || diff == 9)
11090	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11091	  && (mode != DImode
11092	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11093	{
11094	  /*
11095	   * xorl dest,dest
11096	   * cmpl op1,op2
11097	   * setcc dest
11098	   * lea cf(dest*(ct-cf)),dest
11099	   *
11100	   * Size 14.
11101	   *
11102	   * This also catches the degenerate setcc-only case.
11103	   */
11104
11105	  rtx tmp;
11106	  int nops;
11107
11108	  out = emit_store_flag (out, code, ix86_compare_op0,
11109				 ix86_compare_op1, VOIDmode, 0, 1);
11110
11111	  nops = 0;
11112	  /* On x86_64 the lea instruction operates on Pmode, so we need
11113	     to get arithmetics done in proper mode to match.  */
11114	  if (diff == 1)
11115	    tmp = copy_rtx (out);
11116	  else
11117	    {
11118	      rtx out1;
11119	      out1 = copy_rtx (out);
11120	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11121	      nops++;
11122	      if (diff & 1)
11123		{
11124		  tmp = gen_rtx_PLUS (mode, tmp, out1);
11125		  nops++;
11126		}
11127	    }
11128	  if (cf != 0)
11129	    {
11130	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11131	      nops++;
11132	    }
11133	  if (!rtx_equal_p (tmp, out))
11134	    {
11135	      if (nops == 1)
11136		out = force_operand (tmp, copy_rtx (out));
11137	      else
11138		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11139	    }
11140	  if (!rtx_equal_p (out, operands[0]))
11141	    emit_move_insn (operands[0], copy_rtx (out));
11142
11143	  return 1; /* DONE */
11144	}
11145
11146      /*
11147       * General case:			Jumpful:
11148       *   xorl dest,dest		cmpl op1, op2
11149       *   cmpl op1, op2		movl ct, dest
11150       *   setcc dest			jcc 1f
11151       *   decl dest			movl cf, dest
11152       *   andl (cf-ct),dest		1:
11153       *   addl ct,dest
11154       *
11155       * Size 20.			Size 14.
11156       *
11157       * This is reasonably steep, but branch mispredict costs are
11158       * high on modern cpus, so consider failing only if optimizing
11159       * for space.
11160       */
11161
11162      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11163	  && BRANCH_COST >= 2)
11164	{
11165	  if (cf == 0)
11166	    {
11167	      cf = ct;
11168	      ct = 0;
11169	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11170		/* We may be reversing unordered compare to normal compare,
11171		   that is not valid in general (we may convert non-trapping
11172		   condition to trapping one), however on i386 we currently
11173		   emit all comparisons unordered.  */
11174		code = reverse_condition_maybe_unordered (code);
11175	      else
11176		{
11177		  code = reverse_condition (code);
11178		  if (compare_code != UNKNOWN)
11179		    compare_code = reverse_condition (compare_code);
11180		}
11181	    }
11182
11183	  if (compare_code != UNKNOWN)
11184	    {
11185	      /* notl op1	(if needed)
11186		 sarl $31, op1
11187		 andl (cf-ct), op1
11188		 addl ct, op1
11189
11190		 For x < 0 (resp. x <= -1) there will be no notl,
11191		 so if possible swap the constants to get rid of the
11192		 complement.
11193		 True/false will be -1/0 while code below (store flag
11194		 followed by decrement) is 0/-1, so the constants need
11195		 to be exchanged once more.  */
11196
11197	      if (compare_code == GE || !cf)
11198		{
11199		  code = reverse_condition (code);
11200		  compare_code = LT;
11201		}
11202	      else
11203		{
11204		  HOST_WIDE_INT tmp = cf;
11205		  cf = ct;
11206		  ct = tmp;
11207		}
11208
11209	      out = emit_store_flag (out, code, ix86_compare_op0,
11210				     ix86_compare_op1, VOIDmode, 0, -1);
11211	    }
11212	  else
11213	    {
11214	      out = emit_store_flag (out, code, ix86_compare_op0,
11215				     ix86_compare_op1, VOIDmode, 0, 1);
11216
11217	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11218					 copy_rtx (out), 1, OPTAB_DIRECT);
11219	    }
11220
11221	  out = expand_simple_binop (mode, AND, copy_rtx (out),
11222				     gen_int_mode (cf - ct, mode),
11223				     copy_rtx (out), 1, OPTAB_DIRECT);
11224	  if (ct)
11225	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11226				       copy_rtx (out), 1, OPTAB_DIRECT);
11227	  if (!rtx_equal_p (out, operands[0]))
11228	    emit_move_insn (operands[0], copy_rtx (out));
11229
11230	  return 1; /* DONE */
11231	}
11232    }
11233
11234  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11235    {
11236      /* Try a few things more with specific constants and a variable.  */
11237
11238      optab op;
11239      rtx var, orig_out, out, tmp;
11240
11241      if (BRANCH_COST <= 2)
11242	return 0; /* FAIL */
11243
11244      /* If one of the two operands is an interesting constant, load a
11245	 constant with the above and mask it in with a logical operation.  */
11246
11247      if (GET_CODE (operands[2]) == CONST_INT)
11248	{
11249	  var = operands[3];
11250	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11251	    operands[3] = constm1_rtx, op = and_optab;
11252	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11253	    operands[3] = const0_rtx, op = ior_optab;
11254	  else
11255	    return 0; /* FAIL */
11256	}
11257      else if (GET_CODE (operands[3]) == CONST_INT)
11258	{
11259	  var = operands[2];
11260	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11261	    operands[2] = constm1_rtx, op = and_optab;
11262	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11263	    operands[2] = const0_rtx, op = ior_optab;
11264	  else
11265	    return 0; /* FAIL */
11266	}
11267      else
11268        return 0; /* FAIL */
11269
11270      orig_out = operands[0];
11271      tmp = gen_reg_rtx (mode);
11272      operands[0] = tmp;
11273
11274      /* Recurse to get the constant loaded.  */
11275      if (ix86_expand_int_movcc (operands) == 0)
11276        return 0; /* FAIL */
11277
11278      /* Mask in the interesting variable.  */
11279      out = expand_binop (mode, op, var, tmp, orig_out, 0,
11280			  OPTAB_WIDEN);
11281      if (!rtx_equal_p (out, orig_out))
11282	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11283
11284      return 1; /* DONE */
11285    }
11286
11287  /*
11288   * For comparison with above,
11289   *
11290   * movl cf,dest
11291   * movl ct,tmp
11292   * cmpl op1,op2
11293   * cmovcc tmp,dest
11294   *
11295   * Size 15.
11296   */
11297
11298  if (! nonimmediate_operand (operands[2], mode))
11299    operands[2] = force_reg (mode, operands[2]);
11300  if (! nonimmediate_operand (operands[3], mode))
11301    operands[3] = force_reg (mode, operands[3]);
11302
11303  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11304    {
11305      rtx tmp = gen_reg_rtx (mode);
11306      emit_move_insn (tmp, operands[3]);
11307      operands[3] = tmp;
11308    }
11309  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11310    {
11311      rtx tmp = gen_reg_rtx (mode);
11312      emit_move_insn (tmp, operands[2]);
11313      operands[2] = tmp;
11314    }
11315
11316  if (! register_operand (operands[2], VOIDmode)
11317      && (mode == QImode
11318          || ! register_operand (operands[3], VOIDmode)))
11319    operands[2] = force_reg (mode, operands[2]);
11320
11321  if (mode == QImode
11322      && ! register_operand (operands[3], VOIDmode))
11323    operands[3] = force_reg (mode, operands[3]);
11324
11325  emit_insn (compare_seq);
11326  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11327			  gen_rtx_IF_THEN_ELSE (mode,
11328						compare_op, operands[2],
11329						operands[3])));
11330  if (bypass_test)
11331    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11332			    gen_rtx_IF_THEN_ELSE (mode,
11333				  bypass_test,
11334				  copy_rtx (operands[3]),
11335				  copy_rtx (operands[0]))));
11336  if (second_test)
11337    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11338			    gen_rtx_IF_THEN_ELSE (mode,
11339				  second_test,
11340				  copy_rtx (operands[2]),
11341				  copy_rtx (operands[0]))));
11342
11343  return 1; /* DONE */
11344}
11345
11346/* Swap, force into registers, or otherwise massage the two operands
11347   to an sse comparison with a mask result.  Thus we differ a bit from
11348   ix86_prepare_fp_compare_args which expects to produce a flags result.
11349
11350   The DEST operand exists to help determine whether to commute commutative
11351   operators.  The POP0/POP1 operands are updated in place.  The new
11352   comparison code is returned, or UNKNOWN if not implementable.  */
11353
11354static enum rtx_code
11355ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11356				  rtx *pop0, rtx *pop1)
11357{
11358  rtx tmp;
11359
11360  switch (code)
11361    {
11362    case LTGT:
11363    case UNEQ:
11364      /* We have no LTGT as an operator.  We could implement it with
11365	 NE & ORDERED, but this requires an extra temporary.  It's
11366	 not clear that it's worth it.  */
11367      return UNKNOWN;
11368
11369    case LT:
11370    case LE:
11371    case UNGT:
11372    case UNGE:
11373      /* These are supported directly.  */
11374      break;
11375
11376    case EQ:
11377    case NE:
11378    case UNORDERED:
11379    case ORDERED:
11380      /* For commutative operators, try to canonicalize the destination
11381	 operand to be first in the comparison - this helps reload to
11382	 avoid extra moves.  */
11383      if (!dest || !rtx_equal_p (dest, *pop1))
11384	break;
11385      /* FALLTHRU */
11386
11387    case GE:
11388    case GT:
11389    case UNLE:
11390    case UNLT:
11391      /* These are not supported directly.  Swap the comparison operands
11392	 to transform into something that is supported.  */
11393      tmp = *pop0;
11394      *pop0 = *pop1;
11395      *pop1 = tmp;
11396      code = swap_condition (code);
11397      break;
11398
11399    default:
11400      gcc_unreachable ();
11401    }
11402
11403  return code;
11404}
11405
11406/* Detect conditional moves that exactly match min/max operational
11407   semantics.  Note that this is IEEE safe, as long as we don't
11408   interchange the operands.
11409
11410   Returns FALSE if this conditional move doesn't match a MIN/MAX,
11411   and TRUE if the operation is successful and instructions are emitted.  */
11412
11413static bool
11414ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11415			   rtx cmp_op1, rtx if_true, rtx if_false)
11416{
11417  enum machine_mode mode;
11418  bool is_min;
11419  rtx tmp;
11420
11421  if (code == LT)
11422    ;
11423  else if (code == UNGE)
11424    {
11425      tmp = if_true;
11426      if_true = if_false;
11427      if_false = tmp;
11428    }
11429  else
11430    return false;
11431
11432  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11433    is_min = true;
11434  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11435    is_min = false;
11436  else
11437    return false;
11438
11439  mode = GET_MODE (dest);
11440
11441  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11442     but MODE may be a vector mode and thus not appropriate.  */
11443  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11444    {
11445      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11446      rtvec v;
11447
11448      if_true = force_reg (mode, if_true);
11449      v = gen_rtvec (2, if_true, if_false);
11450      tmp = gen_rtx_UNSPEC (mode, v, u);
11451    }
11452  else
11453    {
11454      code = is_min ? SMIN : SMAX;
11455      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11456    }
11457
11458  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11459  return true;
11460}
11461
11462/* Expand an sse vector comparison.  Return the register with the result.  */
11463
11464static rtx
11465ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11466		     rtx op_true, rtx op_false)
11467{
11468  enum machine_mode mode = GET_MODE (dest);
11469  rtx x;
11470
11471  cmp_op0 = force_reg (mode, cmp_op0);
11472  if (!nonimmediate_operand (cmp_op1, mode))
11473    cmp_op1 = force_reg (mode, cmp_op1);
11474
11475  if (optimize
11476      || reg_overlap_mentioned_p (dest, op_true)
11477      || reg_overlap_mentioned_p (dest, op_false))
11478    dest = gen_reg_rtx (mode);
11479
11480  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11481  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11482
11483  return dest;
11484}
11485
11486/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11487   operations.  This is used for both scalar and vector conditional moves.  */
11488
11489static void
11490ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11491{
11492  enum machine_mode mode = GET_MODE (dest);
11493  rtx t2, t3, x;
11494
11495  if (op_false == CONST0_RTX (mode))
11496    {
11497      op_true = force_reg (mode, op_true);
11498      x = gen_rtx_AND (mode, cmp, op_true);
11499      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11500    }
11501  else if (op_true == CONST0_RTX (mode))
11502    {
11503      op_false = force_reg (mode, op_false);
11504      x = gen_rtx_NOT (mode, cmp);
11505      x = gen_rtx_AND (mode, x, op_false);
11506      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11507    }
11508  else
11509    {
11510      op_true = force_reg (mode, op_true);
11511      op_false = force_reg (mode, op_false);
11512
11513      t2 = gen_reg_rtx (mode);
11514      if (optimize)
11515	t3 = gen_reg_rtx (mode);
11516      else
11517	t3 = dest;
11518
11519      x = gen_rtx_AND (mode, op_true, cmp);
11520      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11521
11522      x = gen_rtx_NOT (mode, cmp);
11523      x = gen_rtx_AND (mode, x, op_false);
11524      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11525
11526      x = gen_rtx_IOR (mode, t3, t2);
11527      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11528    }
11529}
11530
11531/* Expand a floating-point conditional move.  Return true if successful.  */
11532
11533int
11534ix86_expand_fp_movcc (rtx operands[])
11535{
11536  enum machine_mode mode = GET_MODE (operands[0]);
11537  enum rtx_code code = GET_CODE (operands[1]);
11538  rtx tmp, compare_op, second_test, bypass_test;
11539
11540  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11541    {
11542      enum machine_mode cmode;
11543
11544      /* Since we've no cmove for sse registers, don't force bad register
11545	 allocation just to gain access to it.  Deny movcc when the
11546	 comparison mode doesn't match the move mode.  */
11547      cmode = GET_MODE (ix86_compare_op0);
11548      if (cmode == VOIDmode)
11549	cmode = GET_MODE (ix86_compare_op1);
11550      if (cmode != mode)
11551	return 0;
11552
11553      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11554					       &ix86_compare_op0,
11555					       &ix86_compare_op1);
11556      if (code == UNKNOWN)
11557	return 0;
11558
11559      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11560				     ix86_compare_op1, operands[2],
11561				     operands[3]))
11562	return 1;
11563
11564      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11565				 ix86_compare_op1, operands[2], operands[3]);
11566      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11567      return 1;
11568    }
11569
11570  /* The floating point conditional move instructions don't directly
11571     support conditions resulting from a signed integer comparison.  */
11572
11573  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11574
11575  /* The floating point conditional move instructions don't directly
11576     support signed integer comparisons.  */
11577
11578  if (!fcmov_comparison_operator (compare_op, VOIDmode))
11579    {
11580      gcc_assert (!second_test && !bypass_test);
11581      tmp = gen_reg_rtx (QImode);
11582      ix86_expand_setcc (code, tmp);
11583      code = NE;
11584      ix86_compare_op0 = tmp;
11585      ix86_compare_op1 = const0_rtx;
11586      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
11587    }
11588  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11589    {
11590      tmp = gen_reg_rtx (mode);
11591      emit_move_insn (tmp, operands[3]);
11592      operands[3] = tmp;
11593    }
11594  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11595    {
11596      tmp = gen_reg_rtx (mode);
11597      emit_move_insn (tmp, operands[2]);
11598      operands[2] = tmp;
11599    }
11600
11601  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11602			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
11603						operands[2], operands[3])));
11604  if (bypass_test)
11605    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11606			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11607						  operands[3], operands[0])));
11608  if (second_test)
11609    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11610			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11611						  operands[2], operands[0])));
11612
11613  return 1;
11614}
11615
11616/* Expand a floating-point vector conditional move; a vcond operation
11617   rather than a movcc operation.  */
11618
11619bool
11620ix86_expand_fp_vcond (rtx operands[])
11621{
11622  enum rtx_code code = GET_CODE (operands[3]);
11623  rtx cmp;
11624
11625  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11626					   &operands[4], &operands[5]);
11627  if (code == UNKNOWN)
11628    return false;
11629
11630  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11631				 operands[5], operands[1], operands[2]))
11632    return true;
11633
11634  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11635			     operands[1], operands[2]);
11636  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11637  return true;
11638}
11639
11640/* Expand a signed integral vector conditional move.  */
11641
11642bool
11643ix86_expand_int_vcond (rtx operands[])
11644{
11645  enum machine_mode mode = GET_MODE (operands[0]);
11646  enum rtx_code code = GET_CODE (operands[3]);
11647  bool negate = false;
11648  rtx x, cop0, cop1;
11649
11650  cop0 = operands[4];
11651  cop1 = operands[5];
11652
11653  /* Canonicalize the comparison to EQ, GT, GTU.  */
11654  switch (code)
11655    {
11656    case EQ:
11657    case GT:
11658    case GTU:
11659      break;
11660
11661    case NE:
11662    case LE:
11663    case LEU:
11664      code = reverse_condition (code);
11665      negate = true;
11666      break;
11667
11668    case GE:
11669    case GEU:
11670      code = reverse_condition (code);
11671      negate = true;
11672      /* FALLTHRU */
11673
11674    case LT:
11675    case LTU:
11676      code = swap_condition (code);
11677      x = cop0, cop0 = cop1, cop1 = x;
11678      break;
11679
11680    default:
11681      gcc_unreachable ();
11682    }
11683
11684  /* Unsigned parallel compare is not supported by the hardware.  Play some
11685     tricks to turn this into a signed comparison against 0.  */
11686  if (code == GTU)
11687    {
11688      cop0 = force_reg (mode, cop0);
11689
11690      switch (mode)
11691	{
11692	case V4SImode:
11693	  {
11694	    rtx t1, t2, mask;
11695
11696	    /* Perform a parallel modulo subtraction.  */
11697	    t1 = gen_reg_rtx (mode);
11698	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
11699
11700	    /* Extract the original sign bit of op0.  */
11701	    mask = GEN_INT (-0x80000000);
11702	    mask = gen_rtx_CONST_VECTOR (mode,
11703			gen_rtvec (4, mask, mask, mask, mask));
11704	    mask = force_reg (mode, mask);
11705	    t2 = gen_reg_rtx (mode);
11706	    emit_insn (gen_andv4si3 (t2, cop0, mask));
11707
11708	    /* XOR it back into the result of the subtraction.  This results
11709	       in the sign bit set iff we saw unsigned underflow.  */
11710	    x = gen_reg_rtx (mode);
11711	    emit_insn (gen_xorv4si3 (x, t1, t2));
11712
11713	    code = GT;
11714	  }
11715	  break;
11716
11717	case V16QImode:
11718	case V8HImode:
11719	  /* Perform a parallel unsigned saturating subtraction.  */
11720	  x = gen_reg_rtx (mode);
11721	  emit_insn (gen_rtx_SET (VOIDmode, x,
11722				  gen_rtx_US_MINUS (mode, cop0, cop1)));
11723
11724	  code = EQ;
11725	  negate = !negate;
11726	  break;
11727
11728	default:
11729	  gcc_unreachable ();
11730	}
11731
11732      cop0 = x;
11733      cop1 = CONST0_RTX (mode);
11734    }
11735
11736  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11737			   operands[1+negate], operands[2-negate]);
11738
11739  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11740			 operands[2-negate]);
11741  return true;
11742}
11743
11744/* Expand conditional increment or decrement using adb/sbb instructions.
11745   The default case using setcc followed by the conditional move can be
11746   done by generic code.  */
11747int
11748ix86_expand_int_addcc (rtx operands[])
11749{
11750  enum rtx_code code = GET_CODE (operands[1]);
11751  rtx compare_op;
11752  rtx val = const0_rtx;
11753  bool fpcmp = false;
11754  enum machine_mode mode = GET_MODE (operands[0]);
11755
11756  if (operands[3] != const1_rtx
11757      && operands[3] != constm1_rtx)
11758    return 0;
11759  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11760				       ix86_compare_op1, &compare_op))
11761     return 0;
11762  code = GET_CODE (compare_op);
11763
11764  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11765      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11766    {
11767      fpcmp = true;
11768      code = ix86_fp_compare_code_to_integer (code);
11769    }
11770
11771  if (code != LTU)
11772    {
11773      val = constm1_rtx;
11774      if (fpcmp)
11775	PUT_CODE (compare_op,
11776		  reverse_condition_maybe_unordered
11777		    (GET_CODE (compare_op)));
11778      else
11779	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11780    }
11781  PUT_MODE (compare_op, mode);
11782
11783  /* Construct either adc or sbb insn.  */
11784  if ((code == LTU) == (operands[3] == constm1_rtx))
11785    {
11786      switch (GET_MODE (operands[0]))
11787	{
11788	  case QImode:
11789            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11790	    break;
11791	  case HImode:
11792            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11793	    break;
11794	  case SImode:
11795            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11796	    break;
11797	  case DImode:
11798            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11799	    break;
11800	  default:
11801	    gcc_unreachable ();
11802	}
11803    }
11804  else
11805    {
11806      switch (GET_MODE (operands[0]))
11807	{
11808	  case QImode:
11809            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11810	    break;
11811	  case HImode:
11812            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11813	    break;
11814	  case SImode:
11815            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11816	    break;
11817	  case DImode:
11818            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11819	    break;
11820	  default:
11821	    gcc_unreachable ();
11822	}
11823    }
11824  return 1; /* DONE */
11825}
11826
11827
11828/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11829   works for floating pointer parameters and nonoffsetable memories.
11830   For pushes, it returns just stack offsets; the values will be saved
11831   in the right order.  Maximally three parts are generated.  */
11832
11833static int
11834ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11835{
11836  int size;
11837
11838  if (!TARGET_64BIT)
11839    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11840  else
11841    size = (GET_MODE_SIZE (mode) + 4) / 8;
11842
11843  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11844  gcc_assert (size >= 2 && size <= 3);
11845
11846  /* Optimize constant pool reference to immediates.  This is used by fp
11847     moves, that force all constants to memory to allow combining.  */
11848  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11849    {
11850      rtx tmp = maybe_get_pool_constant (operand);
11851      if (tmp)
11852	operand = tmp;
11853    }
11854
11855  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11856    {
11857      /* The only non-offsetable memories we handle are pushes.  */
11858      int ok = push_operand (operand, VOIDmode);
11859
11860      gcc_assert (ok);
11861
11862      operand = copy_rtx (operand);
11863      PUT_MODE (operand, Pmode);
11864      parts[0] = parts[1] = parts[2] = operand;
11865      return size;
11866    }
11867
11868  if (GET_CODE (operand) == CONST_VECTOR)
11869    {
11870      enum machine_mode imode = int_mode_for_mode (mode);
11871      /* Caution: if we looked through a constant pool memory above,
11872	 the operand may actually have a different mode now.  That's
11873	 ok, since we want to pun this all the way back to an integer.  */
11874      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11875      gcc_assert (operand != NULL);
11876      mode = imode;
11877    }
11878
11879  if (!TARGET_64BIT)
11880    {
11881      if (mode == DImode)
11882	split_di (&operand, 1, &parts[0], &parts[1]);
11883      else
11884	{
11885	  if (REG_P (operand))
11886	    {
11887	      gcc_assert (reload_completed);
11888	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11889	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11890	      if (size == 3)
11891		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11892	    }
11893	  else if (offsettable_memref_p (operand))
11894	    {
11895	      operand = adjust_address (operand, SImode, 0);
11896	      parts[0] = operand;
11897	      parts[1] = adjust_address (operand, SImode, 4);
11898	      if (size == 3)
11899		parts[2] = adjust_address (operand, SImode, 8);
11900	    }
11901	  else if (GET_CODE (operand) == CONST_DOUBLE)
11902	    {
11903	      REAL_VALUE_TYPE r;
11904	      long l[4];
11905
11906	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11907	      switch (mode)
11908		{
11909		case XFmode:
11910		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11911		  parts[2] = gen_int_mode (l[2], SImode);
11912		  break;
11913		case DFmode:
11914		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11915		  break;
11916		default:
11917		  gcc_unreachable ();
11918		}
11919	      parts[1] = gen_int_mode (l[1], SImode);
11920	      parts[0] = gen_int_mode (l[0], SImode);
11921	    }
11922	  else
11923	    gcc_unreachable ();
11924	}
11925    }
11926  else
11927    {
11928      if (mode == TImode)
11929	split_ti (&operand, 1, &parts[0], &parts[1]);
11930      if (mode == XFmode || mode == TFmode)
11931	{
11932	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11933	  if (REG_P (operand))
11934	    {
11935	      gcc_assert (reload_completed);
11936	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11937	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11938	    }
11939	  else if (offsettable_memref_p (operand))
11940	    {
11941	      operand = adjust_address (operand, DImode, 0);
11942	      parts[0] = operand;
11943	      parts[1] = adjust_address (operand, upper_mode, 8);
11944	    }
11945	  else if (GET_CODE (operand) == CONST_DOUBLE)
11946	    {
11947	      REAL_VALUE_TYPE r;
11948	      long l[4];
11949
11950	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11951	      real_to_target (l, &r, mode);
11952
11953	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
11954	      if (HOST_BITS_PER_WIDE_INT >= 64)
11955	        parts[0]
11956		  = gen_int_mode
11957		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11958		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11959		       DImode);
11960	      else
11961	        parts[0] = immed_double_const (l[0], l[1], DImode);
11962
11963	      if (upper_mode == SImode)
11964	        parts[1] = gen_int_mode (l[2], SImode);
11965	      else if (HOST_BITS_PER_WIDE_INT >= 64)
11966	        parts[1]
11967		  = gen_int_mode
11968		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11969		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11970		       DImode);
11971	      else
11972	        parts[1] = immed_double_const (l[2], l[3], DImode);
11973	    }
11974	  else
11975	    gcc_unreachable ();
11976	}
11977    }
11978
11979  return size;
11980}
11981
11982/* Emit insns to perform a move or push of DI, DF, and XF values.
11983   Return false when normal moves are needed; true when all required
11984   insns have been emitted.  Operands 2-4 contain the input values
11985   int the correct order; operands 5-7 contain the output values.  */
11986
11987void
11988ix86_split_long_move (rtx operands[])
11989{
11990  rtx part[2][3];
11991  int nparts;
11992  int push = 0;
11993  int collisions = 0;
11994  enum machine_mode mode = GET_MODE (operands[0]);
11995
11996  /* The DFmode expanders may ask us to move double.
11997     For 64bit target this is single move.  By hiding the fact
11998     here we simplify i386.md splitters.  */
11999  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12000    {
12001      /* Optimize constant pool reference to immediates.  This is used by
12002	 fp moves, that force all constants to memory to allow combining.  */
12003
12004      if (GET_CODE (operands[1]) == MEM
12005	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12006	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12007	operands[1] = get_pool_constant (XEXP (operands[1], 0));
12008      if (push_operand (operands[0], VOIDmode))
12009	{
12010	  operands[0] = copy_rtx (operands[0]);
12011	  PUT_MODE (operands[0], Pmode);
12012	}
12013      else
12014        operands[0] = gen_lowpart (DImode, operands[0]);
12015      operands[1] = gen_lowpart (DImode, operands[1]);
12016      emit_move_insn (operands[0], operands[1]);
12017      return;
12018    }
12019
12020  /* The only non-offsettable memory we handle is push.  */
12021  if (push_operand (operands[0], VOIDmode))
12022    push = 1;
12023  else
12024    gcc_assert (GET_CODE (operands[0]) != MEM
12025		|| offsettable_memref_p (operands[0]));
12026
12027  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12028  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12029
12030  /* When emitting push, take care for source operands on the stack.  */
12031  if (push && GET_CODE (operands[1]) == MEM
12032      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12033    {
12034      if (nparts == 3)
12035	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12036				     XEXP (part[1][2], 0));
12037      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12038				   XEXP (part[1][1], 0));
12039    }
12040
12041  /* We need to do copy in the right order in case an address register
12042     of the source overlaps the destination.  */
12043  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12044    {
12045      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12046	collisions++;
12047      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12048	collisions++;
12049      if (nparts == 3
12050	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12051	collisions++;
12052
12053      /* Collision in the middle part can be handled by reordering.  */
12054      if (collisions == 1 && nparts == 3
12055	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12056	{
12057	  rtx tmp;
12058	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12059	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12060	}
12061
12062      /* If there are more collisions, we can't handle it by reordering.
12063	 Do an lea to the last part and use only one colliding move.  */
12064      else if (collisions > 1)
12065	{
12066	  rtx base;
12067
12068	  collisions = 1;
12069
12070	  base = part[0][nparts - 1];
12071
12072	  /* Handle the case when the last part isn't valid for lea.
12073	     Happens in 64-bit mode storing the 12-byte XFmode.  */
12074	  if (GET_MODE (base) != Pmode)
12075	    base = gen_rtx_REG (Pmode, REGNO (base));
12076
12077	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12078	  part[1][0] = replace_equiv_address (part[1][0], base);
12079	  part[1][1] = replace_equiv_address (part[1][1],
12080				      plus_constant (base, UNITS_PER_WORD));
12081	  if (nparts == 3)
12082	    part[1][2] = replace_equiv_address (part[1][2],
12083				      plus_constant (base, 8));
12084	}
12085    }
12086
12087  if (push)
12088    {
12089      if (!TARGET_64BIT)
12090	{
12091	  if (nparts == 3)
12092	    {
12093	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12094                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12095	      emit_move_insn (part[0][2], part[1][2]);
12096	    }
12097	}
12098      else
12099	{
12100	  /* In 64bit mode we don't have 32bit push available.  In case this is
12101	     register, it is OK - we will just use larger counterpart.  We also
12102	     retype memory - these comes from attempt to avoid REX prefix on
12103	     moving of second half of TFmode value.  */
12104	  if (GET_MODE (part[1][1]) == SImode)
12105	    {
12106	      switch (GET_CODE (part[1][1]))
12107		{
12108		case MEM:
12109		  part[1][1] = adjust_address (part[1][1], DImode, 0);
12110		  break;
12111
12112		case REG:
12113		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12114		  break;
12115
12116		default:
12117		  gcc_unreachable ();
12118		}
12119
12120	      if (GET_MODE (part[1][0]) == SImode)
12121		part[1][0] = part[1][1];
12122	    }
12123	}
12124      emit_move_insn (part[0][1], part[1][1]);
12125      emit_move_insn (part[0][0], part[1][0]);
12126      return;
12127    }
12128
12129  /* Choose correct order to not overwrite the source before it is copied.  */
12130  if ((REG_P (part[0][0])
12131       && REG_P (part[1][1])
12132       && (REGNO (part[0][0]) == REGNO (part[1][1])
12133	   || (nparts == 3
12134	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
12135      || (collisions > 0
12136	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12137    {
12138      if (nparts == 3)
12139	{
12140	  operands[2] = part[0][2];
12141	  operands[3] = part[0][1];
12142	  operands[4] = part[0][0];
12143	  operands[5] = part[1][2];
12144	  operands[6] = part[1][1];
12145	  operands[7] = part[1][0];
12146	}
12147      else
12148	{
12149	  operands[2] = part[0][1];
12150	  operands[3] = part[0][0];
12151	  operands[5] = part[1][1];
12152	  operands[6] = part[1][0];
12153	}
12154    }
12155  else
12156    {
12157      if (nparts == 3)
12158	{
12159	  operands[2] = part[0][0];
12160	  operands[3] = part[0][1];
12161	  operands[4] = part[0][2];
12162	  operands[5] = part[1][0];
12163	  operands[6] = part[1][1];
12164	  operands[7] = part[1][2];
12165	}
12166      else
12167	{
12168	  operands[2] = part[0][0];
12169	  operands[3] = part[0][1];
12170	  operands[5] = part[1][0];
12171	  operands[6] = part[1][1];
12172	}
12173    }
12174
12175  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
12176  if (optimize_size)
12177    {
12178      if (GET_CODE (operands[5]) == CONST_INT
12179	  && operands[5] != const0_rtx
12180	  && REG_P (operands[2]))
12181	{
12182	  if (GET_CODE (operands[6]) == CONST_INT
12183	      && INTVAL (operands[6]) == INTVAL (operands[5]))
12184	    operands[6] = operands[2];
12185
12186	  if (nparts == 3
12187	      && GET_CODE (operands[7]) == CONST_INT
12188	      && INTVAL (operands[7]) == INTVAL (operands[5]))
12189	    operands[7] = operands[2];
12190	}
12191
12192      if (nparts == 3
12193	  && GET_CODE (operands[6]) == CONST_INT
12194	  && operands[6] != const0_rtx
12195	  && REG_P (operands[3])
12196	  && GET_CODE (operands[7]) == CONST_INT
12197	  && INTVAL (operands[7]) == INTVAL (operands[6]))
12198	operands[7] = operands[3];
12199    }
12200
12201  emit_move_insn (operands[2], operands[5]);
12202  emit_move_insn (operands[3], operands[6]);
12203  if (nparts == 3)
12204    emit_move_insn (operands[4], operands[7]);
12205
12206  return;
12207}
12208
12209/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12210   left shift by a constant, either using a single shift or
12211   a sequence of add instructions.  */
12212
12213static void
12214ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12215{
12216  if (count == 1)
12217    {
12218      emit_insn ((mode == DImode
12219		  ? gen_addsi3
12220		  : gen_adddi3) (operand, operand, operand));
12221    }
12222  else if (!optimize_size
12223	   && count * ix86_cost->add <= ix86_cost->shift_const)
12224    {
12225      int i;
12226      for (i=0; i<count; i++)
12227	{
12228	  emit_insn ((mode == DImode
12229		      ? gen_addsi3
12230		      : gen_adddi3) (operand, operand, operand));
12231	}
12232    }
12233  else
12234    emit_insn ((mode == DImode
12235		? gen_ashlsi3
12236		: gen_ashldi3) (operand, operand, GEN_INT (count)));
12237}
12238
12239void
12240ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12241{
12242  rtx low[2], high[2];
12243  int count;
12244  const int single_width = mode == DImode ? 32 : 64;
12245
12246  if (GET_CODE (operands[2]) == CONST_INT)
12247    {
12248      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12249      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12250
12251      if (count >= single_width)
12252	{
12253	  emit_move_insn (high[0], low[1]);
12254	  emit_move_insn (low[0], const0_rtx);
12255
12256	  if (count > single_width)
12257	    ix86_expand_ashl_const (high[0], count - single_width, mode);
12258	}
12259      else
12260	{
12261	  if (!rtx_equal_p (operands[0], operands[1]))
12262	    emit_move_insn (operands[0], operands[1]);
12263	  emit_insn ((mode == DImode
12264		     ? gen_x86_shld_1
12265		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12266	  ix86_expand_ashl_const (low[0], count, mode);
12267	}
12268      return;
12269    }
12270
12271  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12272
12273  if (operands[1] == const1_rtx)
12274    {
12275      /* Assuming we've chosen a QImode capable registers, then 1 << N
12276	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
12277      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12278	{
12279	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12280
12281	  ix86_expand_clear (low[0]);
12282	  ix86_expand_clear (high[0]);
12283	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12284
12285	  d = gen_lowpart (QImode, low[0]);
12286	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12287	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
12288	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12289
12290	  d = gen_lowpart (QImode, high[0]);
12291	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12292	  s = gen_rtx_NE (QImode, flags, const0_rtx);
12293	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
12294	}
12295
12296      /* Otherwise, we can get the same results by manually performing
12297	 a bit extract operation on bit 5/6, and then performing the two
12298	 shifts.  The two methods of getting 0/1 into low/high are exactly
12299	 the same size.  Avoiding the shift in the bit extract case helps
12300	 pentium4 a bit; no one else seems to care much either way.  */
12301      else
12302	{
12303	  rtx x;
12304
12305	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12306	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12307	  else
12308	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12309	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12310
12311	  emit_insn ((mode == DImode
12312		      ? gen_lshrsi3
12313		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12314	  emit_insn ((mode == DImode
12315		      ? gen_andsi3
12316		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12317	  emit_move_insn (low[0], high[0]);
12318	  emit_insn ((mode == DImode
12319		      ? gen_xorsi3
12320		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12321	}
12322
12323      emit_insn ((mode == DImode
12324		    ? gen_ashlsi3
12325		    : gen_ashldi3) (low[0], low[0], operands[2]));
12326      emit_insn ((mode == DImode
12327		    ? gen_ashlsi3
12328		    : gen_ashldi3) (high[0], high[0], operands[2]));
12329      return;
12330    }
12331
12332  if (operands[1] == constm1_rtx)
12333    {
12334      /* For -1 << N, we can avoid the shld instruction, because we
12335	 know that we're shifting 0...31/63 ones into a -1.  */
12336      emit_move_insn (low[0], constm1_rtx);
12337      if (optimize_size)
12338	emit_move_insn (high[0], low[0]);
12339      else
12340	emit_move_insn (high[0], constm1_rtx);
12341    }
12342  else
12343    {
12344      if (!rtx_equal_p (operands[0], operands[1]))
12345	emit_move_insn (operands[0], operands[1]);
12346
12347      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12348      emit_insn ((mode == DImode
12349		  ? gen_x86_shld_1
12350		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
12351    }
12352
12353  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12354
12355  if (TARGET_CMOVE && scratch)
12356    {
12357      ix86_expand_clear (scratch);
12358      emit_insn ((mode == DImode
12359		  ? gen_x86_shift_adj_1
12360		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12361    }
12362  else
12363    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12364}
12365
12366void
12367ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12368{
12369  rtx low[2], high[2];
12370  int count;
12371  const int single_width = mode == DImode ? 32 : 64;
12372
12373  if (GET_CODE (operands[2]) == CONST_INT)
12374    {
12375      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12376      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12377
12378      if (count == single_width * 2 - 1)
12379	{
12380	  emit_move_insn (high[0], high[1]);
12381	  emit_insn ((mode == DImode
12382		      ? gen_ashrsi3
12383		      : gen_ashrdi3) (high[0], high[0],
12384				      GEN_INT (single_width - 1)));
12385	  emit_move_insn (low[0], high[0]);
12386
12387	}
12388      else if (count >= single_width)
12389	{
12390	  emit_move_insn (low[0], high[1]);
12391	  emit_move_insn (high[0], low[0]);
12392	  emit_insn ((mode == DImode
12393		      ? gen_ashrsi3
12394		      : gen_ashrdi3) (high[0], high[0],
12395				      GEN_INT (single_width - 1)));
12396	  if (count > single_width)
12397	    emit_insn ((mode == DImode
12398			? gen_ashrsi3
12399			: gen_ashrdi3) (low[0], low[0],
12400					GEN_INT (count - single_width)));
12401	}
12402      else
12403	{
12404	  if (!rtx_equal_p (operands[0], operands[1]))
12405	    emit_move_insn (operands[0], operands[1]);
12406	  emit_insn ((mode == DImode
12407		      ? gen_x86_shrd_1
12408		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12409	  emit_insn ((mode == DImode
12410		      ? gen_ashrsi3
12411		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12412	}
12413    }
12414  else
12415    {
12416      if (!rtx_equal_p (operands[0], operands[1]))
12417	emit_move_insn (operands[0], operands[1]);
12418
12419      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12420
12421      emit_insn ((mode == DImode
12422		  ? gen_x86_shrd_1
12423		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12424      emit_insn ((mode == DImode
12425		  ? gen_ashrsi3
12426		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
12427
12428      if (TARGET_CMOVE && scratch)
12429	{
12430	  emit_move_insn (scratch, high[0]);
12431	  emit_insn ((mode == DImode
12432		      ? gen_ashrsi3
12433		      : gen_ashrdi3) (scratch, scratch,
12434				      GEN_INT (single_width - 1)));
12435	  emit_insn ((mode == DImode
12436		      ? gen_x86_shift_adj_1
12437		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12438					 scratch));
12439	}
12440      else
12441	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12442    }
12443}
12444
12445void
12446ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12447{
12448  rtx low[2], high[2];
12449  int count;
12450  const int single_width = mode == DImode ? 32 : 64;
12451
12452  if (GET_CODE (operands[2]) == CONST_INT)
12453    {
12454      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12455      count = INTVAL (operands[2]) & (single_width * 2 - 1);
12456
12457      if (count >= single_width)
12458	{
12459	  emit_move_insn (low[0], high[1]);
12460	  ix86_expand_clear (high[0]);
12461
12462	  if (count > single_width)
12463	    emit_insn ((mode == DImode
12464			? gen_lshrsi3
12465			: gen_lshrdi3) (low[0], low[0],
12466					GEN_INT (count - single_width)));
12467	}
12468      else
12469	{
12470	  if (!rtx_equal_p (operands[0], operands[1]))
12471	    emit_move_insn (operands[0], operands[1]);
12472	  emit_insn ((mode == DImode
12473		      ? gen_x86_shrd_1
12474		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12475	  emit_insn ((mode == DImode
12476		      ? gen_lshrsi3
12477		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12478	}
12479    }
12480  else
12481    {
12482      if (!rtx_equal_p (operands[0], operands[1]))
12483	emit_move_insn (operands[0], operands[1]);
12484
12485      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12486
12487      emit_insn ((mode == DImode
12488		  ? gen_x86_shrd_1
12489		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12490      emit_insn ((mode == DImode
12491		  ? gen_lshrsi3
12492		  : gen_lshrdi3) (high[0], high[0], operands[2]));
12493
12494      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
12495      if (TARGET_CMOVE && scratch)
12496	{
12497	  ix86_expand_clear (scratch);
12498	  emit_insn ((mode == DImode
12499		      ? gen_x86_shift_adj_1
12500		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12501					       scratch));
12502	}
12503      else
12504	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12505    }
12506}
12507
12508/* Helper function for the string operations below.  Dest VARIABLE whether
12509   it is aligned to VALUE bytes.  If true, jump to the label.  */
12510static rtx
12511ix86_expand_aligntest (rtx variable, int value)
12512{
12513  rtx label = gen_label_rtx ();
12514  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12515  if (GET_MODE (variable) == DImode)
12516    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12517  else
12518    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12519  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12520			   1, label);
12521  return label;
12522}
12523
12524/* Adjust COUNTER by the VALUE.  */
12525static void
12526ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12527{
12528  if (GET_MODE (countreg) == DImode)
12529    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12530  else
12531    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12532}
12533
12534/* Zero extend possibly SImode EXP to Pmode register.  */
12535rtx
12536ix86_zero_extend_to_Pmode (rtx exp)
12537{
12538  rtx r;
12539  if (GET_MODE (exp) == VOIDmode)
12540    return force_reg (Pmode, exp);
12541  if (GET_MODE (exp) == Pmode)
12542    return copy_to_mode_reg (Pmode, exp);
12543  r = gen_reg_rtx (Pmode);
12544  emit_insn (gen_zero_extendsidi2 (r, exp));
12545  return r;
12546}
12547
12548/* Expand string move (memcpy) operation.  Use i386 string operations when
12549   profitable.  expand_clrmem contains similar code.  */
12550int
12551ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12552{
12553  rtx srcreg, destreg, countreg, srcexp, destexp;
12554  enum machine_mode counter_mode;
12555  HOST_WIDE_INT align = 0;
12556  unsigned HOST_WIDE_INT count = 0;
12557
12558  if (GET_CODE (align_exp) == CONST_INT)
12559    align = INTVAL (align_exp);
12560
12561  /* Can't use any of this if the user has appropriated esi or edi.  */
12562  if (global_regs[4] || global_regs[5])
12563    return 0;
12564
12565  /* This simple hack avoids all inlining code and simplifies code below.  */
12566  if (!TARGET_ALIGN_STRINGOPS)
12567    align = 64;
12568
12569  if (GET_CODE (count_exp) == CONST_INT)
12570    {
12571      count = INTVAL (count_exp);
12572      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12573	return 0;
12574    }
12575
12576  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12577     for 64bits use SImode when possible, otherwise DImode.
12578     Set count to number of bytes copied when known at compile time.  */
12579  if (!TARGET_64BIT
12580      || GET_MODE (count_exp) == SImode
12581      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12582    counter_mode = SImode;
12583  else
12584    counter_mode = DImode;
12585
12586  gcc_assert (counter_mode == SImode || counter_mode == DImode);
12587
12588  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12589  if (destreg != XEXP (dst, 0))
12590    dst = replace_equiv_address_nv (dst, destreg);
12591  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12592  if (srcreg != XEXP (src, 0))
12593    src = replace_equiv_address_nv (src, srcreg);
12594
12595  /* When optimizing for size emit simple rep ; movsb instruction for
12596     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12597     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12598     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12599     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12600     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12601     known to be zero or not.  The rep; movsb sequence causes higher
12602     register pressure though, so take that into account.  */
12603
12604  if ((!optimize || optimize_size)
12605      && (count == 0
12606	  || ((count & 0x03)
12607	      && (!optimize_size
12608		  || count > 5 * 4
12609		  || (count & 3) + count / 4 > 6))))
12610    {
12611      emit_insn (gen_cld ());
12612      countreg = ix86_zero_extend_to_Pmode (count_exp);
12613      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12614      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12615      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12616			      destexp, srcexp));
12617    }
12618
12619  /* For constant aligned (or small unaligned) copies use rep movsl
12620     followed by code copying the rest.  For PentiumPro ensure 8 byte
12621     alignment to allow rep movsl acceleration.  */
12622
12623  else if (count != 0
12624	   && (align >= 8
12625	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12626	       || optimize_size || count < (unsigned int) 64))
12627    {
12628      unsigned HOST_WIDE_INT offset = 0;
12629      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12630      rtx srcmem, dstmem;
12631
12632      emit_insn (gen_cld ());
12633      if (count & ~(size - 1))
12634	{
12635	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12636	    {
12637	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12638
12639	      while (offset < (count & ~(size - 1)))
12640		{
12641		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12642							 srcreg, offset);
12643		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12644							 destreg, offset);
12645		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12646		  offset += size;
12647		}
12648	    }
12649	  else
12650	    {
12651	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12652				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12653	      countreg = copy_to_mode_reg (counter_mode, countreg);
12654	      countreg = ix86_zero_extend_to_Pmode (countreg);
12655
12656	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12657					GEN_INT (size == 4 ? 2 : 3));
12658	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12659	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12660
12661	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12662				      countreg, destexp, srcexp));
12663	      offset = count & ~(size - 1);
12664	    }
12665	}
12666      if (size == 8 && (count & 0x04))
12667	{
12668	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12669						 offset);
12670	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12671						 offset);
12672	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12673	  offset += 4;
12674	}
12675      if (count & 0x02)
12676	{
12677	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12678						 offset);
12679	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12680						 offset);
12681	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12682	  offset += 2;
12683	}
12684      if (count & 0x01)
12685	{
12686	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12687						 offset);
12688	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12689						 offset);
12690	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12691	}
12692    }
12693  /* The generic code based on the glibc implementation:
12694     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12695     allowing accelerated copying there)
12696     - copy the data using rep movsl
12697     - copy the rest.  */
12698  else
12699    {
12700      rtx countreg2;
12701      rtx label = NULL;
12702      rtx srcmem, dstmem;
12703      int desired_alignment = (TARGET_PENTIUMPRO
12704			       && (count == 0 || count >= (unsigned int) 260)
12705			       ? 8 : UNITS_PER_WORD);
12706      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12707      dst = change_address (dst, BLKmode, destreg);
12708      src = change_address (src, BLKmode, srcreg);
12709
12710      /* In case we don't know anything about the alignment, default to
12711         library version, since it is usually equally fast and result in
12712         shorter code.
12713
12714	 Also emit call when we know that the count is large and call overhead
12715	 will not be important.  */
12716      if (!TARGET_INLINE_ALL_STRINGOPS
12717	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12718	return 0;
12719
12720      if (TARGET_SINGLE_STRINGOP)
12721	emit_insn (gen_cld ());
12722
12723      countreg2 = gen_reg_rtx (Pmode);
12724      countreg = copy_to_mode_reg (counter_mode, count_exp);
12725
12726      /* We don't use loops to align destination and to copy parts smaller
12727         than 4 bytes, because gcc is able to optimize such code better (in
12728         the case the destination or the count really is aligned, gcc is often
12729         able to predict the branches) and also it is friendlier to the
12730         hardware branch prediction.
12731
12732         Using loops is beneficial for generic case, because we can
12733         handle small counts using the loops.  Many CPUs (such as Athlon)
12734         have large REP prefix setup costs.
12735
12736         This is quite costly.  Maybe we can revisit this decision later or
12737         add some customizability to this code.  */
12738
12739      if (count == 0 && align < desired_alignment)
12740	{
12741	  label = gen_label_rtx ();
12742	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12743				   LEU, 0, counter_mode, 1, label);
12744	}
12745      if (align <= 1)
12746	{
12747	  rtx label = ix86_expand_aligntest (destreg, 1);
12748	  srcmem = change_address (src, QImode, srcreg);
12749	  dstmem = change_address (dst, QImode, destreg);
12750	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12751	  ix86_adjust_counter (countreg, 1);
12752	  emit_label (label);
12753	  LABEL_NUSES (label) = 1;
12754	}
12755      if (align <= 2)
12756	{
12757	  rtx label = ix86_expand_aligntest (destreg, 2);
12758	  srcmem = change_address (src, HImode, srcreg);
12759	  dstmem = change_address (dst, HImode, destreg);
12760	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12761	  ix86_adjust_counter (countreg, 2);
12762	  emit_label (label);
12763	  LABEL_NUSES (label) = 1;
12764	}
12765      if (align <= 4 && desired_alignment > 4)
12766	{
12767	  rtx label = ix86_expand_aligntest (destreg, 4);
12768	  srcmem = change_address (src, SImode, srcreg);
12769	  dstmem = change_address (dst, SImode, destreg);
12770	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12771	  ix86_adjust_counter (countreg, 4);
12772	  emit_label (label);
12773	  LABEL_NUSES (label) = 1;
12774	}
12775
12776      if (label && desired_alignment > 4 && !TARGET_64BIT)
12777	{
12778	  emit_label (label);
12779	  LABEL_NUSES (label) = 1;
12780	  label = NULL_RTX;
12781	}
12782      if (!TARGET_SINGLE_STRINGOP)
12783	emit_insn (gen_cld ());
12784      if (TARGET_64BIT)
12785	{
12786	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12787				  GEN_INT (3)));
12788	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12789	}
12790      else
12791	{
12792	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12793	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12794	}
12795      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12796      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12797      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12798			      countreg2, destexp, srcexp));
12799
12800      if (label)
12801	{
12802	  emit_label (label);
12803	  LABEL_NUSES (label) = 1;
12804	}
12805      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12806	{
12807	  srcmem = change_address (src, SImode, srcreg);
12808	  dstmem = change_address (dst, SImode, destreg);
12809	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12810	}
12811      if ((align <= 4 || count == 0) && TARGET_64BIT)
12812	{
12813	  rtx label = ix86_expand_aligntest (countreg, 4);
12814	  srcmem = change_address (src, SImode, srcreg);
12815	  dstmem = change_address (dst, SImode, destreg);
12816	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12817	  emit_label (label);
12818	  LABEL_NUSES (label) = 1;
12819	}
12820      if (align > 2 && count != 0 && (count & 2))
12821	{
12822	  srcmem = change_address (src, HImode, srcreg);
12823	  dstmem = change_address (dst, HImode, destreg);
12824	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12825	}
12826      if (align <= 2 || count == 0)
12827	{
12828	  rtx label = ix86_expand_aligntest (countreg, 2);
12829	  srcmem = change_address (src, HImode, srcreg);
12830	  dstmem = change_address (dst, HImode, destreg);
12831	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12832	  emit_label (label);
12833	  LABEL_NUSES (label) = 1;
12834	}
12835      if (align > 1 && count != 0 && (count & 1))
12836	{
12837	  srcmem = change_address (src, QImode, srcreg);
12838	  dstmem = change_address (dst, QImode, destreg);
12839	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12840	}
12841      if (align <= 1 || count == 0)
12842	{
12843	  rtx label = ix86_expand_aligntest (countreg, 1);
12844	  srcmem = change_address (src, QImode, srcreg);
12845	  dstmem = change_address (dst, QImode, destreg);
12846	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12847	  emit_label (label);
12848	  LABEL_NUSES (label) = 1;
12849	}
12850    }
12851
12852  return 1;
12853}
12854
12855/* Expand string clear operation (bzero).  Use i386 string operations when
12856   profitable.  expand_movmem contains similar code.  */
12857int
12858ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12859{
12860  rtx destreg, zeroreg, countreg, destexp;
12861  enum machine_mode counter_mode;
12862  HOST_WIDE_INT align = 0;
12863  unsigned HOST_WIDE_INT count = 0;
12864
12865  if (GET_CODE (align_exp) == CONST_INT)
12866    align = INTVAL (align_exp);
12867
12868  /* Can't use any of this if the user has appropriated esi.  */
12869  if (global_regs[4])
12870    return 0;
12871
12872  /* This simple hack avoids all inlining code and simplifies code below.  */
12873  if (!TARGET_ALIGN_STRINGOPS)
12874    align = 32;
12875
12876  if (GET_CODE (count_exp) == CONST_INT)
12877    {
12878      count = INTVAL (count_exp);
12879      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12880	return 0;
12881    }
12882  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12883     for 64bits use SImode when possible, otherwise DImode.
12884     Set count to number of bytes copied when known at compile time.  */
12885  if (!TARGET_64BIT
12886      || GET_MODE (count_exp) == SImode
12887      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12888    counter_mode = SImode;
12889  else
12890    counter_mode = DImode;
12891
12892  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12893  if (destreg != XEXP (dst, 0))
12894    dst = replace_equiv_address_nv (dst, destreg);
12895
12896
12897  /* When optimizing for size emit simple rep ; movsb instruction for
12898     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
12899     sequence is 7 bytes long, so if optimizing for size and count is
12900     small enough that some stosl, stosw and stosb instructions without
12901     rep are shorter, fall back into the next if.  */
12902
12903  if ((!optimize || optimize_size)
12904      && (count == 0
12905	  || ((count & 0x03)
12906	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12907    {
12908      emit_insn (gen_cld ());
12909
12910      countreg = ix86_zero_extend_to_Pmode (count_exp);
12911      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12912      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12913      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12914    }
12915  else if (count != 0
12916	   && (align >= 8
12917	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12918	       || optimize_size || count < (unsigned int) 64))
12919    {
12920      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12921      unsigned HOST_WIDE_INT offset = 0;
12922
12923      emit_insn (gen_cld ());
12924
12925      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12926      if (count & ~(size - 1))
12927	{
12928	  unsigned HOST_WIDE_INT repcount;
12929	  unsigned int max_nonrep;
12930
12931	  repcount = count >> (size == 4 ? 2 : 3);
12932	  if (!TARGET_64BIT)
12933	    repcount &= 0x3fffffff;
12934
12935	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12936	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12937	     bytes.  In both cases the latter seems to be faster for small
12938	     values of N.  */
12939	  max_nonrep = size == 4 ? 7 : 4;
12940	  if (!optimize_size)
12941	    switch (ix86_tune)
12942	      {
12943	      case PROCESSOR_PENTIUM4:
12944	      case PROCESSOR_NOCONA:
12945	        max_nonrep = 3;
12946	        break;
12947	      default:
12948	        break;
12949	      }
12950
12951	  if (repcount <= max_nonrep)
12952	    while (repcount-- > 0)
12953	      {
12954		rtx mem = adjust_automodify_address_nv (dst,
12955							GET_MODE (zeroreg),
12956							destreg, offset);
12957		emit_insn (gen_strset (destreg, mem, zeroreg));
12958		offset += size;
12959	      }
12960	  else
12961	    {
12962	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12963	      countreg = ix86_zero_extend_to_Pmode (countreg);
12964	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12965					GEN_INT (size == 4 ? 2 : 3));
12966	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12967	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12968				       destexp));
12969	      offset = count & ~(size - 1);
12970	    }
12971	}
12972      if (size == 8 && (count & 0x04))
12973	{
12974	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12975						  offset);
12976	  emit_insn (gen_strset (destreg, mem,
12977				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12978	  offset += 4;
12979	}
12980      if (count & 0x02)
12981	{
12982	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12983						  offset);
12984	  emit_insn (gen_strset (destreg, mem,
12985				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12986	  offset += 2;
12987	}
12988      if (count & 0x01)
12989	{
12990	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12991						  offset);
12992	  emit_insn (gen_strset (destreg, mem,
12993				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12994	}
12995    }
12996  else
12997    {
12998      rtx countreg2;
12999      rtx label = NULL;
13000      /* Compute desired alignment of the string operation.  */
13001      int desired_alignment = (TARGET_PENTIUMPRO
13002			       && (count == 0 || count >= (unsigned int) 260)
13003			       ? 8 : UNITS_PER_WORD);
13004
13005      /* In case we don't know anything about the alignment, default to
13006         library version, since it is usually equally fast and result in
13007         shorter code.
13008
13009	 Also emit call when we know that the count is large and call overhead
13010	 will not be important.  */
13011      if (!TARGET_INLINE_ALL_STRINGOPS
13012	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13013	return 0;
13014
13015      if (TARGET_SINGLE_STRINGOP)
13016	emit_insn (gen_cld ());
13017
13018      countreg2 = gen_reg_rtx (Pmode);
13019      countreg = copy_to_mode_reg (counter_mode, count_exp);
13020      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13021      /* Get rid of MEM_OFFSET, it won't be accurate.  */
13022      dst = change_address (dst, BLKmode, destreg);
13023
13024      if (count == 0 && align < desired_alignment)
13025	{
13026	  label = gen_label_rtx ();
13027	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13028				   LEU, 0, counter_mode, 1, label);
13029	}
13030      if (align <= 1)
13031	{
13032	  rtx label = ix86_expand_aligntest (destreg, 1);
13033	  emit_insn (gen_strset (destreg, dst,
13034				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13035	  ix86_adjust_counter (countreg, 1);
13036	  emit_label (label);
13037	  LABEL_NUSES (label) = 1;
13038	}
13039      if (align <= 2)
13040	{
13041	  rtx label = ix86_expand_aligntest (destreg, 2);
13042	  emit_insn (gen_strset (destreg, dst,
13043				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13044	  ix86_adjust_counter (countreg, 2);
13045	  emit_label (label);
13046	  LABEL_NUSES (label) = 1;
13047	}
13048      if (align <= 4 && desired_alignment > 4)
13049	{
13050	  rtx label = ix86_expand_aligntest (destreg, 4);
13051	  emit_insn (gen_strset (destreg, dst,
13052				 (TARGET_64BIT
13053				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13054				  : zeroreg)));
13055	  ix86_adjust_counter (countreg, 4);
13056	  emit_label (label);
13057	  LABEL_NUSES (label) = 1;
13058	}
13059
13060      if (label && desired_alignment > 4 && !TARGET_64BIT)
13061	{
13062	  emit_label (label);
13063	  LABEL_NUSES (label) = 1;
13064	  label = NULL_RTX;
13065	}
13066
13067      if (!TARGET_SINGLE_STRINGOP)
13068	emit_insn (gen_cld ());
13069      if (TARGET_64BIT)
13070	{
13071	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13072				  GEN_INT (3)));
13073	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13074	}
13075      else
13076	{
13077	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13078	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13079	}
13080      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13081      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13082
13083      if (label)
13084	{
13085	  emit_label (label);
13086	  LABEL_NUSES (label) = 1;
13087	}
13088
13089      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13090	emit_insn (gen_strset (destreg, dst,
13091			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
13092      if (TARGET_64BIT && (align <= 4 || count == 0))
13093	{
13094	  rtx label = ix86_expand_aligntest (countreg, 4);
13095	  emit_insn (gen_strset (destreg, dst,
13096				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13097	  emit_label (label);
13098	  LABEL_NUSES (label) = 1;
13099	}
13100      if (align > 2 && count != 0 && (count & 2))
13101	emit_insn (gen_strset (destreg, dst,
13102			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
13103      if (align <= 2 || count == 0)
13104	{
13105	  rtx label = ix86_expand_aligntest (countreg, 2);
13106	  emit_insn (gen_strset (destreg, dst,
13107				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13108	  emit_label (label);
13109	  LABEL_NUSES (label) = 1;
13110	}
13111      if (align > 1 && count != 0 && (count & 1))
13112	emit_insn (gen_strset (destreg, dst,
13113			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
13114      if (align <= 1 || count == 0)
13115	{
13116	  rtx label = ix86_expand_aligntest (countreg, 1);
13117	  emit_insn (gen_strset (destreg, dst,
13118				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13119	  emit_label (label);
13120	  LABEL_NUSES (label) = 1;
13121	}
13122    }
13123  return 1;
13124}
13125
13126/* Expand strlen.  */
13127int
13128ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13129{
13130  rtx addr, scratch1, scratch2, scratch3, scratch4;
13131
13132  /* The generic case of strlen expander is long.  Avoid it's
13133     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
13134
13135  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13136      && !TARGET_INLINE_ALL_STRINGOPS
13137      && !optimize_size
13138      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13139    return 0;
13140
13141  addr = force_reg (Pmode, XEXP (src, 0));
13142  scratch1 = gen_reg_rtx (Pmode);
13143
13144  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13145      && !optimize_size)
13146    {
13147      /* Well it seems that some optimizer does not combine a call like
13148         foo(strlen(bar), strlen(bar));
13149         when the move and the subtraction is done here.  It does calculate
13150         the length just once when these instructions are done inside of
13151         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
13152         often used and I use one fewer register for the lifetime of
13153         output_strlen_unroll() this is better.  */
13154
13155      emit_move_insn (out, addr);
13156
13157      ix86_expand_strlensi_unroll_1 (out, src, align);
13158
13159      /* strlensi_unroll_1 returns the address of the zero at the end of
13160         the string, like memchr(), so compute the length by subtracting
13161         the start address.  */
13162      if (TARGET_64BIT)
13163	emit_insn (gen_subdi3 (out, out, addr));
13164      else
13165	emit_insn (gen_subsi3 (out, out, addr));
13166    }
13167  else
13168    {
13169      rtx unspec;
13170      scratch2 = gen_reg_rtx (Pmode);
13171      scratch3 = gen_reg_rtx (Pmode);
13172      scratch4 = force_reg (Pmode, constm1_rtx);
13173
13174      emit_move_insn (scratch3, addr);
13175      eoschar = force_reg (QImode, eoschar);
13176
13177      emit_insn (gen_cld ());
13178      src = replace_equiv_address_nv (src, scratch3);
13179
13180      /* If .md starts supporting :P, this can be done in .md.  */
13181      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13182						 scratch4), UNSPEC_SCAS);
13183      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13184      if (TARGET_64BIT)
13185	{
13186	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13187	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13188	}
13189      else
13190	{
13191	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13192	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13193	}
13194    }
13195  return 1;
13196}
13197
13198/* Expand the appropriate insns for doing strlen if not just doing
13199   repnz; scasb
13200
13201   out = result, initialized with the start address
13202   align_rtx = alignment of the address.
13203   scratch = scratch register, initialized with the startaddress when
13204	not aligned, otherwise undefined
13205
13206   This is just the body. It needs the initializations mentioned above and
13207   some address computing at the end.  These things are done in i386.md.  */
13208
13209static void
13210ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13211{
13212  int align;
13213  rtx tmp;
13214  rtx align_2_label = NULL_RTX;
13215  rtx align_3_label = NULL_RTX;
13216  rtx align_4_label = gen_label_rtx ();
13217  rtx end_0_label = gen_label_rtx ();
13218  rtx mem;
13219  rtx tmpreg = gen_reg_rtx (SImode);
13220  rtx scratch = gen_reg_rtx (SImode);
13221  rtx cmp;
13222
13223  align = 0;
13224  if (GET_CODE (align_rtx) == CONST_INT)
13225    align = INTVAL (align_rtx);
13226
13227  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
13228
13229  /* Is there a known alignment and is it less than 4?  */
13230  if (align < 4)
13231    {
13232      rtx scratch1 = gen_reg_rtx (Pmode);
13233      emit_move_insn (scratch1, out);
13234      /* Is there a known alignment and is it not 2? */
13235      if (align != 2)
13236	{
13237	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13238	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13239
13240	  /* Leave just the 3 lower bits.  */
13241	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13242				    NULL_RTX, 0, OPTAB_WIDEN);
13243
13244	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13245				   Pmode, 1, align_4_label);
13246	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13247				   Pmode, 1, align_2_label);
13248	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13249				   Pmode, 1, align_3_label);
13250	}
13251      else
13252        {
13253	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
13254	     check if is aligned to 4 - byte.  */
13255
13256	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13257				    NULL_RTX, 0, OPTAB_WIDEN);
13258
13259	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13260				   Pmode, 1, align_4_label);
13261        }
13262
13263      mem = change_address (src, QImode, out);
13264
13265      /* Now compare the bytes.  */
13266
13267      /* Compare the first n unaligned byte on a byte per byte basis.  */
13268      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13269			       QImode, 1, end_0_label);
13270
13271      /* Increment the address.  */
13272      if (TARGET_64BIT)
13273	emit_insn (gen_adddi3 (out, out, const1_rtx));
13274      else
13275	emit_insn (gen_addsi3 (out, out, const1_rtx));
13276
13277      /* Not needed with an alignment of 2 */
13278      if (align != 2)
13279	{
13280	  emit_label (align_2_label);
13281
13282	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13283				   end_0_label);
13284
13285	  if (TARGET_64BIT)
13286	    emit_insn (gen_adddi3 (out, out, const1_rtx));
13287	  else
13288	    emit_insn (gen_addsi3 (out, out, const1_rtx));
13289
13290	  emit_label (align_3_label);
13291	}
13292
13293      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13294			       end_0_label);
13295
13296      if (TARGET_64BIT)
13297	emit_insn (gen_adddi3 (out, out, const1_rtx));
13298      else
13299	emit_insn (gen_addsi3 (out, out, const1_rtx));
13300    }
13301
13302  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
13303     align this loop.  It gives only huge programs, but does not help to
13304     speed up.  */
13305  emit_label (align_4_label);
13306
13307  mem = change_address (src, SImode, out);
13308  emit_move_insn (scratch, mem);
13309  if (TARGET_64BIT)
13310    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13311  else
13312    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13313
13314  /* This formula yields a nonzero result iff one of the bytes is zero.
13315     This saves three branches inside loop and many cycles.  */
13316
13317  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13318  emit_insn (gen_one_cmplsi2 (scratch, scratch));
13319  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13320  emit_insn (gen_andsi3 (tmpreg, tmpreg,
13321			 gen_int_mode (0x80808080, SImode)));
13322  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13323			   align_4_label);
13324
13325  if (TARGET_CMOVE)
13326    {
13327       rtx reg = gen_reg_rtx (SImode);
13328       rtx reg2 = gen_reg_rtx (Pmode);
13329       emit_move_insn (reg, tmpreg);
13330       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13331
13332       /* If zero is not in the first two bytes, move two bytes forward.  */
13333       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13334       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13335       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13336       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13337			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
13338						     reg,
13339						     tmpreg)));
13340       /* Emit lea manually to avoid clobbering of flags.  */
13341       emit_insn (gen_rtx_SET (SImode, reg2,
13342			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
13343
13344       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13345       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13346       emit_insn (gen_rtx_SET (VOIDmode, out,
13347			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13348						     reg2,
13349						     out)));
13350
13351    }
13352  else
13353    {
13354       rtx end_2_label = gen_label_rtx ();
13355       /* Is zero in the first two bytes? */
13356
13357       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13358       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13359       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13360       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13361                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13362                            pc_rtx);
13363       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13364       JUMP_LABEL (tmp) = end_2_label;
13365
13366       /* Not in the first two.  Move two bytes forward.  */
13367       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13368       if (TARGET_64BIT)
13369	 emit_insn (gen_adddi3 (out, out, const2_rtx));
13370       else
13371	 emit_insn (gen_addsi3 (out, out, const2_rtx));
13372
13373       emit_label (end_2_label);
13374
13375    }
13376
13377  /* Avoid branch in fixing the byte.  */
13378  tmpreg = gen_lowpart (QImode, tmpreg);
13379  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13380  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13381  if (TARGET_64BIT)
13382    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13383  else
13384    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13385
13386  emit_label (end_0_label);
13387}
13388
13389void
13390ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13391		  rtx callarg2 ATTRIBUTE_UNUSED,
13392		  rtx pop, int sibcall)
13393{
13394  rtx use = NULL, call;
13395
13396  if (pop == const0_rtx)
13397    pop = NULL;
13398  gcc_assert (!TARGET_64BIT || !pop);
13399
13400  if (TARGET_MACHO && !TARGET_64BIT)
13401    {
13402#if TARGET_MACHO
13403      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13404	fnaddr = machopic_indirect_call_target (fnaddr);
13405#endif
13406    }
13407  else
13408    {
13409      /* Static functions and indirect calls don't need the pic register.  */
13410      if (! TARGET_64BIT && flag_pic
13411	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13412	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13413	use_reg (&use, pic_offset_table_rtx);
13414    }
13415
13416  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13417    {
13418      rtx al = gen_rtx_REG (QImode, 0);
13419      emit_move_insn (al, callarg2);
13420      use_reg (&use, al);
13421    }
13422
13423  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13424    {
13425      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13426      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13427    }
13428  if (sibcall && TARGET_64BIT
13429      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13430    {
13431      rtx addr;
13432      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13433      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13434      emit_move_insn (fnaddr, addr);
13435      fnaddr = gen_rtx_MEM (QImode, fnaddr);
13436    }
13437
13438  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13439  if (retval)
13440    call = gen_rtx_SET (VOIDmode, retval, call);
13441  if (pop)
13442    {
13443      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13444      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13445      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13446    }
13447
13448  call = emit_call_insn (call);
13449  if (use)
13450    CALL_INSN_FUNCTION_USAGE (call) = use;
13451}
13452
13453
13454/* Clear stack slot assignments remembered from previous functions.
13455   This is called from INIT_EXPANDERS once before RTL is emitted for each
13456   function.  */
13457
13458static struct machine_function *
13459ix86_init_machine_status (void)
13460{
13461  struct machine_function *f;
13462
13463  f = ggc_alloc_cleared (sizeof (struct machine_function));
13464  f->use_fast_prologue_epilogue_nregs = -1;
13465  f->tls_descriptor_call_expanded_p = 0;
13466
13467  return f;
13468}
13469
13470/* Return a MEM corresponding to a stack slot with mode MODE.
13471   Allocate a new slot if necessary.
13472
13473   The RTL for a function can have several slots available: N is
13474   which slot to use.  */
13475
13476rtx
13477assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13478{
13479  struct stack_local_entry *s;
13480
13481  gcc_assert (n < MAX_386_STACK_LOCALS);
13482
13483  /* Virtual slot is valid only before vregs are instantiated.  */
13484  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13485
13486  for (s = ix86_stack_locals; s; s = s->next)
13487    if (s->mode == mode && s->n == n)
13488      return s->rtl;
13489
13490  s = (struct stack_local_entry *)
13491    ggc_alloc (sizeof (struct stack_local_entry));
13492  s->n = n;
13493  s->mode = mode;
13494  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13495
13496  s->next = ix86_stack_locals;
13497  ix86_stack_locals = s;
13498  return s->rtl;
13499}
13500
13501/* Construct the SYMBOL_REF for the tls_get_addr function.  */
13502
13503static GTY(()) rtx ix86_tls_symbol;
13504rtx
13505ix86_tls_get_addr (void)
13506{
13507
13508  if (!ix86_tls_symbol)
13509    {
13510      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13511					    (TARGET_ANY_GNU_TLS
13512					     && !TARGET_64BIT)
13513					    ? "___tls_get_addr"
13514					    : "__tls_get_addr");
13515    }
13516
13517  return ix86_tls_symbol;
13518}
13519
13520/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
13521
13522static GTY(()) rtx ix86_tls_module_base_symbol;
13523rtx
13524ix86_tls_module_base (void)
13525{
13526
13527  if (!ix86_tls_module_base_symbol)
13528    {
13529      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13530							"_TLS_MODULE_BASE_");
13531      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13532	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13533    }
13534
13535  return ix86_tls_module_base_symbol;
13536}
13537
13538/* Calculate the length of the memory address in the instruction
13539   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
13540
13541int
13542memory_address_length (rtx addr)
13543{
13544  struct ix86_address parts;
13545  rtx base, index, disp;
13546  int len;
13547  int ok;
13548
13549  if (GET_CODE (addr) == PRE_DEC
13550      || GET_CODE (addr) == POST_INC
13551      || GET_CODE (addr) == PRE_MODIFY
13552      || GET_CODE (addr) == POST_MODIFY)
13553    return 0;
13554
13555  ok = ix86_decompose_address (addr, &parts);
13556  gcc_assert (ok);
13557
13558  if (parts.base && GET_CODE (parts.base) == SUBREG)
13559    parts.base = SUBREG_REG (parts.base);
13560  if (parts.index && GET_CODE (parts.index) == SUBREG)
13561    parts.index = SUBREG_REG (parts.index);
13562
13563  base = parts.base;
13564  index = parts.index;
13565  disp = parts.disp;
13566  len = 0;
13567
13568  /* Rule of thumb:
13569       - esp as the base always wants an index,
13570       - ebp as the base always wants a displacement.  */
13571
13572  /* Register Indirect.  */
13573  if (base && !index && !disp)
13574    {
13575      /* esp (for its index) and ebp (for its displacement) need
13576	 the two-byte modrm form.  */
13577      if (addr == stack_pointer_rtx
13578	  || addr == arg_pointer_rtx
13579	  || addr == frame_pointer_rtx
13580	  || addr == hard_frame_pointer_rtx)
13581	len = 1;
13582    }
13583
13584  /* Direct Addressing.  */
13585  else if (disp && !base && !index)
13586    len = 4;
13587
13588  else
13589    {
13590      /* Find the length of the displacement constant.  */
13591      if (disp)
13592	{
13593	  if (base && satisfies_constraint_K (disp))
13594	    len = 1;
13595	  else
13596	    len = 4;
13597	}
13598      /* ebp always wants a displacement.  */
13599      else if (base == hard_frame_pointer_rtx)
13600        len = 1;
13601
13602      /* An index requires the two-byte modrm form....  */
13603      if (index
13604	  /* ...like esp, which always wants an index.  */
13605	  || base == stack_pointer_rtx
13606	  || base == arg_pointer_rtx
13607	  || base == frame_pointer_rtx)
13608	len += 1;
13609    }
13610
13611  return len;
13612}
13613
13614/* Compute default value for "length_immediate" attribute.  When SHORTFORM
13615   is set, expect that insn have 8bit immediate alternative.  */
13616int
13617ix86_attr_length_immediate_default (rtx insn, int shortform)
13618{
13619  int len = 0;
13620  int i;
13621  extract_insn_cached (insn);
13622  for (i = recog_data.n_operands - 1; i >= 0; --i)
13623    if (CONSTANT_P (recog_data.operand[i]))
13624      {
13625	gcc_assert (!len);
13626	if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13627	  len = 1;
13628	else
13629	  {
13630	    switch (get_attr_mode (insn))
13631	      {
13632		case MODE_QI:
13633		  len+=1;
13634		  break;
13635		case MODE_HI:
13636		  len+=2;
13637		  break;
13638		case MODE_SI:
13639		  len+=4;
13640		  break;
13641		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13642		case MODE_DI:
13643		  len+=4;
13644		  break;
13645		default:
13646		  fatal_insn ("unknown insn mode", insn);
13647	      }
13648	  }
13649      }
13650  return len;
13651}
13652/* Compute default value for "length_address" attribute.  */
13653int
13654ix86_attr_length_address_default (rtx insn)
13655{
13656  int i;
13657
13658  if (get_attr_type (insn) == TYPE_LEA)
13659    {
13660      rtx set = PATTERN (insn);
13661
13662      if (GET_CODE (set) == PARALLEL)
13663	set = XVECEXP (set, 0, 0);
13664
13665      gcc_assert (GET_CODE (set) == SET);
13666
13667      return memory_address_length (SET_SRC (set));
13668    }
13669
13670  extract_insn_cached (insn);
13671  for (i = recog_data.n_operands - 1; i >= 0; --i)
13672    if (GET_CODE (recog_data.operand[i]) == MEM)
13673      {
13674	return memory_address_length (XEXP (recog_data.operand[i], 0));
13675	break;
13676      }
13677  return 0;
13678}
13679
13680/* Return the maximum number of instructions a cpu can issue.  */
13681
13682static int
13683ix86_issue_rate (void)
13684{
13685  switch (ix86_tune)
13686    {
13687    case PROCESSOR_PENTIUM:
13688    case PROCESSOR_K6:
13689      return 2;
13690
13691    case PROCESSOR_PENTIUMPRO:
13692    case PROCESSOR_PENTIUM4:
13693    case PROCESSOR_ATHLON:
13694    case PROCESSOR_K8:
13695    case PROCESSOR_NOCONA:
13696    case PROCESSOR_GENERIC32:
13697    case PROCESSOR_GENERIC64:
13698      return 3;
13699
13700    default:
13701      return 1;
13702    }
13703}
13704
13705/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13706   by DEP_INSN and nothing set by DEP_INSN.  */
13707
13708static int
13709ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13710{
13711  rtx set, set2;
13712
13713  /* Simplify the test for uninteresting insns.  */
13714  if (insn_type != TYPE_SETCC
13715      && insn_type != TYPE_ICMOV
13716      && insn_type != TYPE_FCMOV
13717      && insn_type != TYPE_IBR)
13718    return 0;
13719
13720  if ((set = single_set (dep_insn)) != 0)
13721    {
13722      set = SET_DEST (set);
13723      set2 = NULL_RTX;
13724    }
13725  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13726	   && XVECLEN (PATTERN (dep_insn), 0) == 2
13727	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13728	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13729    {
13730      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13731      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13732    }
13733  else
13734    return 0;
13735
13736  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13737    return 0;
13738
13739  /* This test is true if the dependent insn reads the flags but
13740     not any other potentially set register.  */
13741  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13742    return 0;
13743
13744  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13745    return 0;
13746
13747  return 1;
13748}
13749
13750/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13751   address with operands set by DEP_INSN.  */
13752
13753static int
13754ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13755{
13756  rtx addr;
13757
13758  if (insn_type == TYPE_LEA
13759      && TARGET_PENTIUM)
13760    {
13761      addr = PATTERN (insn);
13762
13763      if (GET_CODE (addr) == PARALLEL)
13764	addr = XVECEXP (addr, 0, 0);
13765
13766      gcc_assert (GET_CODE (addr) == SET);
13767
13768      addr = SET_SRC (addr);
13769    }
13770  else
13771    {
13772      int i;
13773      extract_insn_cached (insn);
13774      for (i = recog_data.n_operands - 1; i >= 0; --i)
13775	if (GET_CODE (recog_data.operand[i]) == MEM)
13776	  {
13777	    addr = XEXP (recog_data.operand[i], 0);
13778	    goto found;
13779	  }
13780      return 0;
13781    found:;
13782    }
13783
13784  return modified_in_p (addr, dep_insn);
13785}
13786
13787static int
13788ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13789{
13790  enum attr_type insn_type, dep_insn_type;
13791  enum attr_memory memory;
13792  rtx set, set2;
13793  int dep_insn_code_number;
13794
13795  /* Anti and output dependencies have zero cost on all CPUs.  */
13796  if (REG_NOTE_KIND (link) != 0)
13797    return 0;
13798
13799  dep_insn_code_number = recog_memoized (dep_insn);
13800
13801  /* If we can't recognize the insns, we can't really do anything.  */
13802  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13803    return cost;
13804
13805  insn_type = get_attr_type (insn);
13806  dep_insn_type = get_attr_type (dep_insn);
13807
13808  switch (ix86_tune)
13809    {
13810    case PROCESSOR_PENTIUM:
13811      /* Address Generation Interlock adds a cycle of latency.  */
13812      if (ix86_agi_dependent (insn, dep_insn, insn_type))
13813	cost += 1;
13814
13815      /* ??? Compares pair with jump/setcc.  */
13816      if (ix86_flags_dependent (insn, dep_insn, insn_type))
13817	cost = 0;
13818
13819      /* Floating point stores require value to be ready one cycle earlier.  */
13820      if (insn_type == TYPE_FMOV
13821	  && get_attr_memory (insn) == MEMORY_STORE
13822	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13823	cost += 1;
13824      break;
13825
13826    case PROCESSOR_PENTIUMPRO:
13827      memory = get_attr_memory (insn);
13828
13829      /* INT->FP conversion is expensive.  */
13830      if (get_attr_fp_int_src (dep_insn))
13831	cost += 5;
13832
13833      /* There is one cycle extra latency between an FP op and a store.  */
13834      if (insn_type == TYPE_FMOV
13835	  && (set = single_set (dep_insn)) != NULL_RTX
13836	  && (set2 = single_set (insn)) != NULL_RTX
13837	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13838	  && GET_CODE (SET_DEST (set2)) == MEM)
13839	cost += 1;
13840
13841      /* Show ability of reorder buffer to hide latency of load by executing
13842	 in parallel with previous instruction in case
13843	 previous instruction is not needed to compute the address.  */
13844      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13845	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13846	{
13847	  /* Claim moves to take one cycle, as core can issue one load
13848	     at time and the next load can start cycle later.  */
13849	  if (dep_insn_type == TYPE_IMOV
13850	      || dep_insn_type == TYPE_FMOV)
13851	    cost = 1;
13852	  else if (cost > 1)
13853	    cost--;
13854	}
13855      break;
13856
13857    case PROCESSOR_K6:
13858      memory = get_attr_memory (insn);
13859
13860      /* The esp dependency is resolved before the instruction is really
13861         finished.  */
13862      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13863	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13864	return 1;
13865
13866      /* INT->FP conversion is expensive.  */
13867      if (get_attr_fp_int_src (dep_insn))
13868	cost += 5;
13869
13870      /* Show ability of reorder buffer to hide latency of load by executing
13871	 in parallel with previous instruction in case
13872	 previous instruction is not needed to compute the address.  */
13873      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13874	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13875	{
13876	  /* Claim moves to take one cycle, as core can issue one load
13877	     at time and the next load can start cycle later.  */
13878	  if (dep_insn_type == TYPE_IMOV
13879	      || dep_insn_type == TYPE_FMOV)
13880	    cost = 1;
13881	  else if (cost > 2)
13882	    cost -= 2;
13883	  else
13884	    cost = 1;
13885	}
13886      break;
13887
13888    case PROCESSOR_ATHLON:
13889    case PROCESSOR_K8:
13890    case PROCESSOR_GENERIC32:
13891    case PROCESSOR_GENERIC64:
13892      memory = get_attr_memory (insn);
13893
13894      /* Show ability of reorder buffer to hide latency of load by executing
13895	 in parallel with previous instruction in case
13896	 previous instruction is not needed to compute the address.  */
13897      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13898	  && !ix86_agi_dependent (insn, dep_insn, insn_type))
13899	{
13900	  enum attr_unit unit = get_attr_unit (insn);
13901	  int loadcost = 3;
13902
13903	  /* Because of the difference between the length of integer and
13904	     floating unit pipeline preparation stages, the memory operands
13905	     for floating point are cheaper.
13906
13907	     ??? For Athlon it the difference is most probably 2.  */
13908	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13909	    loadcost = 3;
13910	  else
13911	    loadcost = TARGET_ATHLON ? 2 : 0;
13912
13913	  if (cost >= loadcost)
13914	    cost -= loadcost;
13915	  else
13916	    cost = 0;
13917	}
13918
13919    default:
13920      break;
13921    }
13922
13923  return cost;
13924}
13925
13926/* How many alternative schedules to try.  This should be as wide as the
13927   scheduling freedom in the DFA, but no wider.  Making this value too
13928   large results extra work for the scheduler.  */
13929
13930static int
13931ia32_multipass_dfa_lookahead (void)
13932{
13933  if (ix86_tune == PROCESSOR_PENTIUM)
13934    return 2;
13935
13936  if (ix86_tune == PROCESSOR_PENTIUMPRO
13937      || ix86_tune == PROCESSOR_K6)
13938    return 1;
13939
13940  else
13941    return 0;
13942}
13943
13944
13945/* Compute the alignment given to a constant that is being placed in memory.
13946   EXP is the constant and ALIGN is the alignment that the object would
13947   ordinarily have.
13948   The value of this function is used instead of that alignment to align
13949   the object.  */
13950
13951int
13952ix86_constant_alignment (tree exp, int align)
13953{
13954  if (TREE_CODE (exp) == REAL_CST)
13955    {
13956      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13957	return 64;
13958      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13959	return 128;
13960    }
13961  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13962      	   && !TARGET_NO_ALIGN_LONG_STRINGS
13963	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13964    return BITS_PER_WORD;
13965
13966  return align;
13967}
13968
13969/* Compute the alignment for a static variable.
13970   TYPE is the data type, and ALIGN is the alignment that
13971   the object would ordinarily have.  The value of this function is used
13972   instead of that alignment to align the object.  */
13973
13974int
13975ix86_data_alignment (tree type, int align)
13976{
13977  int max_align = optimize_size ? BITS_PER_WORD : 256;
13978
13979  if (AGGREGATE_TYPE_P (type)
13980      && TYPE_SIZE (type)
13981      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13982      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13983	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13984      && align < max_align)
13985    align = max_align;
13986
13987  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13988     to 16byte boundary.  */
13989  if (TARGET_64BIT)
13990    {
13991      if (AGGREGATE_TYPE_P (type)
13992	   && TYPE_SIZE (type)
13993	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13994	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13995	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13996	return 128;
13997    }
13998
13999  if (TREE_CODE (type) == ARRAY_TYPE)
14000    {
14001      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14002	return 64;
14003      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14004	return 128;
14005    }
14006  else if (TREE_CODE (type) == COMPLEX_TYPE)
14007    {
14008
14009      if (TYPE_MODE (type) == DCmode && align < 64)
14010	return 64;
14011      if (TYPE_MODE (type) == XCmode && align < 128)
14012	return 128;
14013    }
14014  else if ((TREE_CODE (type) == RECORD_TYPE
14015	    || TREE_CODE (type) == UNION_TYPE
14016	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14017	   && TYPE_FIELDS (type))
14018    {
14019      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14020	return 64;
14021      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14022	return 128;
14023    }
14024  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14025	   || TREE_CODE (type) == INTEGER_TYPE)
14026    {
14027      if (TYPE_MODE (type) == DFmode && align < 64)
14028	return 64;
14029      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14030	return 128;
14031    }
14032
14033  return align;
14034}
14035
14036/* Compute the alignment for a local variable.
14037   TYPE is the data type, and ALIGN is the alignment that
14038   the object would ordinarily have.  The value of this macro is used
14039   instead of that alignment to align the object.  */
14040
14041int
14042ix86_local_alignment (tree type, int align)
14043{
14044  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14045     to 16byte boundary.  */
14046  if (TARGET_64BIT)
14047    {
14048      if (AGGREGATE_TYPE_P (type)
14049	   && TYPE_SIZE (type)
14050	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14051	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14052	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14053	return 128;
14054    }
14055  if (TREE_CODE (type) == ARRAY_TYPE)
14056    {
14057      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14058	return 64;
14059      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14060	return 128;
14061    }
14062  else if (TREE_CODE (type) == COMPLEX_TYPE)
14063    {
14064      if (TYPE_MODE (type) == DCmode && align < 64)
14065	return 64;
14066      if (TYPE_MODE (type) == XCmode && align < 128)
14067	return 128;
14068    }
14069  else if ((TREE_CODE (type) == RECORD_TYPE
14070	    || TREE_CODE (type) == UNION_TYPE
14071	    || TREE_CODE (type) == QUAL_UNION_TYPE)
14072	   && TYPE_FIELDS (type))
14073    {
14074      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14075	return 64;
14076      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14077	return 128;
14078    }
14079  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14080	   || TREE_CODE (type) == INTEGER_TYPE)
14081    {
14082
14083      if (TYPE_MODE (type) == DFmode && align < 64)
14084	return 64;
14085      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14086	return 128;
14087    }
14088  return align;
14089}
14090
14091/* Emit RTL insns to initialize the variable parts of a trampoline.
14092   FNADDR is an RTX for the address of the function's pure code.
14093   CXT is an RTX for the static chain value for the function.  */
14094void
14095x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14096{
14097  if (!TARGET_64BIT)
14098    {
14099      /* Compute offset from the end of the jmp to the target function.  */
14100      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14101			       plus_constant (tramp, 10),
14102			       NULL_RTX, 1, OPTAB_DIRECT);
14103      emit_move_insn (gen_rtx_MEM (QImode, tramp),
14104		      gen_int_mode (0xb9, QImode));
14105      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14106      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14107		      gen_int_mode (0xe9, QImode));
14108      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14109    }
14110  else
14111    {
14112      int offset = 0;
14113      /* Try to load address using shorter movl instead of movabs.
14114         We may want to support movq for kernel mode, but kernel does not use
14115         trampolines at the moment.  */
14116      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14117	{
14118	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
14119	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14120			  gen_int_mode (0xbb41, HImode));
14121	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14122			  gen_lowpart (SImode, fnaddr));
14123	  offset += 6;
14124	}
14125      else
14126	{
14127	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14128			  gen_int_mode (0xbb49, HImode));
14129	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14130			  fnaddr);
14131	  offset += 10;
14132	}
14133      /* Load static chain using movabs to r10.  */
14134      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14135		      gen_int_mode (0xba49, HImode));
14136      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14137		      cxt);
14138      offset += 10;
14139      /* Jump to the r11 */
14140      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14141		      gen_int_mode (0xff49, HImode));
14142      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14143		      gen_int_mode (0xe3, QImode));
14144      offset += 3;
14145      gcc_assert (offset <= TRAMPOLINE_SIZE);
14146    }
14147
14148#ifdef ENABLE_EXECUTE_STACK
14149  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14150		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14151#endif
14152}
14153
14154/* Codes for all the SSE/MMX builtins.  */
14155enum ix86_builtins
14156{
14157  IX86_BUILTIN_ADDPS,
14158  IX86_BUILTIN_ADDSS,
14159  IX86_BUILTIN_DIVPS,
14160  IX86_BUILTIN_DIVSS,
14161  IX86_BUILTIN_MULPS,
14162  IX86_BUILTIN_MULSS,
14163  IX86_BUILTIN_SUBPS,
14164  IX86_BUILTIN_SUBSS,
14165
14166  IX86_BUILTIN_CMPEQPS,
14167  IX86_BUILTIN_CMPLTPS,
14168  IX86_BUILTIN_CMPLEPS,
14169  IX86_BUILTIN_CMPGTPS,
14170  IX86_BUILTIN_CMPGEPS,
14171  IX86_BUILTIN_CMPNEQPS,
14172  IX86_BUILTIN_CMPNLTPS,
14173  IX86_BUILTIN_CMPNLEPS,
14174  IX86_BUILTIN_CMPNGTPS,
14175  IX86_BUILTIN_CMPNGEPS,
14176  IX86_BUILTIN_CMPORDPS,
14177  IX86_BUILTIN_CMPUNORDPS,
14178  IX86_BUILTIN_CMPEQSS,
14179  IX86_BUILTIN_CMPLTSS,
14180  IX86_BUILTIN_CMPLESS,
14181  IX86_BUILTIN_CMPNEQSS,
14182  IX86_BUILTIN_CMPNLTSS,
14183  IX86_BUILTIN_CMPNLESS,
14184  IX86_BUILTIN_CMPNGTSS,
14185  IX86_BUILTIN_CMPNGESS,
14186  IX86_BUILTIN_CMPORDSS,
14187  IX86_BUILTIN_CMPUNORDSS,
14188
14189  IX86_BUILTIN_COMIEQSS,
14190  IX86_BUILTIN_COMILTSS,
14191  IX86_BUILTIN_COMILESS,
14192  IX86_BUILTIN_COMIGTSS,
14193  IX86_BUILTIN_COMIGESS,
14194  IX86_BUILTIN_COMINEQSS,
14195  IX86_BUILTIN_UCOMIEQSS,
14196  IX86_BUILTIN_UCOMILTSS,
14197  IX86_BUILTIN_UCOMILESS,
14198  IX86_BUILTIN_UCOMIGTSS,
14199  IX86_BUILTIN_UCOMIGESS,
14200  IX86_BUILTIN_UCOMINEQSS,
14201
14202  IX86_BUILTIN_CVTPI2PS,
14203  IX86_BUILTIN_CVTPS2PI,
14204  IX86_BUILTIN_CVTSI2SS,
14205  IX86_BUILTIN_CVTSI642SS,
14206  IX86_BUILTIN_CVTSS2SI,
14207  IX86_BUILTIN_CVTSS2SI64,
14208  IX86_BUILTIN_CVTTPS2PI,
14209  IX86_BUILTIN_CVTTSS2SI,
14210  IX86_BUILTIN_CVTTSS2SI64,
14211
14212  IX86_BUILTIN_MAXPS,
14213  IX86_BUILTIN_MAXSS,
14214  IX86_BUILTIN_MINPS,
14215  IX86_BUILTIN_MINSS,
14216
14217  IX86_BUILTIN_LOADUPS,
14218  IX86_BUILTIN_STOREUPS,
14219  IX86_BUILTIN_MOVSS,
14220
14221  IX86_BUILTIN_MOVHLPS,
14222  IX86_BUILTIN_MOVLHPS,
14223  IX86_BUILTIN_LOADHPS,
14224  IX86_BUILTIN_LOADLPS,
14225  IX86_BUILTIN_STOREHPS,
14226  IX86_BUILTIN_STORELPS,
14227
14228  IX86_BUILTIN_MASKMOVQ,
14229  IX86_BUILTIN_MOVMSKPS,
14230  IX86_BUILTIN_PMOVMSKB,
14231
14232  IX86_BUILTIN_MOVNTPS,
14233  IX86_BUILTIN_MOVNTQ,
14234
14235  IX86_BUILTIN_LOADDQU,
14236  IX86_BUILTIN_STOREDQU,
14237
14238  IX86_BUILTIN_PACKSSWB,
14239  IX86_BUILTIN_PACKSSDW,
14240  IX86_BUILTIN_PACKUSWB,
14241
14242  IX86_BUILTIN_PADDB,
14243  IX86_BUILTIN_PADDW,
14244  IX86_BUILTIN_PADDD,
14245  IX86_BUILTIN_PADDQ,
14246  IX86_BUILTIN_PADDSB,
14247  IX86_BUILTIN_PADDSW,
14248  IX86_BUILTIN_PADDUSB,
14249  IX86_BUILTIN_PADDUSW,
14250  IX86_BUILTIN_PSUBB,
14251  IX86_BUILTIN_PSUBW,
14252  IX86_BUILTIN_PSUBD,
14253  IX86_BUILTIN_PSUBQ,
14254  IX86_BUILTIN_PSUBSB,
14255  IX86_BUILTIN_PSUBSW,
14256  IX86_BUILTIN_PSUBUSB,
14257  IX86_BUILTIN_PSUBUSW,
14258
14259  IX86_BUILTIN_PAND,
14260  IX86_BUILTIN_PANDN,
14261  IX86_BUILTIN_POR,
14262  IX86_BUILTIN_PXOR,
14263
14264  IX86_BUILTIN_PAVGB,
14265  IX86_BUILTIN_PAVGW,
14266
14267  IX86_BUILTIN_PCMPEQB,
14268  IX86_BUILTIN_PCMPEQW,
14269  IX86_BUILTIN_PCMPEQD,
14270  IX86_BUILTIN_PCMPGTB,
14271  IX86_BUILTIN_PCMPGTW,
14272  IX86_BUILTIN_PCMPGTD,
14273
14274  IX86_BUILTIN_PMADDWD,
14275
14276  IX86_BUILTIN_PMAXSW,
14277  IX86_BUILTIN_PMAXUB,
14278  IX86_BUILTIN_PMINSW,
14279  IX86_BUILTIN_PMINUB,
14280
14281  IX86_BUILTIN_PMULHUW,
14282  IX86_BUILTIN_PMULHW,
14283  IX86_BUILTIN_PMULLW,
14284
14285  IX86_BUILTIN_PSADBW,
14286  IX86_BUILTIN_PSHUFW,
14287
14288  IX86_BUILTIN_PSLLW,
14289  IX86_BUILTIN_PSLLD,
14290  IX86_BUILTIN_PSLLQ,
14291  IX86_BUILTIN_PSRAW,
14292  IX86_BUILTIN_PSRAD,
14293  IX86_BUILTIN_PSRLW,
14294  IX86_BUILTIN_PSRLD,
14295  IX86_BUILTIN_PSRLQ,
14296  IX86_BUILTIN_PSLLWI,
14297  IX86_BUILTIN_PSLLDI,
14298  IX86_BUILTIN_PSLLQI,
14299  IX86_BUILTIN_PSRAWI,
14300  IX86_BUILTIN_PSRADI,
14301  IX86_BUILTIN_PSRLWI,
14302  IX86_BUILTIN_PSRLDI,
14303  IX86_BUILTIN_PSRLQI,
14304
14305  IX86_BUILTIN_PUNPCKHBW,
14306  IX86_BUILTIN_PUNPCKHWD,
14307  IX86_BUILTIN_PUNPCKHDQ,
14308  IX86_BUILTIN_PUNPCKLBW,
14309  IX86_BUILTIN_PUNPCKLWD,
14310  IX86_BUILTIN_PUNPCKLDQ,
14311
14312  IX86_BUILTIN_SHUFPS,
14313
14314  IX86_BUILTIN_RCPPS,
14315  IX86_BUILTIN_RCPSS,
14316  IX86_BUILTIN_RSQRTPS,
14317  IX86_BUILTIN_RSQRTSS,
14318  IX86_BUILTIN_SQRTPS,
14319  IX86_BUILTIN_SQRTSS,
14320
14321  IX86_BUILTIN_UNPCKHPS,
14322  IX86_BUILTIN_UNPCKLPS,
14323
14324  IX86_BUILTIN_ANDPS,
14325  IX86_BUILTIN_ANDNPS,
14326  IX86_BUILTIN_ORPS,
14327  IX86_BUILTIN_XORPS,
14328
14329  IX86_BUILTIN_EMMS,
14330  IX86_BUILTIN_LDMXCSR,
14331  IX86_BUILTIN_STMXCSR,
14332  IX86_BUILTIN_SFENCE,
14333
14334  /* 3DNow! Original */
14335  IX86_BUILTIN_FEMMS,
14336  IX86_BUILTIN_PAVGUSB,
14337  IX86_BUILTIN_PF2ID,
14338  IX86_BUILTIN_PFACC,
14339  IX86_BUILTIN_PFADD,
14340  IX86_BUILTIN_PFCMPEQ,
14341  IX86_BUILTIN_PFCMPGE,
14342  IX86_BUILTIN_PFCMPGT,
14343  IX86_BUILTIN_PFMAX,
14344  IX86_BUILTIN_PFMIN,
14345  IX86_BUILTIN_PFMUL,
14346  IX86_BUILTIN_PFRCP,
14347  IX86_BUILTIN_PFRCPIT1,
14348  IX86_BUILTIN_PFRCPIT2,
14349  IX86_BUILTIN_PFRSQIT1,
14350  IX86_BUILTIN_PFRSQRT,
14351  IX86_BUILTIN_PFSUB,
14352  IX86_BUILTIN_PFSUBR,
14353  IX86_BUILTIN_PI2FD,
14354  IX86_BUILTIN_PMULHRW,
14355
14356  /* 3DNow! Athlon Extensions */
14357  IX86_BUILTIN_PF2IW,
14358  IX86_BUILTIN_PFNACC,
14359  IX86_BUILTIN_PFPNACC,
14360  IX86_BUILTIN_PI2FW,
14361  IX86_BUILTIN_PSWAPDSI,
14362  IX86_BUILTIN_PSWAPDSF,
14363
14364  /* SSE2 */
14365  IX86_BUILTIN_ADDPD,
14366  IX86_BUILTIN_ADDSD,
14367  IX86_BUILTIN_DIVPD,
14368  IX86_BUILTIN_DIVSD,
14369  IX86_BUILTIN_MULPD,
14370  IX86_BUILTIN_MULSD,
14371  IX86_BUILTIN_SUBPD,
14372  IX86_BUILTIN_SUBSD,
14373
14374  IX86_BUILTIN_CMPEQPD,
14375  IX86_BUILTIN_CMPLTPD,
14376  IX86_BUILTIN_CMPLEPD,
14377  IX86_BUILTIN_CMPGTPD,
14378  IX86_BUILTIN_CMPGEPD,
14379  IX86_BUILTIN_CMPNEQPD,
14380  IX86_BUILTIN_CMPNLTPD,
14381  IX86_BUILTIN_CMPNLEPD,
14382  IX86_BUILTIN_CMPNGTPD,
14383  IX86_BUILTIN_CMPNGEPD,
14384  IX86_BUILTIN_CMPORDPD,
14385  IX86_BUILTIN_CMPUNORDPD,
14386  IX86_BUILTIN_CMPNEPD,
14387  IX86_BUILTIN_CMPEQSD,
14388  IX86_BUILTIN_CMPLTSD,
14389  IX86_BUILTIN_CMPLESD,
14390  IX86_BUILTIN_CMPNEQSD,
14391  IX86_BUILTIN_CMPNLTSD,
14392  IX86_BUILTIN_CMPNLESD,
14393  IX86_BUILTIN_CMPORDSD,
14394  IX86_BUILTIN_CMPUNORDSD,
14395  IX86_BUILTIN_CMPNESD,
14396
14397  IX86_BUILTIN_COMIEQSD,
14398  IX86_BUILTIN_COMILTSD,
14399  IX86_BUILTIN_COMILESD,
14400  IX86_BUILTIN_COMIGTSD,
14401  IX86_BUILTIN_COMIGESD,
14402  IX86_BUILTIN_COMINEQSD,
14403  IX86_BUILTIN_UCOMIEQSD,
14404  IX86_BUILTIN_UCOMILTSD,
14405  IX86_BUILTIN_UCOMILESD,
14406  IX86_BUILTIN_UCOMIGTSD,
14407  IX86_BUILTIN_UCOMIGESD,
14408  IX86_BUILTIN_UCOMINEQSD,
14409
14410  IX86_BUILTIN_MAXPD,
14411  IX86_BUILTIN_MAXSD,
14412  IX86_BUILTIN_MINPD,
14413  IX86_BUILTIN_MINSD,
14414
14415  IX86_BUILTIN_ANDPD,
14416  IX86_BUILTIN_ANDNPD,
14417  IX86_BUILTIN_ORPD,
14418  IX86_BUILTIN_XORPD,
14419
14420  IX86_BUILTIN_SQRTPD,
14421  IX86_BUILTIN_SQRTSD,
14422
14423  IX86_BUILTIN_UNPCKHPD,
14424  IX86_BUILTIN_UNPCKLPD,
14425
14426  IX86_BUILTIN_SHUFPD,
14427
14428  IX86_BUILTIN_LOADUPD,
14429  IX86_BUILTIN_STOREUPD,
14430  IX86_BUILTIN_MOVSD,
14431
14432  IX86_BUILTIN_LOADHPD,
14433  IX86_BUILTIN_LOADLPD,
14434
14435  IX86_BUILTIN_CVTDQ2PD,
14436  IX86_BUILTIN_CVTDQ2PS,
14437
14438  IX86_BUILTIN_CVTPD2DQ,
14439  IX86_BUILTIN_CVTPD2PI,
14440  IX86_BUILTIN_CVTPD2PS,
14441  IX86_BUILTIN_CVTTPD2DQ,
14442  IX86_BUILTIN_CVTTPD2PI,
14443
14444  IX86_BUILTIN_CVTPI2PD,
14445  IX86_BUILTIN_CVTSI2SD,
14446  IX86_BUILTIN_CVTSI642SD,
14447
14448  IX86_BUILTIN_CVTSD2SI,
14449  IX86_BUILTIN_CVTSD2SI64,
14450  IX86_BUILTIN_CVTSD2SS,
14451  IX86_BUILTIN_CVTSS2SD,
14452  IX86_BUILTIN_CVTTSD2SI,
14453  IX86_BUILTIN_CVTTSD2SI64,
14454
14455  IX86_BUILTIN_CVTPS2DQ,
14456  IX86_BUILTIN_CVTPS2PD,
14457  IX86_BUILTIN_CVTTPS2DQ,
14458
14459  IX86_BUILTIN_MOVNTI,
14460  IX86_BUILTIN_MOVNTPD,
14461  IX86_BUILTIN_MOVNTDQ,
14462
14463  /* SSE2 MMX */
14464  IX86_BUILTIN_MASKMOVDQU,
14465  IX86_BUILTIN_MOVMSKPD,
14466  IX86_BUILTIN_PMOVMSKB128,
14467
14468  IX86_BUILTIN_PACKSSWB128,
14469  IX86_BUILTIN_PACKSSDW128,
14470  IX86_BUILTIN_PACKUSWB128,
14471
14472  IX86_BUILTIN_PADDB128,
14473  IX86_BUILTIN_PADDW128,
14474  IX86_BUILTIN_PADDD128,
14475  IX86_BUILTIN_PADDQ128,
14476  IX86_BUILTIN_PADDSB128,
14477  IX86_BUILTIN_PADDSW128,
14478  IX86_BUILTIN_PADDUSB128,
14479  IX86_BUILTIN_PADDUSW128,
14480  IX86_BUILTIN_PSUBB128,
14481  IX86_BUILTIN_PSUBW128,
14482  IX86_BUILTIN_PSUBD128,
14483  IX86_BUILTIN_PSUBQ128,
14484  IX86_BUILTIN_PSUBSB128,
14485  IX86_BUILTIN_PSUBSW128,
14486  IX86_BUILTIN_PSUBUSB128,
14487  IX86_BUILTIN_PSUBUSW128,
14488
14489  IX86_BUILTIN_PAND128,
14490  IX86_BUILTIN_PANDN128,
14491  IX86_BUILTIN_POR128,
14492  IX86_BUILTIN_PXOR128,
14493
14494  IX86_BUILTIN_PAVGB128,
14495  IX86_BUILTIN_PAVGW128,
14496
14497  IX86_BUILTIN_PCMPEQB128,
14498  IX86_BUILTIN_PCMPEQW128,
14499  IX86_BUILTIN_PCMPEQD128,
14500  IX86_BUILTIN_PCMPGTB128,
14501  IX86_BUILTIN_PCMPGTW128,
14502  IX86_BUILTIN_PCMPGTD128,
14503
14504  IX86_BUILTIN_PMADDWD128,
14505
14506  IX86_BUILTIN_PMAXSW128,
14507  IX86_BUILTIN_PMAXUB128,
14508  IX86_BUILTIN_PMINSW128,
14509  IX86_BUILTIN_PMINUB128,
14510
14511  IX86_BUILTIN_PMULUDQ,
14512  IX86_BUILTIN_PMULUDQ128,
14513  IX86_BUILTIN_PMULHUW128,
14514  IX86_BUILTIN_PMULHW128,
14515  IX86_BUILTIN_PMULLW128,
14516
14517  IX86_BUILTIN_PSADBW128,
14518  IX86_BUILTIN_PSHUFHW,
14519  IX86_BUILTIN_PSHUFLW,
14520  IX86_BUILTIN_PSHUFD,
14521
14522  IX86_BUILTIN_PSLLW128,
14523  IX86_BUILTIN_PSLLD128,
14524  IX86_BUILTIN_PSLLQ128,
14525  IX86_BUILTIN_PSRAW128,
14526  IX86_BUILTIN_PSRAD128,
14527  IX86_BUILTIN_PSRLW128,
14528  IX86_BUILTIN_PSRLD128,
14529  IX86_BUILTIN_PSRLQ128,
14530  IX86_BUILTIN_PSLLDQI128,
14531  IX86_BUILTIN_PSLLWI128,
14532  IX86_BUILTIN_PSLLDI128,
14533  IX86_BUILTIN_PSLLQI128,
14534  IX86_BUILTIN_PSRAWI128,
14535  IX86_BUILTIN_PSRADI128,
14536  IX86_BUILTIN_PSRLDQI128,
14537  IX86_BUILTIN_PSRLWI128,
14538  IX86_BUILTIN_PSRLDI128,
14539  IX86_BUILTIN_PSRLQI128,
14540
14541  IX86_BUILTIN_PUNPCKHBW128,
14542  IX86_BUILTIN_PUNPCKHWD128,
14543  IX86_BUILTIN_PUNPCKHDQ128,
14544  IX86_BUILTIN_PUNPCKHQDQ128,
14545  IX86_BUILTIN_PUNPCKLBW128,
14546  IX86_BUILTIN_PUNPCKLWD128,
14547  IX86_BUILTIN_PUNPCKLDQ128,
14548  IX86_BUILTIN_PUNPCKLQDQ128,
14549
14550  IX86_BUILTIN_CLFLUSH,
14551  IX86_BUILTIN_MFENCE,
14552  IX86_BUILTIN_LFENCE,
14553
14554  /* Prescott New Instructions.  */
14555  IX86_BUILTIN_ADDSUBPS,
14556  IX86_BUILTIN_HADDPS,
14557  IX86_BUILTIN_HSUBPS,
14558  IX86_BUILTIN_MOVSHDUP,
14559  IX86_BUILTIN_MOVSLDUP,
14560  IX86_BUILTIN_ADDSUBPD,
14561  IX86_BUILTIN_HADDPD,
14562  IX86_BUILTIN_HSUBPD,
14563  IX86_BUILTIN_LDDQU,
14564
14565  IX86_BUILTIN_MONITOR,
14566  IX86_BUILTIN_MWAIT,
14567
14568  IX86_BUILTIN_VEC_INIT_V2SI,
14569  IX86_BUILTIN_VEC_INIT_V4HI,
14570  IX86_BUILTIN_VEC_INIT_V8QI,
14571  IX86_BUILTIN_VEC_EXT_V2DF,
14572  IX86_BUILTIN_VEC_EXT_V2DI,
14573  IX86_BUILTIN_VEC_EXT_V4SF,
14574  IX86_BUILTIN_VEC_EXT_V4SI,
14575  IX86_BUILTIN_VEC_EXT_V8HI,
14576  IX86_BUILTIN_VEC_EXT_V16QI,
14577  IX86_BUILTIN_VEC_EXT_V2SI,
14578  IX86_BUILTIN_VEC_EXT_V4HI,
14579  IX86_BUILTIN_VEC_SET_V8HI,
14580  IX86_BUILTIN_VEC_SET_V4HI,
14581
14582  IX86_BUILTIN_MAX
14583};
14584
14585#define def_builtin(MASK, NAME, TYPE, CODE)				\
14586do {									\
14587  if ((MASK) & target_flags						\
14588      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
14589    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
14590				 NULL, NULL_TREE);			\
14591} while (0)
14592
14593/* Bits for builtin_description.flag.  */
14594
14595/* Set when we don't support the comparison natively, and should
14596   swap_comparison in order to support it.  */
14597#define BUILTIN_DESC_SWAP_OPERANDS	1
14598
14599struct builtin_description
14600{
14601  const unsigned int mask;
14602  const enum insn_code icode;
14603  const char *const name;
14604  const enum ix86_builtins code;
14605  const enum rtx_code comparison;
14606  const unsigned int flag;
14607};
14608
14609static const struct builtin_description bdesc_comi[] =
14610{
14611  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14612  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14613  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14614  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14615  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14616  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14617  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14618  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14619  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14620  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14621  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14622  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14623  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14624  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14625  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14626  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14627  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14628  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14629  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14630  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14631  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14632  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14633  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14634  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14635};
14636
14637static const struct builtin_description bdesc_2arg[] =
14638{
14639  /* SSE */
14640  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14641  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14642  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14643  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14644  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14645  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14646  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14647  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14648
14649  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14650  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14651  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14652  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14653    BUILTIN_DESC_SWAP_OPERANDS },
14654  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14655    BUILTIN_DESC_SWAP_OPERANDS },
14656  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14657  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14658  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14659  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14660  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14661    BUILTIN_DESC_SWAP_OPERANDS },
14662  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14663    BUILTIN_DESC_SWAP_OPERANDS },
14664  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14665  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14666  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14667  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14668  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14669  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14670  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14671  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14672  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14673    BUILTIN_DESC_SWAP_OPERANDS },
14674  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14675    BUILTIN_DESC_SWAP_OPERANDS },
14676  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14677
14678  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14679  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14680  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14681  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14682
14683  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14684  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14685  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14686  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14687
14688  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14689  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14690  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14691  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14692  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14693
14694  /* MMX */
14695  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14696  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14697  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14698  { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14699  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14700  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14701  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14702  { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14703
14704  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14705  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14706  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14707  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14708  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14709  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14710  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14711  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14712
14713  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14714  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14715  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14716
14717  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14718  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14719  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14720  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14721
14722  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14723  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14724
14725  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14726  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14727  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14728  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14729  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14730  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14731
14732  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14733  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14734  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14735  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14736
14737  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14738  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14739  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14740  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14741  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14742  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14743
14744  /* Special.  */
14745  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14746  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14747  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14748
14749  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14750  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14751  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14752
14753  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14754  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14755  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14756  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14757  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14758  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14759
14760  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14761  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14762  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14763  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14764  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14765  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14766
14767  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14768  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14769  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14770  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14771
14772  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14773  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14774
14775  /* SSE2 */
14776  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14777  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14778  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14779  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14780  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14781  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14782  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14783  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14784
14785  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14786  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14787  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14788  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14789    BUILTIN_DESC_SWAP_OPERANDS },
14790  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14791    BUILTIN_DESC_SWAP_OPERANDS },
14792  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14793  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14794  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14795  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14796  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14797    BUILTIN_DESC_SWAP_OPERANDS },
14798  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14799    BUILTIN_DESC_SWAP_OPERANDS },
14800  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14801  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14802  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14803  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14804  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14805  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14806  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14807  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14808  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14809
14810  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14811  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14812  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14813  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14814
14815  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14816  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14817  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14818  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14819
14820  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14821  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14822  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14823
14824  /* SSE2 MMX */
14825  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14826  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14827  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14828  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14829  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14830  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14831  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14832  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14833
14834  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14835  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14836  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14837  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14838  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14839  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14840  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14841  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14842
14843  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14844  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14845
14846  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14847  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14848  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14849  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14850
14851  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14852  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14853
14854  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14855  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14856  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14857  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14858  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14859  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14860
14861  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14862  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14863  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14864  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14865
14866  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14867  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14868  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14869  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14870  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14871  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14872  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14873  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14874
14875  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14876  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14877  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14878
14879  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14880  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14881
14882  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14883  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14884
14885  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14886  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14887  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14888
14889  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14890  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14891  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14892
14893  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14894  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14895
14896  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14897
14898  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14899  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14900  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14901  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14902
14903  /* SSE3 MMX */
14904  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14905  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14906  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14907  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14908  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14909  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14910};
14911
14912static const struct builtin_description bdesc_1arg[] =
14913{
14914  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14915  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14916
14917  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14918  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14919  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14920
14921  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14922  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14923  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14924  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14925  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14926  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14927
14928  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14929  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14930
14931  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14932
14933  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14934  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14935
14936  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14937  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14938  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14939  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14940  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14941
14942  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14943
14944  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14945  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14946  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14947  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14948
14949  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14950  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14951  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14952
14953  /* SSE3 */
14954  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14955  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14956};
14957
14958static void
14959ix86_init_builtins (void)
14960{
14961  if (TARGET_MMX)
14962    ix86_init_mmx_sse_builtins ();
14963}
14964
14965/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
14966   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
14967   builtins.  */
14968static void
14969ix86_init_mmx_sse_builtins (void)
14970{
14971  const struct builtin_description * d;
14972  size_t i;
14973
14974  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14975  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14976  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14977  tree V2DI_type_node
14978    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14979  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14980  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14981  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14982  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14983  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14984  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14985
14986  tree pchar_type_node = build_pointer_type (char_type_node);
14987  tree pcchar_type_node = build_pointer_type (
14988			     build_type_variant (char_type_node, 1, 0));
14989  tree pfloat_type_node = build_pointer_type (float_type_node);
14990  tree pcfloat_type_node = build_pointer_type (
14991			     build_type_variant (float_type_node, 1, 0));
14992  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14993  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14994  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14995
14996  /* Comparisons.  */
14997  tree int_ftype_v4sf_v4sf
14998    = build_function_type_list (integer_type_node,
14999				V4SF_type_node, V4SF_type_node, NULL_TREE);
15000  tree v4si_ftype_v4sf_v4sf
15001    = build_function_type_list (V4SI_type_node,
15002				V4SF_type_node, V4SF_type_node, NULL_TREE);
15003  /* MMX/SSE/integer conversions.  */
15004  tree int_ftype_v4sf
15005    = build_function_type_list (integer_type_node,
15006				V4SF_type_node, NULL_TREE);
15007  tree int64_ftype_v4sf
15008    = build_function_type_list (long_long_integer_type_node,
15009				V4SF_type_node, NULL_TREE);
15010  tree int_ftype_v8qi
15011    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15012  tree v4sf_ftype_v4sf_int
15013    = build_function_type_list (V4SF_type_node,
15014				V4SF_type_node, integer_type_node, NULL_TREE);
15015  tree v4sf_ftype_v4sf_int64
15016    = build_function_type_list (V4SF_type_node,
15017				V4SF_type_node, long_long_integer_type_node,
15018				NULL_TREE);
15019  tree v4sf_ftype_v4sf_v2si
15020    = build_function_type_list (V4SF_type_node,
15021				V4SF_type_node, V2SI_type_node, NULL_TREE);
15022
15023  /* Miscellaneous.  */
15024  tree v8qi_ftype_v4hi_v4hi
15025    = build_function_type_list (V8QI_type_node,
15026				V4HI_type_node, V4HI_type_node, NULL_TREE);
15027  tree v4hi_ftype_v2si_v2si
15028    = build_function_type_list (V4HI_type_node,
15029				V2SI_type_node, V2SI_type_node, NULL_TREE);
15030  tree v4sf_ftype_v4sf_v4sf_int
15031    = build_function_type_list (V4SF_type_node,
15032				V4SF_type_node, V4SF_type_node,
15033				integer_type_node, NULL_TREE);
15034  tree v2si_ftype_v4hi_v4hi
15035    = build_function_type_list (V2SI_type_node,
15036				V4HI_type_node, V4HI_type_node, NULL_TREE);
15037  tree v4hi_ftype_v4hi_int
15038    = build_function_type_list (V4HI_type_node,
15039				V4HI_type_node, integer_type_node, NULL_TREE);
15040  tree v4hi_ftype_v4hi_di
15041    = build_function_type_list (V4HI_type_node,
15042				V4HI_type_node, long_long_unsigned_type_node,
15043				NULL_TREE);
15044  tree v2si_ftype_v2si_di
15045    = build_function_type_list (V2SI_type_node,
15046				V2SI_type_node, long_long_unsigned_type_node,
15047				NULL_TREE);
15048  tree void_ftype_void
15049    = build_function_type (void_type_node, void_list_node);
15050  tree void_ftype_unsigned
15051    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15052  tree void_ftype_unsigned_unsigned
15053    = build_function_type_list (void_type_node, unsigned_type_node,
15054				unsigned_type_node, NULL_TREE);
15055  tree void_ftype_pcvoid_unsigned_unsigned
15056    = build_function_type_list (void_type_node, const_ptr_type_node,
15057				unsigned_type_node, unsigned_type_node,
15058				NULL_TREE);
15059  tree unsigned_ftype_void
15060    = build_function_type (unsigned_type_node, void_list_node);
15061  tree v2si_ftype_v4sf
15062    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15063  /* Loads/stores.  */
15064  tree void_ftype_v8qi_v8qi_pchar
15065    = build_function_type_list (void_type_node,
15066				V8QI_type_node, V8QI_type_node,
15067				pchar_type_node, NULL_TREE);
15068  tree v4sf_ftype_pcfloat
15069    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15070  /* @@@ the type is bogus */
15071  tree v4sf_ftype_v4sf_pv2si
15072    = build_function_type_list (V4SF_type_node,
15073				V4SF_type_node, pv2si_type_node, NULL_TREE);
15074  tree void_ftype_pv2si_v4sf
15075    = build_function_type_list (void_type_node,
15076				pv2si_type_node, V4SF_type_node, NULL_TREE);
15077  tree void_ftype_pfloat_v4sf
15078    = build_function_type_list (void_type_node,
15079				pfloat_type_node, V4SF_type_node, NULL_TREE);
15080  tree void_ftype_pdi_di
15081    = build_function_type_list (void_type_node,
15082				pdi_type_node, long_long_unsigned_type_node,
15083				NULL_TREE);
15084  tree void_ftype_pv2di_v2di
15085    = build_function_type_list (void_type_node,
15086				pv2di_type_node, V2DI_type_node, NULL_TREE);
15087  /* Normal vector unops.  */
15088  tree v4sf_ftype_v4sf
15089    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15090
15091  /* Normal vector binops.  */
15092  tree v4sf_ftype_v4sf_v4sf
15093    = build_function_type_list (V4SF_type_node,
15094				V4SF_type_node, V4SF_type_node, NULL_TREE);
15095  tree v8qi_ftype_v8qi_v8qi
15096    = build_function_type_list (V8QI_type_node,
15097				V8QI_type_node, V8QI_type_node, NULL_TREE);
15098  tree v4hi_ftype_v4hi_v4hi
15099    = build_function_type_list (V4HI_type_node,
15100				V4HI_type_node, V4HI_type_node, NULL_TREE);
15101  tree v2si_ftype_v2si_v2si
15102    = build_function_type_list (V2SI_type_node,
15103				V2SI_type_node, V2SI_type_node, NULL_TREE);
15104  tree di_ftype_di_di
15105    = build_function_type_list (long_long_unsigned_type_node,
15106				long_long_unsigned_type_node,
15107				long_long_unsigned_type_node, NULL_TREE);
15108
15109  tree v2si_ftype_v2sf
15110    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15111  tree v2sf_ftype_v2si
15112    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15113  tree v2si_ftype_v2si
15114    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15115  tree v2sf_ftype_v2sf
15116    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15117  tree v2sf_ftype_v2sf_v2sf
15118    = build_function_type_list (V2SF_type_node,
15119				V2SF_type_node, V2SF_type_node, NULL_TREE);
15120  tree v2si_ftype_v2sf_v2sf
15121    = build_function_type_list (V2SI_type_node,
15122				V2SF_type_node, V2SF_type_node, NULL_TREE);
15123  tree pint_type_node    = build_pointer_type (integer_type_node);
15124  tree pdouble_type_node = build_pointer_type (double_type_node);
15125  tree pcdouble_type_node = build_pointer_type (
15126				build_type_variant (double_type_node, 1, 0));
15127  tree int_ftype_v2df_v2df
15128    = build_function_type_list (integer_type_node,
15129				V2DF_type_node, V2DF_type_node, NULL_TREE);
15130
15131  tree void_ftype_pcvoid
15132    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15133  tree v4sf_ftype_v4si
15134    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15135  tree v4si_ftype_v4sf
15136    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15137  tree v2df_ftype_v4si
15138    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15139  tree v4si_ftype_v2df
15140    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15141  tree v2si_ftype_v2df
15142    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15143  tree v4sf_ftype_v2df
15144    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15145  tree v2df_ftype_v2si
15146    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15147  tree v2df_ftype_v4sf
15148    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15149  tree int_ftype_v2df
15150    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15151  tree int64_ftype_v2df
15152    = build_function_type_list (long_long_integer_type_node,
15153				V2DF_type_node, NULL_TREE);
15154  tree v2df_ftype_v2df_int
15155    = build_function_type_list (V2DF_type_node,
15156				V2DF_type_node, integer_type_node, NULL_TREE);
15157  tree v2df_ftype_v2df_int64
15158    = build_function_type_list (V2DF_type_node,
15159				V2DF_type_node, long_long_integer_type_node,
15160				NULL_TREE);
15161  tree v4sf_ftype_v4sf_v2df
15162    = build_function_type_list (V4SF_type_node,
15163				V4SF_type_node, V2DF_type_node, NULL_TREE);
15164  tree v2df_ftype_v2df_v4sf
15165    = build_function_type_list (V2DF_type_node,
15166				V2DF_type_node, V4SF_type_node, NULL_TREE);
15167  tree v2df_ftype_v2df_v2df_int
15168    = build_function_type_list (V2DF_type_node,
15169				V2DF_type_node, V2DF_type_node,
15170				integer_type_node,
15171				NULL_TREE);
15172  tree v2df_ftype_v2df_pcdouble
15173    = build_function_type_list (V2DF_type_node,
15174				V2DF_type_node, pcdouble_type_node, NULL_TREE);
15175  tree void_ftype_pdouble_v2df
15176    = build_function_type_list (void_type_node,
15177				pdouble_type_node, V2DF_type_node, NULL_TREE);
15178  tree void_ftype_pint_int
15179    = build_function_type_list (void_type_node,
15180				pint_type_node, integer_type_node, NULL_TREE);
15181  tree void_ftype_v16qi_v16qi_pchar
15182    = build_function_type_list (void_type_node,
15183				V16QI_type_node, V16QI_type_node,
15184				pchar_type_node, NULL_TREE);
15185  tree v2df_ftype_pcdouble
15186    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15187  tree v2df_ftype_v2df_v2df
15188    = build_function_type_list (V2DF_type_node,
15189				V2DF_type_node, V2DF_type_node, NULL_TREE);
15190  tree v16qi_ftype_v16qi_v16qi
15191    = build_function_type_list (V16QI_type_node,
15192				V16QI_type_node, V16QI_type_node, NULL_TREE);
15193  tree v8hi_ftype_v8hi_v8hi
15194    = build_function_type_list (V8HI_type_node,
15195				V8HI_type_node, V8HI_type_node, NULL_TREE);
15196  tree v4si_ftype_v4si_v4si
15197    = build_function_type_list (V4SI_type_node,
15198				V4SI_type_node, V4SI_type_node, NULL_TREE);
15199  tree v2di_ftype_v2di_v2di
15200    = build_function_type_list (V2DI_type_node,
15201				V2DI_type_node, V2DI_type_node, NULL_TREE);
15202  tree v2di_ftype_v2df_v2df
15203    = build_function_type_list (V2DI_type_node,
15204				V2DF_type_node, V2DF_type_node, NULL_TREE);
15205  tree v2df_ftype_v2df
15206    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15207  tree v2di_ftype_v2di_int
15208    = build_function_type_list (V2DI_type_node,
15209				V2DI_type_node, integer_type_node, NULL_TREE);
15210  tree v4si_ftype_v4si_int
15211    = build_function_type_list (V4SI_type_node,
15212				V4SI_type_node, integer_type_node, NULL_TREE);
15213  tree v8hi_ftype_v8hi_int
15214    = build_function_type_list (V8HI_type_node,
15215				V8HI_type_node, integer_type_node, NULL_TREE);
15216  tree v4si_ftype_v8hi_v8hi
15217    = build_function_type_list (V4SI_type_node,
15218				V8HI_type_node, V8HI_type_node, NULL_TREE);
15219  tree di_ftype_v8qi_v8qi
15220    = build_function_type_list (long_long_unsigned_type_node,
15221				V8QI_type_node, V8QI_type_node, NULL_TREE);
15222  tree di_ftype_v2si_v2si
15223    = build_function_type_list (long_long_unsigned_type_node,
15224				V2SI_type_node, V2SI_type_node, NULL_TREE);
15225  tree v2di_ftype_v16qi_v16qi
15226    = build_function_type_list (V2DI_type_node,
15227				V16QI_type_node, V16QI_type_node, NULL_TREE);
15228  tree v2di_ftype_v4si_v4si
15229    = build_function_type_list (V2DI_type_node,
15230				V4SI_type_node, V4SI_type_node, NULL_TREE);
15231  tree int_ftype_v16qi
15232    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15233  tree v16qi_ftype_pcchar
15234    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15235  tree void_ftype_pchar_v16qi
15236    = build_function_type_list (void_type_node,
15237			        pchar_type_node, V16QI_type_node, NULL_TREE);
15238
15239  tree float80_type;
15240  tree float128_type;
15241  tree ftype;
15242
15243  /* The __float80 type.  */
15244  if (TYPE_MODE (long_double_type_node) == XFmode)
15245    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15246					       "__float80");
15247  else
15248    {
15249      /* The __float80 type.  */
15250      float80_type = make_node (REAL_TYPE);
15251      TYPE_PRECISION (float80_type) = 80;
15252      layout_type (float80_type);
15253      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15254    }
15255
15256  if (TARGET_64BIT)
15257    {
15258      float128_type = make_node (REAL_TYPE);
15259      TYPE_PRECISION (float128_type) = 128;
15260      layout_type (float128_type);
15261      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15262    }
15263
15264  /* Add all builtins that are more or less simple operations on two
15265     operands.  */
15266  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15267    {
15268      /* Use one of the operands; the target can have a different mode for
15269	 mask-generating compares.  */
15270      enum machine_mode mode;
15271      tree type;
15272
15273      if (d->name == 0)
15274	continue;
15275      mode = insn_data[d->icode].operand[1].mode;
15276
15277      switch (mode)
15278	{
15279	case V16QImode:
15280	  type = v16qi_ftype_v16qi_v16qi;
15281	  break;
15282	case V8HImode:
15283	  type = v8hi_ftype_v8hi_v8hi;
15284	  break;
15285	case V4SImode:
15286	  type = v4si_ftype_v4si_v4si;
15287	  break;
15288	case V2DImode:
15289	  type = v2di_ftype_v2di_v2di;
15290	  break;
15291	case V2DFmode:
15292	  type = v2df_ftype_v2df_v2df;
15293	  break;
15294	case V4SFmode:
15295	  type = v4sf_ftype_v4sf_v4sf;
15296	  break;
15297	case V8QImode:
15298	  type = v8qi_ftype_v8qi_v8qi;
15299	  break;
15300	case V4HImode:
15301	  type = v4hi_ftype_v4hi_v4hi;
15302	  break;
15303	case V2SImode:
15304	  type = v2si_ftype_v2si_v2si;
15305	  break;
15306	case DImode:
15307	  type = di_ftype_di_di;
15308	  break;
15309
15310	default:
15311	  gcc_unreachable ();
15312	}
15313
15314      /* Override for comparisons.  */
15315      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15316	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15317	type = v4si_ftype_v4sf_v4sf;
15318
15319      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15320	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15321	type = v2di_ftype_v2df_v2df;
15322
15323      def_builtin (d->mask, d->name, type, d->code);
15324    }
15325
15326  /* Add the remaining MMX insns with somewhat more complicated types.  */
15327  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15328  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15329  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15330  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15331
15332  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15333  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15334  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15335
15336  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15337  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15338
15339  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15340  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15341
15342  /* comi/ucomi insns.  */
15343  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15344    if (d->mask == MASK_SSE2)
15345      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15346    else
15347      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15348
15349  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15350  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15351  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15352
15353  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15354  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15355  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15356  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15357  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15358  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15359  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15360  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15361  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15362  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15363  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15364
15365  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15366
15367  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15368  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15369
15370  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15371  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15372  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15373  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15374
15375  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15376  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15377  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15378  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15379
15380  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15381
15382  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15383
15384  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15385  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15386  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15387  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15388  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15389  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15390
15391  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15392
15393  /* Original 3DNow!  */
15394  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15395  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15396  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15397  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15398  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15399  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15400  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15401  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15402  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15403  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15404  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15405  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15406  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15407  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15408  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15409  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15410  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15411  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15412  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15413  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15414
15415  /* 3DNow! extension as used in the Athlon CPU.  */
15416  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15417  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15418  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15419  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15420  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15421  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15422
15423  /* SSE2 */
15424  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15425
15426  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15427  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15428
15429  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15430  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15431
15432  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15433  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15434  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15435  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15436  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15437
15438  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15439  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15440  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15441  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15442
15443  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15444  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15445
15446  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15447
15448  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15449  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15450
15451  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15452  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15453  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15454  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15455  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15456
15457  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15458
15459  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15460  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15461  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15462  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15463
15464  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15465  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15466  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15467
15468  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15469  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15470  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15471  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15472
15473  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15474  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15475  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15476
15477  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15478  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15479
15480  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15481  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15482
15483  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15484  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15485  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15486
15487  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
15488  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
15489  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15490
15491  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
15492  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
15493
15494  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15495  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15496  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15497  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15498
15499  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15500  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15501  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15502  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15503
15504  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15505  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15506
15507  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15508
15509  /* Prescott New Instructions.  */
15510  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15511	       void_ftype_pcvoid_unsigned_unsigned,
15512	       IX86_BUILTIN_MONITOR);
15513  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15514	       void_ftype_unsigned_unsigned,
15515	       IX86_BUILTIN_MWAIT);
15516  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15517	       v4sf_ftype_v4sf,
15518	       IX86_BUILTIN_MOVSHDUP);
15519  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15520	       v4sf_ftype_v4sf,
15521	       IX86_BUILTIN_MOVSLDUP);
15522  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15523	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15524
15525  /* Access to the vec_init patterns.  */
15526  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15527				    integer_type_node, NULL_TREE);
15528  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15529	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15530
15531  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15532				    short_integer_type_node,
15533				    short_integer_type_node,
15534				    short_integer_type_node, NULL_TREE);
15535  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15536	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15537
15538  ftype = build_function_type_list (V8QI_type_node, char_type_node,
15539				    char_type_node, char_type_node,
15540				    char_type_node, char_type_node,
15541				    char_type_node, char_type_node,
15542				    char_type_node, NULL_TREE);
15543  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15544	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15545
15546  /* Access to the vec_extract patterns.  */
15547  ftype = build_function_type_list (double_type_node, V2DF_type_node,
15548				    integer_type_node, NULL_TREE);
15549  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
15550	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15551
15552  ftype = build_function_type_list (long_long_integer_type_node,
15553				    V2DI_type_node, integer_type_node,
15554				    NULL_TREE);
15555  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
15556	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15557
15558  ftype = build_function_type_list (float_type_node, V4SF_type_node,
15559				    integer_type_node, NULL_TREE);
15560  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15561	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15562
15563  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15564				    integer_type_node, NULL_TREE);
15565  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
15566	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15567
15568  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15569				    integer_type_node, NULL_TREE);
15570  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
15571	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15572
15573  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15574				    integer_type_node, NULL_TREE);
15575  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15576	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15577
15578  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15579				    integer_type_node, NULL_TREE);
15580  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15581	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15582
15583  ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15584				    integer_type_node, NULL_TREE);
15585  def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
15586
15587  /* Access to the vec_set patterns.  */
15588  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15589				    intHI_type_node,
15590				    integer_type_node, NULL_TREE);
15591  def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
15592	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
15593
15594  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15595				    intHI_type_node,
15596				    integer_type_node, NULL_TREE);
15597  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15598	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
15599}
15600
15601/* Errors in the source file can cause expand_expr to return const0_rtx
15602   where we expect a vector.  To avoid crashing, use one of the vector
15603   clear instructions.  */
15604static rtx
15605safe_vector_operand (rtx x, enum machine_mode mode)
15606{
15607  if (x == const0_rtx)
15608    x = CONST0_RTX (mode);
15609  return x;
15610}
15611
15612/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
15613
15614static rtx
15615ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15616{
15617  rtx pat, xops[3];
15618  tree arg0 = TREE_VALUE (arglist);
15619  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15620  rtx op0 = expand_normal (arg0);
15621  rtx op1 = expand_normal (arg1);
15622  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15623  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15624  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15625
15626  if (VECTOR_MODE_P (mode0))
15627    op0 = safe_vector_operand (op0, mode0);
15628  if (VECTOR_MODE_P (mode1))
15629    op1 = safe_vector_operand (op1, mode1);
15630
15631  if (optimize || !target
15632      || GET_MODE (target) != tmode
15633      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15634    target = gen_reg_rtx (tmode);
15635
15636  if (GET_MODE (op1) == SImode && mode1 == TImode)
15637    {
15638      rtx x = gen_reg_rtx (V4SImode);
15639      emit_insn (gen_sse2_loadd (x, op1));
15640      op1 = gen_lowpart (TImode, x);
15641    }
15642
15643  /* The insn must want input operands in the same modes as the
15644     result.  */
15645  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15646	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15647
15648  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15649    op0 = copy_to_mode_reg (mode0, op0);
15650  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15651    op1 = copy_to_mode_reg (mode1, op1);
15652
15653  /* ??? Using ix86_fixup_binary_operands is problematic when
15654     we've got mismatched modes.  Fake it.  */
15655
15656  xops[0] = target;
15657  xops[1] = op0;
15658  xops[2] = op1;
15659
15660  if (tmode == mode0 && tmode == mode1)
15661    {
15662      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15663      op0 = xops[1];
15664      op1 = xops[2];
15665    }
15666  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15667    {
15668      op0 = force_reg (mode0, op0);
15669      op1 = force_reg (mode1, op1);
15670      target = gen_reg_rtx (tmode);
15671    }
15672
15673  pat = GEN_FCN (icode) (target, op0, op1);
15674  if (! pat)
15675    return 0;
15676  emit_insn (pat);
15677  return target;
15678}
15679
15680/* Subroutine of ix86_expand_builtin to take care of stores.  */
15681
15682static rtx
15683ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15684{
15685  rtx pat;
15686  tree arg0 = TREE_VALUE (arglist);
15687  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15688  rtx op0 = expand_normal (arg0);
15689  rtx op1 = expand_normal (arg1);
15690  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15691  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15692
15693  if (VECTOR_MODE_P (mode1))
15694    op1 = safe_vector_operand (op1, mode1);
15695
15696  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15697  op1 = copy_to_mode_reg (mode1, op1);
15698
15699  pat = GEN_FCN (icode) (op0, op1);
15700  if (pat)
15701    emit_insn (pat);
15702  return 0;
15703}
15704
15705/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15706
15707static rtx
15708ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15709			  rtx target, int do_load)
15710{
15711  rtx pat;
15712  tree arg0 = TREE_VALUE (arglist);
15713  rtx op0 = expand_normal (arg0);
15714  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15715  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15716
15717  if (optimize || !target
15718      || GET_MODE (target) != tmode
15719      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15720    target = gen_reg_rtx (tmode);
15721  if (do_load)
15722    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15723  else
15724    {
15725      if (VECTOR_MODE_P (mode0))
15726	op0 = safe_vector_operand (op0, mode0);
15727
15728      if ((optimize && !register_operand (op0, mode0))
15729	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15730	op0 = copy_to_mode_reg (mode0, op0);
15731    }
15732
15733  pat = GEN_FCN (icode) (target, op0);
15734  if (! pat)
15735    return 0;
15736  emit_insn (pat);
15737  return target;
15738}
15739
15740/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15741   sqrtss, rsqrtss, rcpss.  */
15742
15743static rtx
15744ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15745{
15746  rtx pat;
15747  tree arg0 = TREE_VALUE (arglist);
15748  rtx op1, op0 = expand_normal (arg0);
15749  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15750  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15751
15752  if (optimize || !target
15753      || GET_MODE (target) != tmode
15754      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15755    target = gen_reg_rtx (tmode);
15756
15757  if (VECTOR_MODE_P (mode0))
15758    op0 = safe_vector_operand (op0, mode0);
15759
15760  if ((optimize && !register_operand (op0, mode0))
15761      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15762    op0 = copy_to_mode_reg (mode0, op0);
15763
15764  op1 = op0;
15765  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15766    op1 = copy_to_mode_reg (mode0, op1);
15767
15768  pat = GEN_FCN (icode) (target, op0, op1);
15769  if (! pat)
15770    return 0;
15771  emit_insn (pat);
15772  return target;
15773}
15774
15775/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15776
15777static rtx
15778ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15779			 rtx target)
15780{
15781  rtx pat;
15782  tree arg0 = TREE_VALUE (arglist);
15783  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15784  rtx op0 = expand_normal (arg0);
15785  rtx op1 = expand_normal (arg1);
15786  rtx op2;
15787  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15788  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15789  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15790  enum rtx_code comparison = d->comparison;
15791
15792  if (VECTOR_MODE_P (mode0))
15793    op0 = safe_vector_operand (op0, mode0);
15794  if (VECTOR_MODE_P (mode1))
15795    op1 = safe_vector_operand (op1, mode1);
15796
15797  /* Swap operands if we have a comparison that isn't available in
15798     hardware.  */
15799  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15800    {
15801      rtx tmp = gen_reg_rtx (mode1);
15802      emit_move_insn (tmp, op1);
15803      op1 = op0;
15804      op0 = tmp;
15805    }
15806
15807  if (optimize || !target
15808      || GET_MODE (target) != tmode
15809      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15810    target = gen_reg_rtx (tmode);
15811
15812  if ((optimize && !register_operand (op0, mode0))
15813      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15814    op0 = copy_to_mode_reg (mode0, op0);
15815  if ((optimize && !register_operand (op1, mode1))
15816      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15817    op1 = copy_to_mode_reg (mode1, op1);
15818
15819  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15820  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15821  if (! pat)
15822    return 0;
15823  emit_insn (pat);
15824  return target;
15825}
15826
15827/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15828
15829static rtx
15830ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15831		      rtx target)
15832{
15833  rtx pat;
15834  tree arg0 = TREE_VALUE (arglist);
15835  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15836  rtx op0 = expand_normal (arg0);
15837  rtx op1 = expand_normal (arg1);
15838  rtx op2;
15839  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15840  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15841  enum rtx_code comparison = d->comparison;
15842
15843  if (VECTOR_MODE_P (mode0))
15844    op0 = safe_vector_operand (op0, mode0);
15845  if (VECTOR_MODE_P (mode1))
15846    op1 = safe_vector_operand (op1, mode1);
15847
15848  /* Swap operands if we have a comparison that isn't available in
15849     hardware.  */
15850  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15851    {
15852      rtx tmp = op1;
15853      op1 = op0;
15854      op0 = tmp;
15855    }
15856
15857  target = gen_reg_rtx (SImode);
15858  emit_move_insn (target, const0_rtx);
15859  target = gen_rtx_SUBREG (QImode, target, 0);
15860
15861  if ((optimize && !register_operand (op0, mode0))
15862      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15863    op0 = copy_to_mode_reg (mode0, op0);
15864  if ((optimize && !register_operand (op1, mode1))
15865      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15866    op1 = copy_to_mode_reg (mode1, op1);
15867
15868  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15869  pat = GEN_FCN (d->icode) (op0, op1);
15870  if (! pat)
15871    return 0;
15872  emit_insn (pat);
15873  emit_insn (gen_rtx_SET (VOIDmode,
15874			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15875			  gen_rtx_fmt_ee (comparison, QImode,
15876					  SET_DEST (pat),
15877					  const0_rtx)));
15878
15879  return SUBREG_REG (target);
15880}
15881
15882/* Return the integer constant in ARG.  Constrain it to be in the range
15883   of the subparts of VEC_TYPE; issue an error if not.  */
15884
15885static int
15886get_element_number (tree vec_type, tree arg)
15887{
15888  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15889
15890  if (!host_integerp (arg, 1)
15891      || (elt = tree_low_cst (arg, 1), elt > max))
15892    {
15893      error ("selector must be an integer constant in the range 0..%wi", max);
15894      return 0;
15895    }
15896
15897  return elt;
15898}
15899
15900/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15901   ix86_expand_vector_init.  We DO have language-level syntax for this, in
15902   the form of  (type){ init-list }.  Except that since we can't place emms
15903   instructions from inside the compiler, we can't allow the use of MMX
15904   registers unless the user explicitly asks for it.  So we do *not* define
15905   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
15906   we have builtins invoked by mmintrin.h that gives us license to emit
15907   these sorts of instructions.  */
15908
15909static rtx
15910ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15911{
15912  enum machine_mode tmode = TYPE_MODE (type);
15913  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15914  int i, n_elt = GET_MODE_NUNITS (tmode);
15915  rtvec v = rtvec_alloc (n_elt);
15916
15917  gcc_assert (VECTOR_MODE_P (tmode));
15918
15919  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15920    {
15921      rtx x = expand_normal (TREE_VALUE (arglist));
15922      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15923    }
15924
15925  gcc_assert (arglist == NULL);
15926
15927  if (!target || !register_operand (target, tmode))
15928    target = gen_reg_rtx (tmode);
15929
15930  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15931  return target;
15932}
15933
15934/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15935   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
15936   had a language-level syntax for referencing vector elements.  */
15937
15938static rtx
15939ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15940{
15941  enum machine_mode tmode, mode0;
15942  tree arg0, arg1;
15943  int elt;
15944  rtx op0;
15945
15946  arg0 = TREE_VALUE (arglist);
15947  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15948
15949  op0 = expand_normal (arg0);
15950  elt = get_element_number (TREE_TYPE (arg0), arg1);
15951
15952  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15953  mode0 = TYPE_MODE (TREE_TYPE (arg0));
15954  gcc_assert (VECTOR_MODE_P (mode0));
15955
15956  op0 = force_reg (mode0, op0);
15957
15958  if (optimize || !target || !register_operand (target, tmode))
15959    target = gen_reg_rtx (tmode);
15960
15961  ix86_expand_vector_extract (true, target, op0, elt);
15962
15963  return target;
15964}
15965
15966/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15967   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
15968   a language-level syntax for referencing vector elements.  */
15969
15970static rtx
15971ix86_expand_vec_set_builtin (tree arglist)
15972{
15973  enum machine_mode tmode, mode1;
15974  tree arg0, arg1, arg2;
15975  int elt;
15976  rtx op0, op1, target;
15977
15978  arg0 = TREE_VALUE (arglist);
15979  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15980  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15981
15982  tmode = TYPE_MODE (TREE_TYPE (arg0));
15983  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15984  gcc_assert (VECTOR_MODE_P (tmode));
15985
15986  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15987  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15988  elt = get_element_number (TREE_TYPE (arg0), arg2);
15989
15990  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15991    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15992
15993  op0 = force_reg (tmode, op0);
15994  op1 = force_reg (mode1, op1);
15995
15996  /* OP0 is the source of these builtin functions and shouldn't be
15997     modified.  Create a copy, use it and return it as target.  */
15998  target = gen_reg_rtx (tmode);
15999  emit_move_insn (target, op0);
16000  ix86_expand_vector_set (true, target, op1, elt);
16001
16002  return target;
16003}
16004
16005/* Expand an expression EXP that calls a built-in function,
16006   with result going to TARGET if that's convenient
16007   (and in mode MODE if that's convenient).
16008   SUBTARGET may be used as the target for computing one of EXP's operands.
16009   IGNORE is nonzero if the value is to be ignored.  */
16010
16011static rtx
16012ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16013		     enum machine_mode mode ATTRIBUTE_UNUSED,
16014		     int ignore ATTRIBUTE_UNUSED)
16015{
16016  const struct builtin_description *d;
16017  size_t i;
16018  enum insn_code icode;
16019  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16020  tree arglist = TREE_OPERAND (exp, 1);
16021  tree arg0, arg1, arg2;
16022  rtx op0, op1, op2, pat;
16023  enum machine_mode tmode, mode0, mode1, mode2;
16024  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16025
16026  switch (fcode)
16027    {
16028    case IX86_BUILTIN_EMMS:
16029      emit_insn (gen_mmx_emms ());
16030      return 0;
16031
16032    case IX86_BUILTIN_SFENCE:
16033      emit_insn (gen_sse_sfence ());
16034      return 0;
16035
16036    case IX86_BUILTIN_MASKMOVQ:
16037    case IX86_BUILTIN_MASKMOVDQU:
16038      icode = (fcode == IX86_BUILTIN_MASKMOVQ
16039	       ? CODE_FOR_mmx_maskmovq
16040	       : CODE_FOR_sse2_maskmovdqu);
16041      /* Note the arg order is different from the operand order.  */
16042      arg1 = TREE_VALUE (arglist);
16043      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16044      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16045      op0 = expand_normal (arg0);
16046      op1 = expand_normal (arg1);
16047      op2 = expand_normal (arg2);
16048      mode0 = insn_data[icode].operand[0].mode;
16049      mode1 = insn_data[icode].operand[1].mode;
16050      mode2 = insn_data[icode].operand[2].mode;
16051
16052      op0 = force_reg (Pmode, op0);
16053      op0 = gen_rtx_MEM (mode1, op0);
16054
16055      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16056	op0 = copy_to_mode_reg (mode0, op0);
16057      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16058	op1 = copy_to_mode_reg (mode1, op1);
16059      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16060	op2 = copy_to_mode_reg (mode2, op2);
16061      pat = GEN_FCN (icode) (op0, op1, op2);
16062      if (! pat)
16063	return 0;
16064      emit_insn (pat);
16065      return 0;
16066
16067    case IX86_BUILTIN_SQRTSS:
16068      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16069    case IX86_BUILTIN_RSQRTSS:
16070      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16071    case IX86_BUILTIN_RCPSS:
16072      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16073
16074    case IX86_BUILTIN_LOADUPS:
16075      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16076
16077    case IX86_BUILTIN_STOREUPS:
16078      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16079
16080    case IX86_BUILTIN_LOADHPS:
16081    case IX86_BUILTIN_LOADLPS:
16082    case IX86_BUILTIN_LOADHPD:
16083    case IX86_BUILTIN_LOADLPD:
16084      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16085	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16086	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16087	       : CODE_FOR_sse2_loadlpd);
16088      arg0 = TREE_VALUE (arglist);
16089      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16090      op0 = expand_normal (arg0);
16091      op1 = expand_normal (arg1);
16092      tmode = insn_data[icode].operand[0].mode;
16093      mode0 = insn_data[icode].operand[1].mode;
16094      mode1 = insn_data[icode].operand[2].mode;
16095
16096      op0 = force_reg (mode0, op0);
16097      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16098      if (optimize || target == 0
16099	  || GET_MODE (target) != tmode
16100	  || !register_operand (target, tmode))
16101	target = gen_reg_rtx (tmode);
16102      pat = GEN_FCN (icode) (target, op0, op1);
16103      if (! pat)
16104	return 0;
16105      emit_insn (pat);
16106      return target;
16107
16108    case IX86_BUILTIN_STOREHPS:
16109    case IX86_BUILTIN_STORELPS:
16110      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16111	       : CODE_FOR_sse_storelps);
16112      arg0 = TREE_VALUE (arglist);
16113      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16114      op0 = expand_normal (arg0);
16115      op1 = expand_normal (arg1);
16116      mode0 = insn_data[icode].operand[0].mode;
16117      mode1 = insn_data[icode].operand[1].mode;
16118
16119      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16120      op1 = force_reg (mode1, op1);
16121
16122      pat = GEN_FCN (icode) (op0, op1);
16123      if (! pat)
16124	return 0;
16125      emit_insn (pat);
16126      return const0_rtx;
16127
16128    case IX86_BUILTIN_MOVNTPS:
16129      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16130    case IX86_BUILTIN_MOVNTQ:
16131      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16132
16133    case IX86_BUILTIN_LDMXCSR:
16134      op0 = expand_normal (TREE_VALUE (arglist));
16135      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16136      emit_move_insn (target, op0);
16137      emit_insn (gen_sse_ldmxcsr (target));
16138      return 0;
16139
16140    case IX86_BUILTIN_STMXCSR:
16141      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16142      emit_insn (gen_sse_stmxcsr (target));
16143      return copy_to_mode_reg (SImode, target);
16144
16145    case IX86_BUILTIN_SHUFPS:
16146    case IX86_BUILTIN_SHUFPD:
16147      icode = (fcode == IX86_BUILTIN_SHUFPS
16148	       ? CODE_FOR_sse_shufps
16149	       : CODE_FOR_sse2_shufpd);
16150      arg0 = TREE_VALUE (arglist);
16151      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16153      op0 = expand_normal (arg0);
16154      op1 = expand_normal (arg1);
16155      op2 = expand_normal (arg2);
16156      tmode = insn_data[icode].operand[0].mode;
16157      mode0 = insn_data[icode].operand[1].mode;
16158      mode1 = insn_data[icode].operand[2].mode;
16159      mode2 = insn_data[icode].operand[3].mode;
16160
16161      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16162	op0 = copy_to_mode_reg (mode0, op0);
16163      if ((optimize && !register_operand (op1, mode1))
16164	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16165	op1 = copy_to_mode_reg (mode1, op1);
16166      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16167	{
16168	  /* @@@ better error message */
16169	  error ("mask must be an immediate");
16170	  return gen_reg_rtx (tmode);
16171	}
16172      if (optimize || target == 0
16173	  || GET_MODE (target) != tmode
16174	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16175	target = gen_reg_rtx (tmode);
16176      pat = GEN_FCN (icode) (target, op0, op1, op2);
16177      if (! pat)
16178	return 0;
16179      emit_insn (pat);
16180      return target;
16181
16182    case IX86_BUILTIN_PSHUFW:
16183    case IX86_BUILTIN_PSHUFD:
16184    case IX86_BUILTIN_PSHUFHW:
16185    case IX86_BUILTIN_PSHUFLW:
16186      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16187	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16188	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16189	       : CODE_FOR_mmx_pshufw);
16190      arg0 = TREE_VALUE (arglist);
16191      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16192      op0 = expand_normal (arg0);
16193      op1 = expand_normal (arg1);
16194      tmode = insn_data[icode].operand[0].mode;
16195      mode1 = insn_data[icode].operand[1].mode;
16196      mode2 = insn_data[icode].operand[2].mode;
16197
16198      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16199	op0 = copy_to_mode_reg (mode1, op0);
16200      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16201	{
16202	  /* @@@ better error message */
16203	  error ("mask must be an immediate");
16204	  return const0_rtx;
16205	}
16206      if (target == 0
16207	  || GET_MODE (target) != tmode
16208	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16209	target = gen_reg_rtx (tmode);
16210      pat = GEN_FCN (icode) (target, op0, op1);
16211      if (! pat)
16212	return 0;
16213      emit_insn (pat);
16214      return target;
16215
16216    case IX86_BUILTIN_PSLLWI128:
16217      icode = CODE_FOR_ashlv8hi3;
16218      goto do_pshifti;
16219    case IX86_BUILTIN_PSLLDI128:
16220      icode = CODE_FOR_ashlv4si3;
16221      goto do_pshifti;
16222    case IX86_BUILTIN_PSLLQI128:
16223      icode = CODE_FOR_ashlv2di3;
16224      goto do_pshifti;
16225    case IX86_BUILTIN_PSRAWI128:
16226      icode = CODE_FOR_ashrv8hi3;
16227      goto do_pshifti;
16228    case IX86_BUILTIN_PSRADI128:
16229      icode = CODE_FOR_ashrv4si3;
16230      goto do_pshifti;
16231    case IX86_BUILTIN_PSRLWI128:
16232      icode = CODE_FOR_lshrv8hi3;
16233      goto do_pshifti;
16234    case IX86_BUILTIN_PSRLDI128:
16235      icode = CODE_FOR_lshrv4si3;
16236      goto do_pshifti;
16237    case IX86_BUILTIN_PSRLQI128:
16238      icode = CODE_FOR_lshrv2di3;
16239      goto do_pshifti;
16240    do_pshifti:
16241      arg0 = TREE_VALUE (arglist);
16242      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16243      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16244      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16245
16246      if (GET_CODE (op1) != CONST_INT)
16247	{
16248	  error ("shift must be an immediate");
16249	  return const0_rtx;
16250	}
16251      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16252	op1 = GEN_INT (255);
16253
16254      tmode = insn_data[icode].operand[0].mode;
16255      mode1 = insn_data[icode].operand[1].mode;
16256      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16257	op0 = copy_to_reg (op0);
16258
16259      target = gen_reg_rtx (tmode);
16260      pat = GEN_FCN (icode) (target, op0, op1);
16261      if (!pat)
16262	return 0;
16263      emit_insn (pat);
16264      return target;
16265
16266    case IX86_BUILTIN_PSLLW128:
16267      icode = CODE_FOR_ashlv8hi3;
16268      goto do_pshift;
16269    case IX86_BUILTIN_PSLLD128:
16270      icode = CODE_FOR_ashlv4si3;
16271      goto do_pshift;
16272    case IX86_BUILTIN_PSLLQ128:
16273      icode = CODE_FOR_ashlv2di3;
16274      goto do_pshift;
16275    case IX86_BUILTIN_PSRAW128:
16276      icode = CODE_FOR_ashrv8hi3;
16277      goto do_pshift;
16278    case IX86_BUILTIN_PSRAD128:
16279      icode = CODE_FOR_ashrv4si3;
16280      goto do_pshift;
16281    case IX86_BUILTIN_PSRLW128:
16282      icode = CODE_FOR_lshrv8hi3;
16283      goto do_pshift;
16284    case IX86_BUILTIN_PSRLD128:
16285      icode = CODE_FOR_lshrv4si3;
16286      goto do_pshift;
16287    case IX86_BUILTIN_PSRLQ128:
16288      icode = CODE_FOR_lshrv2di3;
16289      goto do_pshift;
16290    do_pshift:
16291      arg0 = TREE_VALUE (arglist);
16292      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16293      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16294      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16295
16296      tmode = insn_data[icode].operand[0].mode;
16297      mode1 = insn_data[icode].operand[1].mode;
16298
16299      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16300	op0 = copy_to_reg (op0);
16301
16302      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16303      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16304	op1 = copy_to_reg (op1);
16305
16306      target = gen_reg_rtx (tmode);
16307      pat = GEN_FCN (icode) (target, op0, op1);
16308      if (!pat)
16309	return 0;
16310      emit_insn (pat);
16311      return target;
16312
16313    case IX86_BUILTIN_PSLLDQI128:
16314    case IX86_BUILTIN_PSRLDQI128:
16315      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16316	       : CODE_FOR_sse2_lshrti3);
16317      arg0 = TREE_VALUE (arglist);
16318      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16319      op0 = expand_normal (arg0);
16320      op1 = expand_normal (arg1);
16321      tmode = insn_data[icode].operand[0].mode;
16322      mode1 = insn_data[icode].operand[1].mode;
16323      mode2 = insn_data[icode].operand[2].mode;
16324
16325      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16326	{
16327	  op0 = copy_to_reg (op0);
16328	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16329	}
16330      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16331	{
16332	  error ("shift must be an immediate");
16333	  return const0_rtx;
16334	}
16335      target = gen_reg_rtx (V2DImode);
16336      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16337			     op0, op1);
16338      if (! pat)
16339	return 0;
16340      emit_insn (pat);
16341      return target;
16342
16343    case IX86_BUILTIN_FEMMS:
16344      emit_insn (gen_mmx_femms ());
16345      return NULL_RTX;
16346
16347    case IX86_BUILTIN_PAVGUSB:
16348      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16349
16350    case IX86_BUILTIN_PF2ID:
16351      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16352
16353    case IX86_BUILTIN_PFACC:
16354      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16355
16356    case IX86_BUILTIN_PFADD:
16357     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16358
16359    case IX86_BUILTIN_PFCMPEQ:
16360      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16361
16362    case IX86_BUILTIN_PFCMPGE:
16363      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16364
16365    case IX86_BUILTIN_PFCMPGT:
16366      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16367
16368    case IX86_BUILTIN_PFMAX:
16369      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16370
16371    case IX86_BUILTIN_PFMIN:
16372      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16373
16374    case IX86_BUILTIN_PFMUL:
16375      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16376
16377    case IX86_BUILTIN_PFRCP:
16378      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16379
16380    case IX86_BUILTIN_PFRCPIT1:
16381      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16382
16383    case IX86_BUILTIN_PFRCPIT2:
16384      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16385
16386    case IX86_BUILTIN_PFRSQIT1:
16387      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16388
16389    case IX86_BUILTIN_PFRSQRT:
16390      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16391
16392    case IX86_BUILTIN_PFSUB:
16393      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16394
16395    case IX86_BUILTIN_PFSUBR:
16396      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16397
16398    case IX86_BUILTIN_PI2FD:
16399      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16400
16401    case IX86_BUILTIN_PMULHRW:
16402      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16403
16404    case IX86_BUILTIN_PF2IW:
16405      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16406
16407    case IX86_BUILTIN_PFNACC:
16408      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16409
16410    case IX86_BUILTIN_PFPNACC:
16411      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16412
16413    case IX86_BUILTIN_PI2FW:
16414      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16415
16416    case IX86_BUILTIN_PSWAPDSI:
16417      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16418
16419    case IX86_BUILTIN_PSWAPDSF:
16420      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16421
16422    case IX86_BUILTIN_SQRTSD:
16423      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16424    case IX86_BUILTIN_LOADUPD:
16425      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16426    case IX86_BUILTIN_STOREUPD:
16427      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16428
16429    case IX86_BUILTIN_MFENCE:
16430	emit_insn (gen_sse2_mfence ());
16431	return 0;
16432    case IX86_BUILTIN_LFENCE:
16433	emit_insn (gen_sse2_lfence ());
16434	return 0;
16435
16436    case IX86_BUILTIN_CLFLUSH:
16437	arg0 = TREE_VALUE (arglist);
16438	op0 = expand_normal (arg0);
16439	icode = CODE_FOR_sse2_clflush;
16440	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16441	    op0 = copy_to_mode_reg (Pmode, op0);
16442
16443	emit_insn (gen_sse2_clflush (op0));
16444	return 0;
16445
16446    case IX86_BUILTIN_MOVNTPD:
16447      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16448    case IX86_BUILTIN_MOVNTDQ:
16449      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16450    case IX86_BUILTIN_MOVNTI:
16451      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16452
16453    case IX86_BUILTIN_LOADDQU:
16454      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16455    case IX86_BUILTIN_STOREDQU:
16456      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16457
16458    case IX86_BUILTIN_MONITOR:
16459      arg0 = TREE_VALUE (arglist);
16460      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16461      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16462      op0 = expand_normal (arg0);
16463      op1 = expand_normal (arg1);
16464      op2 = expand_normal (arg2);
16465      if (!REG_P (op0))
16466	op0 = copy_to_mode_reg (Pmode, op0);
16467      if (!REG_P (op1))
16468	op1 = copy_to_mode_reg (SImode, op1);
16469      if (!REG_P (op2))
16470	op2 = copy_to_mode_reg (SImode, op2);
16471      if (!TARGET_64BIT)
16472	emit_insn (gen_sse3_monitor (op0, op1, op2));
16473      else
16474	emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16475      return 0;
16476
16477    case IX86_BUILTIN_MWAIT:
16478      arg0 = TREE_VALUE (arglist);
16479      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16480      op0 = expand_normal (arg0);
16481      op1 = expand_normal (arg1);
16482      if (!REG_P (op0))
16483	op0 = copy_to_mode_reg (SImode, op0);
16484      if (!REG_P (op1))
16485	op1 = copy_to_mode_reg (SImode, op1);
16486      emit_insn (gen_sse3_mwait (op0, op1));
16487      return 0;
16488
16489    case IX86_BUILTIN_LDDQU:
16490      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16491				       target, 1);
16492
16493    case IX86_BUILTIN_VEC_INIT_V2SI:
16494    case IX86_BUILTIN_VEC_INIT_V4HI:
16495    case IX86_BUILTIN_VEC_INIT_V8QI:
16496      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16497
16498    case IX86_BUILTIN_VEC_EXT_V2DF:
16499    case IX86_BUILTIN_VEC_EXT_V2DI:
16500    case IX86_BUILTIN_VEC_EXT_V4SF:
16501    case IX86_BUILTIN_VEC_EXT_V4SI:
16502    case IX86_BUILTIN_VEC_EXT_V8HI:
16503    case IX86_BUILTIN_VEC_EXT_V16QI:
16504    case IX86_BUILTIN_VEC_EXT_V2SI:
16505    case IX86_BUILTIN_VEC_EXT_V4HI:
16506      return ix86_expand_vec_ext_builtin (arglist, target);
16507
16508    case IX86_BUILTIN_VEC_SET_V8HI:
16509    case IX86_BUILTIN_VEC_SET_V4HI:
16510      return ix86_expand_vec_set_builtin (arglist);
16511
16512    default:
16513      break;
16514    }
16515
16516  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16517    if (d->code == fcode)
16518      {
16519	/* Compares are treated specially.  */
16520	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16521	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16522	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
16523	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16524	  return ix86_expand_sse_compare (d, arglist, target);
16525
16526	return ix86_expand_binop_builtin (d->icode, arglist, target);
16527      }
16528
16529  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16530    if (d->code == fcode)
16531      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16532
16533  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16534    if (d->code == fcode)
16535      return ix86_expand_sse_comi (d, arglist, target);
16536
16537  gcc_unreachable ();
16538}
16539
16540/* Store OPERAND to the memory after reload is completed.  This means
16541   that we can't easily use assign_stack_local.  */
16542rtx
16543ix86_force_to_memory (enum machine_mode mode, rtx operand)
16544{
16545  rtx result;
16546
16547  gcc_assert (reload_completed);
16548  if (TARGET_RED_ZONE)
16549    {
16550      result = gen_rtx_MEM (mode,
16551			    gen_rtx_PLUS (Pmode,
16552					  stack_pointer_rtx,
16553					  GEN_INT (-RED_ZONE_SIZE)));
16554      emit_move_insn (result, operand);
16555    }
16556  else if (!TARGET_RED_ZONE && TARGET_64BIT)
16557    {
16558      switch (mode)
16559	{
16560	case HImode:
16561	case SImode:
16562	  operand = gen_lowpart (DImode, operand);
16563	  /* FALLTHRU */
16564	case DImode:
16565	  emit_insn (
16566		      gen_rtx_SET (VOIDmode,
16567				   gen_rtx_MEM (DImode,
16568						gen_rtx_PRE_DEC (DImode,
16569							stack_pointer_rtx)),
16570				   operand));
16571	  break;
16572	default:
16573	  gcc_unreachable ();
16574	}
16575      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16576    }
16577  else
16578    {
16579      switch (mode)
16580	{
16581	case DImode:
16582	  {
16583	    rtx operands[2];
16584	    split_di (&operand, 1, operands, operands + 1);
16585	    emit_insn (
16586			gen_rtx_SET (VOIDmode,
16587				     gen_rtx_MEM (SImode,
16588						  gen_rtx_PRE_DEC (Pmode,
16589							stack_pointer_rtx)),
16590				     operands[1]));
16591	    emit_insn (
16592			gen_rtx_SET (VOIDmode,
16593				     gen_rtx_MEM (SImode,
16594						  gen_rtx_PRE_DEC (Pmode,
16595							stack_pointer_rtx)),
16596				     operands[0]));
16597	  }
16598	  break;
16599	case HImode:
16600	  /* Store HImodes as SImodes.  */
16601	  operand = gen_lowpart (SImode, operand);
16602	  /* FALLTHRU */
16603	case SImode:
16604	  emit_insn (
16605		      gen_rtx_SET (VOIDmode,
16606				   gen_rtx_MEM (GET_MODE (operand),
16607						gen_rtx_PRE_DEC (SImode,
16608							stack_pointer_rtx)),
16609				   operand));
16610	  break;
16611	default:
16612	  gcc_unreachable ();
16613	}
16614      result = gen_rtx_MEM (mode, stack_pointer_rtx);
16615    }
16616  return result;
16617}
16618
16619/* Free operand from the memory.  */
16620void
16621ix86_free_from_memory (enum machine_mode mode)
16622{
16623  if (!TARGET_RED_ZONE)
16624    {
16625      int size;
16626
16627      if (mode == DImode || TARGET_64BIT)
16628	size = 8;
16629      else
16630	size = 4;
16631      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
16632         to pop or add instruction if registers are available.  */
16633      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16634			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16635					    GEN_INT (size))));
16636    }
16637}
16638
16639/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16640   QImode must go into class Q_REGS.
16641   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16642   movdf to do mem-to-mem moves through integer regs.  */
16643enum reg_class
16644ix86_preferred_reload_class (rtx x, enum reg_class class)
16645{
16646  enum machine_mode mode = GET_MODE (x);
16647
16648  /* We're only allowed to return a subclass of CLASS.  Many of the
16649     following checks fail for NO_REGS, so eliminate that early.  */
16650  if (class == NO_REGS)
16651    return NO_REGS;
16652
16653  /* All classes can load zeros.  */
16654  if (x == CONST0_RTX (mode))
16655    return class;
16656
16657  /* Force constants into memory if we are loading a (nonzero) constant into
16658     an MMX or SSE register.  This is because there are no MMX/SSE instructions
16659     to load from a constant.  */
16660  if (CONSTANT_P (x)
16661      && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16662    return NO_REGS;
16663
16664  /* Prefer SSE regs only, if we can use them for math.  */
16665  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16666    return SSE_CLASS_P (class) ? class : NO_REGS;
16667
16668  /* Floating-point constants need more complex checks.  */
16669  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16670    {
16671      /* General regs can load everything.  */
16672      if (reg_class_subset_p (class, GENERAL_REGS))
16673        return class;
16674
16675      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
16676	 zero above.  We only want to wind up preferring 80387 registers if
16677	 we plan on doing computation with them.  */
16678      if (TARGET_80387
16679	  && standard_80387_constant_p (x))
16680	{
16681	  /* Limit class to non-sse.  */
16682	  if (class == FLOAT_SSE_REGS)
16683	    return FLOAT_REGS;
16684	  if (class == FP_TOP_SSE_REGS)
16685	    return FP_TOP_REG;
16686	  if (class == FP_SECOND_SSE_REGS)
16687	    return FP_SECOND_REG;
16688	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16689	    return class;
16690	}
16691
16692      return NO_REGS;
16693    }
16694
16695  /* Generally when we see PLUS here, it's the function invariant
16696     (plus soft-fp const_int).  Which can only be computed into general
16697     regs.  */
16698  if (GET_CODE (x) == PLUS)
16699    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16700
16701  /* QImode constants are easy to load, but non-constant QImode data
16702     must go into Q_REGS.  */
16703  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16704    {
16705      if (reg_class_subset_p (class, Q_REGS))
16706	return class;
16707      if (reg_class_subset_p (Q_REGS, class))
16708	return Q_REGS;
16709      return NO_REGS;
16710    }
16711
16712  return class;
16713}
16714
16715/* Discourage putting floating-point values in SSE registers unless
16716   SSE math is being used, and likewise for the 387 registers.  */
16717enum reg_class
16718ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16719{
16720  enum machine_mode mode = GET_MODE (x);
16721
16722  /* Restrict the output reload class to the register bank that we are doing
16723     math on.  If we would like not to return a subset of CLASS, reject this
16724     alternative: if reload cannot do this, it will still use its choice.  */
16725  mode = GET_MODE (x);
16726  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16727    return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16728
16729  if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16730    {
16731      if (class == FP_TOP_SSE_REGS)
16732	return FP_TOP_REG;
16733      else if (class == FP_SECOND_SSE_REGS)
16734	return FP_SECOND_REG;
16735      else
16736	return FLOAT_CLASS_P (class) ? class : NO_REGS;
16737    }
16738
16739  return class;
16740}
16741
16742/* If we are copying between general and FP registers, we need a memory
16743   location. The same is true for SSE and MMX registers.
16744
16745   The macro can't work reliably when one of the CLASSES is class containing
16746   registers from multiple units (SSE, MMX, integer).  We avoid this by never
16747   combining those units in single alternative in the machine description.
16748   Ensure that this constraint holds to avoid unexpected surprises.
16749
16750   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16751   enforce these sanity checks.  */
16752
16753int
16754ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16755			      enum machine_mode mode, int strict)
16756{
16757  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16758      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16759      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16760      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16761      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16762      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16763    {
16764      gcc_assert (!strict);
16765      return true;
16766    }
16767
16768  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16769    return true;
16770
16771  /* ??? This is a lie.  We do have moves between mmx/general, and for
16772     mmx/sse2.  But by saying we need secondary memory we discourage the
16773     register allocator from using the mmx registers unless needed.  */
16774  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16775    return true;
16776
16777  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16778    {
16779      /* SSE1 doesn't have any direct moves from other classes.  */
16780      if (!TARGET_SSE2)
16781	return true;
16782
16783      /* If the target says that inter-unit moves are more expensive
16784	 than moving through memory, then don't generate them.  */
16785      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16786	return true;
16787
16788      /* Between SSE and general, we have moves no larger than word size.  */
16789      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16790	return true;
16791
16792      /* ??? For the cost of one register reformat penalty, we could use
16793	 the same instructions to move SFmode and DFmode data, but the
16794	 relevant move patterns don't support those alternatives.  */
16795      if (mode == SFmode || mode == DFmode)
16796	return true;
16797    }
16798
16799  return false;
16800}
16801
16802/* Return true if the registers in CLASS cannot represent the change from
16803   modes FROM to TO.  */
16804
16805bool
16806ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16807			       enum reg_class class)
16808{
16809  if (from == to)
16810    return false;
16811
16812  /* x87 registers can't do subreg at all, as all values are reformatted
16813     to extended precision.  */
16814  if (MAYBE_FLOAT_CLASS_P (class))
16815    return true;
16816
16817  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16818    {
16819      /* Vector registers do not support QI or HImode loads.  If we don't
16820	 disallow a change to these modes, reload will assume it's ok to
16821	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16822	 the vec_dupv4hi pattern.  */
16823      if (GET_MODE_SIZE (from) < 4)
16824	return true;
16825
16826      /* Vector registers do not support subreg with nonzero offsets, which
16827	 are otherwise valid for integer registers.  Since we can't see
16828	 whether we have a nonzero offset from here, prohibit all
16829         nonparadoxical subregs changing size.  */
16830      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16831	return true;
16832    }
16833
16834  return false;
16835}
16836
16837/* Return the cost of moving data from a register in class CLASS1 to
16838   one in class CLASS2.
16839
16840   It is not required that the cost always equal 2 when FROM is the same as TO;
16841   on some machines it is expensive to move between registers if they are not
16842   general registers.  */
16843
16844int
16845ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16846			 enum reg_class class2)
16847{
16848  /* In case we require secondary memory, compute cost of the store followed
16849     by load.  In order to avoid bad register allocation choices, we need
16850     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16851
16852  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16853    {
16854      int cost = 1;
16855
16856      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16857		   MEMORY_MOVE_COST (mode, class1, 1));
16858      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16859		   MEMORY_MOVE_COST (mode, class2, 1));
16860
16861      /* In case of copying from general_purpose_register we may emit multiple
16862         stores followed by single load causing memory size mismatch stall.
16863         Count this as arbitrarily high cost of 20.  */
16864      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16865	cost += 20;
16866
16867      /* In the case of FP/MMX moves, the registers actually overlap, and we
16868	 have to switch modes in order to treat them differently.  */
16869      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16870          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16871	cost += 20;
16872
16873      return cost;
16874    }
16875
16876  /* Moves between SSE/MMX and integer unit are expensive.  */
16877  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16878      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16879    return ix86_cost->mmxsse_to_integer;
16880  if (MAYBE_FLOAT_CLASS_P (class1))
16881    return ix86_cost->fp_move;
16882  if (MAYBE_SSE_CLASS_P (class1))
16883    return ix86_cost->sse_move;
16884  if (MAYBE_MMX_CLASS_P (class1))
16885    return ix86_cost->mmx_move;
16886  return 2;
16887}
16888
16889/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
16890
16891bool
16892ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16893{
16894  /* Flags and only flags can only hold CCmode values.  */
16895  if (CC_REGNO_P (regno))
16896    return GET_MODE_CLASS (mode) == MODE_CC;
16897  if (GET_MODE_CLASS (mode) == MODE_CC
16898      || GET_MODE_CLASS (mode) == MODE_RANDOM
16899      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16900    return 0;
16901  if (FP_REGNO_P (regno))
16902    return VALID_FP_MODE_P (mode);
16903  if (SSE_REGNO_P (regno))
16904    {
16905      /* We implement the move patterns for all vector modes into and
16906	 out of SSE registers, even when no operation instructions
16907	 are available.  */
16908      return (VALID_SSE_REG_MODE (mode)
16909	      || VALID_SSE2_REG_MODE (mode)
16910	      || VALID_MMX_REG_MODE (mode)
16911	      || VALID_MMX_REG_MODE_3DNOW (mode));
16912    }
16913  if (MMX_REGNO_P (regno))
16914    {
16915      /* We implement the move patterns for 3DNOW modes even in MMX mode,
16916	 so if the register is available at all, then we can move data of
16917	 the given mode into or out of it.  */
16918      return (VALID_MMX_REG_MODE (mode)
16919	      || VALID_MMX_REG_MODE_3DNOW (mode));
16920    }
16921
16922  if (mode == QImode)
16923    {
16924      /* Take care for QImode values - they can be in non-QI regs,
16925	 but then they do cause partial register stalls.  */
16926      if (regno < 4 || TARGET_64BIT)
16927	return 1;
16928      if (!TARGET_PARTIAL_REG_STALL)
16929	return 1;
16930      return reload_in_progress || reload_completed;
16931    }
16932  /* We handle both integer and floats in the general purpose registers.  */
16933  else if (VALID_INT_MODE_P (mode))
16934    return 1;
16935  else if (VALID_FP_MODE_P (mode))
16936    return 1;
16937  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
16938     on to use that value in smaller contexts, this can easily force a
16939     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
16940     supporting DImode, allow it.  */
16941  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16942    return 1;
16943
16944  return 0;
16945}
16946
16947/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
16948   tieable integer mode.  */
16949
16950static bool
16951ix86_tieable_integer_mode_p (enum machine_mode mode)
16952{
16953  switch (mode)
16954    {
16955    case HImode:
16956    case SImode:
16957      return true;
16958
16959    case QImode:
16960      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16961
16962    case DImode:
16963      return TARGET_64BIT;
16964
16965    default:
16966      return false;
16967    }
16968}
16969
16970/* Return true if MODE1 is accessible in a register that can hold MODE2
16971   without copying.  That is, all register classes that can hold MODE2
16972   can also hold MODE1.  */
16973
16974bool
16975ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16976{
16977  if (mode1 == mode2)
16978    return true;
16979
16980  if (ix86_tieable_integer_mode_p (mode1)
16981      && ix86_tieable_integer_mode_p (mode2))
16982    return true;
16983
16984  /* MODE2 being XFmode implies fp stack or general regs, which means we
16985     can tie any smaller floating point modes to it.  Note that we do not
16986     tie this with TFmode.  */
16987  if (mode2 == XFmode)
16988    return mode1 == SFmode || mode1 == DFmode;
16989
16990  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16991     that we can tie it with SFmode.  */
16992  if (mode2 == DFmode)
16993    return mode1 == SFmode;
16994
16995  /* If MODE2 is only appropriate for an SSE register, then tie with
16996     any other mode acceptable to SSE registers.  */
16997  if (GET_MODE_SIZE (mode2) >= 8
16998      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16999    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17000
17001  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17002     with any other mode acceptable to MMX registers.  */
17003  if (GET_MODE_SIZE (mode2) == 8
17004      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17005    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17006
17007  return false;
17008}
17009
17010/* Return the cost of moving data of mode M between a
17011   register and memory.  A value of 2 is the default; this cost is
17012   relative to those in `REGISTER_MOVE_COST'.
17013
17014   If moving between registers and memory is more expensive than
17015   between two registers, you should define this macro to express the
17016   relative cost.
17017
17018   Model also increased moving costs of QImode registers in non
17019   Q_REGS classes.
17020 */
17021int
17022ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17023{
17024  if (FLOAT_CLASS_P (class))
17025    {
17026      int index;
17027      switch (mode)
17028	{
17029	  case SFmode:
17030	    index = 0;
17031	    break;
17032	  case DFmode:
17033	    index = 1;
17034	    break;
17035	  case XFmode:
17036	    index = 2;
17037	    break;
17038	  default:
17039	    return 100;
17040	}
17041      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17042    }
17043  if (SSE_CLASS_P (class))
17044    {
17045      int index;
17046      switch (GET_MODE_SIZE (mode))
17047	{
17048	  case 4:
17049	    index = 0;
17050	    break;
17051	  case 8:
17052	    index = 1;
17053	    break;
17054	  case 16:
17055	    index = 2;
17056	    break;
17057	  default:
17058	    return 100;
17059	}
17060      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17061    }
17062  if (MMX_CLASS_P (class))
17063    {
17064      int index;
17065      switch (GET_MODE_SIZE (mode))
17066	{
17067	  case 4:
17068	    index = 0;
17069	    break;
17070	  case 8:
17071	    index = 1;
17072	    break;
17073	  default:
17074	    return 100;
17075	}
17076      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17077    }
17078  switch (GET_MODE_SIZE (mode))
17079    {
17080      case 1:
17081	if (in)
17082	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17083		  : ix86_cost->movzbl_load);
17084	else
17085	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17086		  : ix86_cost->int_store[0] + 4);
17087	break;
17088      case 2:
17089	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17090      default:
17091	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
17092	if (mode == TFmode)
17093	  mode = XFmode;
17094	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17095		* (((int) GET_MODE_SIZE (mode)
17096		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17097    }
17098}
17099
17100/* Compute a (partial) cost for rtx X.  Return true if the complete
17101   cost has been computed, and false if subexpressions should be
17102   scanned.  In either case, *TOTAL contains the cost result.  */
17103
17104static bool
17105ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17106{
17107  enum machine_mode mode = GET_MODE (x);
17108
17109  switch (code)
17110    {
17111    case CONST_INT:
17112    case CONST:
17113    case LABEL_REF:
17114    case SYMBOL_REF:
17115      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17116	*total = 3;
17117      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17118	*total = 2;
17119      else if (flag_pic && SYMBOLIC_CONST (x)
17120	       && (!TARGET_64BIT
17121		   || (!GET_CODE (x) != LABEL_REF
17122		       && (GET_CODE (x) != SYMBOL_REF
17123		           || !SYMBOL_REF_LOCAL_P (x)))))
17124	*total = 1;
17125      else
17126	*total = 0;
17127      return true;
17128
17129    case CONST_DOUBLE:
17130      if (mode == VOIDmode)
17131	*total = 0;
17132      else
17133	switch (standard_80387_constant_p (x))
17134	  {
17135	  case 1: /* 0.0 */
17136	    *total = 1;
17137	    break;
17138	  default: /* Other constants */
17139	    *total = 2;
17140	    break;
17141	  case 0:
17142	  case -1:
17143	    /* Start with (MEM (SYMBOL_REF)), since that's where
17144	       it'll probably end up.  Add a penalty for size.  */
17145	    *total = (COSTS_N_INSNS (1)
17146		      + (flag_pic != 0 && !TARGET_64BIT)
17147		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17148	    break;
17149	  }
17150      return true;
17151
17152    case ZERO_EXTEND:
17153      /* The zero extensions is often completely free on x86_64, so make
17154	 it as cheap as possible.  */
17155      if (TARGET_64BIT && mode == DImode
17156	  && GET_MODE (XEXP (x, 0)) == SImode)
17157	*total = 1;
17158      else if (TARGET_ZERO_EXTEND_WITH_AND)
17159	*total = ix86_cost->add;
17160      else
17161	*total = ix86_cost->movzx;
17162      return false;
17163
17164    case SIGN_EXTEND:
17165      *total = ix86_cost->movsx;
17166      return false;
17167
17168    case ASHIFT:
17169      if (GET_CODE (XEXP (x, 1)) == CONST_INT
17170	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17171	{
17172	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17173	  if (value == 1)
17174	    {
17175	      *total = ix86_cost->add;
17176	      return false;
17177	    }
17178	  if ((value == 2 || value == 3)
17179	      && ix86_cost->lea <= ix86_cost->shift_const)
17180	    {
17181	      *total = ix86_cost->lea;
17182	      return false;
17183	    }
17184	}
17185      /* FALLTHRU */
17186
17187    case ROTATE:
17188    case ASHIFTRT:
17189    case LSHIFTRT:
17190    case ROTATERT:
17191      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17192	{
17193	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17194	    {
17195	      if (INTVAL (XEXP (x, 1)) > 32)
17196		*total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17197	      else
17198		*total = ix86_cost->shift_const * 2;
17199	    }
17200	  else
17201	    {
17202	      if (GET_CODE (XEXP (x, 1)) == AND)
17203		*total = ix86_cost->shift_var * 2;
17204	      else
17205		*total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17206	    }
17207	}
17208      else
17209	{
17210	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17211	    *total = ix86_cost->shift_const;
17212	  else
17213	    *total = ix86_cost->shift_var;
17214	}
17215      return false;
17216
17217    case MULT:
17218      if (FLOAT_MODE_P (mode))
17219	{
17220	  *total = ix86_cost->fmul;
17221	  return false;
17222	}
17223      else
17224	{
17225	  rtx op0 = XEXP (x, 0);
17226	  rtx op1 = XEXP (x, 1);
17227	  int nbits;
17228	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17229	    {
17230	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17231	      for (nbits = 0; value != 0; value &= value - 1)
17232	        nbits++;
17233	    }
17234	  else
17235	    /* This is arbitrary.  */
17236	    nbits = 7;
17237
17238	  /* Compute costs correctly for widening multiplication.  */
17239	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17240	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17241	         == GET_MODE_SIZE (mode))
17242	    {
17243	      int is_mulwiden = 0;
17244	      enum machine_mode inner_mode = GET_MODE (op0);
17245
17246	      if (GET_CODE (op0) == GET_CODE (op1))
17247		is_mulwiden = 1, op1 = XEXP (op1, 0);
17248	      else if (GET_CODE (op1) == CONST_INT)
17249		{
17250		  if (GET_CODE (op0) == SIGN_EXTEND)
17251		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17252			          == INTVAL (op1);
17253		  else
17254		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17255	        }
17256
17257	      if (is_mulwiden)
17258	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17259	    }
17260
17261  	  *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17262		    + nbits * ix86_cost->mult_bit
17263	            + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17264
17265          return true;
17266	}
17267
17268    case DIV:
17269    case UDIV:
17270    case MOD:
17271    case UMOD:
17272      if (FLOAT_MODE_P (mode))
17273	*total = ix86_cost->fdiv;
17274      else
17275	*total = ix86_cost->divide[MODE_INDEX (mode)];
17276      return false;
17277
17278    case PLUS:
17279      if (FLOAT_MODE_P (mode))
17280	*total = ix86_cost->fadd;
17281      else if (GET_MODE_CLASS (mode) == MODE_INT
17282	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17283	{
17284	  if (GET_CODE (XEXP (x, 0)) == PLUS
17285	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17286	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17287	      && CONSTANT_P (XEXP (x, 1)))
17288	    {
17289	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17290	      if (val == 2 || val == 4 || val == 8)
17291		{
17292		  *total = ix86_cost->lea;
17293		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17294		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17295				      outer_code);
17296		  *total += rtx_cost (XEXP (x, 1), outer_code);
17297		  return true;
17298		}
17299	    }
17300	  else if (GET_CODE (XEXP (x, 0)) == MULT
17301		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17302	    {
17303	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17304	      if (val == 2 || val == 4 || val == 8)
17305		{
17306		  *total = ix86_cost->lea;
17307		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17308		  *total += rtx_cost (XEXP (x, 1), outer_code);
17309		  return true;
17310		}
17311	    }
17312	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
17313	    {
17314	      *total = ix86_cost->lea;
17315	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17316	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17317	      *total += rtx_cost (XEXP (x, 1), outer_code);
17318	      return true;
17319	    }
17320	}
17321      /* FALLTHRU */
17322
17323    case MINUS:
17324      if (FLOAT_MODE_P (mode))
17325	{
17326	  *total = ix86_cost->fadd;
17327	  return false;
17328	}
17329      /* FALLTHRU */
17330
17331    case AND:
17332    case IOR:
17333    case XOR:
17334      if (!TARGET_64BIT && mode == DImode)
17335	{
17336	  *total = (ix86_cost->add * 2
17337		    + (rtx_cost (XEXP (x, 0), outer_code)
17338		       << (GET_MODE (XEXP (x, 0)) != DImode))
17339		    + (rtx_cost (XEXP (x, 1), outer_code)
17340	               << (GET_MODE (XEXP (x, 1)) != DImode)));
17341	  return true;
17342	}
17343      /* FALLTHRU */
17344
17345    case NEG:
17346      if (FLOAT_MODE_P (mode))
17347	{
17348	  *total = ix86_cost->fchs;
17349	  return false;
17350	}
17351      /* FALLTHRU */
17352
17353    case NOT:
17354      if (!TARGET_64BIT && mode == DImode)
17355	*total = ix86_cost->add * 2;
17356      else
17357	*total = ix86_cost->add;
17358      return false;
17359
17360    case COMPARE:
17361      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17362	  && XEXP (XEXP (x, 0), 1) == const1_rtx
17363	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17364	  && XEXP (x, 1) == const0_rtx)
17365	{
17366	  /* This kind of construct is implemented using test[bwl].
17367	     Treat it as if we had an AND.  */
17368	  *total = (ix86_cost->add
17369		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17370		    + rtx_cost (const1_rtx, outer_code));
17371	  return true;
17372	}
17373      return false;
17374
17375    case FLOAT_EXTEND:
17376      if (!TARGET_SSE_MATH
17377	  || mode == XFmode
17378	  || (mode == DFmode && !TARGET_SSE2))
17379	/* For standard 80387 constants, raise the cost to prevent
17380	   compress_float_constant() to generate load from memory.  */
17381	switch (standard_80387_constant_p (XEXP (x, 0)))
17382	  {
17383	  case -1:
17384	  case 0:
17385	    *total = 0;
17386	    break;
17387	  case 1: /* 0.0 */
17388	    *total = 1;
17389	    break;
17390	  default:
17391	    *total = (x86_ext_80387_constants & TUNEMASK
17392		      || optimize_size
17393		      ? 1 : 0);
17394	  }
17395      return false;
17396
17397    case ABS:
17398      if (FLOAT_MODE_P (mode))
17399	*total = ix86_cost->fabs;
17400      return false;
17401
17402    case SQRT:
17403      if (FLOAT_MODE_P (mode))
17404	*total = ix86_cost->fsqrt;
17405      return false;
17406
17407    case UNSPEC:
17408      if (XINT (x, 1) == UNSPEC_TP)
17409	*total = 0;
17410      return false;
17411
17412    default:
17413      return false;
17414    }
17415}
17416
17417#if TARGET_MACHO
17418
17419static int current_machopic_label_num;
17420
17421/* Given a symbol name and its associated stub, write out the
17422   definition of the stub.  */
17423
17424void
17425machopic_output_stub (FILE *file, const char *symb, const char *stub)
17426{
17427  unsigned int length;
17428  char *binder_name, *symbol_name, lazy_ptr_name[32];
17429  int label = ++current_machopic_label_num;
17430
17431  /* For 64-bit we shouldn't get here.  */
17432  gcc_assert (!TARGET_64BIT);
17433
17434  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
17435  symb = (*targetm.strip_name_encoding) (symb);
17436
17437  length = strlen (stub);
17438  binder_name = alloca (length + 32);
17439  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17440
17441  length = strlen (symb);
17442  symbol_name = alloca (length + 32);
17443  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17444
17445  sprintf (lazy_ptr_name, "L%d$lz", label);
17446
17447  if (MACHOPIC_PURE)
17448    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17449  else
17450    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17451
17452  fprintf (file, "%s:\n", stub);
17453  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17454
17455  if (MACHOPIC_PURE)
17456    {
17457      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17458      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17459      fprintf (file, "\tjmp\t*%%edx\n");
17460    }
17461  else
17462    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17463
17464  fprintf (file, "%s:\n", binder_name);
17465
17466  if (MACHOPIC_PURE)
17467    {
17468      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17469      fprintf (file, "\tpushl\t%%eax\n");
17470    }
17471  else
17472    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17473
17474  fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17475
17476  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17477  fprintf (file, "%s:\n", lazy_ptr_name);
17478  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17479  fprintf (file, "\t.long %s\n", binder_name);
17480}
17481
17482void
17483darwin_x86_file_end (void)
17484{
17485  darwin_file_end ();
17486  ix86_file_end ();
17487}
17488#endif /* TARGET_MACHO */
17489
17490/* Order the registers for register allocator.  */
17491
17492void
17493x86_order_regs_for_local_alloc (void)
17494{
17495   int pos = 0;
17496   int i;
17497
17498   /* First allocate the local general purpose registers.  */
17499   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17500     if (GENERAL_REGNO_P (i) && call_used_regs[i])
17501	reg_alloc_order [pos++] = i;
17502
17503   /* Global general purpose registers.  */
17504   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17505     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17506	reg_alloc_order [pos++] = i;
17507
17508   /* x87 registers come first in case we are doing FP math
17509      using them.  */
17510   if (!TARGET_SSE_MATH)
17511     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17512       reg_alloc_order [pos++] = i;
17513
17514   /* SSE registers.  */
17515   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17516     reg_alloc_order [pos++] = i;
17517   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17518     reg_alloc_order [pos++] = i;
17519
17520   /* x87 registers.  */
17521   if (TARGET_SSE_MATH)
17522     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17523       reg_alloc_order [pos++] = i;
17524
17525   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17526     reg_alloc_order [pos++] = i;
17527
17528   /* Initialize the rest of array as we do not allocate some registers
17529      at all.  */
17530   while (pos < FIRST_PSEUDO_REGISTER)
17531     reg_alloc_order [pos++] = 0;
17532}
17533
17534/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17535   struct attribute_spec.handler.  */
17536static tree
17537ix86_handle_struct_attribute (tree *node, tree name,
17538			      tree args ATTRIBUTE_UNUSED,
17539			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17540{
17541  tree *type = NULL;
17542  if (DECL_P (*node))
17543    {
17544      if (TREE_CODE (*node) == TYPE_DECL)
17545	type = &TREE_TYPE (*node);
17546    }
17547  else
17548    type = node;
17549
17550  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17551		 || TREE_CODE (*type) == UNION_TYPE)))
17552    {
17553      warning (OPT_Wattributes, "%qs attribute ignored",
17554	       IDENTIFIER_POINTER (name));
17555      *no_add_attrs = true;
17556    }
17557
17558  else if ((is_attribute_p ("ms_struct", name)
17559	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17560	   || ((is_attribute_p ("gcc_struct", name)
17561		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17562    {
17563      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17564               IDENTIFIER_POINTER (name));
17565      *no_add_attrs = true;
17566    }
17567
17568  return NULL_TREE;
17569}
17570
17571static bool
17572ix86_ms_bitfield_layout_p (tree record_type)
17573{
17574  return (TARGET_MS_BITFIELD_LAYOUT &&
17575	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17576    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17577}
17578
17579/* Returns an expression indicating where the this parameter is
17580   located on entry to the FUNCTION.  */
17581
17582static rtx
17583x86_this_parameter (tree function)
17584{
17585  tree type = TREE_TYPE (function);
17586
17587  if (TARGET_64BIT)
17588    {
17589      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17590      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17591    }
17592
17593  if (ix86_function_regparm (type, function) > 0)
17594    {
17595      tree parm;
17596
17597      parm = TYPE_ARG_TYPES (type);
17598      /* Figure out whether or not the function has a variable number of
17599	 arguments.  */
17600      for (; parm; parm = TREE_CHAIN (parm))
17601	if (TREE_VALUE (parm) == void_type_node)
17602	  break;
17603      /* If not, the this parameter is in the first argument.  */
17604      if (parm)
17605	{
17606	  int regno = 0;
17607	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17608	    regno = 2;
17609	  return gen_rtx_REG (SImode, regno);
17610	}
17611    }
17612
17613  if (aggregate_value_p (TREE_TYPE (type), type))
17614    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17615  else
17616    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17617}
17618
17619/* Determine whether x86_output_mi_thunk can succeed.  */
17620
17621static bool
17622x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17623			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17624			 HOST_WIDE_INT vcall_offset, tree function)
17625{
17626  /* 64-bit can handle anything.  */
17627  if (TARGET_64BIT)
17628    return true;
17629
17630  /* For 32-bit, everything's fine if we have one free register.  */
17631  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17632    return true;
17633
17634  /* Need a free register for vcall_offset.  */
17635  if (vcall_offset)
17636    return false;
17637
17638  /* Need a free register for GOT references.  */
17639  if (flag_pic && !(*targetm.binds_local_p) (function))
17640    return false;
17641
17642  /* Otherwise ok.  */
17643  return true;
17644}
17645
17646/* Output the assembler code for a thunk function.  THUNK_DECL is the
17647   declaration for the thunk function itself, FUNCTION is the decl for
17648   the target function.  DELTA is an immediate constant offset to be
17649   added to THIS.  If VCALL_OFFSET is nonzero, the word at
17650   *(*this + vcall_offset) should be added to THIS.  */
17651
17652static void
17653x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17654		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17655		     HOST_WIDE_INT vcall_offset, tree function)
17656{
17657  rtx xops[3];
17658  rtx this = x86_this_parameter (function);
17659  rtx this_reg, tmp;
17660
17661  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
17662     pull it in now and let DELTA benefit.  */
17663  if (REG_P (this))
17664    this_reg = this;
17665  else if (vcall_offset)
17666    {
17667      /* Put the this parameter into %eax.  */
17668      xops[0] = this;
17669      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17670      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17671    }
17672  else
17673    this_reg = NULL_RTX;
17674
17675  /* Adjust the this parameter by a fixed constant.  */
17676  if (delta)
17677    {
17678      xops[0] = GEN_INT (delta);
17679      xops[1] = this_reg ? this_reg : this;
17680      if (TARGET_64BIT)
17681	{
17682	  if (!x86_64_general_operand (xops[0], DImode))
17683	    {
17684	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17685	      xops[1] = tmp;
17686	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17687	      xops[0] = tmp;
17688	      xops[1] = this;
17689	    }
17690	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17691	}
17692      else
17693	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17694    }
17695
17696  /* Adjust the this parameter by a value stored in the vtable.  */
17697  if (vcall_offset)
17698    {
17699      if (TARGET_64BIT)
17700	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17701      else
17702	{
17703	  int tmp_regno = 2 /* ECX */;
17704	  if (lookup_attribute ("fastcall",
17705	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
17706	    tmp_regno = 0 /* EAX */;
17707	  tmp = gen_rtx_REG (SImode, tmp_regno);
17708	}
17709
17710      xops[0] = gen_rtx_MEM (Pmode, this_reg);
17711      xops[1] = tmp;
17712      if (TARGET_64BIT)
17713	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17714      else
17715	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17716
17717      /* Adjust the this parameter.  */
17718      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17719      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17720	{
17721	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17722	  xops[0] = GEN_INT (vcall_offset);
17723	  xops[1] = tmp2;
17724	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17725	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17726	}
17727      xops[1] = this_reg;
17728      if (TARGET_64BIT)
17729	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17730      else
17731	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17732    }
17733
17734  /* If necessary, drop THIS back to its stack slot.  */
17735  if (this_reg && this_reg != this)
17736    {
17737      xops[0] = this_reg;
17738      xops[1] = this;
17739      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17740    }
17741
17742  xops[0] = XEXP (DECL_RTL (function), 0);
17743  if (TARGET_64BIT)
17744    {
17745      if (!flag_pic || (*targetm.binds_local_p) (function))
17746	output_asm_insn ("jmp\t%P0", xops);
17747      else
17748	{
17749	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17750	  tmp = gen_rtx_CONST (Pmode, tmp);
17751	  tmp = gen_rtx_MEM (QImode, tmp);
17752	  xops[0] = tmp;
17753	  output_asm_insn ("jmp\t%A0", xops);
17754	}
17755    }
17756  else
17757    {
17758      if (!flag_pic || (*targetm.binds_local_p) (function))
17759	output_asm_insn ("jmp\t%P0", xops);
17760      else
17761#if TARGET_MACHO
17762	if (TARGET_MACHO)
17763	  {
17764	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
17765	    tmp = (gen_rtx_SYMBOL_REF
17766		   (Pmode,
17767		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17768	    tmp = gen_rtx_MEM (QImode, tmp);
17769	    xops[0] = tmp;
17770	    output_asm_insn ("jmp\t%0", xops);
17771	  }
17772	else
17773#endif /* TARGET_MACHO */
17774	{
17775	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17776	  output_set_got (tmp, NULL_RTX);
17777
17778	  xops[1] = tmp;
17779	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17780	  output_asm_insn ("jmp\t{*}%1", xops);
17781	}
17782    }
17783}
17784
17785static void
17786x86_file_start (void)
17787{
17788  default_file_start ();
17789#if TARGET_MACHO
17790  darwin_file_start ();
17791#endif
17792  if (X86_FILE_START_VERSION_DIRECTIVE)
17793    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17794  if (X86_FILE_START_FLTUSED)
17795    fputs ("\t.global\t__fltused\n", asm_out_file);
17796  if (ix86_asm_dialect == ASM_INTEL)
17797    fputs ("\t.intel_syntax\n", asm_out_file);
17798}
17799
17800int
17801x86_field_alignment (tree field, int computed)
17802{
17803  enum machine_mode mode;
17804  tree type = TREE_TYPE (field);
17805
17806  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17807    return computed;
17808  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17809		    ? get_inner_array_type (type) : type);
17810  if (mode == DFmode || mode == DCmode
17811      || GET_MODE_CLASS (mode) == MODE_INT
17812      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17813    return MIN (32, computed);
17814  return computed;
17815}
17816
17817/* Output assembler code to FILE to increment profiler label # LABELNO
17818   for profiling a function entry.  */
17819void
17820x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17821{
17822  if (TARGET_64BIT)
17823    if (flag_pic)
17824      {
17825#ifndef NO_PROFILE_COUNTERS
17826	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17827#endif
17828	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17829      }
17830    else
17831      {
17832#ifndef NO_PROFILE_COUNTERS
17833	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17834#endif
17835	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17836      }
17837  else if (flag_pic)
17838    {
17839#ifndef NO_PROFILE_COUNTERS
17840      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17841	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17842#endif
17843      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17844    }
17845  else
17846    {
17847#ifndef NO_PROFILE_COUNTERS
17848      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17849	       PROFILE_COUNT_REGISTER);
17850#endif
17851      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17852    }
17853}
17854
17855/* We don't have exact information about the insn sizes, but we may assume
17856   quite safely that we are informed about all 1 byte insns and memory
17857   address sizes.  This is enough to eliminate unnecessary padding in
17858   99% of cases.  */
17859
17860static int
17861min_insn_size (rtx insn)
17862{
17863  int l = 0;
17864
17865  if (!INSN_P (insn) || !active_insn_p (insn))
17866    return 0;
17867
17868  /* Discard alignments we've emit and jump instructions.  */
17869  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17870      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17871    return 0;
17872  if (GET_CODE (insn) == JUMP_INSN
17873      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17874	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17875    return 0;
17876
17877  /* Important case - calls are always 5 bytes.
17878     It is common to have many calls in the row.  */
17879  if (GET_CODE (insn) == CALL_INSN
17880      && symbolic_reference_mentioned_p (PATTERN (insn))
17881      && !SIBLING_CALL_P (insn))
17882    return 5;
17883  if (get_attr_length (insn) <= 1)
17884    return 1;
17885
17886  /* For normal instructions we may rely on the sizes of addresses
17887     and the presence of symbol to require 4 bytes of encoding.
17888     This is not the case for jumps where references are PC relative.  */
17889  if (GET_CODE (insn) != JUMP_INSN)
17890    {
17891      l = get_attr_length_address (insn);
17892      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17893	l = 4;
17894    }
17895  if (l)
17896    return 1+l;
17897  else
17898    return 2;
17899}
17900
17901/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17902   window.  */
17903
17904static void
17905ix86_avoid_jump_misspredicts (void)
17906{
17907  rtx insn, start = get_insns ();
17908  int nbytes = 0, njumps = 0;
17909  int isjump = 0;
17910
17911  /* Look for all minimal intervals of instructions containing 4 jumps.
17912     The intervals are bounded by START and INSN.  NBYTES is the total
17913     size of instructions in the interval including INSN and not including
17914     START.  When the NBYTES is smaller than 16 bytes, it is possible
17915     that the end of START and INSN ends up in the same 16byte page.
17916
17917     The smallest offset in the page INSN can start is the case where START
17918     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
17919     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17920     */
17921  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17922    {
17923
17924      nbytes += min_insn_size (insn);
17925      if (dump_file)
17926        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17927		INSN_UID (insn), min_insn_size (insn));
17928      if ((GET_CODE (insn) == JUMP_INSN
17929	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
17930	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17931	  || GET_CODE (insn) == CALL_INSN)
17932	njumps++;
17933      else
17934	continue;
17935
17936      while (njumps > 3)
17937	{
17938	  start = NEXT_INSN (start);
17939	  if ((GET_CODE (start) == JUMP_INSN
17940	       && GET_CODE (PATTERN (start)) != ADDR_VEC
17941	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17942	      || GET_CODE (start) == CALL_INSN)
17943	    njumps--, isjump = 1;
17944	  else
17945	    isjump = 0;
17946	  nbytes -= min_insn_size (start);
17947	}
17948      gcc_assert (njumps >= 0);
17949      if (dump_file)
17950        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17951		INSN_UID (start), INSN_UID (insn), nbytes);
17952
17953      if (njumps == 3 && isjump && nbytes < 16)
17954	{
17955	  int padsize = 15 - nbytes + min_insn_size (insn);
17956
17957	  if (dump_file)
17958	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17959		     INSN_UID (insn), padsize);
17960          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17961	}
17962    }
17963}
17964
17965/* AMD Athlon works faster
17966   when RET is not destination of conditional jump or directly preceded
17967   by other jump instruction.  We avoid the penalty by inserting NOP just
17968   before the RET instructions in such cases.  */
17969static void
17970ix86_pad_returns (void)
17971{
17972  edge e;
17973  edge_iterator ei;
17974
17975  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17976    {
17977      basic_block bb = e->src;
17978      rtx ret = BB_END (bb);
17979      rtx prev;
17980      bool replace = false;
17981
17982      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17983	  || !maybe_hot_bb_p (bb))
17984	continue;
17985      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17986	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17987	  break;
17988      if (prev && GET_CODE (prev) == CODE_LABEL)
17989	{
17990	  edge e;
17991	  edge_iterator ei;
17992
17993	  FOR_EACH_EDGE (e, ei, bb->preds)
17994	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
17995		&& !(e->flags & EDGE_FALLTHRU))
17996	      replace = true;
17997	}
17998      if (!replace)
17999	{
18000	  prev = prev_active_insn (ret);
18001	  if (prev
18002	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18003		  || GET_CODE (prev) == CALL_INSN))
18004	    replace = true;
18005	  /* Empty functions get branch mispredict even when the jump destination
18006	     is not visible to us.  */
18007	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18008	    replace = true;
18009	}
18010      if (replace)
18011	{
18012	  emit_insn_before (gen_return_internal_long (), ret);
18013	  delete_insn (ret);
18014	}
18015    }
18016}
18017
18018/* Implement machine specific optimizations.  We implement padding of returns
18019   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
18020static void
18021ix86_reorg (void)
18022{
18023  if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18024    ix86_pad_returns ();
18025  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18026    ix86_avoid_jump_misspredicts ();
18027}
18028
18029/* Return nonzero when QImode register that must be represented via REX prefix
18030   is used.  */
18031bool
18032x86_extended_QIreg_mentioned_p (rtx insn)
18033{
18034  int i;
18035  extract_insn_cached (insn);
18036  for (i = 0; i < recog_data.n_operands; i++)
18037    if (REG_P (recog_data.operand[i])
18038	&& REGNO (recog_data.operand[i]) >= 4)
18039       return true;
18040  return false;
18041}
18042
18043/* Return nonzero when P points to register encoded via REX prefix.
18044   Called via for_each_rtx.  */
18045static int
18046extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18047{
18048   unsigned int regno;
18049   if (!REG_P (*p))
18050     return 0;
18051   regno = REGNO (*p);
18052   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18053}
18054
18055/* Return true when INSN mentions register that must be encoded using REX
18056   prefix.  */
18057bool
18058x86_extended_reg_mentioned_p (rtx insn)
18059{
18060  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18061}
18062
18063/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
18064   optabs would emit if we didn't have TFmode patterns.  */
18065
18066void
18067x86_emit_floatuns (rtx operands[2])
18068{
18069  rtx neglab, donelab, i0, i1, f0, in, out;
18070  enum machine_mode mode, inmode;
18071
18072  inmode = GET_MODE (operands[1]);
18073  gcc_assert (inmode == SImode || inmode == DImode);
18074
18075  out = operands[0];
18076  in = force_reg (inmode, operands[1]);
18077  mode = GET_MODE (out);
18078  neglab = gen_label_rtx ();
18079  donelab = gen_label_rtx ();
18080  i1 = gen_reg_rtx (Pmode);
18081  f0 = gen_reg_rtx (mode);
18082
18083  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18084
18085  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18086  emit_jump_insn (gen_jump (donelab));
18087  emit_barrier ();
18088
18089  emit_label (neglab);
18090
18091  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18092  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18093  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18094  expand_float (f0, i0, 0);
18095  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18096
18097  emit_label (donelab);
18098}
18099
18100/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18101   with all elements equal to VAR.  Return true if successful.  */
18102
18103static bool
18104ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18105				   rtx target, rtx val)
18106{
18107  enum machine_mode smode, wsmode, wvmode;
18108  rtx x;
18109
18110  switch (mode)
18111    {
18112    case V2SImode:
18113    case V2SFmode:
18114      if (!mmx_ok)
18115	return false;
18116      /* FALLTHRU */
18117
18118    case V2DFmode:
18119    case V2DImode:
18120    case V4SFmode:
18121    case V4SImode:
18122      val = force_reg (GET_MODE_INNER (mode), val);
18123      x = gen_rtx_VEC_DUPLICATE (mode, val);
18124      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18125      return true;
18126
18127    case V4HImode:
18128      if (!mmx_ok)
18129	return false;
18130      if (TARGET_SSE || TARGET_3DNOW_A)
18131	{
18132	  val = gen_lowpart (SImode, val);
18133	  x = gen_rtx_TRUNCATE (HImode, val);
18134	  x = gen_rtx_VEC_DUPLICATE (mode, x);
18135	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
18136	  return true;
18137	}
18138      else
18139	{
18140	  smode = HImode;
18141	  wsmode = SImode;
18142	  wvmode = V2SImode;
18143	  goto widen;
18144	}
18145
18146    case V8QImode:
18147      if (!mmx_ok)
18148	return false;
18149      smode = QImode;
18150      wsmode = HImode;
18151      wvmode = V4HImode;
18152      goto widen;
18153    case V8HImode:
18154      if (TARGET_SSE2)
18155	{
18156	  rtx tmp1, tmp2;
18157	  /* Extend HImode to SImode using a paradoxical SUBREG.  */
18158	  tmp1 = gen_reg_rtx (SImode);
18159	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18160	  /* Insert the SImode value as low element of V4SImode vector. */
18161	  tmp2 = gen_reg_rtx (V4SImode);
18162	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18163				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18164				    CONST0_RTX (V4SImode),
18165				    const1_rtx);
18166	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18167	  /* Cast the V4SImode vector back to a V8HImode vector.  */
18168	  tmp1 = gen_reg_rtx (V8HImode);
18169	  emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18170	  /* Duplicate the low short through the whole low SImode word.  */
18171	  emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18172	  /* Cast the V8HImode vector back to a V4SImode vector.  */
18173	  tmp2 = gen_reg_rtx (V4SImode);
18174	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18175	  /* Replicate the low element of the V4SImode vector.  */
18176	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18177	  /* Cast the V2SImode back to V8HImode, and store in target.  */
18178	  emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18179	  return true;
18180	}
18181      smode = HImode;
18182      wsmode = SImode;
18183      wvmode = V4SImode;
18184      goto widen;
18185    case V16QImode:
18186      if (TARGET_SSE2)
18187	{
18188	  rtx tmp1, tmp2;
18189	  /* Extend QImode to SImode using a paradoxical SUBREG.  */
18190	  tmp1 = gen_reg_rtx (SImode);
18191	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
18192	  /* Insert the SImode value as low element of V4SImode vector. */
18193	  tmp2 = gen_reg_rtx (V4SImode);
18194	  tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18195				    gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18196				    CONST0_RTX (V4SImode),
18197				    const1_rtx);
18198	  emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18199	  /* Cast the V4SImode vector back to a V16QImode vector.  */
18200	  tmp1 = gen_reg_rtx (V16QImode);
18201	  emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18202	  /* Duplicate the low byte through the whole low SImode word.  */
18203	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18204	  emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18205	  /* Cast the V16QImode vector back to a V4SImode vector.  */
18206	  tmp2 = gen_reg_rtx (V4SImode);
18207	  emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18208	  /* Replicate the low element of the V4SImode vector.  */
18209	  emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18210	  /* Cast the V2SImode back to V16QImode, and store in target.  */
18211	  emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18212	  return true;
18213	}
18214      smode = QImode;
18215      wsmode = HImode;
18216      wvmode = V8HImode;
18217      goto widen;
18218    widen:
18219      /* Replicate the value once into the next wider mode and recurse.  */
18220      val = convert_modes (wsmode, smode, val, true);
18221      x = expand_simple_binop (wsmode, ASHIFT, val,
18222			       GEN_INT (GET_MODE_BITSIZE (smode)),
18223			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
18224      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18225
18226      x = gen_reg_rtx (wvmode);
18227      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18228	gcc_unreachable ();
18229      emit_move_insn (target, gen_lowpart (mode, x));
18230      return true;
18231
18232    default:
18233      return false;
18234    }
18235}
18236
18237/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18238   whose ONE_VAR element is VAR, and other elements are zero.  Return true
18239   if successful.  */
18240
18241static bool
18242ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18243				     rtx target, rtx var, int one_var)
18244{
18245  enum machine_mode vsimode;
18246  rtx new_target;
18247  rtx x, tmp;
18248
18249  switch (mode)
18250    {
18251    case V2SFmode:
18252    case V2SImode:
18253      if (!mmx_ok)
18254	return false;
18255      /* FALLTHRU */
18256
18257    case V2DFmode:
18258    case V2DImode:
18259      if (one_var != 0)
18260	return false;
18261      var = force_reg (GET_MODE_INNER (mode), var);
18262      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18263      emit_insn (gen_rtx_SET (VOIDmode, target, x));
18264      return true;
18265
18266    case V4SFmode:
18267    case V4SImode:
18268      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18269	new_target = gen_reg_rtx (mode);
18270      else
18271	new_target = target;
18272      var = force_reg (GET_MODE_INNER (mode), var);
18273      x = gen_rtx_VEC_DUPLICATE (mode, var);
18274      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18275      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18276      if (one_var != 0)
18277	{
18278	  /* We need to shuffle the value to the correct position, so
18279	     create a new pseudo to store the intermediate result.  */
18280
18281	  /* With SSE2, we can use the integer shuffle insns.  */
18282	  if (mode != V4SFmode && TARGET_SSE2)
18283	    {
18284	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18285					    GEN_INT (1),
18286					    GEN_INT (one_var == 1 ? 0 : 1),
18287					    GEN_INT (one_var == 2 ? 0 : 1),
18288					    GEN_INT (one_var == 3 ? 0 : 1)));
18289	      if (target != new_target)
18290		emit_move_insn (target, new_target);
18291	      return true;
18292	    }
18293
18294	  /* Otherwise convert the intermediate result to V4SFmode and
18295	     use the SSE1 shuffle instructions.  */
18296	  if (mode != V4SFmode)
18297	    {
18298	      tmp = gen_reg_rtx (V4SFmode);
18299	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18300	    }
18301	  else
18302	    tmp = new_target;
18303
18304	  emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18305				       GEN_INT (1),
18306				       GEN_INT (one_var == 1 ? 0 : 1),
18307				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
18308				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18309
18310	  if (mode != V4SFmode)
18311	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18312	  else if (tmp != target)
18313	    emit_move_insn (target, tmp);
18314	}
18315      else if (target != new_target)
18316	emit_move_insn (target, new_target);
18317      return true;
18318
18319    case V8HImode:
18320    case V16QImode:
18321      vsimode = V4SImode;
18322      goto widen;
18323    case V4HImode:
18324    case V8QImode:
18325      if (!mmx_ok)
18326	return false;
18327      vsimode = V2SImode;
18328      goto widen;
18329    widen:
18330      if (one_var != 0)
18331	return false;
18332
18333      /* Zero extend the variable element to SImode and recurse.  */
18334      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18335
18336      x = gen_reg_rtx (vsimode);
18337      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18338						var, one_var))
18339	gcc_unreachable ();
18340
18341      emit_move_insn (target, gen_lowpart (mode, x));
18342      return true;
18343
18344    default:
18345      return false;
18346    }
18347}
18348
18349/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
18350   consisting of the values in VALS.  It is known that all elements
18351   except ONE_VAR are constants.  Return true if successful.  */
18352
18353static bool
18354ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18355				 rtx target, rtx vals, int one_var)
18356{
18357  rtx var = XVECEXP (vals, 0, one_var);
18358  enum machine_mode wmode;
18359  rtx const_vec, x;
18360
18361  const_vec = copy_rtx (vals);
18362  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18363  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18364
18365  switch (mode)
18366    {
18367    case V2DFmode:
18368    case V2DImode:
18369    case V2SFmode:
18370    case V2SImode:
18371      /* For the two element vectors, it's just as easy to use
18372	 the general case.  */
18373      return false;
18374
18375    case V4SFmode:
18376    case V4SImode:
18377    case V8HImode:
18378    case V4HImode:
18379      break;
18380
18381    case V16QImode:
18382      wmode = V8HImode;
18383      goto widen;
18384    case V8QImode:
18385      wmode = V4HImode;
18386      goto widen;
18387    widen:
18388      /* There's no way to set one QImode entry easily.  Combine
18389	 the variable value with its adjacent constant value, and
18390	 promote to an HImode set.  */
18391      x = XVECEXP (vals, 0, one_var ^ 1);
18392      if (one_var & 1)
18393	{
18394	  var = convert_modes (HImode, QImode, var, true);
18395	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18396				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
18397	  x = GEN_INT (INTVAL (x) & 0xff);
18398	}
18399      else
18400	{
18401	  var = convert_modes (HImode, QImode, var, true);
18402	  x = gen_int_mode (INTVAL (x) << 8, HImode);
18403	}
18404      if (x != const0_rtx)
18405	var = expand_simple_binop (HImode, IOR, var, x, var,
18406				   1, OPTAB_LIB_WIDEN);
18407
18408      x = gen_reg_rtx (wmode);
18409      emit_move_insn (x, gen_lowpart (wmode, const_vec));
18410      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18411
18412      emit_move_insn (target, gen_lowpart (mode, x));
18413      return true;
18414
18415    default:
18416      return false;
18417    }
18418
18419  emit_move_insn (target, const_vec);
18420  ix86_expand_vector_set (mmx_ok, target, var, one_var);
18421  return true;
18422}
18423
18424/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
18425   all values variable, and none identical.  */
18426
18427static void
18428ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18429				 rtx target, rtx vals)
18430{
18431  enum machine_mode half_mode = GET_MODE_INNER (mode);
18432  rtx op0 = NULL, op1 = NULL;
18433  bool use_vec_concat = false;
18434
18435  switch (mode)
18436    {
18437    case V2SFmode:
18438    case V2SImode:
18439      if (!mmx_ok && !TARGET_SSE)
18440	break;
18441      /* FALLTHRU */
18442
18443    case V2DFmode:
18444    case V2DImode:
18445      /* For the two element vectors, we always implement VEC_CONCAT.  */
18446      op0 = XVECEXP (vals, 0, 0);
18447      op1 = XVECEXP (vals, 0, 1);
18448      use_vec_concat = true;
18449      break;
18450
18451    case V4SFmode:
18452      half_mode = V2SFmode;
18453      goto half;
18454    case V4SImode:
18455      half_mode = V2SImode;
18456      goto half;
18457    half:
18458      {
18459	rtvec v;
18460
18461	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
18462	   Recurse to load the two halves.  */
18463
18464	op0 = gen_reg_rtx (half_mode);
18465	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18466	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18467
18468	op1 = gen_reg_rtx (half_mode);
18469	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18470	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18471
18472	use_vec_concat = true;
18473      }
18474      break;
18475
18476    case V8HImode:
18477    case V16QImode:
18478    case V4HImode:
18479    case V8QImode:
18480      break;
18481
18482    default:
18483      gcc_unreachable ();
18484    }
18485
18486  if (use_vec_concat)
18487    {
18488      if (!register_operand (op0, half_mode))
18489	op0 = force_reg (half_mode, op0);
18490      if (!register_operand (op1, half_mode))
18491	op1 = force_reg (half_mode, op1);
18492
18493      emit_insn (gen_rtx_SET (VOIDmode, target,
18494			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
18495    }
18496  else
18497    {
18498      int i, j, n_elts, n_words, n_elt_per_word;
18499      enum machine_mode inner_mode;
18500      rtx words[4], shift;
18501
18502      inner_mode = GET_MODE_INNER (mode);
18503      n_elts = GET_MODE_NUNITS (mode);
18504      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18505      n_elt_per_word = n_elts / n_words;
18506      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18507
18508      for (i = 0; i < n_words; ++i)
18509	{
18510	  rtx word = NULL_RTX;
18511
18512	  for (j = 0; j < n_elt_per_word; ++j)
18513	    {
18514	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18515	      elt = convert_modes (word_mode, inner_mode, elt, true);
18516
18517	      if (j == 0)
18518		word = elt;
18519	      else
18520		{
18521		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18522					      word, 1, OPTAB_LIB_WIDEN);
18523		  word = expand_simple_binop (word_mode, IOR, word, elt,
18524					      word, 1, OPTAB_LIB_WIDEN);
18525		}
18526	    }
18527
18528	  words[i] = word;
18529	}
18530
18531      if (n_words == 1)
18532	emit_move_insn (target, gen_lowpart (mode, words[0]));
18533      else if (n_words == 2)
18534	{
18535	  rtx tmp = gen_reg_rtx (mode);
18536	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18537	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18538	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18539	  emit_move_insn (target, tmp);
18540	}
18541      else if (n_words == 4)
18542	{
18543	  rtx tmp = gen_reg_rtx (V4SImode);
18544	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18545	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18546	  emit_move_insn (target, gen_lowpart (mode, tmp));
18547	}
18548      else
18549	gcc_unreachable ();
18550    }
18551}
18552
18553/* Initialize vector TARGET via VALS.  Suppress the use of MMX
18554   instructions unless MMX_OK is true.  */
18555
18556void
18557ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18558{
18559  enum machine_mode mode = GET_MODE (target);
18560  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18561  int n_elts = GET_MODE_NUNITS (mode);
18562  int n_var = 0, one_var = -1;
18563  bool all_same = true, all_const_zero = true;
18564  int i;
18565  rtx x;
18566
18567  for (i = 0; i < n_elts; ++i)
18568    {
18569      x = XVECEXP (vals, 0, i);
18570      if (!CONSTANT_P (x))
18571	n_var++, one_var = i;
18572      else if (x != CONST0_RTX (inner_mode))
18573	all_const_zero = false;
18574      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18575	all_same = false;
18576    }
18577
18578  /* Constants are best loaded from the constant pool.  */
18579  if (n_var == 0)
18580    {
18581      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18582      return;
18583    }
18584
18585  /* If all values are identical, broadcast the value.  */
18586  if (all_same
18587      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18588					    XVECEXP (vals, 0, 0)))
18589    return;
18590
18591  /* Values where only one field is non-constant are best loaded from
18592     the pool and overwritten via move later.  */
18593  if (n_var == 1)
18594    {
18595      if (all_const_zero
18596	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18597						  XVECEXP (vals, 0, one_var),
18598						  one_var))
18599	return;
18600
18601      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18602	return;
18603    }
18604
18605  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18606}
18607
18608void
18609ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18610{
18611  enum machine_mode mode = GET_MODE (target);
18612  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18613  bool use_vec_merge = false;
18614  rtx tmp;
18615
18616  switch (mode)
18617    {
18618    case V2SFmode:
18619    case V2SImode:
18620      if (mmx_ok)
18621	{
18622	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18623	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18624	  if (elt == 0)
18625	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18626	  else
18627	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18628	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18629	  return;
18630	}
18631      break;
18632
18633    case V2DFmode:
18634    case V2DImode:
18635      {
18636	rtx op0, op1;
18637
18638	/* For the two element vectors, we implement a VEC_CONCAT with
18639	   the extraction of the other element.  */
18640
18641	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18642	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18643
18644	if (elt == 0)
18645	  op0 = val, op1 = tmp;
18646	else
18647	  op0 = tmp, op1 = val;
18648
18649	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18650	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18651      }
18652      return;
18653
18654    case V4SFmode:
18655      switch (elt)
18656	{
18657	case 0:
18658	  use_vec_merge = true;
18659	  break;
18660
18661	case 1:
18662	  /* tmp = target = A B C D */
18663	  tmp = copy_to_reg (target);
18664	  /* target = A A B B */
18665	  emit_insn (gen_sse_unpcklps (target, target, target));
18666	  /* target = X A B B */
18667	  ix86_expand_vector_set (false, target, val, 0);
18668	  /* target = A X C D  */
18669	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18670				       GEN_INT (1), GEN_INT (0),
18671				       GEN_INT (2+4), GEN_INT (3+4)));
18672	  return;
18673
18674	case 2:
18675	  /* tmp = target = A B C D */
18676	  tmp = copy_to_reg (target);
18677	  /* tmp = X B C D */
18678	  ix86_expand_vector_set (false, tmp, val, 0);
18679	  /* target = A B X D */
18680	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18681				       GEN_INT (0), GEN_INT (1),
18682				       GEN_INT (0+4), GEN_INT (3+4)));
18683	  return;
18684
18685	case 3:
18686	  /* tmp = target = A B C D */
18687	  tmp = copy_to_reg (target);
18688	  /* tmp = X B C D */
18689	  ix86_expand_vector_set (false, tmp, val, 0);
18690	  /* target = A B X D */
18691	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
18692				       GEN_INT (0), GEN_INT (1),
18693				       GEN_INT (2+4), GEN_INT (0+4)));
18694	  return;
18695
18696	default:
18697	  gcc_unreachable ();
18698	}
18699      break;
18700
18701    case V4SImode:
18702      /* Element 0 handled by vec_merge below.  */
18703      if (elt == 0)
18704	{
18705	  use_vec_merge = true;
18706	  break;
18707	}
18708
18709      if (TARGET_SSE2)
18710	{
18711	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
18712	     store into element 0, then shuffle them back.  */
18713
18714	  rtx order[4];
18715
18716	  order[0] = GEN_INT (elt);
18717	  order[1] = const1_rtx;
18718	  order[2] = const2_rtx;
18719	  order[3] = GEN_INT (3);
18720	  order[elt] = const0_rtx;
18721
18722	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18723					order[1], order[2], order[3]));
18724
18725	  ix86_expand_vector_set (false, target, val, 0);
18726
18727	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18728					order[1], order[2], order[3]));
18729	}
18730      else
18731	{
18732	  /* For SSE1, we have to reuse the V4SF code.  */
18733	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18734				  gen_lowpart (SFmode, val), elt);
18735	}
18736      return;
18737
18738    case V8HImode:
18739      use_vec_merge = TARGET_SSE2;
18740      break;
18741    case V4HImode:
18742      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18743      break;
18744
18745    case V16QImode:
18746    case V8QImode:
18747    default:
18748      break;
18749    }
18750
18751  if (use_vec_merge)
18752    {
18753      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18754      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18755      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18756    }
18757  else
18758    {
18759      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18760
18761      emit_move_insn (mem, target);
18762
18763      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18764      emit_move_insn (tmp, val);
18765
18766      emit_move_insn (target, mem);
18767    }
18768}
18769
18770void
18771ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18772{
18773  enum machine_mode mode = GET_MODE (vec);
18774  enum machine_mode inner_mode = GET_MODE_INNER (mode);
18775  bool use_vec_extr = false;
18776  rtx tmp;
18777
18778  switch (mode)
18779    {
18780    case V2SImode:
18781    case V2SFmode:
18782      if (!mmx_ok)
18783	break;
18784      /* FALLTHRU */
18785
18786    case V2DFmode:
18787    case V2DImode:
18788      use_vec_extr = true;
18789      break;
18790
18791    case V4SFmode:
18792      switch (elt)
18793	{
18794	case 0:
18795	  tmp = vec;
18796	  break;
18797
18798	case 1:
18799	case 3:
18800	  tmp = gen_reg_rtx (mode);
18801	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18802				       GEN_INT (elt), GEN_INT (elt),
18803				       GEN_INT (elt+4), GEN_INT (elt+4)));
18804	  break;
18805
18806	case 2:
18807	  tmp = gen_reg_rtx (mode);
18808	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18809	  break;
18810
18811	default:
18812	  gcc_unreachable ();
18813	}
18814      vec = tmp;
18815      use_vec_extr = true;
18816      elt = 0;
18817      break;
18818
18819    case V4SImode:
18820      if (TARGET_SSE2)
18821	{
18822	  switch (elt)
18823	    {
18824	    case 0:
18825	      tmp = vec;
18826	      break;
18827
18828	    case 1:
18829	    case 3:
18830	      tmp = gen_reg_rtx (mode);
18831	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18832					    GEN_INT (elt), GEN_INT (elt),
18833					    GEN_INT (elt), GEN_INT (elt)));
18834	      break;
18835
18836	    case 2:
18837	      tmp = gen_reg_rtx (mode);
18838	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18839	      break;
18840
18841	    default:
18842	      gcc_unreachable ();
18843	    }
18844	  vec = tmp;
18845	  use_vec_extr = true;
18846	  elt = 0;
18847	}
18848      else
18849	{
18850	  /* For SSE1, we have to reuse the V4SF code.  */
18851	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18852				      gen_lowpart (V4SFmode, vec), elt);
18853	  return;
18854	}
18855      break;
18856
18857    case V8HImode:
18858      use_vec_extr = TARGET_SSE2;
18859      break;
18860    case V4HImode:
18861      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18862      break;
18863
18864    case V16QImode:
18865    case V8QImode:
18866      /* ??? Could extract the appropriate HImode element and shift.  */
18867    default:
18868      break;
18869    }
18870
18871  if (use_vec_extr)
18872    {
18873      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18874      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18875
18876      /* Let the rtl optimizers know about the zero extension performed.  */
18877      if (inner_mode == HImode)
18878	{
18879	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18880	  target = gen_lowpart (SImode, target);
18881	}
18882
18883      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18884    }
18885  else
18886    {
18887      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18888
18889      emit_move_insn (mem, vec);
18890
18891      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18892      emit_move_insn (target, tmp);
18893    }
18894}
18895
18896/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
18897   pattern to reduce; DEST is the destination; IN is the input vector.  */
18898
18899void
18900ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18901{
18902  rtx tmp1, tmp2, tmp3;
18903
18904  tmp1 = gen_reg_rtx (V4SFmode);
18905  tmp2 = gen_reg_rtx (V4SFmode);
18906  tmp3 = gen_reg_rtx (V4SFmode);
18907
18908  emit_insn (gen_sse_movhlps (tmp1, in, in));
18909  emit_insn (fn (tmp2, tmp1, in));
18910
18911  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18912			       GEN_INT (1), GEN_INT (1),
18913			       GEN_INT (1+4), GEN_INT (1+4)));
18914  emit_insn (fn (dest, tmp2, tmp3));
18915}
18916
18917/* Target hook for scalar_mode_supported_p.  */
18918static bool
18919ix86_scalar_mode_supported_p (enum machine_mode mode)
18920{
18921  if (DECIMAL_FLOAT_MODE_P (mode))
18922    return true;
18923  else
18924    return default_scalar_mode_supported_p (mode);
18925}
18926
18927/* Implements target hook vector_mode_supported_p.  */
18928static bool
18929ix86_vector_mode_supported_p (enum machine_mode mode)
18930{
18931  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18932    return true;
18933  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18934    return true;
18935  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18936    return true;
18937  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18938    return true;
18939  return false;
18940}
18941
18942/* Worker function for TARGET_MD_ASM_CLOBBERS.
18943
18944   We do this in the new i386 backend to maintain source compatibility
18945   with the old cc0-based compiler.  */
18946
18947static tree
18948ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18949		      tree inputs ATTRIBUTE_UNUSED,
18950		      tree clobbers)
18951{
18952  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18953			clobbers);
18954  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18955			clobbers);
18956  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18957			clobbers);
18958  return clobbers;
18959}
18960
18961/* Return true if this goes in small data/bss.  */
18962
18963static bool
18964ix86_in_large_data_p (tree exp)
18965{
18966  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18967    return false;
18968
18969  /* Functions are never large data.  */
18970  if (TREE_CODE (exp) == FUNCTION_DECL)
18971    return false;
18972
18973  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18974    {
18975      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18976      if (strcmp (section, ".ldata") == 0
18977	  || strcmp (section, ".lbss") == 0)
18978	return true;
18979      return false;
18980    }
18981  else
18982    {
18983      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18984
18985      /* If this is an incomplete type with size 0, then we can't put it
18986	 in data because it might be too big when completed.  */
18987      if (!size || size > ix86_section_threshold)
18988	return true;
18989    }
18990
18991  return false;
18992}
18993static void
18994ix86_encode_section_info (tree decl, rtx rtl, int first)
18995{
18996  default_encode_section_info (decl, rtl, first);
18997
18998  if (TREE_CODE (decl) == VAR_DECL
18999      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19000      && ix86_in_large_data_p (decl))
19001    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19002}
19003
19004/* Worker function for REVERSE_CONDITION.  */
19005
19006enum rtx_code
19007ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19008{
19009  return (mode != CCFPmode && mode != CCFPUmode
19010	  ? reverse_condition (code)
19011	  : reverse_condition_maybe_unordered (code));
19012}
19013
19014/* Output code to perform an x87 FP register move, from OPERANDS[1]
19015   to OPERANDS[0].  */
19016
19017const char *
19018output_387_reg_move (rtx insn, rtx *operands)
19019{
19020  if (REG_P (operands[1])
19021      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19022    {
19023      if (REGNO (operands[0]) == FIRST_STACK_REG)
19024	return output_387_ffreep (operands, 0);
19025      return "fstp\t%y0";
19026    }
19027  if (STACK_TOP_P (operands[0]))
19028    return "fld%z1\t%y1";
19029  return "fst\t%y0";
19030}
19031
19032/* Output code to perform a conditional jump to LABEL, if C2 flag in
19033   FP status register is set.  */
19034
19035void
19036ix86_emit_fp_unordered_jump (rtx label)
19037{
19038  rtx reg = gen_reg_rtx (HImode);
19039  rtx temp;
19040
19041  emit_insn (gen_x86_fnstsw_1 (reg));
19042
19043  if (TARGET_USE_SAHF)
19044    {
19045      emit_insn (gen_x86_sahf_1 (reg));
19046
19047      temp = gen_rtx_REG (CCmode, FLAGS_REG);
19048      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19049    }
19050  else
19051    {
19052      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19053
19054      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19055      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19056    }
19057
19058  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19059			      gen_rtx_LABEL_REF (VOIDmode, label),
19060			      pc_rtx);
19061  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19062  emit_jump_insn (temp);
19063}
19064
19065/* Output code to perform a log1p XFmode calculation.  */
19066
19067void ix86_emit_i387_log1p (rtx op0, rtx op1)
19068{
19069  rtx label1 = gen_label_rtx ();
19070  rtx label2 = gen_label_rtx ();
19071
19072  rtx tmp = gen_reg_rtx (XFmode);
19073  rtx tmp2 = gen_reg_rtx (XFmode);
19074
19075  emit_insn (gen_absxf2 (tmp, op1));
19076  emit_insn (gen_cmpxf (tmp,
19077    CONST_DOUBLE_FROM_REAL_VALUE (
19078       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19079       XFmode)));
19080  emit_jump_insn (gen_bge (label1));
19081
19082  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19083  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19084  emit_jump (label2);
19085
19086  emit_label (label1);
19087  emit_move_insn (tmp, CONST1_RTX (XFmode));
19088  emit_insn (gen_addxf3 (tmp, op1, tmp));
19089  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19090  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19091
19092  emit_label (label2);
19093}
19094
19095/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
19096
19097static void
19098i386_solaris_elf_named_section (const char *name, unsigned int flags,
19099				tree decl)
19100{
19101  /* With Binutils 2.15, the "@unwind" marker must be specified on
19102     every occurrence of the ".eh_frame" section, not just the first
19103     one.  */
19104  if (TARGET_64BIT
19105      && strcmp (name, ".eh_frame") == 0)
19106    {
19107      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19108	       flags & SECTION_WRITE ? "aw" : "a");
19109      return;
19110    }
19111  default_elf_asm_named_section (name, flags, decl);
19112}
19113
19114/* Return the mangling of TYPE if it is an extended fundamental type.  */
19115
19116static const char *
19117ix86_mangle_fundamental_type (tree type)
19118{
19119  switch (TYPE_MODE (type))
19120    {
19121    case TFmode:
19122      /* __float128 is "g".  */
19123      return "g";
19124    case XFmode:
19125      /* "long double" or __float80 is "e".  */
19126      return "e";
19127    default:
19128      return NULL;
19129    }
19130}
19131
19132/* For 32-bit code we can save PIC register setup by using
19133   __stack_chk_fail_local hidden function instead of calling
19134   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
19135   register, so it is better to call __stack_chk_fail directly.  */
19136
19137static tree
19138ix86_stack_protect_fail (void)
19139{
19140  return TARGET_64BIT
19141	 ? default_external_stack_protect_fail ()
19142	 : default_hidden_stack_protect_fail ();
19143}
19144
19145/* Select a format to encode pointers in exception handling data.  CODE
19146   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
19147   true if the symbol may be affected by dynamic relocations.
19148
19149   ??? All x86 object file formats are capable of representing this.
19150   After all, the relocation needed is the same as for the call insn.
19151   Whether or not a particular assembler allows us to enter such, I
19152   guess we'll have to see.  */
19153int
19154asm_preferred_eh_data_format (int code, int global)
19155{
19156  if (flag_pic)
19157    {
19158      int type = DW_EH_PE_sdata8;
19159      if (!TARGET_64BIT
19160	  || ix86_cmodel == CM_SMALL_PIC
19161	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19162	type = DW_EH_PE_sdata4;
19163      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19164    }
19165  if (ix86_cmodel == CM_SMALL
19166      || (ix86_cmodel == CM_MEDIUM && code))
19167    return DW_EH_PE_udata4;
19168  return DW_EH_PE_absptr;
19169}
19170
19171#include "gt-i386.h"
19172