Deleted Added
full compact
i386.c (259563) i386.c (260074)
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
21
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
21
22/* $FreeBSD: stable/10/contrib/gcc/config/i386/i386.c 259563 2013-12-18 19:07:29Z pfg $ */
22/* $FreeBSD: stable/10/contrib/gcc/config/i386/i386.c 260074 2013-12-30 03:39:46Z pfg $ */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables. */
61#define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
91 0, /* "large" insn */
92 2, /* MOVE_RATIO */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
116 2, /* Branch cost */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
146 3, /* MOVE_RATIO */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
170 1, /* Branch cost */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
177};
178
179static const
180struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
199 3, /* MOVE_RATIO */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
223 1, /* Branch cost */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
230};
231
232static const
233struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
276 2, /* Branch cost */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
305 6, /* MOVE_RATIO */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
329 2, /* Branch cost */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
336};
337
338static const
339struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
358 4, /* MOVE_RATIO */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
369
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
383 1, /* Branch cost */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
390};
391
392static const
393struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
412 4, /* MOVE_RATIO */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
436 1, /* Branch cost */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
443};
444
445static const
446struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
465 9, /* MOVE_RATIO */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
489 5, /* Branch cost */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496};
497
498static const
499struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
518 9, /* MOVE_RATIO */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
542 5, /* Branch cost */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
549};
550
551struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
570 9, /* MOVE_RATIO */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
592 /* On K8
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
595 On AMDFAM10
596 MOVD reg64, xmmreg Double FADD 3
597 1/1 1/1
598 MOVD reg32, xmmreg Double FADD 3
599 1/1 1/1 */
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
605 time). */
606 100, /* number of parallel prefetches */
607 5, /* Branch cost */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
614};
615
616static const
617struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
636 6, /* MOVE_RATIO */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
660 2, /* Branch cost */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
667};
668
669static const
670struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
689 17, /* MOVE_RATIO */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
713 1, /* Branch cost */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
720};
721
722static const
723struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 16, /* MOVE_RATIO */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
765 3, /* Branch cost */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
772};
773
774/* Generic64 should produce code tuned for Nocona and K8. */
775static const
776struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
799 17, /* MOVE_RATIO */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
825 3, /* Branch cost */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
832};
833
834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
835static const
836struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
855 17, /* MOVE_RATIO */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
879 3, /* Branch cost */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
886};
887
888const struct processor_costs *ix86_cost = &pentium_cost;
889
890/* Processor feature/optimization bitmasks. */
891#define m_386 (1<<PROCESSOR_I386)
892#define m_486 (1<<PROCESSOR_I486)
893#define m_PENT (1<<PROCESSOR_PENTIUM)
894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895#define m_GEODE (1<<PROCESSOR_GEODE)
896#define m_K6_GEODE (m_K6 | m_GEODE)
897#define m_K6 (1<<PROCESSOR_K6)
898#define m_ATHLON (1<<PROCESSOR_ATHLON)
899#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900#define m_K8 (1<<PROCESSOR_K8)
901#define m_ATHLON_K8 (m_K8 | m_ATHLON)
902#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
903#define m_NOCONA (1<<PROCESSOR_NOCONA)
904#define m_CORE2 (1<<PROCESSOR_CORE2)
905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
909
910/* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
912
913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
917 | m_GENERIC64;
918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
920const int x86_zero_extend_with_and = m_486 | m_PENT;
921/* Enable to zero extend integer registers to avoid partial dependencies */
922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924const int x86_double_with_add = ~m_386;
925const int x86_use_bit_test = m_386;
926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
929 | m_NOCONA;
930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933/* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937const int x86_branch_hints = 0;
938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
940/* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948const int x86_partial_reg_stall = m_PPRO;
949const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
953const int x86_use_mov0 = m_K6;
954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955const int x86_read_modify_write = ~m_PENT;
956const int x86_read_modify = ~(m_PENT | m_PPRO);
957const int x86_split_long_moves = m_PPRO;
958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
960 /* m_PENT4 ? */
961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963const int x86_qimode_math = ~(0);
964const int x86_promote_qi_regs = 0;
965/* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968const int x86_himode_math = ~(m_PPRO);
969const int x86_promote_hi_regs = m_PPRO;
970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979/* Enable if integer moves are preferred for DFmode copies */
980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
991 not proper. */
992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
994 | m_GENERIC;
995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997const int x86_shift1 = ~m_486;
998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1009 of moves. */
1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
1012/* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1015 undefined. */
1016const int x86_sse_split_regs = m_ATHLON_K8;
1017/* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1019 movaps reg, reg
1020 else
1021 movss reg, reg
1022 if (x86_sse_partial_reg_dependency == true)
1023 movapd reg, reg
1024 else
1025 movsd reg, reg
1026
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1030 else
1031 movsd mem, reg
1032
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1036 movups mem, reg
1037
1038 if (x86_sse_partial_reg_dependency == true)
1039 {
1040 xorps reg, reg
1041 movlps mem, reg
1042 movhps mem+8, reg
1043 }
1044 else
1045 {
1046 movlps mem, reg
1047 movhps mem+8, reg
1048 }
1049
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1053 movupd mem, reg
1054
1055 if (x86_sse_split_regs == true)
1056 {
1057 movlpd mem, reg
1058 movhpd mem+8, reg
1059 }
1060 else
1061 {
1062 movsd mem, reg
1063 movhpd mem+8, reg
1064 }
1065 */
1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1072
1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1076
1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079/* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086/* Compare and exchange was added for 80486. */
1087const int x86_cmpxchg = ~m_386;
1088/* Compare and exchange 8 bytes was added for pentium. */
1089const int x86_cmpxchg8b = ~(m_386 | m_486);
1090/* Exchange and add was added for 80486. */
1091const int x86_xadd = ~m_386;
1092/* Byteswap was added for 80486. */
1093const int x86_bswap = ~m_386;
1094const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1095
1096/* In case the average insn count for single function invocation is
1097 lower than this constant, emit fast (but longer) prologue and
1098 epilogue code. */
1099#define FAST_PROLOGUE_INSN_COUNT 20
1100
1101/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1102static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1103static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1104static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1105
1106/* Array of the smallest class containing reg number REGNO, indexed by
1107 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1108
1109enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1110{
1111 /* ax, dx, cx, bx */
1112 AREG, DREG, CREG, BREG,
1113 /* si, di, bp, sp */
1114 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1115 /* FP registers */
1116 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1117 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1118 /* arg pointer */
1119 NON_Q_REGS,
1120 /* flags, fpsr, dirflag, frame */
1121 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1122 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1123 SSE_REGS, SSE_REGS,
1124 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1125 MMX_REGS, MMX_REGS,
1126 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1127 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1128 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1129 SSE_REGS, SSE_REGS,
1130};
1131
1132/* The "default" register map used in 32bit mode. */
1133
1134int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1135{
1136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1143};
1144
1145static int const x86_64_int_parameter_registers[6] =
1146{
1147 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1148 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1149};
1150
1151static int const x86_64_int_return_registers[4] =
1152{
1153 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1154};
1155
1156/* The "default" register map used in 64bit mode. */
1157int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1158{
1159 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1160 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1162 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1163 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1164 8,9,10,11,12,13,14,15, /* extended integer registers */
1165 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1166};
1167
1168/* Define the register numbers to be used in Dwarf debugging information.
1169 The SVR4 reference port C compiler uses the following register numbers
1170 in its Dwarf output code:
1171 0 for %eax (gcc regno = 0)
1172 1 for %ecx (gcc regno = 2)
1173 2 for %edx (gcc regno = 1)
1174 3 for %ebx (gcc regno = 3)
1175 4 for %esp (gcc regno = 7)
1176 5 for %ebp (gcc regno = 6)
1177 6 for %esi (gcc regno = 4)
1178 7 for %edi (gcc regno = 5)
1179 The following three DWARF register numbers are never generated by
1180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1181 believes these numbers have these meanings.
1182 8 for %eip (no gcc equivalent)
1183 9 for %eflags (gcc regno = 17)
1184 10 for %trapno (no gcc equivalent)
1185 It is not at all clear how we should number the FP stack registers
1186 for the x86 architecture. If the version of SDB on x86/svr4 were
1187 a bit less brain dead with respect to floating-point then we would
1188 have a precedent to follow with respect to DWARF register numbers
1189 for x86 FP registers, but the SDB on x86/svr4 is so completely
1190 broken with respect to FP registers that it is hardly worth thinking
1191 of it as something to strive for compatibility with.
1192 The version of x86/svr4 SDB I have at the moment does (partially)
1193 seem to believe that DWARF register number 11 is associated with
1194 the x86 register %st(0), but that's about all. Higher DWARF
1195 register numbers don't seem to be associated with anything in
1196 particular, and even for DWARF regno 11, SDB only seems to under-
1197 stand that it should say that a variable lives in %st(0) (when
1198 asked via an `=' command) if we said it was in DWARF regno 11,
1199 but SDB still prints garbage when asked for the value of the
1200 variable in question (via a `/' command).
1201 (Also note that the labels SDB prints for various FP stack regs
1202 when doing an `x' command are all wrong.)
1203 Note that these problems generally don't affect the native SVR4
1204 C compiler because it doesn't allow the use of -O with -g and
1205 because when it is *not* optimizing, it allocates a memory
1206 location for each floating-point variable, and the memory
1207 location is what gets described in the DWARF AT_location
1208 attribute for the variable in question.
1209 Regardless of the severe mental illness of the x86/svr4 SDB, we
1210 do something sensible here and we use the following DWARF
1211 register numbers. Note that these are all stack-top-relative
1212 numbers.
1213 11 for %st(0) (gcc regno = 8)
1214 12 for %st(1) (gcc regno = 9)
1215 13 for %st(2) (gcc regno = 10)
1216 14 for %st(3) (gcc regno = 11)
1217 15 for %st(4) (gcc regno = 12)
1218 16 for %st(5) (gcc regno = 13)
1219 17 for %st(6) (gcc regno = 14)
1220 18 for %st(7) (gcc regno = 15)
1221*/
1222int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1223{
1224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1231};
1232
1233/* Test and compare insns in i386.md store the information needed to
1234 generate branch and scc insns here. */
1235
1236rtx ix86_compare_op0 = NULL_RTX;
1237rtx ix86_compare_op1 = NULL_RTX;
1238rtx ix86_compare_emitted = NULL_RTX;
1239
1240/* Size of the register save area. */
1241#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1242
1243/* Define the structure for the machine field in struct function. */
1244
1245struct stack_local_entry GTY(())
1246{
1247 unsigned short mode;
1248 unsigned short n;
1249 rtx rtl;
1250 struct stack_local_entry *next;
1251};
1252
1253/* Structure describing stack frame layout.
1254 Stack grows downward:
1255
1256 [arguments]
1257 <- ARG_POINTER
1258 saved pc
1259
1260 saved frame pointer if frame_pointer_needed
1261 <- HARD_FRAME_POINTER
1262 [saved regs]
1263
1264 [padding1] \
1265 )
1266 [va_arg registers] (
1267 > to_allocate <- FRAME_POINTER
1268 [frame] (
1269 )
1270 [padding2] /
1271 */
1272struct ix86_frame
1273{
1274 int nregs;
1275 int padding1;
1276 int va_arg_size;
1277 HOST_WIDE_INT frame;
1278 int padding2;
1279 int outgoing_arguments_size;
1280 int red_zone_size;
1281
1282 HOST_WIDE_INT to_allocate;
1283 /* The offsets relative to ARG_POINTER. */
1284 HOST_WIDE_INT frame_pointer_offset;
1285 HOST_WIDE_INT hard_frame_pointer_offset;
1286 HOST_WIDE_INT stack_pointer_offset;
1287
1288 /* When save_regs_using_mov is set, emit prologue using
1289 move instead of push instructions. */
1290 bool save_regs_using_mov;
1291};
1292
1293/* Code model option. */
1294enum cmodel ix86_cmodel;
1295/* Asm dialect. */
1296enum asm_dialect ix86_asm_dialect = ASM_ATT;
1297/* TLS dialects. */
1298enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1299
1300/* Which unit we are generating floating point math for. */
1301enum fpmath_unit ix86_fpmath;
1302
1303/* Which cpu are we scheduling for. */
1304enum processor_type ix86_tune;
1305/* Which instruction set architecture to use. */
1306enum processor_type ix86_arch;
1307
1308/* true if sse prefetch instruction is not NOOP. */
1309int x86_prefetch_sse;
1310
1311/* true if cmpxchg16b is supported. */
1312int x86_cmpxchg16b;
1313
1314/* ix86_regparm_string as a number */
1315static int ix86_regparm;
1316
1317/* -mstackrealign option */
1318extern int ix86_force_align_arg_pointer;
1319static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1320
1321/* Preferred alignment for stack boundary in bits. */
1322unsigned int ix86_preferred_stack_boundary;
1323
1324/* Values 1-5: see jump.c */
1325int ix86_branch_cost;
1326
1327/* Variables which are this size or smaller are put in the data/bss
1328 or ldata/lbss sections. */
1329
1330int ix86_section_threshold = 65536;
1331
1332/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1333char internal_label_prefix[16];
1334int internal_label_prefix_len;
1335
1336static bool ix86_handle_option (size_t, const char *, int);
1337static void output_pic_addr_const (FILE *, rtx, int);
1338static void put_condition_code (enum rtx_code, enum machine_mode,
1339 int, int, FILE *);
1340static const char *get_some_local_dynamic_name (void);
1341static int get_some_local_dynamic_name_1 (rtx *, void *);
1342static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1343static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1344 rtx *);
1345static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1346static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1347 enum machine_mode);
1348static rtx get_thread_pointer (int);
1349static rtx legitimize_tls_address (rtx, enum tls_model, int);
1350static void get_pc_thunk_name (char [32], unsigned int);
1351static rtx gen_push (rtx);
1352static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1353static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1354static struct machine_function * ix86_init_machine_status (void);
1355static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1356static int ix86_nsaved_regs (void);
1357static void ix86_emit_save_regs (void);
1358static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1359static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1360static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1361static HOST_WIDE_INT ix86_GOT_alias_set (void);
1362static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1363static rtx ix86_expand_aligntest (rtx, int);
1364static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1365static int ix86_issue_rate (void);
1366static int ix86_adjust_cost (rtx, rtx, rtx, int);
1367static int ia32_multipass_dfa_lookahead (void);
1368static void ix86_init_mmx_sse_builtins (void);
1369static rtx x86_this_parameter (tree);
1370static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1371 HOST_WIDE_INT, tree);
1372static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1373static void x86_file_start (void);
1374static void ix86_reorg (void);
1375static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1376static tree ix86_build_builtin_va_list (void);
1377static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1378 tree, int *, int);
1379static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1380static bool ix86_scalar_mode_supported_p (enum machine_mode);
1381static bool ix86_vector_mode_supported_p (enum machine_mode);
1382
1383static int ix86_address_cost (rtx);
1384static bool ix86_cannot_force_const_mem (rtx);
1385static rtx ix86_delegitimize_address (rtx);
1386
1387static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1388
1389struct builtin_description;
1390static rtx ix86_expand_sse_comi (const struct builtin_description *,
1391 tree, rtx);
1392static rtx ix86_expand_sse_compare (const struct builtin_description *,
1393 tree, rtx);
1394static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1395static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1396static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1397static rtx ix86_expand_store_builtin (enum insn_code, tree);
1398static rtx safe_vector_operand (rtx, enum machine_mode);
1399static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1400static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1401static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1402static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1403static int ix86_fp_comparison_cost (enum rtx_code code);
1404static unsigned int ix86_select_alt_pic_regnum (void);
1405static int ix86_save_reg (unsigned int, int);
1406static void ix86_compute_frame_layout (struct ix86_frame *);
1407static int ix86_comp_type_attributes (tree, tree);
1408static int ix86_function_regparm (tree, tree);
1409const struct attribute_spec ix86_attribute_table[];
1410static bool ix86_function_ok_for_sibcall (tree, tree);
1411static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1412static int ix86_value_regno (enum machine_mode, tree, tree);
1413static bool contains_128bit_aligned_vector_p (tree);
1414static rtx ix86_struct_value_rtx (tree, int);
1415static bool ix86_ms_bitfield_layout_p (tree);
1416static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1417static int extended_reg_mentioned_1 (rtx *, void *);
1418static bool ix86_rtx_costs (rtx, int, int, int *);
1419static int min_insn_size (rtx);
1420static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1421static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1422static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1423 tree, bool);
1424static void ix86_init_builtins (void);
1425static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1426static const char *ix86_mangle_fundamental_type (tree);
1427static tree ix86_stack_protect_fail (void);
1428static rtx ix86_internal_arg_pointer (void);
1429static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1430
1431/* This function is only used on Solaris. */
1432static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1433 ATTRIBUTE_UNUSED;
1434
1435/* Register class used for passing given 64bit part of the argument.
1436 These represent classes as documented by the PS ABI, with the exception
1437 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1438 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1439
1440 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1441 whenever possible (upper half does contain padding).
1442 */
1443enum x86_64_reg_class
1444 {
1445 X86_64_NO_CLASS,
1446 X86_64_INTEGER_CLASS,
1447 X86_64_INTEGERSI_CLASS,
1448 X86_64_SSE_CLASS,
1449 X86_64_SSESF_CLASS,
1450 X86_64_SSEDF_CLASS,
1451 X86_64_SSEUP_CLASS,
1452 X86_64_X87_CLASS,
1453 X86_64_X87UP_CLASS,
1454 X86_64_COMPLEX_X87_CLASS,
1455 X86_64_MEMORY_CLASS
1456 };
1457static const char * const x86_64_reg_class_name[] = {
1458 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1459 "sseup", "x87", "x87up", "cplx87", "no"
1460};
1461
1462#define MAX_CLASSES 4
1463
1464/* Table of constants used by fldpi, fldln2, etc.... */
1465static REAL_VALUE_TYPE ext_80387_constants_table [5];
1466static bool ext_80387_constants_init = 0;
1467static void init_ext_80387_constants (void);
1468static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1469static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1470static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1471static section *x86_64_elf_select_section (tree decl, int reloc,
1472 unsigned HOST_WIDE_INT align)
1473 ATTRIBUTE_UNUSED;
1474
1475/* Initialize the GCC target structure. */
1476#undef TARGET_ATTRIBUTE_TABLE
1477#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1478#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1479# undef TARGET_MERGE_DECL_ATTRIBUTES
1480# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1481#endif
1482
1483#undef TARGET_COMP_TYPE_ATTRIBUTES
1484#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1485
1486#undef TARGET_INIT_BUILTINS
1487#define TARGET_INIT_BUILTINS ix86_init_builtins
1488#undef TARGET_EXPAND_BUILTIN
1489#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1490
1491#undef TARGET_ASM_FUNCTION_EPILOGUE
1492#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1493
1494#undef TARGET_ENCODE_SECTION_INFO
1495#ifndef SUBTARGET_ENCODE_SECTION_INFO
1496#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1497#else
1498#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1499#endif
1500
1501#undef TARGET_ASM_OPEN_PAREN
1502#define TARGET_ASM_OPEN_PAREN ""
1503#undef TARGET_ASM_CLOSE_PAREN
1504#define TARGET_ASM_CLOSE_PAREN ""
1505
1506#undef TARGET_ASM_ALIGNED_HI_OP
1507#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1508#undef TARGET_ASM_ALIGNED_SI_OP
1509#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1510#ifdef ASM_QUAD
1511#undef TARGET_ASM_ALIGNED_DI_OP
1512#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1513#endif
1514
1515#undef TARGET_ASM_UNALIGNED_HI_OP
1516#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1517#undef TARGET_ASM_UNALIGNED_SI_OP
1518#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1519#undef TARGET_ASM_UNALIGNED_DI_OP
1520#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1521
1522#undef TARGET_SCHED_ADJUST_COST
1523#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1524#undef TARGET_SCHED_ISSUE_RATE
1525#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1526#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1527#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1528 ia32_multipass_dfa_lookahead
1529
1530#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1531#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1532
1533#ifdef HAVE_AS_TLS
1534#undef TARGET_HAVE_TLS
1535#define TARGET_HAVE_TLS true
1536#endif
1537#undef TARGET_CANNOT_FORCE_CONST_MEM
1538#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1539#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1540#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1541
1542#undef TARGET_DELEGITIMIZE_ADDRESS
1543#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1544
1545#undef TARGET_MS_BITFIELD_LAYOUT_P
1546#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1547
1548#if TARGET_MACHO
1549#undef TARGET_BINDS_LOCAL_P
1550#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1551#endif
1552
1553#undef TARGET_ASM_OUTPUT_MI_THUNK
1554#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1555#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1556#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1557
1558#undef TARGET_ASM_FILE_START
1559#define TARGET_ASM_FILE_START x86_file_start
1560
1561#undef TARGET_DEFAULT_TARGET_FLAGS
1562#define TARGET_DEFAULT_TARGET_FLAGS \
1563 (TARGET_DEFAULT \
1564 | TARGET_64BIT_DEFAULT \
1565 | TARGET_SUBTARGET_DEFAULT \
1566 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1567
1568#undef TARGET_HANDLE_OPTION
1569#define TARGET_HANDLE_OPTION ix86_handle_option
1570
1571#undef TARGET_RTX_COSTS
1572#define TARGET_RTX_COSTS ix86_rtx_costs
1573#undef TARGET_ADDRESS_COST
1574#define TARGET_ADDRESS_COST ix86_address_cost
1575
1576#undef TARGET_FIXED_CONDITION_CODE_REGS
1577#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1578#undef TARGET_CC_MODES_COMPATIBLE
1579#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1580
1581#undef TARGET_MACHINE_DEPENDENT_REORG
1582#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1583
1584#undef TARGET_BUILD_BUILTIN_VA_LIST
1585#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1586
1587#undef TARGET_MD_ASM_CLOBBERS
1588#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1589
1590#undef TARGET_PROMOTE_PROTOTYPES
1591#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1592#undef TARGET_STRUCT_VALUE_RTX
1593#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1594#undef TARGET_SETUP_INCOMING_VARARGS
1595#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1596#undef TARGET_MUST_PASS_IN_STACK
1597#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1598#undef TARGET_PASS_BY_REFERENCE
1599#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1600#undef TARGET_INTERNAL_ARG_POINTER
1601#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1602#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1603#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1604
1605#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1606#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1607
1608#undef TARGET_SCALAR_MODE_SUPPORTED_P
1609#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1610
1611#undef TARGET_VECTOR_MODE_SUPPORTED_P
1612#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1613
1614#ifdef HAVE_AS_TLS
1615#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1616#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1617#endif
1618
1619#ifdef SUBTARGET_INSERT_ATTRIBUTES
1620#undef TARGET_INSERT_ATTRIBUTES
1621#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1622#endif
1623
1624#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1625#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1626
1627#undef TARGET_STACK_PROTECT_FAIL
1628#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1629
1630#undef TARGET_FUNCTION_VALUE
1631#define TARGET_FUNCTION_VALUE ix86_function_value
1632
1633struct gcc_target targetm = TARGET_INITIALIZER;
1634
1635
1636/* The svr4 ABI for the i386 says that records and unions are returned
1637 in memory. */
1638#ifndef DEFAULT_PCC_STRUCT_RETURN
1639#define DEFAULT_PCC_STRUCT_RETURN 1
1640#endif
1641
1642/* Implement TARGET_HANDLE_OPTION. */
1643
1644static bool
1645ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1646{
1647 switch (code)
1648 {
1649 case OPT_m3dnow:
1650 if (!value)
1651 {
1652 target_flags &= ~MASK_3DNOW_A;
1653 target_flags_explicit |= MASK_3DNOW_A;
1654 }
1655 return true;
1656
1657 case OPT_mmmx:
1658 if (!value)
1659 {
1660 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1661 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1662 }
1663 return true;
1664
1665 case OPT_msse:
1666 if (!value)
1667 {
1668 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1669 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1670 }
1671 return true;
1672
1673 case OPT_msse2:
1674 if (!value)
1675 {
1676 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1677 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1678 }
1679 return true;
1680
1681 case OPT_msse3:
1682 if (!value)
1683 {
1684 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1685 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1686 }
1687 return true;
1688
1689 case OPT_maes:
1690 if (!value)
1691 {
1692 target_flags &= ~MASK_AES;
1693 target_flags_explicit |= MASK_AES;
1694 }
1695 return true;
1696
1697 default:
1698 return true;
1699 }
1700}
1701
1702/* Sometimes certain combinations of command options do not make
1703 sense on a particular target machine. You can define a macro
1704 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1705 defined, is executed once just after all the command options have
1706 been parsed.
1707
1708 Don't use this macro to turn on various extra optimizations for
1709 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1710
1711void
1712override_options (void)
1713{
1714 int i;
1715 int ix86_tune_defaulted = 0;
1716
1717 /* Comes from final.c -- no real reason to change it. */
1718#define MAX_CODE_ALIGN 16
1719
1720 static struct ptt
1721 {
1722 const struct processor_costs *cost; /* Processor costs */
1723 const int target_enable; /* Target flags to enable. */
1724 const int target_disable; /* Target flags to disable. */
1725 const int align_loop; /* Default alignments. */
1726 const int align_loop_max_skip;
1727 const int align_jump;
1728 const int align_jump_max_skip;
1729 const int align_func;
1730 }
1731 const processor_target_table[PROCESSOR_max] =
1732 {
1733 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1734 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1735 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1736 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1737 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1738 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1739 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1740 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1741 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1742 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1743 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1744 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1745 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1746 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1747 };
1748
1749 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1750 static struct pta
1751 {
1752 const char *const name; /* processor name or nickname. */
1753 const enum processor_type processor;
1754 const enum pta_flags
1755 {
1756 PTA_SSE = 1,
1757 PTA_SSE2 = 2,
1758 PTA_SSE3 = 4,
1759 PTA_MMX = 8,
1760 PTA_PREFETCH_SSE = 16,
1761 PTA_3DNOW = 32,
1762 PTA_3DNOW_A = 64,
1763 PTA_64BIT = 128,
1764 PTA_SSSE3 = 256,
1765 PTA_CX16 = 512,
1766 PTA_POPCNT = 1024,
1767 PTA_ABM = 2048,
1768 PTA_SSE4A = 4096
1769 } flags;
1770 }
1771 const processor_alias_table[] =
1772 {
1773 {"i386", PROCESSOR_I386, 0},
1774 {"i486", PROCESSOR_I486, 0},
1775 {"i586", PROCESSOR_PENTIUM, 0},
1776 {"pentium", PROCESSOR_PENTIUM, 0},
1777 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1778 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1779 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1780 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1781 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1782 {"i686", PROCESSOR_PENTIUMPRO, 0},
1783 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1784 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1785 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1786 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1787 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1788 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1789 | PTA_MMX | PTA_PREFETCH_SSE},
1790 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1791 | PTA_MMX | PTA_PREFETCH_SSE},
1792 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1793 | PTA_MMX | PTA_PREFETCH_SSE},
1794 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1795 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1796 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1797 | PTA_64BIT | PTA_MMX
1798 | PTA_PREFETCH_SSE | PTA_CX16},
1799 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1800 | PTA_3DNOW_A},
1801 {"k6", PROCESSOR_K6, PTA_MMX},
1802 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1803 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1804 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1805 | PTA_3DNOW_A},
1806 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1807 | PTA_3DNOW | PTA_3DNOW_A},
1808 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1809 | PTA_3DNOW_A | PTA_SSE},
1810 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1811 | PTA_3DNOW_A | PTA_SSE},
1812 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1813 | PTA_3DNOW_A | PTA_SSE},
1814 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1815 | PTA_SSE | PTA_SSE2 },
1816 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1820 | PTA_SSE3 },
1821 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1824 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1825 | PTA_SSE3 },
1826 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1827 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1828 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1829 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1830 | PTA_SSE3 },
1831 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1832 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1833 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1834 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1835 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1836 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1837 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1838 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1839 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1840 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1841 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1842 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1843 };
1844
1845 int const pta_size = ARRAY_SIZE (processor_alias_table);
1846
1847#ifdef SUBTARGET_OVERRIDE_OPTIONS
1848 SUBTARGET_OVERRIDE_OPTIONS;
1849#endif
1850
1851#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1852 SUBSUBTARGET_OVERRIDE_OPTIONS;
1853#endif
1854
1855 /* -fPIC is the default for x86_64. */
1856 if (TARGET_MACHO && TARGET_64BIT)
1857 flag_pic = 2;
1858
1859 /* Set the default values for switches whose default depends on TARGET_64BIT
1860 in case they weren't overwritten by command line options. */
1861 if (TARGET_64BIT)
1862 {
1863 /* Mach-O doesn't support omitting the frame pointer for now. */
1864 if (flag_omit_frame_pointer == 2)
1865 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1866 if (flag_asynchronous_unwind_tables == 2)
1867 flag_asynchronous_unwind_tables = 1;
1868 if (flag_pcc_struct_return == 2)
1869 flag_pcc_struct_return = 0;
1870 }
1871 else
1872 {
1873 if (flag_omit_frame_pointer == 2)
1874 flag_omit_frame_pointer = 0;
1875 if (flag_asynchronous_unwind_tables == 2)
1876 flag_asynchronous_unwind_tables = 0;
1877 if (flag_pcc_struct_return == 2)
1878 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1879 }
1880
1881 /* Need to check -mtune=generic first. */
1882 if (ix86_tune_string)
1883 {
1884 if (!strcmp (ix86_tune_string, "generic")
1885 || !strcmp (ix86_tune_string, "i686")
1886 /* As special support for cross compilers we read -mtune=native
1887 as -mtune=generic. With native compilers we won't see the
1888 -mtune=native, as it was changed by the driver. */
1889 || !strcmp (ix86_tune_string, "native"))
1890 {
1891 if (TARGET_64BIT)
1892 ix86_tune_string = "generic64";
1893 else
1894 ix86_tune_string = "generic32";
1895 }
1896 else if (!strncmp (ix86_tune_string, "generic", 7))
1897 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1898 }
1899 else
1900 {
1901 if (ix86_arch_string)
1902 ix86_tune_string = ix86_arch_string;
1903 if (!ix86_tune_string)
1904 {
1905 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1906 ix86_tune_defaulted = 1;
1907 }
1908
1909 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1910 need to use a sensible tune option. */
1911 if (!strcmp (ix86_tune_string, "generic")
1912 || !strcmp (ix86_tune_string, "x86-64")
1913 || !strcmp (ix86_tune_string, "i686"))
1914 {
1915 if (TARGET_64BIT)
1916 ix86_tune_string = "generic64";
1917 else
1918 ix86_tune_string = "generic32";
1919 }
1920 }
1921 if (!strcmp (ix86_tune_string, "x86-64"))
1922 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1923 "-mtune=generic instead as appropriate.");
1924
1925 if (!ix86_arch_string)
1926 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1927 if (!strcmp (ix86_arch_string, "generic"))
1928 error ("generic CPU can be used only for -mtune= switch");
1929 if (!strncmp (ix86_arch_string, "generic", 7))
1930 error ("bad value (%s) for -march= switch", ix86_arch_string);
1931
1932 if (ix86_cmodel_string != 0)
1933 {
1934 if (!strcmp (ix86_cmodel_string, "small"))
1935 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1936 else if (!strcmp (ix86_cmodel_string, "medium"))
1937 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1938 else if (flag_pic)
1939 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1940 else if (!strcmp (ix86_cmodel_string, "32"))
1941 ix86_cmodel = CM_32;
1942 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1943 ix86_cmodel = CM_KERNEL;
1944 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1945 ix86_cmodel = CM_LARGE;
1946 else
1947 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1948 }
1949 else
1950 {
1951 ix86_cmodel = CM_32;
1952 if (TARGET_64BIT)
1953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1954 }
1955 if (ix86_asm_string != 0)
1956 {
1957 if (! TARGET_MACHO
1958 && !strcmp (ix86_asm_string, "intel"))
1959 ix86_asm_dialect = ASM_INTEL;
1960 else if (!strcmp (ix86_asm_string, "att"))
1961 ix86_asm_dialect = ASM_ATT;
1962 else
1963 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1964 }
1965 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1966 error ("code model %qs not supported in the %s bit mode",
1967 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1968 if (ix86_cmodel == CM_LARGE)
1969 sorry ("code model %<large%> not supported yet");
1970 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1971 sorry ("%i-bit mode not compiled in",
1972 (target_flags & MASK_64BIT) ? 64 : 32);
1973
1974 for (i = 0; i < pta_size; i++)
1975 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1976 {
1977 ix86_arch = processor_alias_table[i].processor;
1978 /* Default cpu tuning to the architecture. */
1979 ix86_tune = ix86_arch;
1980 if (processor_alias_table[i].flags & PTA_MMX
1981 && !(target_flags_explicit & MASK_MMX))
1982 target_flags |= MASK_MMX;
1983 if (processor_alias_table[i].flags & PTA_3DNOW
1984 && !(target_flags_explicit & MASK_3DNOW))
1985 target_flags |= MASK_3DNOW;
1986 if (processor_alias_table[i].flags & PTA_3DNOW_A
1987 && !(target_flags_explicit & MASK_3DNOW_A))
1988 target_flags |= MASK_3DNOW_A;
1989 if (processor_alias_table[i].flags & PTA_SSE
1990 && !(target_flags_explicit & MASK_SSE))
1991 target_flags |= MASK_SSE;
1992 if (processor_alias_table[i].flags & PTA_SSE2
1993 && !(target_flags_explicit & MASK_SSE2))
1994 target_flags |= MASK_SSE2;
1995 if (processor_alias_table[i].flags & PTA_SSE3
1996 && !(target_flags_explicit & MASK_SSE3))
1997 target_flags |= MASK_SSE3;
1998 if (processor_alias_table[i].flags & PTA_SSSE3
1999 && !(target_flags_explicit & MASK_SSSE3))
2000 target_flags |= MASK_SSSE3;
2001 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2002 x86_prefetch_sse = true;
2003 if (processor_alias_table[i].flags & PTA_CX16)
2004 x86_cmpxchg16b = true;
2005 if (processor_alias_table[i].flags & PTA_POPCNT
2006 && !(target_flags_explicit & MASK_POPCNT))
2007 target_flags |= MASK_POPCNT;
2008 if (processor_alias_table[i].flags & PTA_ABM
2009 && !(target_flags_explicit & MASK_ABM))
2010 target_flags |= MASK_ABM;
2011 if (processor_alias_table[i].flags & PTA_SSE4A
2012 && !(target_flags_explicit & MASK_SSE4A))
2013 target_flags |= MASK_SSE4A;
2014 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2015 error ("CPU you selected does not support x86-64 "
2016 "instruction set");
2017 break;
2018 }
2019
2020 if (i == pta_size)
2021 error ("bad value (%s) for -march= switch", ix86_arch_string);
2022
2023 for (i = 0; i < pta_size; i++)
2024 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2025 {
2026 ix86_tune = processor_alias_table[i].processor;
2027 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2028 {
2029 if (ix86_tune_defaulted)
2030 {
2031 ix86_tune_string = "x86-64";
2032 for (i = 0; i < pta_size; i++)
2033 if (! strcmp (ix86_tune_string,
2034 processor_alias_table[i].name))
2035 break;
2036 ix86_tune = processor_alias_table[i].processor;
2037 }
2038 else
2039 error ("CPU you selected does not support x86-64 "
2040 "instruction set");
2041 }
2042 /* Intel CPUs have always interpreted SSE prefetch instructions as
2043 NOPs; so, we can enable SSE prefetch instructions even when
2044 -mtune (rather than -march) points us to a processor that has them.
2045 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2046 higher processors. */
2047 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2048 x86_prefetch_sse = true;
2049 break;
2050 }
2051 if (i == pta_size)
2052 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2053
2054 if (optimize_size)
2055 ix86_cost = &size_cost;
2056 else
2057 ix86_cost = processor_target_table[ix86_tune].cost;
2058 target_flags |= processor_target_table[ix86_tune].target_enable;
2059 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2060
2061 /* Arrange to set up i386_stack_locals for all functions. */
2062 init_machine_status = ix86_init_machine_status;
2063
2064 /* Validate -mregparm= value. */
2065 if (ix86_regparm_string)
2066 {
2067 i = atoi (ix86_regparm_string);
2068 if (i < 0 || i > REGPARM_MAX)
2069 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2070 else
2071 ix86_regparm = i;
2072 }
2073 else
2074 if (TARGET_64BIT)
2075 ix86_regparm = REGPARM_MAX;
2076
2077 /* If the user has provided any of the -malign-* options,
2078 warn and use that value only if -falign-* is not set.
2079 Remove this code in GCC 3.2 or later. */
2080 if (ix86_align_loops_string)
2081 {
2082 warning (0, "-malign-loops is obsolete, use -falign-loops");
2083 if (align_loops == 0)
2084 {
2085 i = atoi (ix86_align_loops_string);
2086 if (i < 0 || i > MAX_CODE_ALIGN)
2087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2088 else
2089 align_loops = 1 << i;
2090 }
2091 }
2092
2093 if (ix86_align_jumps_string)
2094 {
2095 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2096 if (align_jumps == 0)
2097 {
2098 i = atoi (ix86_align_jumps_string);
2099 if (i < 0 || i > MAX_CODE_ALIGN)
2100 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2101 else
2102 align_jumps = 1 << i;
2103 }
2104 }
2105
2106 if (ix86_align_funcs_string)
2107 {
2108 warning (0, "-malign-functions is obsolete, use -falign-functions");
2109 if (align_functions == 0)
2110 {
2111 i = atoi (ix86_align_funcs_string);
2112 if (i < 0 || i > MAX_CODE_ALIGN)
2113 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2114 else
2115 align_functions = 1 << i;
2116 }
2117 }
2118
2119 /* Default align_* from the processor table. */
2120 if (align_loops == 0)
2121 {
2122 align_loops = processor_target_table[ix86_tune].align_loop;
2123 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2124 }
2125 if (align_jumps == 0)
2126 {
2127 align_jumps = processor_target_table[ix86_tune].align_jump;
2128 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2129 }
2130 if (align_functions == 0)
2131 {
2132 align_functions = processor_target_table[ix86_tune].align_func;
2133 }
2134
2135 /* Validate -mbranch-cost= value, or provide default. */
2136 ix86_branch_cost = ix86_cost->branch_cost;
2137 if (ix86_branch_cost_string)
2138 {
2139 i = atoi (ix86_branch_cost_string);
2140 if (i < 0 || i > 5)
2141 error ("-mbranch-cost=%d is not between 0 and 5", i);
2142 else
2143 ix86_branch_cost = i;
2144 }
2145 if (ix86_section_threshold_string)
2146 {
2147 i = atoi (ix86_section_threshold_string);
2148 if (i < 0)
2149 error ("-mlarge-data-threshold=%d is negative", i);
2150 else
2151 ix86_section_threshold = i;
2152 }
2153
2154 if (ix86_tls_dialect_string)
2155 {
2156 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2157 ix86_tls_dialect = TLS_DIALECT_GNU;
2158 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2159 ix86_tls_dialect = TLS_DIALECT_GNU2;
2160 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2161 ix86_tls_dialect = TLS_DIALECT_SUN;
2162 else
2163 error ("bad value (%s) for -mtls-dialect= switch",
2164 ix86_tls_dialect_string);
2165 }
2166
2167 /* Keep nonleaf frame pointers. */
2168 if (flag_omit_frame_pointer)
2169 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2170 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2171 flag_omit_frame_pointer = 1;
2172
2173 /* If we're doing fast math, we don't care about comparison order
2174 wrt NaNs. This lets us use a shorter comparison sequence. */
2175 if (flag_finite_math_only)
2176 target_flags &= ~MASK_IEEE_FP;
2177
2178 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2179 since the insns won't need emulation. */
2180 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2181 target_flags &= ~MASK_NO_FANCY_MATH_387;
2182
2183 /* Likewise, if the target doesn't have a 387, or we've specified
2184 software floating point, don't use 387 inline intrinsics. */
2185 if (!TARGET_80387)
2186 target_flags |= MASK_NO_FANCY_MATH_387;
2187
2188 /* Turn on SSE3 builtins for -mssse3. */
2189 if (TARGET_SSSE3)
2190 target_flags |= MASK_SSE3;
2191
2192 /* Turn on SSE3 builtins for -msse4a. */
2193 if (TARGET_SSE4A)
2194 target_flags |= MASK_SSE3;
2195
2196 /* Turn on SSE2 builtins for -msse3. */
2197 if (TARGET_SSE3)
2198 target_flags |= MASK_SSE2;
2199
2200 /* Turn on SSE2 builtins for -maes. */
2201 if (TARGET_AES)
2202 target_flags |= MASK_SSE2;
2203
2204 /* Turn on SSE builtins for -msse2. */
2205 if (TARGET_SSE2)
2206 target_flags |= MASK_SSE;
2207
2208 /* Turn on MMX builtins for -msse. */
2209 if (TARGET_SSE)
2210 {
2211 target_flags |= MASK_MMX & ~target_flags_explicit;
2212 x86_prefetch_sse = true;
2213 }
2214
2215 /* Turn on MMX builtins for 3Dnow. */
2216 if (TARGET_3DNOW)
2217 target_flags |= MASK_MMX;
2218
2219 /* Turn on POPCNT builtins for -mabm. */
2220 if (TARGET_ABM)
2221 target_flags |= MASK_POPCNT;
2222
2223 if (TARGET_64BIT)
2224 {
2225 if (TARGET_ALIGN_DOUBLE)
2226 error ("-malign-double makes no sense in the 64bit mode");
2227 if (TARGET_RTD)
2228 error ("-mrtd calling convention not supported in the 64bit mode");
2229
2230 /* Enable by default the SSE and MMX builtins. Do allow the user to
2231 explicitly disable any of these. In particular, disabling SSE and
2232 MMX for kernel code is extremely useful. */
2233 target_flags
2234 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2235 & ~target_flags_explicit);
2236 }
2237 else
2238 {
2239 /* i386 ABI does not specify red zone. It still makes sense to use it
2240 when programmer takes care to stack from being destroyed. */
2241 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2242 target_flags |= MASK_NO_RED_ZONE;
2243 }
2244
2245 /* Validate -mpreferred-stack-boundary= value, or provide default.
2246 The default of 128 bits is for Pentium III's SSE __m128. We can't
2247 change it because of optimize_size. Otherwise, we can't mix object
2248 files compiled with -Os and -On. */
2249 ix86_preferred_stack_boundary = 128;
2250 if (ix86_preferred_stack_boundary_string)
2251 {
2252 i = atoi (ix86_preferred_stack_boundary_string);
2253 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2254 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2255 TARGET_64BIT ? 4 : 2);
2256 else
2257 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2258 }
2259
2260 /* Accept -msseregparm only if at least SSE support is enabled. */
2261 if (TARGET_SSEREGPARM
2262 && ! TARGET_SSE)
2263 error ("-msseregparm used without SSE enabled");
2264
2265 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2266
2267 if (ix86_fpmath_string != 0)
2268 {
2269 if (! strcmp (ix86_fpmath_string, "387"))
2270 ix86_fpmath = FPMATH_387;
2271 else if (! strcmp (ix86_fpmath_string, "sse"))
2272 {
2273 if (!TARGET_SSE)
2274 {
2275 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2276 ix86_fpmath = FPMATH_387;
2277 }
2278 else
2279 ix86_fpmath = FPMATH_SSE;
2280 }
2281 else if (! strcmp (ix86_fpmath_string, "387,sse")
2282 || ! strcmp (ix86_fpmath_string, "sse,387"))
2283 {
2284 if (!TARGET_SSE)
2285 {
2286 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2287 ix86_fpmath = FPMATH_387;
2288 }
2289 else if (!TARGET_80387)
2290 {
2291 warning (0, "387 instruction set disabled, using SSE arithmetics");
2292 ix86_fpmath = FPMATH_SSE;
2293 }
2294 else
2295 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2296 }
2297 else
2298 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2299 }
2300
2301 /* If the i387 is disabled, then do not return values in it. */
2302 if (!TARGET_80387)
2303 target_flags &= ~MASK_FLOAT_RETURNS;
2304
2305 if ((x86_accumulate_outgoing_args & TUNEMASK)
2306 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2307 && !optimize_size)
2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2309
2310 /* ??? Unwind info is not correct around the CFG unless either a frame
2311 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2312 unwind info generation to be aware of the CFG and propagating states
2313 around edges. */
2314 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2315 || flag_exceptions || flag_non_call_exceptions)
2316 && flag_omit_frame_pointer
2317 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2318 {
2319 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2320 warning (0, "unwind tables currently require either a frame pointer "
2321 "or -maccumulate-outgoing-args for correctness");
2322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2323 }
2324
2325 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2326 {
2327 char *p;
2328 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2329 p = strchr (internal_label_prefix, 'X');
2330 internal_label_prefix_len = p - internal_label_prefix;
2331 *p = '\0';
2332 }
2333
2334 /* When scheduling description is not available, disable scheduler pass
2335 so it won't slow down the compilation and make x87 code slower. */
2336 if (!TARGET_SCHEDULE)
2337 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2338}
2339
2340/* switch to the appropriate section for output of DECL.
2341 DECL is either a `VAR_DECL' node or a constant of some sort.
2342 RELOC indicates whether forming the initial value of DECL requires
2343 link-time relocations. */
2344
2345static section *
2346x86_64_elf_select_section (tree decl, int reloc,
2347 unsigned HOST_WIDE_INT align)
2348{
2349 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2350 && ix86_in_large_data_p (decl))
2351 {
2352 const char *sname = NULL;
2353 unsigned int flags = SECTION_WRITE;
2354 switch (categorize_decl_for_section (decl, reloc))
2355 {
2356 case SECCAT_DATA:
2357 sname = ".ldata";
2358 break;
2359 case SECCAT_DATA_REL:
2360 sname = ".ldata.rel";
2361 break;
2362 case SECCAT_DATA_REL_LOCAL:
2363 sname = ".ldata.rel.local";
2364 break;
2365 case SECCAT_DATA_REL_RO:
2366 sname = ".ldata.rel.ro";
2367 break;
2368 case SECCAT_DATA_REL_RO_LOCAL:
2369 sname = ".ldata.rel.ro.local";
2370 break;
2371 case SECCAT_BSS:
2372 sname = ".lbss";
2373 flags |= SECTION_BSS;
2374 break;
2375 case SECCAT_RODATA:
2376 case SECCAT_RODATA_MERGE_STR:
2377 case SECCAT_RODATA_MERGE_STR_INIT:
2378 case SECCAT_RODATA_MERGE_CONST:
2379 sname = ".lrodata";
2380 flags = 0;
2381 break;
2382 case SECCAT_SRODATA:
2383 case SECCAT_SDATA:
2384 case SECCAT_SBSS:
2385 gcc_unreachable ();
2386 case SECCAT_TEXT:
2387 case SECCAT_TDATA:
2388 case SECCAT_TBSS:
2389 /* We don't split these for medium model. Place them into
2390 default sections and hope for best. */
2391 break;
2392 }
2393 if (sname)
2394 {
2395 /* We might get called with string constants, but get_named_section
2396 doesn't like them as they are not DECLs. Also, we need to set
2397 flags in that case. */
2398 if (!DECL_P (decl))
2399 return get_section (sname, flags, NULL);
2400 return get_named_section (decl, sname, reloc);
2401 }
2402 }
2403 return default_elf_select_section (decl, reloc, align);
2404}
2405
2406/* Build up a unique section name, expressed as a
2407 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2408 RELOC indicates whether the initial value of EXP requires
2409 link-time relocations. */
2410
2411static void
2412x86_64_elf_unique_section (tree decl, int reloc)
2413{
2414 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2415 && ix86_in_large_data_p (decl))
2416 {
2417 const char *prefix = NULL;
2418 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2419 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2420
2421 switch (categorize_decl_for_section (decl, reloc))
2422 {
2423 case SECCAT_DATA:
2424 case SECCAT_DATA_REL:
2425 case SECCAT_DATA_REL_LOCAL:
2426 case SECCAT_DATA_REL_RO:
2427 case SECCAT_DATA_REL_RO_LOCAL:
2428 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2429 break;
2430 case SECCAT_BSS:
2431 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2432 break;
2433 case SECCAT_RODATA:
2434 case SECCAT_RODATA_MERGE_STR:
2435 case SECCAT_RODATA_MERGE_STR_INIT:
2436 case SECCAT_RODATA_MERGE_CONST:
2437 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2438 break;
2439 case SECCAT_SRODATA:
2440 case SECCAT_SDATA:
2441 case SECCAT_SBSS:
2442 gcc_unreachable ();
2443 case SECCAT_TEXT:
2444 case SECCAT_TDATA:
2445 case SECCAT_TBSS:
2446 /* We don't split these for medium model. Place them into
2447 default sections and hope for best. */
2448 break;
2449 }
2450 if (prefix)
2451 {
2452 const char *name;
2453 size_t nlen, plen;
2454 char *string;
2455 plen = strlen (prefix);
2456
2457 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2458 name = targetm.strip_name_encoding (name);
2459 nlen = strlen (name);
2460
2461 string = alloca (nlen + plen + 1);
2462 memcpy (string, prefix, plen);
2463 memcpy (string + plen, name, nlen + 1);
2464
2465 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2466 return;
2467 }
2468 }
2469 default_unique_section (decl, reloc);
2470}
2471
2472#ifdef COMMON_ASM_OP
2473/* This says how to output assembler code to declare an
2474 uninitialized external linkage data object.
2475
2476 For medium model x86-64 we need to use .largecomm opcode for
2477 large objects. */
2478void
2479x86_elf_aligned_common (FILE *file,
2480 const char *name, unsigned HOST_WIDE_INT size,
2481 int align)
2482{
2483 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2484 && size > (unsigned int)ix86_section_threshold)
2485 fprintf (file, ".largecomm\t");
2486 else
2487 fprintf (file, "%s", COMMON_ASM_OP);
2488 assemble_name (file, name);
2489 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2490 size, align / BITS_PER_UNIT);
2491}
2492
2493/* Utility function for targets to use in implementing
2494 ASM_OUTPUT_ALIGNED_BSS. */
2495
2496void
2497x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2498 const char *name, unsigned HOST_WIDE_INT size,
2499 int align)
2500{
2501 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2502 && size > (unsigned int)ix86_section_threshold)
2503 switch_to_section (get_named_section (decl, ".lbss", 0));
2504 else
2505 switch_to_section (bss_section);
2506 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2507#ifdef ASM_DECLARE_OBJECT_NAME
2508 last_assemble_variable_decl = decl;
2509 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2510#else
2511 /* Standard thing is just output label for the object. */
2512 ASM_OUTPUT_LABEL (file, name);
2513#endif /* ASM_DECLARE_OBJECT_NAME */
2514 ASM_OUTPUT_SKIP (file, size ? size : 1);
2515}
2516#endif
2517
2518void
2519optimization_options (int level, int size ATTRIBUTE_UNUSED)
2520{
2521 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2522 make the problem with not enough registers even worse. */
2523#ifdef INSN_SCHEDULING
2524 if (level > 1)
2525 flag_schedule_insns = 0;
2526#endif
2527
2528 if (TARGET_MACHO)
2529 /* The Darwin libraries never set errno, so we might as well
2530 avoid calling them when that's the only reason we would. */
2531 flag_errno_math = 0;
2532
2533 /* The default values of these switches depend on the TARGET_64BIT
2534 that is not known at this moment. Mark these values with 2 and
2535 let user the to override these. In case there is no command line option
2536 specifying them, we will set the defaults in override_options. */
2537 if (optimize >= 1)
2538 flag_omit_frame_pointer = 2;
2539 flag_pcc_struct_return = 2;
2540 flag_asynchronous_unwind_tables = 2;
2541#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2542 SUBTARGET_OPTIMIZATION_OPTIONS;
2543#endif
2544}
2545
2546/* Table of valid machine attributes. */
2547const struct attribute_spec ix86_attribute_table[] =
2548{
2549 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2550 /* Stdcall attribute says callee is responsible for popping arguments
2551 if they are not variable. */
2552 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2553 /* Fastcall attribute says callee is responsible for popping arguments
2554 if they are not variable. */
2555 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2556 /* Cdecl attribute says the callee is a normal C declaration */
2557 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2558 /* Regparm attribute specifies how many integer arguments are to be
2559 passed in registers. */
2560 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2561 /* Sseregparm attribute says we are using x86_64 calling conventions
2562 for FP arguments. */
2563 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2564 /* force_align_arg_pointer says this function realigns the stack at entry. */
2565 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2566 false, true, true, ix86_handle_cconv_attribute },
2567#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2568 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2569 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2570 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2571#endif
2572 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2573 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2574#ifdef SUBTARGET_ATTRIBUTE_TABLE
2575 SUBTARGET_ATTRIBUTE_TABLE,
2576#endif
2577 { NULL, 0, 0, false, false, false, NULL }
2578};
2579
2580/* Decide whether we can make a sibling call to a function. DECL is the
2581 declaration of the function being targeted by the call and EXP is the
2582 CALL_EXPR representing the call. */
2583
2584static bool
2585ix86_function_ok_for_sibcall (tree decl, tree exp)
2586{
2587 tree func;
2588 rtx a, b;
2589
2590 /* If we are generating position-independent code, we cannot sibcall
2591 optimize any indirect call, or a direct call to a global function,
2592 as the PLT requires %ebx be live. */
2593 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2594 return false;
2595
2596 if (decl)
2597 func = decl;
2598 else
2599 {
2600 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2601 if (POINTER_TYPE_P (func))
2602 func = TREE_TYPE (func);
2603 }
2604
2605 /* Check that the return value locations are the same. Like
2606 if we are returning floats on the 80387 register stack, we cannot
2607 make a sibcall from a function that doesn't return a float to a
2608 function that does or, conversely, from a function that does return
2609 a float to a function that doesn't; the necessary stack adjustment
2610 would not be executed. This is also the place we notice
2611 differences in the return value ABI. Note that it is ok for one
2612 of the functions to have void return type as long as the return
2613 value of the other is passed in a register. */
2614 a = ix86_function_value (TREE_TYPE (exp), func, false);
2615 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2616 cfun->decl, false);
2617 if (STACK_REG_P (a) || STACK_REG_P (b))
2618 {
2619 if (!rtx_equal_p (a, b))
2620 return false;
2621 }
2622 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2623 ;
2624 else if (!rtx_equal_p (a, b))
2625 return false;
2626
2627 /* If this call is indirect, we'll need to be able to use a call-clobbered
2628 register for the address of the target function. Make sure that all
2629 such registers are not used for passing parameters. */
2630 if (!decl && !TARGET_64BIT)
2631 {
2632 tree type;
2633
2634 /* We're looking at the CALL_EXPR, we need the type of the function. */
2635 type = TREE_OPERAND (exp, 0); /* pointer expression */
2636 type = TREE_TYPE (type); /* pointer type */
2637 type = TREE_TYPE (type); /* function type */
2638
2639 if (ix86_function_regparm (type, NULL) >= 3)
2640 {
2641 /* ??? Need to count the actual number of registers to be used,
2642 not the possible number of registers. Fix later. */
2643 return false;
2644 }
2645 }
2646
2647#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2648 /* Dllimport'd functions are also called indirectly. */
2649 if (decl && DECL_DLLIMPORT_P (decl)
2650 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2651 return false;
2652#endif
2653
2654 /* If we forced aligned the stack, then sibcalling would unalign the
2655 stack, which may break the called function. */
2656 if (cfun->machine->force_align_arg_pointer)
2657 return false;
2658
2659 /* Otherwise okay. That also includes certain types of indirect calls. */
2660 return true;
2661}
2662
2663/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2664 calling convention attributes;
2665 arguments as in struct attribute_spec.handler. */
2666
2667static tree
2668ix86_handle_cconv_attribute (tree *node, tree name,
2669 tree args,
2670 int flags ATTRIBUTE_UNUSED,
2671 bool *no_add_attrs)
2672{
2673 if (TREE_CODE (*node) != FUNCTION_TYPE
2674 && TREE_CODE (*node) != METHOD_TYPE
2675 && TREE_CODE (*node) != FIELD_DECL
2676 && TREE_CODE (*node) != TYPE_DECL)
2677 {
2678 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2679 IDENTIFIER_POINTER (name));
2680 *no_add_attrs = true;
2681 return NULL_TREE;
2682 }
2683
2684 /* Can combine regparm with all attributes but fastcall. */
2685 if (is_attribute_p ("regparm", name))
2686 {
2687 tree cst;
2688
2689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2690 {
2691 error ("fastcall and regparm attributes are not compatible");
2692 }
2693
2694 cst = TREE_VALUE (args);
2695 if (TREE_CODE (cst) != INTEGER_CST)
2696 {
2697 warning (OPT_Wattributes,
2698 "%qs attribute requires an integer constant argument",
2699 IDENTIFIER_POINTER (name));
2700 *no_add_attrs = true;
2701 }
2702 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2703 {
2704 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2705 IDENTIFIER_POINTER (name), REGPARM_MAX);
2706 *no_add_attrs = true;
2707 }
2708
2709 if (!TARGET_64BIT
2710 && lookup_attribute (ix86_force_align_arg_pointer_string,
2711 TYPE_ATTRIBUTES (*node))
2712 && compare_tree_int (cst, REGPARM_MAX-1))
2713 {
2714 error ("%s functions limited to %d register parameters",
2715 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2716 }
2717
2718 return NULL_TREE;
2719 }
2720
2721 if (TARGET_64BIT)
2722 {
2723 warning (OPT_Wattributes, "%qs attribute ignored",
2724 IDENTIFIER_POINTER (name));
2725 *no_add_attrs = true;
2726 return NULL_TREE;
2727 }
2728
2729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2730 if (is_attribute_p ("fastcall", name))
2731 {
2732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2733 {
2734 error ("fastcall and cdecl attributes are not compatible");
2735 }
2736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2737 {
2738 error ("fastcall and stdcall attributes are not compatible");
2739 }
2740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2741 {
2742 error ("fastcall and regparm attributes are not compatible");
2743 }
2744 }
2745
2746 /* Can combine stdcall with fastcall (redundant), regparm and
2747 sseregparm. */
2748 else if (is_attribute_p ("stdcall", name))
2749 {
2750 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2751 {
2752 error ("stdcall and cdecl attributes are not compatible");
2753 }
2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2755 {
2756 error ("stdcall and fastcall attributes are not compatible");
2757 }
2758 }
2759
2760 /* Can combine cdecl with regparm and sseregparm. */
2761 else if (is_attribute_p ("cdecl", name))
2762 {
2763 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2764 {
2765 error ("stdcall and cdecl attributes are not compatible");
2766 }
2767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2768 {
2769 error ("fastcall and cdecl attributes are not compatible");
2770 }
2771 }
2772
2773 /* Can combine sseregparm with all attributes. */
2774
2775 return NULL_TREE;
2776}
2777
2778/* Return 0 if the attributes for two types are incompatible, 1 if they
2779 are compatible, and 2 if they are nearly compatible (which causes a
2780 warning to be generated). */
2781
2782static int
2783ix86_comp_type_attributes (tree type1, tree type2)
2784{
2785 /* Check for mismatch of non-default calling convention. */
2786 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2787
2788 if (TREE_CODE (type1) != FUNCTION_TYPE)
2789 return 1;
2790
2791 /* Check for mismatched fastcall/regparm types. */
2792 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2793 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2794 || (ix86_function_regparm (type1, NULL)
2795 != ix86_function_regparm (type2, NULL)))
2796 return 0;
2797
2798 /* Check for mismatched sseregparm types. */
2799 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2800 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2801 return 0;
2802
2803 /* Check for mismatched return types (cdecl vs stdcall). */
2804 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2805 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2806 return 0;
2807
2808 return 1;
2809}
2810
2811/* Return the regparm value for a function with the indicated TYPE and DECL.
2812 DECL may be NULL when calling function indirectly
2813 or considering a libcall. */
2814
2815static int
2816ix86_function_regparm (tree type, tree decl)
2817{
2818 tree attr;
2819 int regparm = ix86_regparm;
2820 bool user_convention = false;
2821
2822 if (!TARGET_64BIT)
2823 {
2824 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2825 if (attr)
2826 {
2827 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2828 user_convention = true;
2829 }
2830
2831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2832 {
2833 regparm = 2;
2834 user_convention = true;
2835 }
2836
2837 /* Use register calling convention for local functions when possible. */
2838 if (!TARGET_64BIT && !user_convention && decl
2839 && flag_unit_at_a_time && !profile_flag)
2840 {
2841 struct cgraph_local_info *i = cgraph_local_info (decl);
2842 if (i && i->local)
2843 {
2844 int local_regparm, globals = 0, regno;
2845
2846 /* Make sure no regparm register is taken by a global register
2847 variable. */
2848 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2849 if (global_regs[local_regparm])
2850 break;
2851 /* We can't use regparm(3) for nested functions as these use
2852 static chain pointer in third argument. */
2853 if (local_regparm == 3
2854 && decl_function_context (decl)
2855 && !DECL_NO_STATIC_CHAIN (decl))
2856 local_regparm = 2;
2857 /* If the function realigns its stackpointer, the
2858 prologue will clobber %ecx. If we've already
2859 generated code for the callee, the callee
2860 DECL_STRUCT_FUNCTION is gone, so we fall back to
2861 scanning the attributes for the self-realigning
2862 property. */
2863 if ((DECL_STRUCT_FUNCTION (decl)
2864 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2865 || (!DECL_STRUCT_FUNCTION (decl)
2866 && lookup_attribute (ix86_force_align_arg_pointer_string,
2867 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2868 local_regparm = 2;
2869 /* Each global register variable increases register preassure,
2870 so the more global reg vars there are, the smaller regparm
2871 optimization use, unless requested by the user explicitly. */
2872 for (regno = 0; regno < 6; regno++)
2873 if (global_regs[regno])
2874 globals++;
2875 local_regparm
2876 = globals < local_regparm ? local_regparm - globals : 0;
2877
2878 if (local_regparm > regparm)
2879 regparm = local_regparm;
2880 }
2881 }
2882 }
2883 return regparm;
2884}
2885
2886/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2887 DFmode (2) arguments in SSE registers for a function with the
2888 indicated TYPE and DECL. DECL may be NULL when calling function
2889 indirectly or considering a libcall. Otherwise return 0. */
2890
2891static int
2892ix86_function_sseregparm (tree type, tree decl)
2893{
2894 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2895 by the sseregparm attribute. */
2896 if (TARGET_SSEREGPARM
2897 || (type
2898 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2899 {
2900 if (!TARGET_SSE)
2901 {
2902 if (decl)
2903 error ("Calling %qD with attribute sseregparm without "
2904 "SSE/SSE2 enabled", decl);
2905 else
2906 error ("Calling %qT with attribute sseregparm without "
2907 "SSE/SSE2 enabled", type);
2908 return 0;
2909 }
2910
2911 return 2;
2912 }
2913
2914 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2915 (and DFmode for SSE2) arguments in SSE registers,
2916 even for 32-bit targets. */
2917 if (!TARGET_64BIT && decl
2918 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2919 {
2920 struct cgraph_local_info *i = cgraph_local_info (decl);
2921 if (i && i->local)
2922 return TARGET_SSE2 ? 2 : 1;
2923 }
2924
2925 return 0;
2926}
2927
2928/* Return true if EAX is live at the start of the function. Used by
2929 ix86_expand_prologue to determine if we need special help before
2930 calling allocate_stack_worker. */
2931
2932static bool
2933ix86_eax_live_at_start_p (void)
2934{
2935 /* Cheat. Don't bother working forward from ix86_function_regparm
2936 to the function type to whether an actual argument is located in
2937 eax. Instead just look at cfg info, which is still close enough
2938 to correct at this point. This gives false positives for broken
2939 functions that might use uninitialized data that happens to be
2940 allocated in eax, but who cares? */
2941 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2942}
2943
2944/* Value is the number of bytes of arguments automatically
2945 popped when returning from a subroutine call.
2946 FUNDECL is the declaration node of the function (as a tree),
2947 FUNTYPE is the data type of the function (as a tree),
2948 or for a library call it is an identifier node for the subroutine name.
2949 SIZE is the number of bytes of arguments passed on the stack.
2950
2951 On the 80386, the RTD insn may be used to pop them if the number
2952 of args is fixed, but if the number is variable then the caller
2953 must pop them all. RTD can't be used for library calls now
2954 because the library is compiled with the Unix compiler.
2955 Use of RTD is a selectable option, since it is incompatible with
2956 standard Unix calling sequences. If the option is not selected,
2957 the caller must always pop the args.
2958
2959 The attribute stdcall is equivalent to RTD on a per module basis. */
2960
2961int
2962ix86_return_pops_args (tree fundecl, tree funtype, int size)
2963{
2964 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2965
2966 /* Cdecl functions override -mrtd, and never pop the stack. */
2967 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2968
2969 /* Stdcall and fastcall functions will pop the stack if not
2970 variable args. */
2971 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2972 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2973 rtd = 1;
2974
2975 if (rtd
2976 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2977 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2978 == void_type_node)))
2979 return size;
2980 }
2981
2982 /* Lose any fake structure return argument if it is passed on the stack. */
2983 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2984 && !TARGET_64BIT
2985 && !KEEP_AGGREGATE_RETURN_POINTER)
2986 {
2987 int nregs = ix86_function_regparm (funtype, fundecl);
2988
2989 if (!nregs)
2990 return GET_MODE_SIZE (Pmode);
2991 }
2992
2993 return 0;
2994}
2995
2996/* Argument support functions. */
2997
2998/* Return true when register may be used to pass function parameters. */
2999bool
3000ix86_function_arg_regno_p (int regno)
3001{
3002 int i;
3003 if (!TARGET_64BIT)
3004 {
3005 if (TARGET_MACHO)
3006 return (regno < REGPARM_MAX
3007 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3008 else
3009 return (regno < REGPARM_MAX
3010 || (TARGET_MMX && MMX_REGNO_P (regno)
3011 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3012 || (TARGET_SSE && SSE_REGNO_P (regno)
3013 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3014 }
3015
3016 if (TARGET_MACHO)
3017 {
3018 if (SSE_REGNO_P (regno) && TARGET_SSE)
3019 return true;
3020 }
3021 else
3022 {
3023 if (TARGET_SSE && SSE_REGNO_P (regno)
3024 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3025 return true;
3026 }
3027 /* RAX is used as hidden argument to va_arg functions. */
3028 if (!regno)
3029 return true;
3030 for (i = 0; i < REGPARM_MAX; i++)
3031 if (regno == x86_64_int_parameter_registers[i])
3032 return true;
3033 return false;
3034}
3035
3036/* Return if we do not know how to pass TYPE solely in registers. */
3037
3038static bool
3039ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3040{
3041 if (must_pass_in_stack_var_size_or_pad (mode, type))
3042 return true;
3043
3044 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3045 The layout_type routine is crafty and tries to trick us into passing
3046 currently unsupported vector types on the stack by using TImode. */
3047 return (!TARGET_64BIT && mode == TImode
3048 && type && TREE_CODE (type) != VECTOR_TYPE);
3049}
3050
3051/* Initialize a variable CUM of type CUMULATIVE_ARGS
3052 for a call to a function whose data type is FNTYPE.
3053 For a library call, FNTYPE is 0. */
3054
3055void
3056init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3057 tree fntype, /* tree ptr for function decl */
3058 rtx libname, /* SYMBOL_REF of library name or 0 */
3059 tree fndecl)
3060{
3061 static CUMULATIVE_ARGS zero_cum;
3062 tree param, next_param;
3063
3064 if (TARGET_DEBUG_ARG)
3065 {
3066 fprintf (stderr, "\ninit_cumulative_args (");
3067 if (fntype)
3068 fprintf (stderr, "fntype code = %s, ret code = %s",
3069 tree_code_name[(int) TREE_CODE (fntype)],
3070 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3071 else
3072 fprintf (stderr, "no fntype");
3073
3074 if (libname)
3075 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3076 }
3077
3078 *cum = zero_cum;
3079
3080 /* Set up the number of registers to use for passing arguments. */
3081 cum->nregs = ix86_regparm;
3082 if (TARGET_SSE)
3083 cum->sse_nregs = SSE_REGPARM_MAX;
3084 if (TARGET_MMX)
3085 cum->mmx_nregs = MMX_REGPARM_MAX;
3086 cum->warn_sse = true;
3087 cum->warn_mmx = true;
3088 cum->maybe_vaarg = false;
3089
3090 /* Use ecx and edx registers if function has fastcall attribute,
3091 else look for regparm information. */
3092 if (fntype && !TARGET_64BIT)
3093 {
3094 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3095 {
3096 cum->nregs = 2;
3097 cum->fastcall = 1;
3098 }
3099 else
3100 cum->nregs = ix86_function_regparm (fntype, fndecl);
3101 }
3102
3103 /* Set up the number of SSE registers used for passing SFmode
3104 and DFmode arguments. Warn for mismatching ABI. */
3105 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3106
3107 /* Determine if this function has variable arguments. This is
3108 indicated by the last argument being 'void_type_mode' if there
3109 are no variable arguments. If there are variable arguments, then
3110 we won't pass anything in registers in 32-bit mode. */
3111
3112 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3113 {
3114 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3115 param != 0; param = next_param)
3116 {
3117 next_param = TREE_CHAIN (param);
3118 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3119 {
3120 if (!TARGET_64BIT)
3121 {
3122 cum->nregs = 0;
3123 cum->sse_nregs = 0;
3124 cum->mmx_nregs = 0;
3125 cum->warn_sse = 0;
3126 cum->warn_mmx = 0;
3127 cum->fastcall = 0;
3128 cum->float_in_sse = 0;
3129 }
3130 cum->maybe_vaarg = true;
3131 }
3132 }
3133 }
3134 if ((!fntype && !libname)
3135 || (fntype && !TYPE_ARG_TYPES (fntype)))
3136 cum->maybe_vaarg = true;
3137
3138 if (TARGET_DEBUG_ARG)
3139 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3140
3141 return;
3142}
3143
3144/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3145 But in the case of vector types, it is some vector mode.
3146
3147 When we have only some of our vector isa extensions enabled, then there
3148 are some modes for which vector_mode_supported_p is false. For these
3149 modes, the generic vector support in gcc will choose some non-vector mode
3150 in order to implement the type. By computing the natural mode, we'll
3151 select the proper ABI location for the operand and not depend on whatever
3152 the middle-end decides to do with these vector types. */
3153
3154static enum machine_mode
3155type_natural_mode (tree type)
3156{
3157 enum machine_mode mode = TYPE_MODE (type);
3158
3159 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3160 {
3161 HOST_WIDE_INT size = int_size_in_bytes (type);
3162 if ((size == 8 || size == 16)
3163 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3164 && TYPE_VECTOR_SUBPARTS (type) > 1)
3165 {
3166 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3167
3168 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3169 mode = MIN_MODE_VECTOR_FLOAT;
3170 else
3171 mode = MIN_MODE_VECTOR_INT;
3172
3173 /* Get the mode which has this inner mode and number of units. */
3174 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3175 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3176 && GET_MODE_INNER (mode) == innermode)
3177 return mode;
3178
3179 gcc_unreachable ();
3180 }
3181 }
3182
3183 return mode;
3184}
3185
3186/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3187 this may not agree with the mode that the type system has chosen for the
3188 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3189 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3190
3191static rtx
3192gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3193 unsigned int regno)
3194{
3195 rtx tmp;
3196
3197 if (orig_mode != BLKmode)
3198 tmp = gen_rtx_REG (orig_mode, regno);
3199 else
3200 {
3201 tmp = gen_rtx_REG (mode, regno);
3202 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3203 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3204 }
3205
3206 return tmp;
3207}
3208
3209/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3210 of this code is to classify each 8bytes of incoming argument by the register
3211 class and assign registers accordingly. */
3212
3213/* Return the union class of CLASS1 and CLASS2.
3214 See the x86-64 PS ABI for details. */
3215
3216static enum x86_64_reg_class
3217merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3218{
3219 /* Rule #1: If both classes are equal, this is the resulting class. */
3220 if (class1 == class2)
3221 return class1;
3222
3223 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3224 the other class. */
3225 if (class1 == X86_64_NO_CLASS)
3226 return class2;
3227 if (class2 == X86_64_NO_CLASS)
3228 return class1;
3229
3230 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3231 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3232 return X86_64_MEMORY_CLASS;
3233
3234 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3235 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3236 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3237 return X86_64_INTEGERSI_CLASS;
3238 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3239 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3240 return X86_64_INTEGER_CLASS;
3241
3242 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3243 MEMORY is used. */
3244 if (class1 == X86_64_X87_CLASS
3245 || class1 == X86_64_X87UP_CLASS
3246 || class1 == X86_64_COMPLEX_X87_CLASS
3247 || class2 == X86_64_X87_CLASS
3248 || class2 == X86_64_X87UP_CLASS
3249 || class2 == X86_64_COMPLEX_X87_CLASS)
3250 return X86_64_MEMORY_CLASS;
3251
3252 /* Rule #6: Otherwise class SSE is used. */
3253 return X86_64_SSE_CLASS;
3254}
3255
3256/* Classify the argument of type TYPE and mode MODE.
3257 CLASSES will be filled by the register class used to pass each word
3258 of the operand. The number of words is returned. In case the parameter
3259 should be passed in memory, 0 is returned. As a special case for zero
3260 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3261
3262 BIT_OFFSET is used internally for handling records and specifies offset
3263 of the offset in bits modulo 256 to avoid overflow cases.
3264
3265 See the x86-64 PS ABI for details.
3266*/
3267
3268static int
3269classify_argument (enum machine_mode mode, tree type,
3270 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3271{
3272 HOST_WIDE_INT bytes =
3273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3274 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3275
3276 /* Variable sized entities are always passed/returned in memory. */
3277 if (bytes < 0)
3278 return 0;
3279
3280 if (mode != VOIDmode
3281 && targetm.calls.must_pass_in_stack (mode, type))
3282 return 0;
3283
3284 if (type && AGGREGATE_TYPE_P (type))
3285 {
3286 int i;
3287 tree field;
3288 enum x86_64_reg_class subclasses[MAX_CLASSES];
3289
3290 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3291 if (bytes > 16)
3292 return 0;
3293
3294 for (i = 0; i < words; i++)
3295 classes[i] = X86_64_NO_CLASS;
3296
3297 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3298 signalize memory class, so handle it as special case. */
3299 if (!words)
3300 {
3301 classes[0] = X86_64_NO_CLASS;
3302 return 1;
3303 }
3304
3305 /* Classify each field of record and merge classes. */
3306 switch (TREE_CODE (type))
3307 {
3308 case RECORD_TYPE:
3309 /* For classes first merge in the field of the subclasses. */
3310 if (TYPE_BINFO (type))
3311 {
3312 tree binfo, base_binfo;
3313 int basenum;
3314
3315 for (binfo = TYPE_BINFO (type), basenum = 0;
3316 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3317 {
3318 int num;
3319 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3320 tree type = BINFO_TYPE (base_binfo);
3321
3322 num = classify_argument (TYPE_MODE (type),
3323 type, subclasses,
3324 (offset + bit_offset) % 256);
3325 if (!num)
3326 return 0;
3327 for (i = 0; i < num; i++)
3328 {
3329 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3330 classes[i + pos] =
3331 merge_classes (subclasses[i], classes[i + pos]);
3332 }
3333 }
3334 }
3335 /* And now merge the fields of structure. */
3336 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3337 {
3338 if (TREE_CODE (field) == FIELD_DECL)
3339 {
3340 int num;
3341
3342 if (TREE_TYPE (field) == error_mark_node)
3343 continue;
3344
3345 /* Bitfields are always classified as integer. Handle them
3346 early, since later code would consider them to be
3347 misaligned integers. */
3348 if (DECL_BIT_FIELD (field))
3349 {
3350 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3351 i < ((int_bit_position (field) + (bit_offset % 64))
3352 + tree_low_cst (DECL_SIZE (field), 0)
3353 + 63) / 8 / 8; i++)
3354 classes[i] =
3355 merge_classes (X86_64_INTEGER_CLASS,
3356 classes[i]);
3357 }
3358 else
3359 {
3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361 TREE_TYPE (field), subclasses,
3362 (int_bit_position (field)
3363 + bit_offset) % 256);
3364 if (!num)
3365 return 0;
3366 for (i = 0; i < num; i++)
3367 {
3368 int pos =
3369 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3370 classes[i + pos] =
3371 merge_classes (subclasses[i], classes[i + pos]);
3372 }
3373 }
3374 }
3375 }
3376 break;
3377
3378 case ARRAY_TYPE:
3379 /* Arrays are handled as small records. */
3380 {
3381 int num;
3382 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3383 TREE_TYPE (type), subclasses, bit_offset);
3384 if (!num)
3385 return 0;
3386
3387 /* The partial classes are now full classes. */
3388 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3389 subclasses[0] = X86_64_SSE_CLASS;
3390 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3391 subclasses[0] = X86_64_INTEGER_CLASS;
3392
3393 for (i = 0; i < words; i++)
3394 classes[i] = subclasses[i % num];
3395
3396 break;
3397 }
3398 case UNION_TYPE:
3399 case QUAL_UNION_TYPE:
3400 /* Unions are similar to RECORD_TYPE but offset is always 0.
3401 */
3402
3403 /* Unions are not derived. */
3404 gcc_assert (!TYPE_BINFO (type)
3405 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3406 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3407 {
3408 if (TREE_CODE (field) == FIELD_DECL)
3409 {
3410 int num;
3411
3412 if (TREE_TYPE (field) == error_mark_node)
3413 continue;
3414
3415 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3416 TREE_TYPE (field), subclasses,
3417 bit_offset);
3418 if (!num)
3419 return 0;
3420 for (i = 0; i < num; i++)
3421 classes[i] = merge_classes (subclasses[i], classes[i]);
3422 }
3423 }
3424 break;
3425
3426 default:
3427 gcc_unreachable ();
3428 }
3429
3430 /* Final merger cleanup. */
3431 for (i = 0; i < words; i++)
3432 {
3433 /* If one class is MEMORY, everything should be passed in
3434 memory. */
3435 if (classes[i] == X86_64_MEMORY_CLASS)
3436 return 0;
3437
3438 /* The X86_64_SSEUP_CLASS should be always preceded by
3439 X86_64_SSE_CLASS. */
3440 if (classes[i] == X86_64_SSEUP_CLASS
3441 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3442 classes[i] = X86_64_SSE_CLASS;
3443
3444 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3445 if (classes[i] == X86_64_X87UP_CLASS
3446 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3447 classes[i] = X86_64_SSE_CLASS;
3448 }
3449 return words;
3450 }
3451
3452 /* Compute alignment needed. We align all types to natural boundaries with
3453 exception of XFmode that is aligned to 64bits. */
3454 if (mode != VOIDmode && mode != BLKmode)
3455 {
3456 int mode_alignment = GET_MODE_BITSIZE (mode);
3457
3458 if (mode == XFmode)
3459 mode_alignment = 128;
3460 else if (mode == XCmode)
3461 mode_alignment = 256;
3462 if (COMPLEX_MODE_P (mode))
3463 mode_alignment /= 2;
3464 /* Misaligned fields are always returned in memory. */
3465 if (bit_offset % mode_alignment)
3466 return 0;
3467 }
3468
3469 /* for V1xx modes, just use the base mode */
3470 if (VECTOR_MODE_P (mode)
3471 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3472 mode = GET_MODE_INNER (mode);
3473
3474 /* Classification of atomic types. */
3475 switch (mode)
3476 {
3477 case SDmode:
3478 case DDmode:
3479 classes[0] = X86_64_SSE_CLASS;
3480 return 1;
3481 case TDmode:
3482 classes[0] = X86_64_SSE_CLASS;
3483 classes[1] = X86_64_SSEUP_CLASS;
3484 return 2;
3485 case DImode:
3486 case SImode:
3487 case HImode:
3488 case QImode:
3489 case CSImode:
3490 case CHImode:
3491 case CQImode:
3492 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3493 classes[0] = X86_64_INTEGERSI_CLASS;
3494 else
3495 classes[0] = X86_64_INTEGER_CLASS;
3496 return 1;
3497 case CDImode:
3498 case TImode:
3499 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3500 return 2;
3501 case CTImode:
3502 return 0;
3503 case SFmode:
3504 if (!(bit_offset % 64))
3505 classes[0] = X86_64_SSESF_CLASS;
3506 else
3507 classes[0] = X86_64_SSE_CLASS;
3508 return 1;
3509 case DFmode:
3510 classes[0] = X86_64_SSEDF_CLASS;
3511 return 1;
3512 case XFmode:
3513 classes[0] = X86_64_X87_CLASS;
3514 classes[1] = X86_64_X87UP_CLASS;
3515 return 2;
3516 case TFmode:
3517 classes[0] = X86_64_SSE_CLASS;
3518 classes[1] = X86_64_SSEUP_CLASS;
3519 return 2;
3520 case SCmode:
3521 classes[0] = X86_64_SSE_CLASS;
3522 return 1;
3523 case DCmode:
3524 classes[0] = X86_64_SSEDF_CLASS;
3525 classes[1] = X86_64_SSEDF_CLASS;
3526 return 2;
3527 case XCmode:
3528 classes[0] = X86_64_COMPLEX_X87_CLASS;
3529 return 1;
3530 case TCmode:
3531 /* This modes is larger than 16 bytes. */
3532 return 0;
3533 case V4SFmode:
3534 case V4SImode:
3535 case V16QImode:
3536 case V8HImode:
3537 case V2DFmode:
3538 case V2DImode:
3539 classes[0] = X86_64_SSE_CLASS;
3540 classes[1] = X86_64_SSEUP_CLASS;
3541 return 2;
3542 case V2SFmode:
3543 case V2SImode:
3544 case V4HImode:
3545 case V8QImode:
3546 classes[0] = X86_64_SSE_CLASS;
3547 return 1;
3548 case BLKmode:
3549 case VOIDmode:
3550 return 0;
3551 default:
3552 gcc_assert (VECTOR_MODE_P (mode));
3553
3554 if (bytes > 16)
3555 return 0;
3556
3557 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3558
3559 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3560 classes[0] = X86_64_INTEGERSI_CLASS;
3561 else
3562 classes[0] = X86_64_INTEGER_CLASS;
3563 classes[1] = X86_64_INTEGER_CLASS;
3564 return 1 + (bytes > 8);
3565 }
3566}
3567
3568/* Examine the argument and return set number of register required in each
3569 class. Return 0 iff parameter should be passed in memory. */
3570static int
3571examine_argument (enum machine_mode mode, tree type, int in_return,
3572 int *int_nregs, int *sse_nregs)
3573{
3574 enum x86_64_reg_class class[MAX_CLASSES];
3575 int n = classify_argument (mode, type, class, 0);
3576
3577 *int_nregs = 0;
3578 *sse_nregs = 0;
3579 if (!n)
3580 return 0;
3581 for (n--; n >= 0; n--)
3582 switch (class[n])
3583 {
3584 case X86_64_INTEGER_CLASS:
3585 case X86_64_INTEGERSI_CLASS:
3586 (*int_nregs)++;
3587 break;
3588 case X86_64_SSE_CLASS:
3589 case X86_64_SSESF_CLASS:
3590 case X86_64_SSEDF_CLASS:
3591 (*sse_nregs)++;
3592 break;
3593 case X86_64_NO_CLASS:
3594 case X86_64_SSEUP_CLASS:
3595 break;
3596 case X86_64_X87_CLASS:
3597 case X86_64_X87UP_CLASS:
3598 if (!in_return)
3599 return 0;
3600 break;
3601 case X86_64_COMPLEX_X87_CLASS:
3602 return in_return ? 2 : 0;
3603 case X86_64_MEMORY_CLASS:
3604 gcc_unreachable ();
3605 }
3606 return 1;
3607}
3608
3609/* Construct container for the argument used by GCC interface. See
3610 FUNCTION_ARG for the detailed description. */
3611
3612static rtx
3613construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3614 tree type, int in_return, int nintregs, int nsseregs,
3615 const int *intreg, int sse_regno)
3616{
3617 /* The following variables hold the static issued_error state. */
3618 static bool issued_sse_arg_error;
3619 static bool issued_sse_ret_error;
3620 static bool issued_x87_ret_error;
3621
3622 enum machine_mode tmpmode;
3623 int bytes =
3624 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625 enum x86_64_reg_class class[MAX_CLASSES];
3626 int n;
3627 int i;
3628 int nexps = 0;
3629 int needed_sseregs, needed_intregs;
3630 rtx exp[MAX_CLASSES];
3631 rtx ret;
3632
3633 n = classify_argument (mode, type, class, 0);
3634 if (TARGET_DEBUG_ARG)
3635 {
3636 if (!n)
3637 fprintf (stderr, "Memory class\n");
3638 else
3639 {
3640 fprintf (stderr, "Classes:");
3641 for (i = 0; i < n; i++)
3642 {
3643 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3644 }
3645 fprintf (stderr, "\n");
3646 }
3647 }
3648 if (!n)
3649 return NULL;
3650 if (!examine_argument (mode, type, in_return, &needed_intregs,
3651 &needed_sseregs))
3652 return NULL;
3653 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3654 return NULL;
3655
3656 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3657 some less clueful developer tries to use floating-point anyway. */
3658 if (needed_sseregs && !TARGET_SSE)
3659 {
3660 if (in_return)
3661 {
3662 if (!issued_sse_ret_error)
3663 {
3664 error ("SSE register return with SSE disabled");
3665 issued_sse_ret_error = true;
3666 }
3667 }
3668 else if (!issued_sse_arg_error)
3669 {
3670 error ("SSE register argument with SSE disabled");
3671 issued_sse_arg_error = true;
3672 }
3673 return NULL;
3674 }
3675
3676 /* Likewise, error if the ABI requires us to return values in the
3677 x87 registers and the user specified -mno-80387. */
3678 if (!TARGET_80387 && in_return)
3679 for (i = 0; i < n; i++)
3680 if (class[i] == X86_64_X87_CLASS
3681 || class[i] == X86_64_X87UP_CLASS
3682 || class[i] == X86_64_COMPLEX_X87_CLASS)
3683 {
3684 if (!issued_x87_ret_error)
3685 {
3686 error ("x87 register return with x87 disabled");
3687 issued_x87_ret_error = true;
3688 }
3689 return NULL;
3690 }
3691
3692 /* First construct simple cases. Avoid SCmode, since we want to use
3693 single register to pass this type. */
3694 if (n == 1 && mode != SCmode)
3695 switch (class[0])
3696 {
3697 case X86_64_INTEGER_CLASS:
3698 case X86_64_INTEGERSI_CLASS:
3699 return gen_rtx_REG (mode, intreg[0]);
3700 case X86_64_SSE_CLASS:
3701 case X86_64_SSESF_CLASS:
3702 case X86_64_SSEDF_CLASS:
3703 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3704 case X86_64_X87_CLASS:
3705 case X86_64_COMPLEX_X87_CLASS:
3706 return gen_rtx_REG (mode, FIRST_STACK_REG);
3707 case X86_64_NO_CLASS:
3708 /* Zero sized array, struct or class. */
3709 return NULL;
3710 default:
3711 gcc_unreachable ();
3712 }
3713 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3714 && mode != BLKmode)
3715 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3716 if (n == 2
3717 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3718 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3719 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3720 && class[1] == X86_64_INTEGER_CLASS
3721 && (mode == CDImode || mode == TImode || mode == TFmode)
3722 && intreg[0] + 1 == intreg[1])
3723 return gen_rtx_REG (mode, intreg[0]);
3724
3725 /* Otherwise figure out the entries of the PARALLEL. */
3726 for (i = 0; i < n; i++)
3727 {
3728 switch (class[i])
3729 {
3730 case X86_64_NO_CLASS:
3731 break;
3732 case X86_64_INTEGER_CLASS:
3733 case X86_64_INTEGERSI_CLASS:
3734 /* Merge TImodes on aligned occasions here too. */
3735 if (i * 8 + 8 > bytes)
3736 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3737 else if (class[i] == X86_64_INTEGERSI_CLASS)
3738 tmpmode = SImode;
3739 else
3740 tmpmode = DImode;
3741 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3742 if (tmpmode == BLKmode)
3743 tmpmode = DImode;
3744 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3745 gen_rtx_REG (tmpmode, *intreg),
3746 GEN_INT (i*8));
3747 intreg++;
3748 break;
3749 case X86_64_SSESF_CLASS:
3750 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3751 gen_rtx_REG (SFmode,
3752 SSE_REGNO (sse_regno)),
3753 GEN_INT (i*8));
3754 sse_regno++;
3755 break;
3756 case X86_64_SSEDF_CLASS:
3757 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3758 gen_rtx_REG (DFmode,
3759 SSE_REGNO (sse_regno)),
3760 GEN_INT (i*8));
3761 sse_regno++;
3762 break;
3763 case X86_64_SSE_CLASS:
3764 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3765 tmpmode = TImode;
3766 else
3767 tmpmode = DImode;
3768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3769 gen_rtx_REG (tmpmode,
3770 SSE_REGNO (sse_regno)),
3771 GEN_INT (i*8));
3772 if (tmpmode == TImode)
3773 i++;
3774 sse_regno++;
3775 break;
3776 default:
3777 gcc_unreachable ();
3778 }
3779 }
3780
3781 /* Empty aligned struct, union or class. */
3782 if (nexps == 0)
3783 return NULL;
3784
3785 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3786 for (i = 0; i < nexps; i++)
3787 XVECEXP (ret, 0, i) = exp [i];
3788 return ret;
3789}
3790
3791/* Update the data in CUM to advance over an argument
3792 of mode MODE and data type TYPE.
3793 (TYPE is null for libcalls where that information may not be available.) */
3794
3795void
3796function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3797 tree type, int named)
3798{
3799 int bytes =
3800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3801 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3802
3803 if (type)
3804 mode = type_natural_mode (type);
3805
3806 if (TARGET_DEBUG_ARG)
3807 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3808 "mode=%s, named=%d)\n\n",
3809 words, cum->words, cum->nregs, cum->sse_nregs,
3810 GET_MODE_NAME (mode), named);
3811
3812 if (TARGET_64BIT)
3813 {
3814 int int_nregs, sse_nregs;
3815 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3816 cum->words += words;
3817 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3818 {
3819 cum->nregs -= int_nregs;
3820 cum->sse_nregs -= sse_nregs;
3821 cum->regno += int_nregs;
3822 cum->sse_regno += sse_nregs;
3823 }
3824 else
3825 cum->words += words;
3826 }
3827 else
3828 {
3829 switch (mode)
3830 {
3831 default:
3832 break;
3833
3834 case BLKmode:
3835 if (bytes < 0)
3836 break;
3837 /* FALLTHRU */
3838
3839 case DImode:
3840 case SImode:
3841 case HImode:
3842 case QImode:
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3846
3847 if (cum->nregs <= 0)
3848 {
3849 cum->nregs = 0;
3850 cum->regno = 0;
3851 }
3852 break;
3853
3854 case DFmode:
3855 if (cum->float_in_sse < 2)
3856 break;
3857 case SFmode:
3858 if (cum->float_in_sse < 1)
3859 break;
3860 /* FALLTHRU */
3861
3862 case TImode:
3863 case V16QImode:
3864 case V8HImode:
3865 case V4SImode:
3866 case V2DImode:
3867 case V4SFmode:
3868 case V2DFmode:
3869 if (!type || !AGGREGATE_TYPE_P (type))
3870 {
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3875 {
3876 cum->sse_nregs = 0;
3877 cum->sse_regno = 0;
3878 }
3879 }
3880 break;
3881
3882 case V8QImode:
3883 case V4HImode:
3884 case V2SImode:
3885 case V2SFmode:
3886 if (!type || !AGGREGATE_TYPE_P (type))
3887 {
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3892 {
3893 cum->mmx_nregs = 0;
3894 cum->mmx_regno = 0;
3895 }
3896 }
3897 break;
3898 }
3899 }
3900}
3901
3902/* Define where to put the arguments to a function.
3903 Value is zero to push the argument on the stack,
3904 or a hard register in which to store the argument.
3905
3906 MODE is the argument's machine mode.
3907 TYPE is the data type of the argument (as a tree).
3908 This is null for libcalls where that information may
3909 not be available.
3910 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3911 the preceding args and about the function being called.
3912 NAMED is nonzero if this argument is a named parameter
3913 (otherwise it is an extra parameter matching an ellipsis). */
3914
3915rtx
3916function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3917 tree type, int named)
3918{
3919 enum machine_mode mode = orig_mode;
3920 rtx ret = NULL_RTX;
3921 int bytes =
3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3924 static bool warnedsse, warnedmmx;
3925
3926 /* To simplify the code below, represent vector types with a vector mode
3927 even if MMX/SSE are not active. */
3928 if (type && TREE_CODE (type) == VECTOR_TYPE)
3929 mode = type_natural_mode (type);
3930
3931 /* Handle a hidden AL argument containing number of registers for varargs
3932 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3933 any AL settings. */
3934 if (mode == VOIDmode)
3935 {
3936 if (TARGET_64BIT)
3937 return GEN_INT (cum->maybe_vaarg
3938 ? (cum->sse_nregs < 0
3939 ? SSE_REGPARM_MAX
3940 : cum->sse_regno)
3941 : -1);
3942 else
3943 return constm1_rtx;
3944 }
3945 if (TARGET_64BIT)
3946 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3947 cum->sse_nregs,
3948 &x86_64_int_parameter_registers [cum->regno],
3949 cum->sse_regno);
3950 else
3951 switch (mode)
3952 {
3953 /* For now, pass fp/complex values on the stack. */
3954 default:
3955 break;
3956
3957 case BLKmode:
3958 if (bytes < 0)
3959 break;
3960 /* FALLTHRU */
3961 case DImode:
3962 case SImode:
3963 case HImode:
3964 case QImode:
3965 if (words <= cum->nregs)
3966 {
3967 int regno = cum->regno;
3968
3969 /* Fastcall allocates the first two DWORD (SImode) or
3970 smaller arguments to ECX and EDX. */
3971 if (cum->fastcall)
3972 {
3973 if (mode == BLKmode || mode == DImode)
3974 break;
3975
3976 /* ECX not EAX is the first allocated register. */
3977 if (regno == 0)
3978 regno = 2;
3979 }
3980 ret = gen_rtx_REG (mode, regno);
3981 }
3982 break;
3983 case DFmode:
3984 if (cum->float_in_sse < 2)
3985 break;
3986 case SFmode:
3987 if (cum->float_in_sse < 1)
3988 break;
3989 /* FALLTHRU */
3990 case TImode:
3991 case V16QImode:
3992 case V8HImode:
3993 case V4SImode:
3994 case V2DImode:
3995 case V4SFmode:
3996 case V2DFmode:
3997 if (!type || !AGGREGATE_TYPE_P (type))
3998 {
3999 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4000 {
4001 warnedsse = true;
4002 warning (0, "SSE vector argument without SSE enabled "
4003 "changes the ABI");
4004 }
4005 if (cum->sse_nregs)
4006 ret = gen_reg_or_parallel (mode, orig_mode,
4007 cum->sse_regno + FIRST_SSE_REG);
4008 }
4009 break;
4010 case V8QImode:
4011 case V4HImode:
4012 case V2SImode:
4013 case V2SFmode:
4014 if (!type || !AGGREGATE_TYPE_P (type))
4015 {
4016 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4017 {
4018 warnedmmx = true;
4019 warning (0, "MMX vector argument without MMX enabled "
4020 "changes the ABI");
4021 }
4022 if (cum->mmx_nregs)
4023 ret = gen_reg_or_parallel (mode, orig_mode,
4024 cum->mmx_regno + FIRST_MMX_REG);
4025 }
4026 break;
4027 }
4028
4029 if (TARGET_DEBUG_ARG)
4030 {
4031 fprintf (stderr,
4032 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4033 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4034
4035 if (ret)
4036 print_simple_rtl (stderr, ret);
4037 else
4038 fprintf (stderr, ", stack");
4039
4040 fprintf (stderr, " )\n");
4041 }
4042
4043 return ret;
4044}
4045
4046/* A C expression that indicates when an argument must be passed by
4047 reference. If nonzero for an argument, a copy of that argument is
4048 made in memory and a pointer to the argument is passed instead of
4049 the argument itself. The pointer is passed in whatever way is
4050 appropriate for passing a pointer to that type. */
4051
4052static bool
4053ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4054 enum machine_mode mode ATTRIBUTE_UNUSED,
4055 tree type, bool named ATTRIBUTE_UNUSED)
4056{
4057 if (!TARGET_64BIT)
4058 return 0;
4059
4060 if (type && int_size_in_bytes (type) == -1)
4061 {
4062 if (TARGET_DEBUG_ARG)
4063 fprintf (stderr, "function_arg_pass_by_reference\n");
4064 return 1;
4065 }
4066
4067 return 0;
4068}
4069
4070/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4071 ABI. Only called if TARGET_SSE. */
4072static bool
4073contains_128bit_aligned_vector_p (tree type)
4074{
4075 enum machine_mode mode = TYPE_MODE (type);
4076 if (SSE_REG_MODE_P (mode)
4077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4078 return true;
4079 if (TYPE_ALIGN (type) < 128)
4080 return false;
4081
4082 if (AGGREGATE_TYPE_P (type))
4083 {
4084 /* Walk the aggregates recursively. */
4085 switch (TREE_CODE (type))
4086 {
4087 case RECORD_TYPE:
4088 case UNION_TYPE:
4089 case QUAL_UNION_TYPE:
4090 {
4091 tree field;
4092
4093 if (TYPE_BINFO (type))
4094 {
4095 tree binfo, base_binfo;
4096 int i;
4097
4098 for (binfo = TYPE_BINFO (type), i = 0;
4099 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4100 if (contains_128bit_aligned_vector_p
4101 (BINFO_TYPE (base_binfo)))
4102 return true;
4103 }
4104 /* And now merge the fields of structure. */
4105 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4106 {
4107 if (TREE_CODE (field) == FIELD_DECL
4108 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4109 return true;
4110 }
4111 break;
4112 }
4113
4114 case ARRAY_TYPE:
4115 /* Just for use if some languages passes arrays by value. */
4116 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4117 return true;
4118 break;
4119
4120 default:
4121 gcc_unreachable ();
4122 }
4123 }
4124 return false;
4125}
4126
4127/* Gives the alignment boundary, in bits, of an argument with the
4128 specified mode and type. */
4129
4130int
4131ix86_function_arg_boundary (enum machine_mode mode, tree type)
4132{
4133 int align;
4134 if (type)
4135 align = TYPE_ALIGN (type);
4136 else
4137 align = GET_MODE_ALIGNMENT (mode);
4138 if (align < PARM_BOUNDARY)
4139 align = PARM_BOUNDARY;
4140 if (!TARGET_64BIT)
4141 {
4142 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4143 make an exception for SSE modes since these require 128bit
4144 alignment.
4145
4146 The handling here differs from field_alignment. ICC aligns MMX
4147 arguments to 4 byte boundaries, while structure fields are aligned
4148 to 8 byte boundaries. */
4149 if (!TARGET_SSE)
4150 align = PARM_BOUNDARY;
4151 else if (!type)
4152 {
4153 if (!SSE_REG_MODE_P (mode))
4154 align = PARM_BOUNDARY;
4155 }
4156 else
4157 {
4158 if (!contains_128bit_aligned_vector_p (type))
4159 align = PARM_BOUNDARY;
4160 }
4161 }
4162 if (align > 128)
4163 align = 128;
4164 return align;
4165}
4166
4167/* Return true if N is a possible register number of function value. */
4168bool
4169ix86_function_value_regno_p (int regno)
4170{
4171 if (TARGET_MACHO)
4172 {
4173 if (!TARGET_64BIT)
4174 {
4175 return ((regno) == 0
4176 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4177 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4178 }
4179 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4180 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4181 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4182 }
4183 else
4184 {
4185 if (regno == 0
4186 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4187 || (regno == FIRST_SSE_REG && TARGET_SSE))
4188 return true;
4189
4190 if (!TARGET_64BIT
4191 && (regno == FIRST_MMX_REG && TARGET_MMX))
4192 return true;
4193
4194 return false;
4195 }
4196}
4197
4198/* Define how to find the value returned by a function.
4199 VALTYPE is the data type of the value (as a tree).
4200 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4201 otherwise, FUNC is 0. */
4202rtx
4203ix86_function_value (tree valtype, tree fntype_or_decl,
4204 bool outgoing ATTRIBUTE_UNUSED)
4205{
4206 enum machine_mode natmode = type_natural_mode (valtype);
4207
4208 if (TARGET_64BIT)
4209 {
4210 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4211 1, REGPARM_MAX, SSE_REGPARM_MAX,
4212 x86_64_int_return_registers, 0);
4213 /* For zero sized structures, construct_container return NULL, but we
4214 need to keep rest of compiler happy by returning meaningful value. */
4215 if (!ret)
4216 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4217 return ret;
4218 }
4219 else
4220 {
4221 tree fn = NULL_TREE, fntype;
4222 if (fntype_or_decl
4223 && DECL_P (fntype_or_decl))
4224 fn = fntype_or_decl;
4225 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4226 return gen_rtx_REG (TYPE_MODE (valtype),
4227 ix86_value_regno (natmode, fn, fntype));
4228 }
4229}
4230
4231/* Return true iff type is returned in memory. */
4232int
4233ix86_return_in_memory (tree type)
4234{
4235 int needed_intregs, needed_sseregs, size;
4236 enum machine_mode mode = type_natural_mode (type);
4237
4238 if (TARGET_64BIT)
4239 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4240
4241 if (mode == BLKmode)
4242 return 1;
4243
4244 size = int_size_in_bytes (type);
4245
4246 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4247 return 0;
4248
4249 if (VECTOR_MODE_P (mode) || mode == TImode)
4250 {
4251 /* User-created vectors small enough to fit in EAX. */
4252 if (size < 8)
4253 return 0;
4254
4255 /* MMX/3dNow values are returned in MM0,
4256 except when it doesn't exits. */
4257 if (size == 8)
4258 return (TARGET_MMX ? 0 : 1);
4259
4260 /* SSE values are returned in XMM0, except when it doesn't exist. */
4261 if (size == 16)
4262 return (TARGET_SSE ? 0 : 1);
4263 }
4264
4265 if (mode == XFmode)
4266 return 0;
4267
4268 if (mode == TDmode)
4269 return 1;
4270
4271 if (size > 12)
4272 return 1;
4273 return 0;
4274}
4275
4276/* When returning SSE vector types, we have a choice of either
4277 (1) being abi incompatible with a -march switch, or
4278 (2) generating an error.
4279 Given no good solution, I think the safest thing is one warning.
4280 The user won't be able to use -Werror, but....
4281
4282 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4283 called in response to actually generating a caller or callee that
4284 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4285 via aggregate_value_p for general type probing from tree-ssa. */
4286
4287static rtx
4288ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4289{
4290 static bool warnedsse, warnedmmx;
4291
4292 if (type)
4293 {
4294 /* Look at the return type of the function, not the function type. */
4295 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4296
4297 if (!TARGET_SSE && !warnedsse)
4298 {
4299 if (mode == TImode
4300 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4301 {
4302 warnedsse = true;
4303 warning (0, "SSE vector return without SSE enabled "
4304 "changes the ABI");
4305 }
4306 }
4307
4308 if (!TARGET_MMX && !warnedmmx)
4309 {
4310 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4311 {
4312 warnedmmx = true;
4313 warning (0, "MMX vector return without MMX enabled "
4314 "changes the ABI");
4315 }
4316 }
4317 }
4318
4319 return NULL;
4320}
4321
4322/* Define how to find the value returned by a library function
4323 assuming the value has mode MODE. */
4324rtx
4325ix86_libcall_value (enum machine_mode mode)
4326{
4327 if (TARGET_64BIT)
4328 {
4329 switch (mode)
4330 {
4331 case SFmode:
4332 case SCmode:
4333 case DFmode:
4334 case DCmode:
4335 case TFmode:
4336 case SDmode:
4337 case DDmode:
4338 case TDmode:
4339 return gen_rtx_REG (mode, FIRST_SSE_REG);
4340 case XFmode:
4341 case XCmode:
4342 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4343 case TCmode:
4344 return NULL;
4345 default:
4346 return gen_rtx_REG (mode, 0);
4347 }
4348 }
4349 else
4350 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4351}
4352
4353/* Given a mode, return the register to use for a return value. */
4354
4355static int
4356ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4357{
4358 gcc_assert (!TARGET_64BIT);
4359
4360 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4361 we normally prevent this case when mmx is not available. However
4362 some ABIs may require the result to be returned like DImode. */
4363 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4364 return TARGET_MMX ? FIRST_MMX_REG : 0;
4365
4366 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4367 we prevent this case when sse is not available. However some ABIs
4368 may require the result to be returned like integer TImode. */
4369 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4370 return TARGET_SSE ? FIRST_SSE_REG : 0;
4371
4372 /* Decimal floating point values can go in %eax, unlike other float modes. */
4373 if (DECIMAL_FLOAT_MODE_P (mode))
4374 return 0;
4375
4376 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4377 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4378 return 0;
4379
4380 /* Floating point return values in %st(0), except for local functions when
4381 SSE math is enabled or for functions with sseregparm attribute. */
4382 if ((func || fntype)
4383 && (mode == SFmode || mode == DFmode))
4384 {
4385 int sse_level = ix86_function_sseregparm (fntype, func);
4386 if ((sse_level >= 1 && mode == SFmode)
4387 || (sse_level == 2 && mode == DFmode))
4388 return FIRST_SSE_REG;
4389 }
4390
4391 return FIRST_FLOAT_REG;
4392}
4393
4394/* Create the va_list data type. */
4395
4396static tree
4397ix86_build_builtin_va_list (void)
4398{
4399 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4400
4401 /* For i386 we use plain pointer to argument area. */
4402 if (!TARGET_64BIT)
4403 return build_pointer_type (char_type_node);
4404
4405 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4406 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4407
4408 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4409 unsigned_type_node);
4410 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4411 unsigned_type_node);
4412 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4413 ptr_type_node);
4414 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4415 ptr_type_node);
4416
4417 va_list_gpr_counter_field = f_gpr;
4418 va_list_fpr_counter_field = f_fpr;
4419
4420 DECL_FIELD_CONTEXT (f_gpr) = record;
4421 DECL_FIELD_CONTEXT (f_fpr) = record;
4422 DECL_FIELD_CONTEXT (f_ovf) = record;
4423 DECL_FIELD_CONTEXT (f_sav) = record;
4424
4425 TREE_CHAIN (record) = type_decl;
4426 TYPE_NAME (record) = type_decl;
4427 TYPE_FIELDS (record) = f_gpr;
4428 TREE_CHAIN (f_gpr) = f_fpr;
4429 TREE_CHAIN (f_fpr) = f_ovf;
4430 TREE_CHAIN (f_ovf) = f_sav;
4431
4432 layout_type (record);
4433
4434 /* The correct type is an array type of one element. */
4435 return build_array_type (record, build_index_type (size_zero_node));
4436}
4437
4438/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4439
4440static void
4441ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4442 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4443 int no_rtl)
4444{
4445 CUMULATIVE_ARGS next_cum;
4446 rtx save_area = NULL_RTX, mem;
4447 rtx label;
4448 rtx label_ref;
4449 rtx tmp_reg;
4450 rtx nsse_reg;
4451 int set;
4452 tree fntype;
4453 int stdarg_p;
4454 int i;
4455
4456 if (!TARGET_64BIT)
4457 return;
4458
4459 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4460 return;
4461
4462 /* Indicate to allocate space on the stack for varargs save area. */
4463 ix86_save_varrargs_registers = 1;
4464
4465 cfun->stack_alignment_needed = 128;
4466
4467 fntype = TREE_TYPE (current_function_decl);
4468 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4469 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4470 != void_type_node));
4471
4472 /* For varargs, we do not want to skip the dummy va_dcl argument.
4473 For stdargs, we do want to skip the last named argument. */
4474 next_cum = *cum;
4475 if (stdarg_p)
4476 function_arg_advance (&next_cum, mode, type, 1);
4477
4478 if (!no_rtl)
4479 save_area = frame_pointer_rtx;
4480
4481 set = get_varargs_alias_set ();
4482
4483 for (i = next_cum.regno;
4484 i < ix86_regparm
4485 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4486 i++)
4487 {
4488 mem = gen_rtx_MEM (Pmode,
4489 plus_constant (save_area, i * UNITS_PER_WORD));
4490 MEM_NOTRAP_P (mem) = 1;
4491 set_mem_alias_set (mem, set);
4492 emit_move_insn (mem, gen_rtx_REG (Pmode,
4493 x86_64_int_parameter_registers[i]));
4494 }
4495
4496 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4497 {
4498 /* Now emit code to save SSE registers. The AX parameter contains number
4499 of SSE parameter registers used to call this function. We use
4500 sse_prologue_save insn template that produces computed jump across
4501 SSE saves. We need some preparation work to get this working. */
4502
4503 label = gen_label_rtx ();
4504 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4505
4506 /* Compute address to jump to :
4507 label - 5*eax + nnamed_sse_arguments*5 */
4508 tmp_reg = gen_reg_rtx (Pmode);
4509 nsse_reg = gen_reg_rtx (Pmode);
4510 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4512 gen_rtx_MULT (Pmode, nsse_reg,
4513 GEN_INT (4))));
4514 if (next_cum.sse_regno)
4515 emit_move_insn
4516 (nsse_reg,
4517 gen_rtx_CONST (DImode,
4518 gen_rtx_PLUS (DImode,
4519 label_ref,
4520 GEN_INT (next_cum.sse_regno * 4))));
4521 else
4522 emit_move_insn (nsse_reg, label_ref);
4523 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4524
4525 /* Compute address of memory block we save into. We always use pointer
4526 pointing 127 bytes after first byte to store - this is needed to keep
4527 instruction size limited by 4 bytes. */
4528 tmp_reg = gen_reg_rtx (Pmode);
4529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4530 plus_constant (save_area,
4531 8 * REGPARM_MAX + 127)));
4532 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4533 MEM_NOTRAP_P (mem) = 1;
4534 set_mem_alias_set (mem, set);
4535 set_mem_align (mem, BITS_PER_WORD);
4536
4537 /* And finally do the dirty job! */
4538 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4539 GEN_INT (next_cum.sse_regno), label));
4540 }
4541
4542}
4543
4544/* Implement va_start. */
4545
4546void
4547ix86_va_start (tree valist, rtx nextarg)
4548{
4549 HOST_WIDE_INT words, n_gpr, n_fpr;
4550 tree f_gpr, f_fpr, f_ovf, f_sav;
4551 tree gpr, fpr, ovf, sav, t;
4552 tree type;
4553
4554 /* Only 64bit target needs something special. */
4555 if (!TARGET_64BIT)
4556 {
4557 std_expand_builtin_va_start (valist, nextarg);
4558 return;
4559 }
4560
4561 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4562 f_fpr = TREE_CHAIN (f_gpr);
4563 f_ovf = TREE_CHAIN (f_fpr);
4564 f_sav = TREE_CHAIN (f_ovf);
4565
4566 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4567 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4568 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4569 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4570 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4571
4572 /* Count number of gp and fp argument registers used. */
4573 words = current_function_args_info.words;
4574 n_gpr = current_function_args_info.regno;
4575 n_fpr = current_function_args_info.sse_regno;
4576
4577 if (TARGET_DEBUG_ARG)
4578 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4579 (int) words, (int) n_gpr, (int) n_fpr);
4580
4581 if (cfun->va_list_gpr_size)
4582 {
4583 type = TREE_TYPE (gpr);
4584 t = build2 (MODIFY_EXPR, type, gpr,
4585 build_int_cst (type, n_gpr * 8));
4586 TREE_SIDE_EFFECTS (t) = 1;
4587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4588 }
4589
4590 if (cfun->va_list_fpr_size)
4591 {
4592 type = TREE_TYPE (fpr);
4593 t = build2 (MODIFY_EXPR, type, fpr,
4594 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4595 TREE_SIDE_EFFECTS (t) = 1;
4596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4597 }
4598
4599 /* Find the overflow area. */
4600 type = TREE_TYPE (ovf);
4601 t = make_tree (type, virtual_incoming_args_rtx);
4602 if (words != 0)
4603 t = build2 (PLUS_EXPR, type, t,
4604 build_int_cst (type, words * UNITS_PER_WORD));
4605 t = build2 (MODIFY_EXPR, type, ovf, t);
4606 TREE_SIDE_EFFECTS (t) = 1;
4607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4608
4609 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4610 {
4611 /* Find the register save area.
4612 Prologue of the function save it right above stack frame. */
4613 type = TREE_TYPE (sav);
4614 t = make_tree (type, frame_pointer_rtx);
4615 t = build2 (MODIFY_EXPR, type, sav, t);
4616 TREE_SIDE_EFFECTS (t) = 1;
4617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4618 }
4619}
4620
4621/* Implement va_arg. */
4622
4623tree
4624ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4625{
4626 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4627 tree f_gpr, f_fpr, f_ovf, f_sav;
4628 tree gpr, fpr, ovf, sav, t;
4629 int size, rsize;
4630 tree lab_false, lab_over = NULL_TREE;
4631 tree addr, t2;
4632 rtx container;
4633 int indirect_p = 0;
4634 tree ptrtype;
4635 enum machine_mode nat_mode;
4636
4637 /* Only 64bit target needs something special. */
4638 if (!TARGET_64BIT)
4639 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4640
4641 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4642 f_fpr = TREE_CHAIN (f_gpr);
4643 f_ovf = TREE_CHAIN (f_fpr);
4644 f_sav = TREE_CHAIN (f_ovf);
4645
4646 valist = build_va_arg_indirect_ref (valist);
4647 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4648 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4649 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4650 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4651
4652 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4653 if (indirect_p)
4654 type = build_pointer_type (type);
4655 size = int_size_in_bytes (type);
4656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4657
4658 nat_mode = type_natural_mode (type);
4659 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4660 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4661
4662 /* Pull the value out of the saved registers. */
4663
4664 addr = create_tmp_var (ptr_type_node, "addr");
4665 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4666
4667 if (container)
4668 {
4669 int needed_intregs, needed_sseregs;
4670 bool need_temp;
4671 tree int_addr, sse_addr;
4672
4673 lab_false = create_artificial_label ();
4674 lab_over = create_artificial_label ();
4675
4676 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4677
4678 need_temp = (!REG_P (container)
4679 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4680 || TYPE_ALIGN (type) > 128));
4681
4682 /* In case we are passing structure, verify that it is consecutive block
4683 on the register save area. If not we need to do moves. */
4684 if (!need_temp && !REG_P (container))
4685 {
4686 /* Verify that all registers are strictly consecutive */
4687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4688 {
4689 int i;
4690
4691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4692 {
4693 rtx slot = XVECEXP (container, 0, i);
4694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4695 || INTVAL (XEXP (slot, 1)) != i * 16)
4696 need_temp = 1;
4697 }
4698 }
4699 else
4700 {
4701 int i;
4702
4703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4704 {
4705 rtx slot = XVECEXP (container, 0, i);
4706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4707 || INTVAL (XEXP (slot, 1)) != i * 8)
4708 need_temp = 1;
4709 }
4710 }
4711 }
4712 if (!need_temp)
4713 {
4714 int_addr = addr;
4715 sse_addr = addr;
4716 }
4717 else
4718 {
4719 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4720 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4721 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4722 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4723 }
4724
4725 /* First ensure that we fit completely in registers. */
4726 if (needed_intregs)
4727 {
4728 t = build_int_cst (TREE_TYPE (gpr),
4729 (REGPARM_MAX - needed_intregs + 1) * 8);
4730 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4731 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4732 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4733 gimplify_and_add (t, pre_p);
4734 }
4735 if (needed_sseregs)
4736 {
4737 t = build_int_cst (TREE_TYPE (fpr),
4738 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4739 + REGPARM_MAX * 8);
4740 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4741 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4742 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4743 gimplify_and_add (t, pre_p);
4744 }
4745
4746 /* Compute index to start of area used for integer regs. */
4747 if (needed_intregs)
4748 {
4749 /* int_addr = gpr + sav; */
4750 t = fold_convert (ptr_type_node, gpr);
4751 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4752 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4753 gimplify_and_add (t, pre_p);
4754 }
4755 if (needed_sseregs)
4756 {
4757 /* sse_addr = fpr + sav; */
4758 t = fold_convert (ptr_type_node, fpr);
4759 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4760 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4761 gimplify_and_add (t, pre_p);
4762 }
4763 if (need_temp)
4764 {
4765 int i;
4766 tree temp = create_tmp_var (type, "va_arg_tmp");
4767
4768 /* addr = &temp; */
4769 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4770 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4771 gimplify_and_add (t, pre_p);
4772
4773 for (i = 0; i < XVECLEN (container, 0); i++)
4774 {
4775 rtx slot = XVECEXP (container, 0, i);
4776 rtx reg = XEXP (slot, 0);
4777 enum machine_mode mode = GET_MODE (reg);
4778 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4779 tree addr_type = build_pointer_type (piece_type);
4780 tree src_addr, src;
4781 int src_offset;
4782 tree dest_addr, dest;
4783
4784 if (SSE_REGNO_P (REGNO (reg)))
4785 {
4786 src_addr = sse_addr;
4787 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4788 }
4789 else
4790 {
4791 src_addr = int_addr;
4792 src_offset = REGNO (reg) * 8;
4793 }
4794 src_addr = fold_convert (addr_type, src_addr);
4795 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4796 size_int (src_offset)));
4797 src = build_va_arg_indirect_ref (src_addr);
4798
4799 dest_addr = fold_convert (addr_type, addr);
4800 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4801 size_int (INTVAL (XEXP (slot, 1)))));
4802 dest = build_va_arg_indirect_ref (dest_addr);
4803
4804 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4805 gimplify_and_add (t, pre_p);
4806 }
4807 }
4808
4809 if (needed_intregs)
4810 {
4811 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4812 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4813 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4814 gimplify_and_add (t, pre_p);
4815 }
4816 if (needed_sseregs)
4817 {
4818 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4819 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4820 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4821 gimplify_and_add (t, pre_p);
4822 }
4823
4824 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4825 gimplify_and_add (t, pre_p);
4826
4827 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4828 append_to_statement_list (t, pre_p);
4829 }
4830
4831 /* ... otherwise out of the overflow area. */
4832
4833 /* Care for on-stack alignment if needed. */
4834 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4835 || integer_zerop (TYPE_SIZE (type)))
4836 t = ovf;
4837 else
4838 {
4839 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4840 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4841 build_int_cst (TREE_TYPE (ovf), align - 1));
4842 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4843 build_int_cst (TREE_TYPE (t), -align));
4844 }
4845 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4846
4847 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4848 gimplify_and_add (t2, pre_p);
4849
4850 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4851 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4852 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4853 gimplify_and_add (t, pre_p);
4854
4855 if (container)
4856 {
4857 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4858 append_to_statement_list (t, pre_p);
4859 }
4860
4861 ptrtype = build_pointer_type (type);
4862 addr = fold_convert (ptrtype, addr);
4863
4864 if (indirect_p)
4865 addr = build_va_arg_indirect_ref (addr);
4866 return build_va_arg_indirect_ref (addr);
4867}
4868
4869/* Return nonzero if OPNUM's MEM should be matched
4870 in movabs* patterns. */
4871
4872int
4873ix86_check_movabs (rtx insn, int opnum)
4874{
4875 rtx set, mem;
4876
4877 set = PATTERN (insn);
4878 if (GET_CODE (set) == PARALLEL)
4879 set = XVECEXP (set, 0, 0);
4880 gcc_assert (GET_CODE (set) == SET);
4881 mem = XEXP (set, opnum);
4882 while (GET_CODE (mem) == SUBREG)
4883 mem = SUBREG_REG (mem);
4884 gcc_assert (GET_CODE (mem) == MEM);
4885 return (volatile_ok || !MEM_VOLATILE_P (mem));
4886}
4887
4888/* Initialize the table of extra 80387 mathematical constants. */
4889
4890static void
4891init_ext_80387_constants (void)
4892{
4893 static const char * cst[5] =
4894 {
4895 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4896 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4897 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4898 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4899 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4900 };
4901 int i;
4902
4903 for (i = 0; i < 5; i++)
4904 {
4905 real_from_string (&ext_80387_constants_table[i], cst[i]);
4906 /* Ensure each constant is rounded to XFmode precision. */
4907 real_convert (&ext_80387_constants_table[i],
4908 XFmode, &ext_80387_constants_table[i]);
4909 }
4910
4911 ext_80387_constants_init = 1;
4912}
4913
4914/* Return true if the constant is something that can be loaded with
4915 a special instruction. */
4916
4917int
4918standard_80387_constant_p (rtx x)
4919{
4920 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4921 return -1;
4922
4923 if (x == CONST0_RTX (GET_MODE (x)))
4924 return 1;
4925 if (x == CONST1_RTX (GET_MODE (x)))
4926 return 2;
4927
4928 /* For XFmode constants, try to find a special 80387 instruction when
4929 optimizing for size or on those CPUs that benefit from them. */
4930 if (GET_MODE (x) == XFmode
4931 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4932 {
4933 REAL_VALUE_TYPE r;
4934 int i;
4935
4936 if (! ext_80387_constants_init)
4937 init_ext_80387_constants ();
4938
4939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4940 for (i = 0; i < 5; i++)
4941 if (real_identical (&r, &ext_80387_constants_table[i]))
4942 return i + 3;
4943 }
4944
4945 return 0;
4946}
4947
4948/* Return the opcode of the special instruction to be used to load
4949 the constant X. */
4950
4951const char *
4952standard_80387_constant_opcode (rtx x)
4953{
4954 switch (standard_80387_constant_p (x))
4955 {
4956 case 1:
4957 return "fldz";
4958 case 2:
4959 return "fld1";
4960 case 3:
4961 return "fldlg2";
4962 case 4:
4963 return "fldln2";
4964 case 5:
4965 return "fldl2e";
4966 case 6:
4967 return "fldl2t";
4968 case 7:
4969 return "fldpi";
4970 default:
4971 gcc_unreachable ();
4972 }
4973}
4974
4975/* Return the CONST_DOUBLE representing the 80387 constant that is
4976 loaded by the specified special instruction. The argument IDX
4977 matches the return value from standard_80387_constant_p. */
4978
4979rtx
4980standard_80387_constant_rtx (int idx)
4981{
4982 int i;
4983
4984 if (! ext_80387_constants_init)
4985 init_ext_80387_constants ();
4986
4987 switch (idx)
4988 {
4989 case 3:
4990 case 4:
4991 case 5:
4992 case 6:
4993 case 7:
4994 i = idx - 3;
4995 break;
4996
4997 default:
4998 gcc_unreachable ();
4999 }
5000
5001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5002 XFmode);
5003}
5004
5005/* Return 1 if mode is a valid mode for sse. */
5006static int
5007standard_sse_mode_p (enum machine_mode mode)
5008{
5009 switch (mode)
5010 {
5011 case V16QImode:
5012 case V8HImode:
5013 case V4SImode:
5014 case V2DImode:
5015 case V4SFmode:
5016 case V2DFmode:
5017 return 1;
5018
5019 default:
5020 return 0;
5021 }
5022}
5023
5024/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5025 */
5026int
5027standard_sse_constant_p (rtx x)
5028{
5029 enum machine_mode mode = GET_MODE (x);
5030
5031 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5032 return 1;
5033 if (vector_all_ones_operand (x, mode)
5034 && standard_sse_mode_p (mode))
5035 return TARGET_SSE2 ? 2 : -1;
5036
5037 return 0;
5038}
5039
5040/* Return the opcode of the special instruction to be used to load
5041 the constant X. */
5042
5043const char *
5044standard_sse_constant_opcode (rtx insn, rtx x)
5045{
5046 switch (standard_sse_constant_p (x))
5047 {
5048 case 1:
5049 if (get_attr_mode (insn) == MODE_V4SF)
5050 return "xorps\t%0, %0";
5051 else if (get_attr_mode (insn) == MODE_V2DF)
5052 return "xorpd\t%0, %0";
5053 else
5054 return "pxor\t%0, %0";
5055 case 2:
5056 return "pcmpeqd\t%0, %0";
5057 }
5058 gcc_unreachable ();
5059}
5060
5061/* Returns 1 if OP contains a symbol reference */
5062
5063int
5064symbolic_reference_mentioned_p (rtx op)
5065{
5066 const char *fmt;
5067 int i;
5068
5069 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5070 return 1;
5071
5072 fmt = GET_RTX_FORMAT (GET_CODE (op));
5073 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5074 {
5075 if (fmt[i] == 'E')
5076 {
5077 int j;
5078
5079 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5080 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5081 return 1;
5082 }
5083
5084 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5085 return 1;
5086 }
5087
5088 return 0;
5089}
5090
5091/* Return 1 if it is appropriate to emit `ret' instructions in the
5092 body of a function. Do this only if the epilogue is simple, needing a
5093 couple of insns. Prior to reloading, we can't tell how many registers
5094 must be saved, so return 0 then. Return 0 if there is no frame
5095 marker to de-allocate. */
5096
5097int
5098ix86_can_use_return_insn_p (void)
5099{
5100 struct ix86_frame frame;
5101
5102 if (! reload_completed || frame_pointer_needed)
5103 return 0;
5104
5105 /* Don't allow more than 32 pop, since that's all we can do
5106 with one instruction. */
5107 if (current_function_pops_args
5108 && current_function_args_size >= 32768)
5109 return 0;
5110
5111 ix86_compute_frame_layout (&frame);
5112 return frame.to_allocate == 0 && frame.nregs == 0;
5113}
5114
5115/* Value should be nonzero if functions must have frame pointers.
5116 Zero means the frame pointer need not be set up (and parms may
5117 be accessed via the stack pointer) in functions that seem suitable. */
5118
5119int
5120ix86_frame_pointer_required (void)
5121{
5122 /* If we accessed previous frames, then the generated code expects
5123 to be able to access the saved ebp value in our frame. */
5124 if (cfun->machine->accesses_prev_frame)
5125 return 1;
5126
5127 /* Several x86 os'es need a frame pointer for other reasons,
5128 usually pertaining to setjmp. */
5129 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5130 return 1;
5131
5132 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5133 the frame pointer by default. Turn it back on now if we've not
5134 got a leaf function. */
5135 if (TARGET_OMIT_LEAF_FRAME_POINTER
5136 && (!current_function_is_leaf
5137 || ix86_current_function_calls_tls_descriptor))
5138 return 1;
5139
5140 if (current_function_profile)
5141 return 1;
5142
5143 return 0;
5144}
5145
5146/* Record that the current function accesses previous call frames. */
5147
5148void
5149ix86_setup_frame_addresses (void)
5150{
5151 cfun->machine->accesses_prev_frame = 1;
5152}
5153
5154#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5155# define USE_HIDDEN_LINKONCE 1
5156#else
5157# define USE_HIDDEN_LINKONCE 0
5158#endif
5159
5160static int pic_labels_used;
5161
5162/* Fills in the label name that should be used for a pc thunk for
5163 the given register. */
5164
5165static void
5166get_pc_thunk_name (char name[32], unsigned int regno)
5167{
5168 gcc_assert (!TARGET_64BIT);
5169
5170 if (USE_HIDDEN_LINKONCE)
5171 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5172 else
5173 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5174}
5175
5176
5177/* This function generates code for -fpic that loads %ebx with
5178 the return address of the caller and then returns. */
5179
5180void
5181ix86_file_end (void)
5182{
5183 rtx xops[2];
5184 int regno;
5185
5186 for (regno = 0; regno < 8; ++regno)
5187 {
5188 char name[32];
5189
5190 if (! ((pic_labels_used >> regno) & 1))
5191 continue;
5192
5193 get_pc_thunk_name (name, regno);
5194
5195#if TARGET_MACHO
5196 if (TARGET_MACHO)
5197 {
5198 switch_to_section (darwin_sections[text_coal_section]);
5199 fputs ("\t.weak_definition\t", asm_out_file);
5200 assemble_name (asm_out_file, name);
5201 fputs ("\n\t.private_extern\t", asm_out_file);
5202 assemble_name (asm_out_file, name);
5203 fputs ("\n", asm_out_file);
5204 ASM_OUTPUT_LABEL (asm_out_file, name);
5205 }
5206 else
5207#endif
5208 if (USE_HIDDEN_LINKONCE)
5209 {
5210 tree decl;
5211
5212 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5213 error_mark_node);
5214 TREE_PUBLIC (decl) = 1;
5215 TREE_STATIC (decl) = 1;
5216 DECL_ONE_ONLY (decl) = 1;
5217
5218 (*targetm.asm_out.unique_section) (decl, 0);
5219 switch_to_section (get_named_section (decl, NULL, 0));
5220
5221 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5222 fputs ("\t.hidden\t", asm_out_file);
5223 assemble_name (asm_out_file, name);
5224 fputc ('\n', asm_out_file);
5225 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5226 }
5227 else
5228 {
5229 switch_to_section (text_section);
5230 ASM_OUTPUT_LABEL (asm_out_file, name);
5231 }
5232
5233 xops[0] = gen_rtx_REG (SImode, regno);
5234 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5235 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5236 output_asm_insn ("ret", xops);
5237 }
5238
5239 if (NEED_INDICATE_EXEC_STACK)
5240 file_end_indicate_exec_stack ();
5241}
5242
5243/* Emit code for the SET_GOT patterns. */
5244
5245const char *
5246output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5247{
5248 rtx xops[3];
5249
5250 xops[0] = dest;
5251 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5252
5253 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5254 {
5255 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5256
5257 if (!flag_pic)
5258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5259 else
5260 output_asm_insn ("call\t%a2", xops);
5261
5262#if TARGET_MACHO
5263 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5264 is what will be referenced by the Mach-O PIC subsystem. */
5265 if (!label)
5266 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5267#endif
5268
5269 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5270 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5271
5272 if (flag_pic)
5273 output_asm_insn ("pop{l}\t%0", xops);
5274 }
5275 else
5276 {
5277 char name[32];
5278 get_pc_thunk_name (name, REGNO (dest));
5279 pic_labels_used |= 1 << REGNO (dest);
5280
5281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5282 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5283 output_asm_insn ("call\t%X2", xops);
5284 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5285 is what will be referenced by the Mach-O PIC subsystem. */
5286#if TARGET_MACHO
5287 if (!label)
5288 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5289 else
5290 targetm.asm_out.internal_label (asm_out_file, "L",
5291 CODE_LABEL_NUMBER (label));
5292#endif
5293 }
5294
5295 if (TARGET_MACHO)
5296 return "";
5297
5298 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5299 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5300 else
5301 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5302
5303 return "";
5304}
5305
5306/* Generate an "push" pattern for input ARG. */
5307
5308static rtx
5309gen_push (rtx arg)
5310{
5311 return gen_rtx_SET (VOIDmode,
5312 gen_rtx_MEM (Pmode,
5313 gen_rtx_PRE_DEC (Pmode,
5314 stack_pointer_rtx)),
5315 arg);
5316}
5317
5318/* Return >= 0 if there is an unused call-clobbered register available
5319 for the entire function. */
5320
5321static unsigned int
5322ix86_select_alt_pic_regnum (void)
5323{
5324 if (current_function_is_leaf && !current_function_profile
5325 && !ix86_current_function_calls_tls_descriptor)
5326 {
5327 int i;
5328 for (i = 2; i >= 0; --i)
5329 if (!regs_ever_live[i])
5330 return i;
5331 }
5332
5333 return INVALID_REGNUM;
5334}
5335
5336/* Return 1 if we need to save REGNO. */
5337static int
5338ix86_save_reg (unsigned int regno, int maybe_eh_return)
5339{
5340 if (pic_offset_table_rtx
5341 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5342 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5343 || current_function_profile
5344 || current_function_calls_eh_return
5345 || current_function_uses_const_pool))
5346 {
5347 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5348 return 0;
5349 return 1;
5350 }
5351
5352 if (current_function_calls_eh_return && maybe_eh_return)
5353 {
5354 unsigned i;
5355 for (i = 0; ; i++)
5356 {
5357 unsigned test = EH_RETURN_DATA_REGNO (i);
5358 if (test == INVALID_REGNUM)
5359 break;
5360 if (test == regno)
5361 return 1;
5362 }
5363 }
5364
5365 if (cfun->machine->force_align_arg_pointer
5366 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5367 return 1;
5368
5369 return (regs_ever_live[regno]
5370 && !call_used_regs[regno]
5371 && !fixed_regs[regno]
5372 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5373}
5374
5375/* Return number of registers to be saved on the stack. */
5376
5377static int
5378ix86_nsaved_regs (void)
5379{
5380 int nregs = 0;
5381 int regno;
5382
5383 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5384 if (ix86_save_reg (regno, true))
5385 nregs++;
5386 return nregs;
5387}
5388
5389/* Return the offset between two registers, one to be eliminated, and the other
5390 its replacement, at the start of a routine. */
5391
5392HOST_WIDE_INT
5393ix86_initial_elimination_offset (int from, int to)
5394{
5395 struct ix86_frame frame;
5396 ix86_compute_frame_layout (&frame);
5397
5398 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5399 return frame.hard_frame_pointer_offset;
5400 else if (from == FRAME_POINTER_REGNUM
5401 && to == HARD_FRAME_POINTER_REGNUM)
5402 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5403 else
5404 {
5405 gcc_assert (to == STACK_POINTER_REGNUM);
5406
5407 if (from == ARG_POINTER_REGNUM)
5408 return frame.stack_pointer_offset;
5409
5410 gcc_assert (from == FRAME_POINTER_REGNUM);
5411 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5412 }
5413}
5414
5415/* Fill structure ix86_frame about frame of currently computed function. */
5416
5417static void
5418ix86_compute_frame_layout (struct ix86_frame *frame)
5419{
5420 HOST_WIDE_INT total_size;
5421 unsigned int stack_alignment_needed;
5422 HOST_WIDE_INT offset;
5423 unsigned int preferred_alignment;
5424 HOST_WIDE_INT size = get_frame_size ();
5425
5426 frame->nregs = ix86_nsaved_regs ();
5427 total_size = size;
5428
5429 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5430 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5431
5432 /* During reload iteration the amount of registers saved can change.
5433 Recompute the value as needed. Do not recompute when amount of registers
5434 didn't change as reload does multiple calls to the function and does not
5435 expect the decision to change within single iteration. */
5436 if (!optimize_size
5437 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5438 {
5439 int count = frame->nregs;
5440
5441 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5442 /* The fast prologue uses move instead of push to save registers. This
5443 is significantly longer, but also executes faster as modern hardware
5444 can execute the moves in parallel, but can't do that for push/pop.
5445
5446 Be careful about choosing what prologue to emit: When function takes
5447 many instructions to execute we may use slow version as well as in
5448 case function is known to be outside hot spot (this is known with
5449 feedback only). Weight the size of function by number of registers
5450 to save as it is cheap to use one or two push instructions but very
5451 slow to use many of them. */
5452 if (count)
5453 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5454 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5455 || (flag_branch_probabilities
5456 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5457 cfun->machine->use_fast_prologue_epilogue = false;
5458 else
5459 cfun->machine->use_fast_prologue_epilogue
5460 = !expensive_function_p (count);
5461 }
5462 if (TARGET_PROLOGUE_USING_MOVE
5463 && cfun->machine->use_fast_prologue_epilogue)
5464 frame->save_regs_using_mov = true;
5465 else
5466 frame->save_regs_using_mov = false;
5467
5468
5469 /* Skip return address and saved base pointer. */
5470 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5471
5472 frame->hard_frame_pointer_offset = offset;
5473
5474 /* Do some sanity checking of stack_alignment_needed and
5475 preferred_alignment, since i386 port is the only using those features
5476 that may break easily. */
5477
5478 gcc_assert (!size || stack_alignment_needed);
5479 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5480 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5481 gcc_assert (stack_alignment_needed
5482 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5483
5484 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5485 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5486
5487 /* Register save area */
5488 offset += frame->nregs * UNITS_PER_WORD;
5489
5490 /* Va-arg area */
5491 if (ix86_save_varrargs_registers)
5492 {
5493 offset += X86_64_VARARGS_SIZE;
5494 frame->va_arg_size = X86_64_VARARGS_SIZE;
5495 }
5496 else
5497 frame->va_arg_size = 0;
5498
5499 /* Align start of frame for local function. */
5500 frame->padding1 = ((offset + stack_alignment_needed - 1)
5501 & -stack_alignment_needed) - offset;
5502
5503 offset += frame->padding1;
5504
5505 /* Frame pointer points here. */
5506 frame->frame_pointer_offset = offset;
5507
5508 offset += size;
5509
5510 /* Add outgoing arguments area. Can be skipped if we eliminated
5511 all the function calls as dead code.
5512 Skipping is however impossible when function calls alloca. Alloca
5513 expander assumes that last current_function_outgoing_args_size
5514 of stack frame are unused. */
5515 if (ACCUMULATE_OUTGOING_ARGS
5516 && (!current_function_is_leaf || current_function_calls_alloca
5517 || ix86_current_function_calls_tls_descriptor))
5518 {
5519 offset += current_function_outgoing_args_size;
5520 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5521 }
5522 else
5523 frame->outgoing_arguments_size = 0;
5524
5525 /* Align stack boundary. Only needed if we're calling another function
5526 or using alloca. */
5527 if (!current_function_is_leaf || current_function_calls_alloca
5528 || ix86_current_function_calls_tls_descriptor)
5529 frame->padding2 = ((offset + preferred_alignment - 1)
5530 & -preferred_alignment) - offset;
5531 else
5532 frame->padding2 = 0;
5533
5534 offset += frame->padding2;
5535
5536 /* We've reached end of stack frame. */
5537 frame->stack_pointer_offset = offset;
5538
5539 /* Size prologue needs to allocate. */
5540 frame->to_allocate =
5541 (size + frame->padding1 + frame->padding2
5542 + frame->outgoing_arguments_size + frame->va_arg_size);
5543
5544 if ((!frame->to_allocate && frame->nregs <= 1)
5545 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5546 frame->save_regs_using_mov = false;
5547
5548 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5549 && current_function_is_leaf
5550 && !ix86_current_function_calls_tls_descriptor)
5551 {
5552 frame->red_zone_size = frame->to_allocate;
5553 if (frame->save_regs_using_mov)
5554 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5555 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5556 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5557 }
5558 else
5559 frame->red_zone_size = 0;
5560 frame->to_allocate -= frame->red_zone_size;
5561 frame->stack_pointer_offset -= frame->red_zone_size;
5562#if 0
5563 fprintf (stderr, "nregs: %i\n", frame->nregs);
5564 fprintf (stderr, "size: %i\n", size);
5565 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5566 fprintf (stderr, "padding1: %i\n", frame->padding1);
5567 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5568 fprintf (stderr, "padding2: %i\n", frame->padding2);
5569 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5570 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5571 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5572 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5573 frame->hard_frame_pointer_offset);
5574 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5575#endif
5576}
5577
5578/* Emit code to save registers in the prologue. */
5579
5580static void
5581ix86_emit_save_regs (void)
5582{
5583 unsigned int regno;
5584 rtx insn;
5585
5586 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5587 if (ix86_save_reg (regno, true))
5588 {
5589 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5590 RTX_FRAME_RELATED_P (insn) = 1;
5591 }
5592}
5593
5594/* Emit code to save registers using MOV insns. First register
5595 is restored from POINTER + OFFSET. */
5596static void
5597ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5598{
5599 unsigned int regno;
5600 rtx insn;
5601
5602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5603 if (ix86_save_reg (regno, true))
5604 {
5605 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5606 Pmode, offset),
5607 gen_rtx_REG (Pmode, regno));
5608 RTX_FRAME_RELATED_P (insn) = 1;
5609 offset += UNITS_PER_WORD;
5610 }
5611}
5612
5613/* Expand prologue or epilogue stack adjustment.
5614 The pattern exist to put a dependency on all ebp-based memory accesses.
5615 STYLE should be negative if instructions should be marked as frame related,
5616 zero if %r11 register is live and cannot be freely used and positive
5617 otherwise. */
5618
5619static void
5620pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5621{
5622 rtx insn;
5623
5624 if (! TARGET_64BIT)
5625 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5626 else if (x86_64_immediate_operand (offset, DImode))
5627 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5628 else
5629 {
5630 rtx r11;
5631 /* r11 is used by indirect sibcall return as well, set before the
5632 epilogue and used after the epilogue. ATM indirect sibcall
5633 shouldn't be used together with huge frame sizes in one
5634 function because of the frame_size check in sibcall.c. */
5635 gcc_assert (style);
5636 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5637 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5638 if (style < 0)
5639 RTX_FRAME_RELATED_P (insn) = 1;
5640 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5641 offset));
5642 }
5643 if (style < 0)
5644 RTX_FRAME_RELATED_P (insn) = 1;
5645}
5646
5647/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5648
5649static rtx
5650ix86_internal_arg_pointer (void)
5651{
5652 bool has_force_align_arg_pointer =
5653 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5654 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5655 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5656 && DECL_NAME (current_function_decl)
5657 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5658 && DECL_FILE_SCOPE_P (current_function_decl))
5659 || ix86_force_align_arg_pointer
5660 || has_force_align_arg_pointer)
5661 {
5662 /* Nested functions can't realign the stack due to a register
5663 conflict. */
5664 if (DECL_CONTEXT (current_function_decl)
5665 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5666 {
5667 if (ix86_force_align_arg_pointer)
5668 warning (0, "-mstackrealign ignored for nested functions");
5669 if (has_force_align_arg_pointer)
5670 error ("%s not supported for nested functions",
5671 ix86_force_align_arg_pointer_string);
5672 return virtual_incoming_args_rtx;
5673 }
5674 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5675 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5676 }
5677 else
5678 return virtual_incoming_args_rtx;
5679}
5680
5681/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5682 This is called from dwarf2out.c to emit call frame instructions
5683 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5684static void
5685ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5686{
5687 rtx unspec = SET_SRC (pattern);
5688 gcc_assert (GET_CODE (unspec) == UNSPEC);
5689
5690 switch (index)
5691 {
5692 case UNSPEC_REG_SAVE:
5693 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5694 SET_DEST (pattern));
5695 break;
5696 case UNSPEC_DEF_CFA:
5697 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5698 INTVAL (XVECEXP (unspec, 0, 0)));
5699 break;
5700 default:
5701 gcc_unreachable ();
5702 }
5703}
5704
5705/* Expand the prologue into a bunch of separate insns. */
5706
5707void
5708ix86_expand_prologue (void)
5709{
5710 rtx insn;
5711 bool pic_reg_used;
5712 struct ix86_frame frame;
5713 HOST_WIDE_INT allocate;
5714
5715 ix86_compute_frame_layout (&frame);
5716
5717 if (cfun->machine->force_align_arg_pointer)
5718 {
5719 rtx x, y;
5720
5721 /* Grab the argument pointer. */
5722 x = plus_constant (stack_pointer_rtx, 4);
5723 y = cfun->machine->force_align_arg_pointer;
5724 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5725 RTX_FRAME_RELATED_P (insn) = 1;
5726
5727 /* The unwind info consists of two parts: install the fafp as the cfa,
5728 and record the fafp as the "save register" of the stack pointer.
5729 The later is there in order that the unwinder can see where it
5730 should restore the stack pointer across the and insn. */
5731 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5732 x = gen_rtx_SET (VOIDmode, y, x);
5733 RTX_FRAME_RELATED_P (x) = 1;
5734 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5735 UNSPEC_REG_SAVE);
5736 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5737 RTX_FRAME_RELATED_P (y) = 1;
5738 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5739 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5740 REG_NOTES (insn) = x;
5741
5742 /* Align the stack. */
5743 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5744 GEN_INT (-16)));
5745
5746 /* And here we cheat like madmen with the unwind info. We force the
5747 cfa register back to sp+4, which is exactly what it was at the
5748 start of the function. Re-pushing the return address results in
5749 the return at the same spot relative to the cfa, and thus is
5750 correct wrt the unwind info. */
5751 x = cfun->machine->force_align_arg_pointer;
5752 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5753 insn = emit_insn (gen_push (x));
5754 RTX_FRAME_RELATED_P (insn) = 1;
5755
5756 x = GEN_INT (4);
5757 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5758 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5759 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5760 REG_NOTES (insn) = x;
5761 }
5762
5763 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5764 slower on all targets. Also sdb doesn't like it. */
5765
5766 if (frame_pointer_needed)
5767 {
5768 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5769 RTX_FRAME_RELATED_P (insn) = 1;
5770
5771 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5772 RTX_FRAME_RELATED_P (insn) = 1;
5773 }
5774
5775 allocate = frame.to_allocate;
5776
5777 if (!frame.save_regs_using_mov)
5778 ix86_emit_save_regs ();
5779 else
5780 allocate += frame.nregs * UNITS_PER_WORD;
5781
5782 /* When using red zone we may start register saving before allocating
5783 the stack frame saving one cycle of the prologue. */
5784 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5785 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5786 : stack_pointer_rtx,
5787 -frame.nregs * UNITS_PER_WORD);
5788
5789 if (allocate == 0)
5790 ;
5791 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5792 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5793 GEN_INT (-allocate), -1);
5794 else
5795 {
5796 /* Only valid for Win32. */
5797 rtx eax = gen_rtx_REG (SImode, 0);
5798 bool eax_live = ix86_eax_live_at_start_p ();
5799 rtx t;
5800
5801 gcc_assert (!TARGET_64BIT);
5802
5803 if (eax_live)
5804 {
5805 emit_insn (gen_push (eax));
5806 allocate -= 4;
5807 }
5808
5809 emit_move_insn (eax, GEN_INT (allocate));
5810
5811 insn = emit_insn (gen_allocate_stack_worker (eax));
5812 RTX_FRAME_RELATED_P (insn) = 1;
5813 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5814 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5815 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5816 t, REG_NOTES (insn));
5817
5818 if (eax_live)
5819 {
5820 if (frame_pointer_needed)
5821 t = plus_constant (hard_frame_pointer_rtx,
5822 allocate
5823 - frame.to_allocate
5824 - frame.nregs * UNITS_PER_WORD);
5825 else
5826 t = plus_constant (stack_pointer_rtx, allocate);
5827 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5828 }
5829 }
5830
5831 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5832 {
5833 if (!frame_pointer_needed || !frame.to_allocate)
5834 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5835 else
5836 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5837 -frame.nregs * UNITS_PER_WORD);
5838 }
5839
5840 pic_reg_used = false;
5841 if (pic_offset_table_rtx
5842 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5843 || current_function_profile))
5844 {
5845 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5846
5847 if (alt_pic_reg_used != INVALID_REGNUM)
5848 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5849
5850 pic_reg_used = true;
5851 }
5852
5853 if (pic_reg_used)
5854 {
5855 if (TARGET_64BIT)
5856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5857 else
5858 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5859
5860 /* Even with accurate pre-reload life analysis, we can wind up
5861 deleting all references to the pic register after reload.
5862 Consider if cross-jumping unifies two sides of a branch
5863 controlled by a comparison vs the only read from a global.
5864 In which case, allow the set_got to be deleted, though we're
5865 too late to do anything about the ebx save in the prologue. */
5866 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5867 }
5868
5869 /* Prevent function calls from be scheduled before the call to mcount.
5870 In the pic_reg_used case, make sure that the got load isn't deleted. */
5871 if (current_function_profile)
5872 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5873}
5874
5875/* Emit code to restore saved registers using MOV insns. First register
5876 is restored from POINTER + OFFSET. */
5877static void
5878ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5879 int maybe_eh_return)
5880{
5881 int regno;
5882 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5883
5884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5885 if (ix86_save_reg (regno, maybe_eh_return))
5886 {
5887 /* Ensure that adjust_address won't be forced to produce pointer
5888 out of range allowed by x86-64 instruction set. */
5889 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5890 {
5891 rtx r11;
5892
5893 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5894 emit_move_insn (r11, GEN_INT (offset));
5895 emit_insn (gen_adddi3 (r11, r11, pointer));
5896 base_address = gen_rtx_MEM (Pmode, r11);
5897 offset = 0;
5898 }
5899 emit_move_insn (gen_rtx_REG (Pmode, regno),
5900 adjust_address (base_address, Pmode, offset));
5901 offset += UNITS_PER_WORD;
5902 }
5903}
5904
5905/* Restore function stack, frame, and registers. */
5906
5907void
5908ix86_expand_epilogue (int style)
5909{
5910 int regno;
5911 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5912 struct ix86_frame frame;
5913 HOST_WIDE_INT offset;
5914
5915 ix86_compute_frame_layout (&frame);
5916
5917 /* Calculate start of saved registers relative to ebp. Special care
5918 must be taken for the normal return case of a function using
5919 eh_return: the eax and edx registers are marked as saved, but not
5920 restored along this path. */
5921 offset = frame.nregs;
5922 if (current_function_calls_eh_return && style != 2)
5923 offset -= 2;
5924 offset *= -UNITS_PER_WORD;
5925
5926 /* If we're only restoring one register and sp is not valid then
5927 using a move instruction to restore the register since it's
5928 less work than reloading sp and popping the register.
5929
5930 The default code result in stack adjustment using add/lea instruction,
5931 while this code results in LEAVE instruction (or discrete equivalent),
5932 so it is profitable in some other cases as well. Especially when there
5933 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5934 and there is exactly one register to pop. This heuristic may need some
5935 tuning in future. */
5936 if ((!sp_valid && frame.nregs <= 1)
5937 || (TARGET_EPILOGUE_USING_MOVE
5938 && cfun->machine->use_fast_prologue_epilogue
5939 && (frame.nregs > 1 || frame.to_allocate))
5940 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5941 || (frame_pointer_needed && TARGET_USE_LEAVE
5942 && cfun->machine->use_fast_prologue_epilogue
5943 && frame.nregs == 1)
5944 || current_function_calls_eh_return)
5945 {
5946 /* Restore registers. We can use ebp or esp to address the memory
5947 locations. If both are available, default to ebp, since offsets
5948 are known to be small. Only exception is esp pointing directly to the
5949 end of block of saved registers, where we may simplify addressing
5950 mode. */
5951
5952 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5953 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5954 frame.to_allocate, style == 2);
5955 else
5956 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5957 offset, style == 2);
5958
5959 /* eh_return epilogues need %ecx added to the stack pointer. */
5960 if (style == 2)
5961 {
5962 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5963
5964 if (frame_pointer_needed)
5965 {
5966 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5967 tmp = plus_constant (tmp, UNITS_PER_WORD);
5968 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5969
5970 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5971 emit_move_insn (hard_frame_pointer_rtx, tmp);
5972
5973 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5974 const0_rtx, style);
5975 }
5976 else
5977 {
5978 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5979 tmp = plus_constant (tmp, (frame.to_allocate
5980 + frame.nregs * UNITS_PER_WORD));
5981 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5982 }
5983 }
5984 else if (!frame_pointer_needed)
5985 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5986 GEN_INT (frame.to_allocate
5987 + frame.nregs * UNITS_PER_WORD),
5988 style);
5989 /* If not an i386, mov & pop is faster than "leave". */
5990 else if (TARGET_USE_LEAVE || optimize_size
5991 || !cfun->machine->use_fast_prologue_epilogue)
5992 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5993 else
5994 {
5995 pro_epilogue_adjust_stack (stack_pointer_rtx,
5996 hard_frame_pointer_rtx,
5997 const0_rtx, style);
5998 if (TARGET_64BIT)
5999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6000 else
6001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6002 }
6003 }
6004 else
6005 {
6006 /* First step is to deallocate the stack frame so that we can
6007 pop the registers. */
6008 if (!sp_valid)
6009 {
6010 gcc_assert (frame_pointer_needed);
6011 pro_epilogue_adjust_stack (stack_pointer_rtx,
6012 hard_frame_pointer_rtx,
6013 GEN_INT (offset), style);
6014 }
6015 else if (frame.to_allocate)
6016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6017 GEN_INT (frame.to_allocate), style);
6018
6019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6020 if (ix86_save_reg (regno, false))
6021 {
6022 if (TARGET_64BIT)
6023 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6024 else
6025 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6026 }
6027 if (frame_pointer_needed)
6028 {
6029 /* Leave results in shorter dependency chains on CPUs that are
6030 able to grok it fast. */
6031 if (TARGET_USE_LEAVE)
6032 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6033 else if (TARGET_64BIT)
6034 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6035 else
6036 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6037 }
6038 }
6039
6040 if (cfun->machine->force_align_arg_pointer)
6041 {
6042 emit_insn (gen_addsi3 (stack_pointer_rtx,
6043 cfun->machine->force_align_arg_pointer,
6044 GEN_INT (-4)));
6045 }
6046
6047 /* Sibcall epilogues don't want a return instruction. */
6048 if (style == 0)
6049 return;
6050
6051 if (current_function_pops_args && current_function_args_size)
6052 {
6053 rtx popc = GEN_INT (current_function_pops_args);
6054
6055 /* i386 can only pop 64K bytes. If asked to pop more, pop
6056 return address, do explicit add, and jump indirectly to the
6057 caller. */
6058
6059 if (current_function_pops_args >= 65536)
6060 {
6061 rtx ecx = gen_rtx_REG (SImode, 2);
6062
6063 /* There is no "pascal" calling convention in 64bit ABI. */
6064 gcc_assert (!TARGET_64BIT);
6065
6066 emit_insn (gen_popsi1 (ecx));
6067 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6068 emit_jump_insn (gen_return_indirect_internal (ecx));
6069 }
6070 else
6071 emit_jump_insn (gen_return_pop_internal (popc));
6072 }
6073 else
6074 emit_jump_insn (gen_return_internal ());
6075}
6076
6077/* Reset from the function's potential modifications. */
6078
6079static void
6080ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6081 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6082{
6083 if (pic_offset_table_rtx)
6084 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6085#if TARGET_MACHO
6086 /* Mach-O doesn't support labels at the end of objects, so if
6087 it looks like we might want one, insert a NOP. */
6088 {
6089 rtx insn = get_last_insn ();
6090 while (insn
6091 && NOTE_P (insn)
6092 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6093 insn = PREV_INSN (insn);
6094 if (insn
6095 && (LABEL_P (insn)
6096 || (NOTE_P (insn)
6097 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6098 fputs ("\tnop\n", file);
6099 }
6100#endif
6101
6102}
6103
6104/* Extract the parts of an RTL expression that is a valid memory address
6105 for an instruction. Return 0 if the structure of the address is
6106 grossly off. Return -1 if the address contains ASHIFT, so it is not
6107 strictly valid, but still used for computing length of lea instruction. */
6108
6109int
6110ix86_decompose_address (rtx addr, struct ix86_address *out)
6111{
6112 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6113 rtx base_reg, index_reg;
6114 HOST_WIDE_INT scale = 1;
6115 rtx scale_rtx = NULL_RTX;
6116 int retval = 1;
6117 enum ix86_address_seg seg = SEG_DEFAULT;
6118
6119 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6120 base = addr;
6121 else if (GET_CODE (addr) == PLUS)
6122 {
6123 rtx addends[4], op;
6124 int n = 0, i;
6125
6126 op = addr;
6127 do
6128 {
6129 if (n >= 4)
6130 return 0;
6131 addends[n++] = XEXP (op, 1);
6132 op = XEXP (op, 0);
6133 }
6134 while (GET_CODE (op) == PLUS);
6135 if (n >= 4)
6136 return 0;
6137 addends[n] = op;
6138
6139 for (i = n; i >= 0; --i)
6140 {
6141 op = addends[i];
6142 switch (GET_CODE (op))
6143 {
6144 case MULT:
6145 if (index)
6146 return 0;
6147 index = XEXP (op, 0);
6148 scale_rtx = XEXP (op, 1);
6149 break;
6150
6151 case UNSPEC:
6152 if (XINT (op, 1) == UNSPEC_TP
6153 && TARGET_TLS_DIRECT_SEG_REFS
6154 && seg == SEG_DEFAULT)
6155 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6156 else
6157 return 0;
6158 break;
6159
6160 case REG:
6161 case SUBREG:
6162 if (!base)
6163 base = op;
6164 else if (!index)
6165 index = op;
6166 else
6167 return 0;
6168 break;
6169
6170 case CONST:
6171 case CONST_INT:
6172 case SYMBOL_REF:
6173 case LABEL_REF:
6174 if (disp)
6175 return 0;
6176 disp = op;
6177 break;
6178
6179 default:
6180 return 0;
6181 }
6182 }
6183 }
6184 else if (GET_CODE (addr) == MULT)
6185 {
6186 index = XEXP (addr, 0); /* index*scale */
6187 scale_rtx = XEXP (addr, 1);
6188 }
6189 else if (GET_CODE (addr) == ASHIFT)
6190 {
6191 rtx tmp;
6192
6193 /* We're called for lea too, which implements ashift on occasion. */
6194 index = XEXP (addr, 0);
6195 tmp = XEXP (addr, 1);
6196 if (GET_CODE (tmp) != CONST_INT)
6197 return 0;
6198 scale = INTVAL (tmp);
6199 if ((unsigned HOST_WIDE_INT) scale > 3)
6200 return 0;
6201 scale = 1 << scale;
6202 retval = -1;
6203 }
6204 else
6205 disp = addr; /* displacement */
6206
6207 /* Extract the integral value of scale. */
6208 if (scale_rtx)
6209 {
6210 if (GET_CODE (scale_rtx) != CONST_INT)
6211 return 0;
6212 scale = INTVAL (scale_rtx);
6213 }
6214
6215 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6216 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6217
6218 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6219 if (base_reg && index_reg && scale == 1
6220 && (index_reg == arg_pointer_rtx
6221 || index_reg == frame_pointer_rtx
6222 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6223 {
6224 rtx tmp;
6225 tmp = base, base = index, index = tmp;
6226 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6227 }
6228
6229 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6230 if ((base_reg == hard_frame_pointer_rtx
6231 || base_reg == frame_pointer_rtx
6232 || base_reg == arg_pointer_rtx) && !disp)
6233 disp = const0_rtx;
6234
6235 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6236 Avoid this by transforming to [%esi+0]. */
6237 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6238 && base_reg && !index_reg && !disp
6239 && REG_P (base_reg)
6240 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6241 disp = const0_rtx;
6242
6243 /* Special case: encode reg+reg instead of reg*2. */
6244 if (!base && index && scale && scale == 2)
6245 base = index, base_reg = index_reg, scale = 1;
6246
6247 /* Special case: scaling cannot be encoded without base or displacement. */
6248 if (!base && !disp && index && scale != 1)
6249 disp = const0_rtx;
6250
6251 out->base = base;
6252 out->index = index;
6253 out->disp = disp;
6254 out->scale = scale;
6255 out->seg = seg;
6256
6257 return retval;
6258}
6259
6260/* Return cost of the memory address x.
6261 For i386, it is better to use a complex address than let gcc copy
6262 the address into a reg and make a new pseudo. But not if the address
6263 requires to two regs - that would mean more pseudos with longer
6264 lifetimes. */
6265static int
6266ix86_address_cost (rtx x)
6267{
6268 struct ix86_address parts;
6269 int cost = 1;
6270 int ok = ix86_decompose_address (x, &parts);
6271
6272 gcc_assert (ok);
6273
6274 if (parts.base && GET_CODE (parts.base) == SUBREG)
6275 parts.base = SUBREG_REG (parts.base);
6276 if (parts.index && GET_CODE (parts.index) == SUBREG)
6277 parts.index = SUBREG_REG (parts.index);
6278
6279 /* More complex memory references are better. */
6280 if (parts.disp && parts.disp != const0_rtx)
6281 cost--;
6282 if (parts.seg != SEG_DEFAULT)
6283 cost--;
6284
6285 /* Attempt to minimize number of registers in the address. */
6286 if ((parts.base
6287 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6288 || (parts.index
6289 && (!REG_P (parts.index)
6290 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6291 cost++;
6292
6293 if (parts.base
6294 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6295 && parts.index
6296 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6297 && parts.base != parts.index)
6298 cost++;
6299
6300 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6301 since it's predecode logic can't detect the length of instructions
6302 and it degenerates to vector decoded. Increase cost of such
6303 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6304 to split such addresses or even refuse such addresses at all.
6305
6306 Following addressing modes are affected:
6307 [base+scale*index]
6308 [scale*index+disp]
6309 [base+index]
6310
6311 The first and last case may be avoidable by explicitly coding the zero in
6312 memory address, but I don't have AMD-K6 machine handy to check this
6313 theory. */
6314
6315 if (TARGET_K6
6316 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6317 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6318 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6319 cost += 10;
6320
6321 return cost;
6322}
6323
6324/* If X is a machine specific address (i.e. a symbol or label being
6325 referenced as a displacement from the GOT implemented using an
6326 UNSPEC), then return the base term. Otherwise return X. */
6327
6328rtx
6329ix86_find_base_term (rtx x)
6330{
6331 rtx term;
6332
6333 if (TARGET_64BIT)
6334 {
6335 if (GET_CODE (x) != CONST)
6336 return x;
6337 term = XEXP (x, 0);
6338 if (GET_CODE (term) == PLUS
6339 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6340 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6341 term = XEXP (term, 0);
6342 if (GET_CODE (term) != UNSPEC
6343 || XINT (term, 1) != UNSPEC_GOTPCREL)
6344 return x;
6345
6346 term = XVECEXP (term, 0, 0);
6347
6348 if (GET_CODE (term) != SYMBOL_REF
6349 && GET_CODE (term) != LABEL_REF)
6350 return x;
6351
6352 return term;
6353 }
6354
6355 term = ix86_delegitimize_address (x);
6356
6357 if (GET_CODE (term) != SYMBOL_REF
6358 && GET_CODE (term) != LABEL_REF)
6359 return x;
6360
6361 return term;
6362}
6363
6364/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6365 this is used for to form addresses to local data when -fPIC is in
6366 use. */
6367
6368static bool
6369darwin_local_data_pic (rtx disp)
6370{
6371 if (GET_CODE (disp) == MINUS)
6372 {
6373 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6374 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6375 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6376 {
6377 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6378 if (! strcmp (sym_name, "<pic base>"))
6379 return true;
6380 }
6381 }
6382
6383 return false;
6384}
6385
6386/* Determine if a given RTX is a valid constant. We already know this
6387 satisfies CONSTANT_P. */
6388
6389bool
6390legitimate_constant_p (rtx x)
6391{
6392 switch (GET_CODE (x))
6393 {
6394 case CONST:
6395 x = XEXP (x, 0);
6396
6397 if (GET_CODE (x) == PLUS)
6398 {
6399 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6400 return false;
6401 x = XEXP (x, 0);
6402 }
6403
6404 if (TARGET_MACHO && darwin_local_data_pic (x))
6405 return true;
6406
6407 /* Only some unspecs are valid as "constants". */
6408 if (GET_CODE (x) == UNSPEC)
6409 switch (XINT (x, 1))
6410 {
6411 case UNSPEC_GOTOFF:
6412 return TARGET_64BIT;
6413 case UNSPEC_TPOFF:
6414 case UNSPEC_NTPOFF:
6415 x = XVECEXP (x, 0, 0);
6416 return (GET_CODE (x) == SYMBOL_REF
6417 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6418 case UNSPEC_DTPOFF:
6419 x = XVECEXP (x, 0, 0);
6420 return (GET_CODE (x) == SYMBOL_REF
6421 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6422 default:
6423 return false;
6424 }
6425
6426 /* We must have drilled down to a symbol. */
6427 if (GET_CODE (x) == LABEL_REF)
6428 return true;
6429 if (GET_CODE (x) != SYMBOL_REF)
6430 return false;
6431 /* FALLTHRU */
6432
6433 case SYMBOL_REF:
6434 /* TLS symbols are never valid. */
6435 if (SYMBOL_REF_TLS_MODEL (x))
6436 return false;
6437 break;
6438
6439 case CONST_DOUBLE:
6440 if (GET_MODE (x) == TImode
6441 && x != CONST0_RTX (TImode)
6442 && !TARGET_64BIT)
6443 return false;
6444 break;
6445
6446 case CONST_VECTOR:
6447 if (x == CONST0_RTX (GET_MODE (x)))
6448 return true;
6449 return false;
6450
6451 default:
6452 break;
6453 }
6454
6455 /* Otherwise we handle everything else in the move patterns. */
6456 return true;
6457}
6458
6459/* Determine if it's legal to put X into the constant pool. This
6460 is not possible for the address of thread-local symbols, which
6461 is checked above. */
6462
6463static bool
6464ix86_cannot_force_const_mem (rtx x)
6465{
6466 /* We can always put integral constants and vectors in memory. */
6467 switch (GET_CODE (x))
6468 {
6469 case CONST_INT:
6470 case CONST_DOUBLE:
6471 case CONST_VECTOR:
6472 return false;
6473
6474 default:
6475 break;
6476 }
6477 return !legitimate_constant_p (x);
6478}
6479
6480/* Determine if a given RTX is a valid constant address. */
6481
6482bool
6483constant_address_p (rtx x)
6484{
6485 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6486}
6487
6488/* Nonzero if the constant value X is a legitimate general operand
6489 when generating PIC code. It is given that flag_pic is on and
6490 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6491
6492bool
6493legitimate_pic_operand_p (rtx x)
6494{
6495 rtx inner;
6496
6497 switch (GET_CODE (x))
6498 {
6499 case CONST:
6500 inner = XEXP (x, 0);
6501 if (GET_CODE (inner) == PLUS
6502 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6503 inner = XEXP (inner, 0);
6504
6505 /* Only some unspecs are valid as "constants". */
6506 if (GET_CODE (inner) == UNSPEC)
6507 switch (XINT (inner, 1))
6508 {
6509 case UNSPEC_GOTOFF:
6510 return TARGET_64BIT;
6511 case UNSPEC_TPOFF:
6512 x = XVECEXP (inner, 0, 0);
6513 return (GET_CODE (x) == SYMBOL_REF
6514 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6515 default:
6516 return false;
6517 }
6518 /* FALLTHRU */
6519
6520 case SYMBOL_REF:
6521 case LABEL_REF:
6522 return legitimate_pic_address_disp_p (x);
6523
6524 default:
6525 return true;
6526 }
6527}
6528
6529/* Determine if a given CONST RTX is a valid memory displacement
6530 in PIC mode. */
6531
6532int
6533legitimate_pic_address_disp_p (rtx disp)
6534{
6535 bool saw_plus;
6536
6537 /* In 64bit mode we can allow direct addresses of symbols and labels
6538 when they are not dynamic symbols. */
6539 if (TARGET_64BIT)
6540 {
6541 rtx op0 = disp, op1;
6542
6543 switch (GET_CODE (disp))
6544 {
6545 case LABEL_REF:
6546 return true;
6547
6548 case CONST:
6549 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6550 break;
6551 op0 = XEXP (XEXP (disp, 0), 0);
6552 op1 = XEXP (XEXP (disp, 0), 1);
6553 if (GET_CODE (op1) != CONST_INT
6554 || INTVAL (op1) >= 16*1024*1024
6555 || INTVAL (op1) < -16*1024*1024)
6556 break;
6557 if (GET_CODE (op0) == LABEL_REF)
6558 return true;
6559 if (GET_CODE (op0) != SYMBOL_REF)
6560 break;
6561 /* FALLTHRU */
6562
6563 case SYMBOL_REF:
6564 /* TLS references should always be enclosed in UNSPEC. */
6565 if (SYMBOL_REF_TLS_MODEL (op0))
6566 return false;
6567 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6568 return true;
6569 break;
6570
6571 default:
6572 break;
6573 }
6574 }
6575 if (GET_CODE (disp) != CONST)
6576 return 0;
6577 disp = XEXP (disp, 0);
6578
6579 if (TARGET_64BIT)
6580 {
6581 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6582 of GOT tables. We should not need these anyway. */
6583 if (GET_CODE (disp) != UNSPEC
6584 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6585 && XINT (disp, 1) != UNSPEC_GOTOFF))
6586 return 0;
6587
6588 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6589 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6590 return 0;
6591 return 1;
6592 }
6593
6594 saw_plus = false;
6595 if (GET_CODE (disp) == PLUS)
6596 {
6597 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6598 return 0;
6599 disp = XEXP (disp, 0);
6600 saw_plus = true;
6601 }
6602
6603 if (TARGET_MACHO && darwin_local_data_pic (disp))
6604 return 1;
6605
6606 if (GET_CODE (disp) != UNSPEC)
6607 return 0;
6608
6609 switch (XINT (disp, 1))
6610 {
6611 case UNSPEC_GOT:
6612 if (saw_plus)
6613 return false;
6614 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6615 case UNSPEC_GOTOFF:
6616 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6617 While ABI specify also 32bit relocation but we don't produce it in
6618 small PIC model at all. */
6619 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6620 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6621 && !TARGET_64BIT)
6622 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6623 return false;
6624 case UNSPEC_GOTTPOFF:
6625 case UNSPEC_GOTNTPOFF:
6626 case UNSPEC_INDNTPOFF:
6627 if (saw_plus)
6628 return false;
6629 disp = XVECEXP (disp, 0, 0);
6630 return (GET_CODE (disp) == SYMBOL_REF
6631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6632 case UNSPEC_NTPOFF:
6633 disp = XVECEXP (disp, 0, 0);
6634 return (GET_CODE (disp) == SYMBOL_REF
6635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6636 case UNSPEC_DTPOFF:
6637 disp = XVECEXP (disp, 0, 0);
6638 return (GET_CODE (disp) == SYMBOL_REF
6639 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6640 }
6641
6642 return 0;
6643}
6644
6645/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6646 memory address for an instruction. The MODE argument is the machine mode
6647 for the MEM expression that wants to use this address.
6648
6649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6650 convert common non-canonical forms to canonical form so that they will
6651 be recognized. */
6652
6653int
6654legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6655{
6656 struct ix86_address parts;
6657 rtx base, index, disp;
6658 HOST_WIDE_INT scale;
6659 const char *reason = NULL;
6660 rtx reason_rtx = NULL_RTX;
6661
6662 if (TARGET_DEBUG_ADDR)
6663 {
6664 fprintf (stderr,
6665 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6666 GET_MODE_NAME (mode), strict);
6667 debug_rtx (addr);
6668 }
6669
6670 if (ix86_decompose_address (addr, &parts) <= 0)
6671 {
6672 reason = "decomposition failed";
6673 goto report_error;
6674 }
6675
6676 base = parts.base;
6677 index = parts.index;
6678 disp = parts.disp;
6679 scale = parts.scale;
6680
6681 /* Validate base register.
6682
6683 Don't allow SUBREG's that span more than a word here. It can lead to spill
6684 failures when the base is one word out of a two word structure, which is
6685 represented internally as a DImode int. */
6686
6687 if (base)
6688 {
6689 rtx reg;
6690 reason_rtx = base;
6691
6692 if (REG_P (base))
6693 reg = base;
6694 else if (GET_CODE (base) == SUBREG
6695 && REG_P (SUBREG_REG (base))
6696 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6697 <= UNITS_PER_WORD)
6698 reg = SUBREG_REG (base);
6699 else
6700 {
6701 reason = "base is not a register";
6702 goto report_error;
6703 }
6704
6705 if (GET_MODE (base) != Pmode)
6706 {
6707 reason = "base is not in Pmode";
6708 goto report_error;
6709 }
6710
6711 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6712 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6713 {
6714 reason = "base is not valid";
6715 goto report_error;
6716 }
6717 }
6718
6719 /* Validate index register.
6720
6721 Don't allow SUBREG's that span more than a word here -- same as above. */
6722
6723 if (index)
6724 {
6725 rtx reg;
6726 reason_rtx = index;
6727
6728 if (REG_P (index))
6729 reg = index;
6730 else if (GET_CODE (index) == SUBREG
6731 && REG_P (SUBREG_REG (index))
6732 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6733 <= UNITS_PER_WORD)
6734 reg = SUBREG_REG (index);
6735 else
6736 {
6737 reason = "index is not a register";
6738 goto report_error;
6739 }
6740
6741 if (GET_MODE (index) != Pmode)
6742 {
6743 reason = "index is not in Pmode";
6744 goto report_error;
6745 }
6746
6747 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6748 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6749 {
6750 reason = "index is not valid";
6751 goto report_error;
6752 }
6753 }
6754
6755 /* Validate scale factor. */
6756 if (scale != 1)
6757 {
6758 reason_rtx = GEN_INT (scale);
6759 if (!index)
6760 {
6761 reason = "scale without index";
6762 goto report_error;
6763 }
6764
6765 if (scale != 2 && scale != 4 && scale != 8)
6766 {
6767 reason = "scale is not a valid multiplier";
6768 goto report_error;
6769 }
6770 }
6771
6772 /* Validate displacement. */
6773 if (disp)
6774 {
6775 reason_rtx = disp;
6776
6777 if (GET_CODE (disp) == CONST
6778 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6779 switch (XINT (XEXP (disp, 0), 1))
6780 {
6781 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6782 used. While ABI specify also 32bit relocations, we don't produce
6783 them at all and use IP relative instead. */
6784 case UNSPEC_GOT:
6785 case UNSPEC_GOTOFF:
6786 gcc_assert (flag_pic);
6787 if (!TARGET_64BIT)
6788 goto is_legitimate_pic;
6789 reason = "64bit address unspec";
6790 goto report_error;
6791
6792 case UNSPEC_GOTPCREL:
6793 gcc_assert (flag_pic);
6794 goto is_legitimate_pic;
6795
6796 case UNSPEC_GOTTPOFF:
6797 case UNSPEC_GOTNTPOFF:
6798 case UNSPEC_INDNTPOFF:
6799 case UNSPEC_NTPOFF:
6800 case UNSPEC_DTPOFF:
6801 break;
6802
6803 default:
6804 reason = "invalid address unspec";
6805 goto report_error;
6806 }
6807
6808 else if (SYMBOLIC_CONST (disp)
6809 && (flag_pic
6810 || (TARGET_MACHO
6811#if TARGET_MACHO
6812 && MACHOPIC_INDIRECT
6813 && !machopic_operand_p (disp)
6814#endif
6815 )))
6816 {
6817
6818 is_legitimate_pic:
6819 if (TARGET_64BIT && (index || base))
6820 {
6821 /* foo@dtpoff(%rX) is ok. */
6822 if (GET_CODE (disp) != CONST
6823 || GET_CODE (XEXP (disp, 0)) != PLUS
6824 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6825 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6826 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6827 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6828 {
6829 reason = "non-constant pic memory reference";
6830 goto report_error;
6831 }
6832 }
6833 else if (! legitimate_pic_address_disp_p (disp))
6834 {
6835 reason = "displacement is an invalid pic construct";
6836 goto report_error;
6837 }
6838
6839 /* This code used to verify that a symbolic pic displacement
6840 includes the pic_offset_table_rtx register.
6841
6842 While this is good idea, unfortunately these constructs may
6843 be created by "adds using lea" optimization for incorrect
6844 code like:
6845
6846 int a;
6847 int foo(int i)
6848 {
6849 return *(&a+i);
6850 }
6851
6852 This code is nonsensical, but results in addressing
6853 GOT table with pic_offset_table_rtx base. We can't
6854 just refuse it easily, since it gets matched by
6855 "addsi3" pattern, that later gets split to lea in the
6856 case output register differs from input. While this
6857 can be handled by separate addsi pattern for this case
6858 that never results in lea, this seems to be easier and
6859 correct fix for crash to disable this test. */
6860 }
6861 else if (GET_CODE (disp) != LABEL_REF
6862 && GET_CODE (disp) != CONST_INT
6863 && (GET_CODE (disp) != CONST
6864 || !legitimate_constant_p (disp))
6865 && (GET_CODE (disp) != SYMBOL_REF
6866 || !legitimate_constant_p (disp)))
6867 {
6868 reason = "displacement is not constant";
6869 goto report_error;
6870 }
6871 else if (TARGET_64BIT
6872 && !x86_64_immediate_operand (disp, VOIDmode))
6873 {
6874 reason = "displacement is out of range";
6875 goto report_error;
6876 }
6877 }
6878
6879 /* Everything looks valid. */
6880 if (TARGET_DEBUG_ADDR)
6881 fprintf (stderr, "Success.\n");
6882 return TRUE;
6883
6884 report_error:
6885 if (TARGET_DEBUG_ADDR)
6886 {
6887 fprintf (stderr, "Error: %s\n", reason);
6888 debug_rtx (reason_rtx);
6889 }
6890 return FALSE;
6891}
6892
6893/* Return a unique alias set for the GOT. */
6894
6895static HOST_WIDE_INT
6896ix86_GOT_alias_set (void)
6897{
6898 static HOST_WIDE_INT set = -1;
6899 if (set == -1)
6900 set = new_alias_set ();
6901 return set;
6902}
6903
6904/* Return a legitimate reference for ORIG (an address) using the
6905 register REG. If REG is 0, a new pseudo is generated.
6906
6907 There are two types of references that must be handled:
6908
6909 1. Global data references must load the address from the GOT, via
6910 the PIC reg. An insn is emitted to do this load, and the reg is
6911 returned.
6912
6913 2. Static data references, constant pool addresses, and code labels
6914 compute the address as an offset from the GOT, whose base is in
6915 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6916 differentiate them from global data objects. The returned
6917 address is the PIC reg + an unspec constant.
6918
6919 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6920 reg also appears in the address. */
6921
6922static rtx
6923legitimize_pic_address (rtx orig, rtx reg)
6924{
6925 rtx addr = orig;
6926 rtx new = orig;
6927 rtx base;
6928
6929#if TARGET_MACHO
6930 if (TARGET_MACHO && !TARGET_64BIT)
6931 {
6932 if (reg == 0)
6933 reg = gen_reg_rtx (Pmode);
6934 /* Use the generic Mach-O PIC machinery. */
6935 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6936 }
6937#endif
6938
6939 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6940 new = addr;
6941 else if (TARGET_64BIT
6942 && ix86_cmodel != CM_SMALL_PIC
6943 && local_symbolic_operand (addr, Pmode))
6944 {
6945 rtx tmpreg;
6946 /* This symbol may be referenced via a displacement from the PIC
6947 base address (@GOTOFF). */
6948
6949 if (reload_in_progress)
6950 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6951 if (GET_CODE (addr) == CONST)
6952 addr = XEXP (addr, 0);
6953 if (GET_CODE (addr) == PLUS)
6954 {
6955 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6956 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6957 }
6958 else
6959 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6960 new = gen_rtx_CONST (Pmode, new);
6961 if (!reg)
6962 tmpreg = gen_reg_rtx (Pmode);
6963 else
6964 tmpreg = reg;
6965 emit_move_insn (tmpreg, new);
6966
6967 if (reg != 0)
6968 {
6969 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6970 tmpreg, 1, OPTAB_DIRECT);
6971 new = reg;
6972 }
6973 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6974 }
6975 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6976 {
6977 /* This symbol may be referenced via a displacement from the PIC
6978 base address (@GOTOFF). */
6979
6980 if (reload_in_progress)
6981 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6982 if (GET_CODE (addr) == CONST)
6983 addr = XEXP (addr, 0);
6984 if (GET_CODE (addr) == PLUS)
6985 {
6986 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6987 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6988 }
6989 else
6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6991 new = gen_rtx_CONST (Pmode, new);
6992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6993
6994 if (reg != 0)
6995 {
6996 emit_move_insn (reg, new);
6997 new = reg;
6998 }
6999 }
7000 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7001 {
7002 if (TARGET_64BIT)
7003 {
7004 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7005 new = gen_rtx_CONST (Pmode, new);
7006 new = gen_const_mem (Pmode, new);
7007 set_mem_alias_set (new, ix86_GOT_alias_set ());
7008
7009 if (reg == 0)
7010 reg = gen_reg_rtx (Pmode);
7011 /* Use directly gen_movsi, otherwise the address is loaded
7012 into register for CSE. We don't want to CSE this addresses,
7013 instead we CSE addresses from the GOT table, so skip this. */
7014 emit_insn (gen_movsi (reg, new));
7015 new = reg;
7016 }
7017 else
7018 {
7019 /* This symbol must be referenced via a load from the
7020 Global Offset Table (@GOT). */
7021
7022 if (reload_in_progress)
7023 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7024 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7025 new = gen_rtx_CONST (Pmode, new);
7026 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7027 new = gen_const_mem (Pmode, new);
7028 set_mem_alias_set (new, ix86_GOT_alias_set ());
7029
7030 if (reg == 0)
7031 reg = gen_reg_rtx (Pmode);
7032 emit_move_insn (reg, new);
7033 new = reg;
7034 }
7035 }
7036 else
7037 {
7038 if (GET_CODE (addr) == CONST_INT
7039 && !x86_64_immediate_operand (addr, VOIDmode))
7040 {
7041 if (reg)
7042 {
7043 emit_move_insn (reg, addr);
7044 new = reg;
7045 }
7046 else
7047 new = force_reg (Pmode, addr);
7048 }
7049 else if (GET_CODE (addr) == CONST)
7050 {
7051 addr = XEXP (addr, 0);
7052
7053 /* We must match stuff we generate before. Assume the only
7054 unspecs that can get here are ours. Not that we could do
7055 anything with them anyway.... */
7056 if (GET_CODE (addr) == UNSPEC
7057 || (GET_CODE (addr) == PLUS
7058 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7059 return orig;
7060 gcc_assert (GET_CODE (addr) == PLUS);
7061 }
7062 if (GET_CODE (addr) == PLUS)
7063 {
7064 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7065
7066 /* Check first to see if this is a constant offset from a @GOTOFF
7067 symbol reference. */
7068 if (local_symbolic_operand (op0, Pmode)
7069 && GET_CODE (op1) == CONST_INT)
7070 {
7071 if (!TARGET_64BIT)
7072 {
7073 if (reload_in_progress)
7074 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7075 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7076 UNSPEC_GOTOFF);
7077 new = gen_rtx_PLUS (Pmode, new, op1);
7078 new = gen_rtx_CONST (Pmode, new);
7079 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7080
7081 if (reg != 0)
7082 {
7083 emit_move_insn (reg, new);
7084 new = reg;
7085 }
7086 }
7087 else
7088 {
7089 if (INTVAL (op1) < -16*1024*1024
7090 || INTVAL (op1) >= 16*1024*1024)
7091 {
7092 if (!x86_64_immediate_operand (op1, Pmode))
7093 op1 = force_reg (Pmode, op1);
7094 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7095 }
7096 }
7097 }
7098 else
7099 {
7100 base = legitimize_pic_address (XEXP (addr, 0), reg);
7101 new = legitimize_pic_address (XEXP (addr, 1),
7102 base == reg ? NULL_RTX : reg);
7103
7104 if (GET_CODE (new) == CONST_INT)
7105 new = plus_constant (base, INTVAL (new));
7106 else
7107 {
7108 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7109 {
7110 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7111 new = XEXP (new, 1);
7112 }
7113 new = gen_rtx_PLUS (Pmode, base, new);
7114 }
7115 }
7116 }
7117 }
7118 return new;
7119}
7120
7121/* Load the thread pointer. If TO_REG is true, force it into a register. */
7122
7123static rtx
7124get_thread_pointer (int to_reg)
7125{
7126 rtx tp, reg, insn;
7127
7128 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7129 if (!to_reg)
7130 return tp;
7131
7132 reg = gen_reg_rtx (Pmode);
7133 insn = gen_rtx_SET (VOIDmode, reg, tp);
7134 insn = emit_insn (insn);
7135
7136 return reg;
7137}
7138
7139/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7140 false if we expect this to be used for a memory address and true if
7141 we expect to load the address into a register. */
7142
7143static rtx
7144legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7145{
7146 rtx dest, base, off, pic, tp;
7147 int type;
7148
7149 switch (model)
7150 {
7151 case TLS_MODEL_GLOBAL_DYNAMIC:
7152 dest = gen_reg_rtx (Pmode);
7153 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7154
7155 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7156 {
7157 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7158
7159 start_sequence ();
7160 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7161 insns = get_insns ();
7162 end_sequence ();
7163
7164 emit_libcall_block (insns, dest, rax, x);
7165 }
7166 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7167 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7168 else
7169 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7170
7171 if (TARGET_GNU2_TLS)
7172 {
7173 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7174
7175 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7176 }
7177 break;
7178
7179 case TLS_MODEL_LOCAL_DYNAMIC:
7180 base = gen_reg_rtx (Pmode);
7181 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7182
7183 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7184 {
7185 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7186
7187 start_sequence ();
7188 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7189 insns = get_insns ();
7190 end_sequence ();
7191
7192 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7193 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7194 emit_libcall_block (insns, base, rax, note);
7195 }
7196 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7197 emit_insn (gen_tls_local_dynamic_base_64 (base));
7198 else
7199 emit_insn (gen_tls_local_dynamic_base_32 (base));
7200
7201 if (TARGET_GNU2_TLS)
7202 {
7203 rtx x = ix86_tls_module_base ();
7204
7205 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7206 gen_rtx_MINUS (Pmode, x, tp));
7207 }
7208
7209 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7210 off = gen_rtx_CONST (Pmode, off);
7211
7212 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7213
7214 if (TARGET_GNU2_TLS)
7215 {
7216 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7217
7218 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7219 }
7220
7221 break;
7222
7223 case TLS_MODEL_INITIAL_EXEC:
7224 if (TARGET_64BIT)
7225 {
7226 pic = NULL;
7227 type = UNSPEC_GOTNTPOFF;
7228 }
7229 else if (flag_pic)
7230 {
7231 if (reload_in_progress)
7232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7233 pic = pic_offset_table_rtx;
7234 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7235 }
7236 else if (!TARGET_ANY_GNU_TLS)
7237 {
7238 pic = gen_reg_rtx (Pmode);
7239 emit_insn (gen_set_got (pic));
7240 type = UNSPEC_GOTTPOFF;
7241 }
7242 else
7243 {
7244 pic = NULL;
7245 type = UNSPEC_INDNTPOFF;
7246 }
7247
7248 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7249 off = gen_rtx_CONST (Pmode, off);
7250 if (pic)
7251 off = gen_rtx_PLUS (Pmode, pic, off);
7252 off = gen_const_mem (Pmode, off);
7253 set_mem_alias_set (off, ix86_GOT_alias_set ());
7254
7255 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7256 {
7257 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7258 off = force_reg (Pmode, off);
7259 return gen_rtx_PLUS (Pmode, base, off);
7260 }
7261 else
7262 {
7263 base = get_thread_pointer (true);
7264 dest = gen_reg_rtx (Pmode);
7265 emit_insn (gen_subsi3 (dest, base, off));
7266 }
7267 break;
7268
7269 case TLS_MODEL_LOCAL_EXEC:
7270 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7271 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7272 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7273 off = gen_rtx_CONST (Pmode, off);
7274
7275 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7276 {
7277 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7278 return gen_rtx_PLUS (Pmode, base, off);
7279 }
7280 else
7281 {
7282 base = get_thread_pointer (true);
7283 dest = gen_reg_rtx (Pmode);
7284 emit_insn (gen_subsi3 (dest, base, off));
7285 }
7286 break;
7287
7288 default:
7289 gcc_unreachable ();
7290 }
7291
7292 return dest;
7293}
7294
7295/* Try machine-dependent ways of modifying an illegitimate address
7296 to be legitimate. If we find one, return the new, valid address.
7297 This macro is used in only one place: `memory_address' in explow.c.
7298
7299 OLDX is the address as it was before break_out_memory_refs was called.
7300 In some cases it is useful to look at this to decide what needs to be done.
7301
7302 MODE and WIN are passed so that this macro can use
7303 GO_IF_LEGITIMATE_ADDRESS.
7304
7305 It is always safe for this macro to do nothing. It exists to recognize
7306 opportunities to optimize the output.
7307
7308 For the 80386, we handle X+REG by loading X into a register R and
7309 using R+REG. R will go in a general reg and indexing will be used.
7310 However, if REG is a broken-out memory address or multiplication,
7311 nothing needs to be done because REG can certainly go in a general reg.
7312
7313 When -fpic is used, special handling is needed for symbolic references.
7314 See comments by legitimize_pic_address in i386.c for details. */
7315
7316rtx
7317legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7318{
7319 int changed = 0;
7320 unsigned log;
7321
7322 if (TARGET_DEBUG_ADDR)
7323 {
7324 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7325 GET_MODE_NAME (mode));
7326 debug_rtx (x);
7327 }
7328
7329 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7330 if (log)
7331 return legitimize_tls_address (x, log, false);
7332 if (GET_CODE (x) == CONST
7333 && GET_CODE (XEXP (x, 0)) == PLUS
7334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7335 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7336 {
7337 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7338 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7339 }
7340
7341 if (flag_pic && SYMBOLIC_CONST (x))
7342 return legitimize_pic_address (x, 0);
7343
7344 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7345 if (GET_CODE (x) == ASHIFT
7346 && GET_CODE (XEXP (x, 1)) == CONST_INT
7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7348 {
7349 changed = 1;
7350 log = INTVAL (XEXP (x, 1));
7351 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7352 GEN_INT (1 << log));
7353 }
7354
7355 if (GET_CODE (x) == PLUS)
7356 {
7357 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7358
7359 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7361 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7362 {
7363 changed = 1;
7364 log = INTVAL (XEXP (XEXP (x, 0), 1));
7365 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7366 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7367 GEN_INT (1 << log));
7368 }
7369
7370 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7371 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7373 {
7374 changed = 1;
7375 log = INTVAL (XEXP (XEXP (x, 1), 1));
7376 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7377 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7378 GEN_INT (1 << log));
7379 }
7380
7381 /* Put multiply first if it isn't already. */
7382 if (GET_CODE (XEXP (x, 1)) == MULT)
7383 {
7384 rtx tmp = XEXP (x, 0);
7385 XEXP (x, 0) = XEXP (x, 1);
7386 XEXP (x, 1) = tmp;
7387 changed = 1;
7388 }
7389
7390 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7392 created by virtual register instantiation, register elimination, and
7393 similar optimizations. */
7394 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7395 {
7396 changed = 1;
7397 x = gen_rtx_PLUS (Pmode,
7398 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7399 XEXP (XEXP (x, 1), 0)),
7400 XEXP (XEXP (x, 1), 1));
7401 }
7402
7403 /* Canonicalize
7404 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7405 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7406 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7409 && CONSTANT_P (XEXP (x, 1)))
7410 {
7411 rtx constant;
7412 rtx other = NULL_RTX;
7413
7414 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7415 {
7416 constant = XEXP (x, 1);
7417 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7418 }
7419 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7420 {
7421 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7422 other = XEXP (x, 1);
7423 }
7424 else
7425 constant = 0;
7426
7427 if (constant)
7428 {
7429 changed = 1;
7430 x = gen_rtx_PLUS (Pmode,
7431 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7432 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7433 plus_constant (other, INTVAL (constant)));
7434 }
7435 }
7436
7437 if (changed && legitimate_address_p (mode, x, FALSE))
7438 return x;
7439
7440 if (GET_CODE (XEXP (x, 0)) == MULT)
7441 {
7442 changed = 1;
7443 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7444 }
7445
7446 if (GET_CODE (XEXP (x, 1)) == MULT)
7447 {
7448 changed = 1;
7449 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7450 }
7451
7452 if (changed
7453 && GET_CODE (XEXP (x, 1)) == REG
7454 && GET_CODE (XEXP (x, 0)) == REG)
7455 return x;
7456
7457 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7458 {
7459 changed = 1;
7460 x = legitimize_pic_address (x, 0);
7461 }
7462
7463 if (changed && legitimate_address_p (mode, x, FALSE))
7464 return x;
7465
7466 if (GET_CODE (XEXP (x, 0)) == REG)
7467 {
7468 rtx temp = gen_reg_rtx (Pmode);
7469 rtx val = force_operand (XEXP (x, 1), temp);
7470 if (val != temp)
7471 emit_move_insn (temp, val);
7472
7473 XEXP (x, 1) = temp;
7474 return x;
7475 }
7476
7477 else if (GET_CODE (XEXP (x, 1)) == REG)
7478 {
7479 rtx temp = gen_reg_rtx (Pmode);
7480 rtx val = force_operand (XEXP (x, 0), temp);
7481 if (val != temp)
7482 emit_move_insn (temp, val);
7483
7484 XEXP (x, 0) = temp;
7485 return x;
7486 }
7487 }
7488
7489 return x;
7490}
7491
7492/* Print an integer constant expression in assembler syntax. Addition
7493 and subtraction are the only arithmetic that may appear in these
7494 expressions. FILE is the stdio stream to write to, X is the rtx, and
7495 CODE is the operand print code from the output string. */
7496
7497static void
7498output_pic_addr_const (FILE *file, rtx x, int code)
7499{
7500 char buf[256];
7501
7502 switch (GET_CODE (x))
7503 {
7504 case PC:
7505 gcc_assert (flag_pic);
7506 putc ('.', file);
7507 break;
7508
7509 case SYMBOL_REF:
7510 if (! TARGET_MACHO || TARGET_64BIT)
7511 output_addr_const (file, x);
7512 else
7513 {
7514 const char *name = XSTR (x, 0);
7515
7516 /* Mark the decl as referenced so that cgraph will output the function. */
7517 if (SYMBOL_REF_DECL (x))
7518 mark_decl_referenced (SYMBOL_REF_DECL (x));
7519
7520#if TARGET_MACHO
7521 if (MACHOPIC_INDIRECT
7522 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7523 name = machopic_indirection_name (x, /*stub_p=*/true);
7524#endif
7525 assemble_name (file, name);
7526 }
7527 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7528 fputs ("@PLT", file);
7529 break;
7530
7531 case LABEL_REF:
7532 x = XEXP (x, 0);
7533 /* FALLTHRU */
7534 case CODE_LABEL:
7535 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7536 assemble_name (asm_out_file, buf);
7537 break;
7538
7539 case CONST_INT:
7540 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7541 break;
7542
7543 case CONST:
7544 /* This used to output parentheses around the expression,
7545 but that does not work on the 386 (either ATT or BSD assembler). */
7546 output_pic_addr_const (file, XEXP (x, 0), code);
7547 break;
7548
7549 case CONST_DOUBLE:
7550 if (GET_MODE (x) == VOIDmode)
7551 {
7552 /* We can use %d if the number is <32 bits and positive. */
7553 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7554 fprintf (file, "0x%lx%08lx",
7555 (unsigned long) CONST_DOUBLE_HIGH (x),
7556 (unsigned long) CONST_DOUBLE_LOW (x));
7557 else
7558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7559 }
7560 else
7561 /* We can't handle floating point constants;
7562 PRINT_OPERAND must handle them. */
7563 output_operand_lossage ("floating constant misused");
7564 break;
7565
7566 case PLUS:
7567 /* Some assemblers need integer constants to appear first. */
7568 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7569 {
7570 output_pic_addr_const (file, XEXP (x, 0), code);
7571 putc ('+', file);
7572 output_pic_addr_const (file, XEXP (x, 1), code);
7573 }
7574 else
7575 {
7576 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7577 output_pic_addr_const (file, XEXP (x, 1), code);
7578 putc ('+', file);
7579 output_pic_addr_const (file, XEXP (x, 0), code);
7580 }
7581 break;
7582
7583 case MINUS:
7584 if (!TARGET_MACHO)
7585 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7586 output_pic_addr_const (file, XEXP (x, 0), code);
7587 putc ('-', file);
7588 output_pic_addr_const (file, XEXP (x, 1), code);
7589 if (!TARGET_MACHO)
7590 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7591 break;
7592
7593 case UNSPEC:
7594 gcc_assert (XVECLEN (x, 0) == 1);
7595 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7596 switch (XINT (x, 1))
7597 {
7598 case UNSPEC_GOT:
7599 fputs ("@GOT", file);
7600 break;
7601 case UNSPEC_GOTOFF:
7602 fputs ("@GOTOFF", file);
7603 break;
7604 case UNSPEC_GOTPCREL:
7605 fputs ("@GOTPCREL(%rip)", file);
7606 break;
7607 case UNSPEC_GOTTPOFF:
7608 /* FIXME: This might be @TPOFF in Sun ld too. */
7609 fputs ("@GOTTPOFF", file);
7610 break;
7611 case UNSPEC_TPOFF:
7612 fputs ("@TPOFF", file);
7613 break;
7614 case UNSPEC_NTPOFF:
7615 if (TARGET_64BIT)
7616 fputs ("@TPOFF", file);
7617 else
7618 fputs ("@NTPOFF", file);
7619 break;
7620 case UNSPEC_DTPOFF:
7621 fputs ("@DTPOFF", file);
7622 break;
7623 case UNSPEC_GOTNTPOFF:
7624 if (TARGET_64BIT)
7625 fputs ("@GOTTPOFF(%rip)", file);
7626 else
7627 fputs ("@GOTNTPOFF", file);
7628 break;
7629 case UNSPEC_INDNTPOFF:
7630 fputs ("@INDNTPOFF", file);
7631 break;
7632 default:
7633 output_operand_lossage ("invalid UNSPEC as operand");
7634 break;
7635 }
7636 break;
7637
7638 default:
7639 output_operand_lossage ("invalid expression as operand");
7640 }
7641}
7642
7643/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7644 We need to emit DTP-relative relocations. */
7645
7646static void
7647i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7648{
7649 fputs (ASM_LONG, file);
7650 output_addr_const (file, x);
7651 fputs ("@DTPOFF", file);
7652 switch (size)
7653 {
7654 case 4:
7655 break;
7656 case 8:
7657 fputs (", 0", file);
7658 break;
7659 default:
7660 gcc_unreachable ();
7661 }
7662}
7663
7664/* In the name of slightly smaller debug output, and to cater to
7665 general assembler lossage, recognize PIC+GOTOFF and turn it back
7666 into a direct symbol reference.
7667
7668 On Darwin, this is necessary to avoid a crash, because Darwin
7669 has a different PIC label for each routine but the DWARF debugging
7670 information is not associated with any particular routine, so it's
7671 necessary to remove references to the PIC label from RTL stored by
7672 the DWARF output code. */
7673
7674static rtx
7675ix86_delegitimize_address (rtx orig_x)
7676{
7677 rtx x = orig_x;
7678 /* reg_addend is NULL or a multiple of some register. */
7679 rtx reg_addend = NULL_RTX;
7680 /* const_addend is NULL or a const_int. */
7681 rtx const_addend = NULL_RTX;
7682 /* This is the result, or NULL. */
7683 rtx result = NULL_RTX;
7684
7685 if (GET_CODE (x) == MEM)
7686 x = XEXP (x, 0);
7687
7688 if (TARGET_64BIT)
7689 {
7690 if (GET_CODE (x) != CONST
7691 || GET_CODE (XEXP (x, 0)) != UNSPEC
7692 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7693 || GET_CODE (orig_x) != MEM)
7694 return orig_x;
7695 return XVECEXP (XEXP (x, 0), 0, 0);
7696 }
7697
7698 if (GET_CODE (x) != PLUS
7699 || GET_CODE (XEXP (x, 1)) != CONST)
7700 return orig_x;
7701
7702 if (GET_CODE (XEXP (x, 0)) == REG
7703 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7704 /* %ebx + GOT/GOTOFF */
7705 ;
7706 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7707 {
7708 /* %ebx + %reg * scale + GOT/GOTOFF */
7709 reg_addend = XEXP (x, 0);
7710 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7711 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7712 reg_addend = XEXP (reg_addend, 1);
7713 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7714 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7715 reg_addend = XEXP (reg_addend, 0);
7716 else
7717 return orig_x;
7718 if (GET_CODE (reg_addend) != REG
7719 && GET_CODE (reg_addend) != MULT
7720 && GET_CODE (reg_addend) != ASHIFT)
7721 return orig_x;
7722 }
7723 else
7724 return orig_x;
7725
7726 x = XEXP (XEXP (x, 1), 0);
7727 if (GET_CODE (x) == PLUS
7728 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7729 {
7730 const_addend = XEXP (x, 1);
7731 x = XEXP (x, 0);
7732 }
7733
7734 if (GET_CODE (x) == UNSPEC
7735 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7736 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7737 result = XVECEXP (x, 0, 0);
7738
7739 if (TARGET_MACHO && darwin_local_data_pic (x)
7740 && GET_CODE (orig_x) != MEM)
7741 result = XEXP (x, 0);
7742
7743 if (! result)
7744 return orig_x;
7745
7746 if (const_addend)
7747 result = gen_rtx_PLUS (Pmode, result, const_addend);
7748 if (reg_addend)
7749 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7750 return result;
7751}
7752
7753static void
7754put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7755 int fp, FILE *file)
7756{
7757 const char *suffix;
7758
7759 if (mode == CCFPmode || mode == CCFPUmode)
7760 {
7761 enum rtx_code second_code, bypass_code;
7762 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7763 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7764 code = ix86_fp_compare_code_to_integer (code);
7765 mode = CCmode;
7766 }
7767 if (reverse)
7768 code = reverse_condition (code);
7769
7770 switch (code)
7771 {
7772 case EQ:
7773 suffix = "e";
7774 break;
7775 case NE:
7776 suffix = "ne";
7777 break;
7778 case GT:
7779 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7780 suffix = "g";
7781 break;
7782 case GTU:
7783 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7784 Those same assemblers have the same but opposite lossage on cmov. */
7785 gcc_assert (mode == CCmode);
7786 suffix = fp ? "nbe" : "a";
7787 break;
7788 case LT:
7789 switch (mode)
7790 {
7791 case CCNOmode:
7792 case CCGOCmode:
7793 suffix = "s";
7794 break;
7795
7796 case CCmode:
7797 case CCGCmode:
7798 suffix = "l";
7799 break;
7800
7801 default:
7802 gcc_unreachable ();
7803 }
7804 break;
7805 case LTU:
7806 gcc_assert (mode == CCmode);
7807 suffix = "b";
7808 break;
7809 case GE:
7810 switch (mode)
7811 {
7812 case CCNOmode:
7813 case CCGOCmode:
7814 suffix = "ns";
7815 break;
7816
7817 case CCmode:
7818 case CCGCmode:
7819 suffix = "ge";
7820 break;
7821
7822 default:
7823 gcc_unreachable ();
7824 }
7825 break;
7826 case GEU:
7827 /* ??? As above. */
7828 gcc_assert (mode == CCmode);
7829 suffix = fp ? "nb" : "ae";
7830 break;
7831 case LE:
7832 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7833 suffix = "le";
7834 break;
7835 case LEU:
7836 gcc_assert (mode == CCmode);
7837 suffix = "be";
7838 break;
7839 case UNORDERED:
7840 suffix = fp ? "u" : "p";
7841 break;
7842 case ORDERED:
7843 suffix = fp ? "nu" : "np";
7844 break;
7845 default:
7846 gcc_unreachable ();
7847 }
7848 fputs (suffix, file);
7849}
7850
7851/* Print the name of register X to FILE based on its machine mode and number.
7852 If CODE is 'w', pretend the mode is HImode.
7853 If CODE is 'b', pretend the mode is QImode.
7854 If CODE is 'k', pretend the mode is SImode.
7855 If CODE is 'q', pretend the mode is DImode.
7856 If CODE is 'h', pretend the reg is the 'high' byte register.
7857 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7858
7859void
7860print_reg (rtx x, int code, FILE *file)
7861{
7862 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7863 && REGNO (x) != FRAME_POINTER_REGNUM
7864 && REGNO (x) != FLAGS_REG
7865 && REGNO (x) != FPSR_REG);
7866
7867 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7868 putc ('%', file);
7869
7870 if (code == 'w' || MMX_REG_P (x))
7871 code = 2;
7872 else if (code == 'b')
7873 code = 1;
7874 else if (code == 'k')
7875 code = 4;
7876 else if (code == 'q')
7877 code = 8;
7878 else if (code == 'y')
7879 code = 3;
7880 else if (code == 'h')
7881 code = 0;
7882 else
7883 code = GET_MODE_SIZE (GET_MODE (x));
7884
7885 /* Irritatingly, AMD extended registers use different naming convention
7886 from the normal registers. */
7887 if (REX_INT_REG_P (x))
7888 {
7889 gcc_assert (TARGET_64BIT);
7890 switch (code)
7891 {
7892 case 0:
7893 error ("extended registers have no high halves");
7894 break;
7895 case 1:
7896 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7897 break;
7898 case 2:
7899 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7900 break;
7901 case 4:
7902 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7903 break;
7904 case 8:
7905 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7906 break;
7907 default:
7908 error ("unsupported operand size for extended register");
7909 break;
7910 }
7911 return;
7912 }
7913 switch (code)
7914 {
7915 case 3:
7916 if (STACK_TOP_P (x))
7917 {
7918 fputs ("st(0)", file);
7919 break;
7920 }
7921 /* FALLTHRU */
7922 case 8:
7923 case 4:
7924 case 12:
7925 if (! ANY_FP_REG_P (x))
7926 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7927 /* FALLTHRU */
7928 case 16:
7929 case 2:
7930 normal:
7931 fputs (hi_reg_name[REGNO (x)], file);
7932 break;
7933 case 1:
7934 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7935 goto normal;
7936 fputs (qi_reg_name[REGNO (x)], file);
7937 break;
7938 case 0:
7939 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7940 goto normal;
7941 fputs (qi_high_reg_name[REGNO (x)], file);
7942 break;
7943 default:
7944 gcc_unreachable ();
7945 }
7946}
7947
7948/* Locate some local-dynamic symbol still in use by this function
7949 so that we can print its name in some tls_local_dynamic_base
7950 pattern. */
7951
7952static const char *
7953get_some_local_dynamic_name (void)
7954{
7955 rtx insn;
7956
7957 if (cfun->machine->some_ld_name)
7958 return cfun->machine->some_ld_name;
7959
7960 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7961 if (INSN_P (insn)
7962 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7963 return cfun->machine->some_ld_name;
7964
7965 gcc_unreachable ();
7966}
7967
7968static int
7969get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7970{
7971 rtx x = *px;
7972
7973 if (GET_CODE (x) == SYMBOL_REF
7974 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7975 {
7976 cfun->machine->some_ld_name = XSTR (x, 0);
7977 return 1;
7978 }
7979
7980 return 0;
7981}
7982
7983/* Meaning of CODE:
7984 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7985 C -- print opcode suffix for set/cmov insn.
7986 c -- like C, but print reversed condition
7987 F,f -- likewise, but for floating-point.
7988 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7989 otherwise nothing
7990 R -- print the prefix for register names.
7991 z -- print the opcode suffix for the size of the current operand.
7992 * -- print a star (in certain assembler syntax)
7993 A -- print an absolute memory reference.
7994 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7995 s -- print a shift double count, followed by the assemblers argument
7996 delimiter.
7997 b -- print the QImode name of the register for the indicated operand.
7998 %b0 would print %al if operands[0] is reg 0.
7999 w -- likewise, print the HImode name of the register.
8000 k -- likewise, print the SImode name of the register.
8001 q -- likewise, print the DImode name of the register.
8002 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8003 y -- print "st(0)" instead of "st" as a register.
8004 D -- print condition for SSE cmp instruction.
8005 P -- if PIC, print an @PLT suffix.
8006 X -- don't print any sort of PIC '@' suffix for a symbol.
8007 & -- print some in-use local-dynamic symbol name.
8008 H -- print a memory address offset by 8; used for sse high-parts
8009 */
8010
8011void
8012print_operand (FILE *file, rtx x, int code)
8013{
8014 if (code)
8015 {
8016 switch (code)
8017 {
8018 case '*':
8019 if (ASSEMBLER_DIALECT == ASM_ATT)
8020 putc ('*', file);
8021 return;
8022
8023 case '&':
8024 assemble_name (file, get_some_local_dynamic_name ());
8025 return;
8026
8027 case 'A':
8028 switch (ASSEMBLER_DIALECT)
8029 {
8030 case ASM_ATT:
8031 putc ('*', file);
8032 break;
8033
8034 case ASM_INTEL:
8035 /* Intel syntax. For absolute addresses, registers should not
8036 be surrounded by braces. */
8037 if (GET_CODE (x) != REG)
8038 {
8039 putc ('[', file);
8040 PRINT_OPERAND (file, x, 0);
8041 putc (']', file);
8042 return;
8043 }
8044 break;
8045
8046 default:
8047 gcc_unreachable ();
8048 }
8049
8050 PRINT_OPERAND (file, x, 0);
8051 return;
8052
8053
8054 case 'L':
8055 if (ASSEMBLER_DIALECT == ASM_ATT)
8056 putc ('l', file);
8057 return;
8058
8059 case 'W':
8060 if (ASSEMBLER_DIALECT == ASM_ATT)
8061 putc ('w', file);
8062 return;
8063
8064 case 'B':
8065 if (ASSEMBLER_DIALECT == ASM_ATT)
8066 putc ('b', file);
8067 return;
8068
8069 case 'Q':
8070 if (ASSEMBLER_DIALECT == ASM_ATT)
8071 putc ('l', file);
8072 return;
8073
8074 case 'S':
8075 if (ASSEMBLER_DIALECT == ASM_ATT)
8076 putc ('s', file);
8077 return;
8078
8079 case 'T':
8080 if (ASSEMBLER_DIALECT == ASM_ATT)
8081 putc ('t', file);
8082 return;
8083
8084 case 'z':
8085 /* 387 opcodes don't get size suffixes if the operands are
8086 registers. */
8087 if (STACK_REG_P (x))
8088 return;
8089
8090 /* Likewise if using Intel opcodes. */
8091 if (ASSEMBLER_DIALECT == ASM_INTEL)
8092 return;
8093
8094 /* This is the size of op from size of operand. */
8095 switch (GET_MODE_SIZE (GET_MODE (x)))
8096 {
8097 case 2:
8098#ifdef HAVE_GAS_FILDS_FISTS
8099 putc ('s', file);
8100#endif
8101 return;
8102
8103 case 4:
8104 if (GET_MODE (x) == SFmode)
8105 {
8106 putc ('s', file);
8107 return;
8108 }
8109 else
8110 putc ('l', file);
8111 return;
8112
8113 case 12:
8114 case 16:
8115 putc ('t', file);
8116 return;
8117
8118 case 8:
8119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8120 {
8121#ifdef GAS_MNEMONICS
8122 putc ('q', file);
8123#else
8124 putc ('l', file);
8125 putc ('l', file);
8126#endif
8127 }
8128 else
8129 putc ('l', file);
8130 return;
8131
8132 default:
8133 gcc_unreachable ();
8134 }
8135
8136 case 'b':
8137 case 'w':
8138 case 'k':
8139 case 'q':
8140 case 'h':
8141 case 'y':
8142 case 'X':
8143 case 'P':
8144 break;
8145
8146 case 's':
8147 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8148 {
8149 PRINT_OPERAND (file, x, 0);
8150 putc (',', file);
8151 }
8152 return;
8153
8154 case 'D':
8155 /* Little bit of braindamage here. The SSE compare instructions
8156 does use completely different names for the comparisons that the
8157 fp conditional moves. */
8158 switch (GET_CODE (x))
8159 {
8160 case EQ:
8161 case UNEQ:
8162 fputs ("eq", file);
8163 break;
8164 case LT:
8165 case UNLT:
8166 fputs ("lt", file);
8167 break;
8168 case LE:
8169 case UNLE:
8170 fputs ("le", file);
8171 break;
8172 case UNORDERED:
8173 fputs ("unord", file);
8174 break;
8175 case NE:
8176 case LTGT:
8177 fputs ("neq", file);
8178 break;
8179 case UNGE:
8180 case GE:
8181 fputs ("nlt", file);
8182 break;
8183 case UNGT:
8184 case GT:
8185 fputs ("nle", file);
8186 break;
8187 case ORDERED:
8188 fputs ("ord", file);
8189 break;
8190 default:
8191 gcc_unreachable ();
8192 }
8193 return;
8194 case 'O':
8195#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8196 if (ASSEMBLER_DIALECT == ASM_ATT)
8197 {
8198 switch (GET_MODE (x))
8199 {
8200 case HImode: putc ('w', file); break;
8201 case SImode:
8202 case SFmode: putc ('l', file); break;
8203 case DImode:
8204 case DFmode: putc ('q', file); break;
8205 default: gcc_unreachable ();
8206 }
8207 putc ('.', file);
8208 }
8209#endif
8210 return;
8211 case 'C':
8212 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8213 return;
8214 case 'F':
8215#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8216 if (ASSEMBLER_DIALECT == ASM_ATT)
8217 putc ('.', file);
8218#endif
8219 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8220 return;
8221
8222 /* Like above, but reverse condition */
8223 case 'c':
8224 /* Check to see if argument to %c is really a constant
8225 and not a condition code which needs to be reversed. */
8226 if (!COMPARISON_P (x))
8227 {
8228 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8229 return;
8230 }
8231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8232 return;
8233 case 'f':
8234#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8235 if (ASSEMBLER_DIALECT == ASM_ATT)
8236 putc ('.', file);
8237#endif
8238 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8239 return;
8240
8241 case 'H':
8242 /* It doesn't actually matter what mode we use here, as we're
8243 only going to use this for printing. */
8244 x = adjust_address_nv (x, DImode, 8);
8245 break;
8246
8247 case '+':
8248 {
8249 rtx x;
8250
8251 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8252 return;
8253
8254 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8255 if (x)
8256 {
8257 int pred_val = INTVAL (XEXP (x, 0));
8258
8259 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8260 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8261 {
8262 int taken = pred_val > REG_BR_PROB_BASE / 2;
8263 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8264
8265 /* Emit hints only in the case default branch prediction
8266 heuristics would fail. */
8267 if (taken != cputaken)
8268 {
8269 /* We use 3e (DS) prefix for taken branches and
8270 2e (CS) prefix for not taken branches. */
8271 if (taken)
8272 fputs ("ds ; ", file);
8273 else
8274 fputs ("cs ; ", file);
8275 }
8276 }
8277 }
8278 return;
8279 }
8280 default:
8281 output_operand_lossage ("invalid operand code '%c'", code);
8282 }
8283 }
8284
8285 if (GET_CODE (x) == REG)
8286 print_reg (x, code, file);
8287
8288 else if (GET_CODE (x) == MEM)
8289 {
8290 /* No `byte ptr' prefix for call instructions. */
8291 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8292 {
8293 const char * size;
8294 switch (GET_MODE_SIZE (GET_MODE (x)))
8295 {
8296 case 1: size = "BYTE"; break;
8297 case 2: size = "WORD"; break;
8298 case 4: size = "DWORD"; break;
8299 case 8: size = "QWORD"; break;
8300 case 12: size = "XWORD"; break;
8301 case 16: size = "XMMWORD"; break;
8302 default:
8303 gcc_unreachable ();
8304 }
8305
8306 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8307 if (code == 'b')
8308 size = "BYTE";
8309 else if (code == 'w')
8310 size = "WORD";
8311 else if (code == 'k')
8312 size = "DWORD";
8313
8314 fputs (size, file);
8315 fputs (" PTR ", file);
8316 }
8317
8318 x = XEXP (x, 0);
8319 /* Avoid (%rip) for call operands. */
8320 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8321 && GET_CODE (x) != CONST_INT)
8322 output_addr_const (file, x);
8323 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8324 output_operand_lossage ("invalid constraints for operand");
8325 else
8326 output_address (x);
8327 }
8328
8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8330 {
8331 REAL_VALUE_TYPE r;
8332 long l;
8333
8334 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8335 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8336
8337 if (ASSEMBLER_DIALECT == ASM_ATT)
8338 putc ('$', file);
8339 fprintf (file, "0x%08lx", l);
8340 }
8341
8342 /* These float cases don't actually occur as immediate operands. */
8343 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8344 {
8345 char dstr[30];
8346
8347 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8348 fprintf (file, "%s", dstr);
8349 }
8350
8351 else if (GET_CODE (x) == CONST_DOUBLE
8352 && GET_MODE (x) == XFmode)
8353 {
8354 char dstr[30];
8355
8356 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8357 fprintf (file, "%s", dstr);
8358 }
8359
8360 else
8361 {
8362 /* We have patterns that allow zero sets of memory, for instance.
8363 In 64-bit mode, we should probably support all 8-byte vectors,
8364 since we can in fact encode that into an immediate. */
8365 if (GET_CODE (x) == CONST_VECTOR)
8366 {
8367 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8368 x = const0_rtx;
8369 }
8370
8371 if (code != 'P')
8372 {
8373 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8374 {
8375 if (ASSEMBLER_DIALECT == ASM_ATT)
8376 putc ('$', file);
8377 }
8378 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8379 || GET_CODE (x) == LABEL_REF)
8380 {
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8382 putc ('$', file);
8383 else
8384 fputs ("OFFSET FLAT:", file);
8385 }
8386 }
8387 if (GET_CODE (x) == CONST_INT)
8388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8389 else if (flag_pic)
8390 output_pic_addr_const (file, x, code);
8391 else
8392 output_addr_const (file, x);
8393 }
8394}
8395
8396/* Print a memory operand whose address is ADDR. */
8397
8398void
8399print_operand_address (FILE *file, rtx addr)
8400{
8401 struct ix86_address parts;
8402 rtx base, index, disp;
8403 int scale;
8404 int ok = ix86_decompose_address (addr, &parts);
8405
8406 gcc_assert (ok);
8407
8408 base = parts.base;
8409 index = parts.index;
8410 disp = parts.disp;
8411 scale = parts.scale;
8412
8413 switch (parts.seg)
8414 {
8415 case SEG_DEFAULT:
8416 break;
8417 case SEG_FS:
8418 case SEG_GS:
8419 if (USER_LABEL_PREFIX[0] == 0)
8420 putc ('%', file);
8421 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8422 break;
8423 default:
8424 gcc_unreachable ();
8425 }
8426
8427 if (!base && !index)
8428 {
8429 /* Displacement only requires special attention. */
8430
8431 if (GET_CODE (disp) == CONST_INT)
8432 {
8433 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8434 {
8435 if (USER_LABEL_PREFIX[0] == 0)
8436 putc ('%', file);
8437 fputs ("ds:", file);
8438 }
8439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8440 }
8441 else if (flag_pic)
8442 output_pic_addr_const (file, disp, 0);
8443 else
8444 output_addr_const (file, disp);
8445
8446 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8447 if (TARGET_64BIT)
8448 {
8449 if (GET_CODE (disp) == CONST
8450 && GET_CODE (XEXP (disp, 0)) == PLUS
8451 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8452 disp = XEXP (XEXP (disp, 0), 0);
8453 if (GET_CODE (disp) == LABEL_REF
8454 || (GET_CODE (disp) == SYMBOL_REF
8455 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8456 fputs ("(%rip)", file);
8457 }
8458 }
8459 else
8460 {
8461 if (ASSEMBLER_DIALECT == ASM_ATT)
8462 {
8463 if (disp)
8464 {
8465 if (flag_pic)
8466 output_pic_addr_const (file, disp, 0);
8467 else if (GET_CODE (disp) == LABEL_REF)
8468 output_asm_label (disp);
8469 else
8470 output_addr_const (file, disp);
8471 }
8472
8473 putc ('(', file);
8474 if (base)
8475 print_reg (base, 0, file);
8476 if (index)
8477 {
8478 putc (',', file);
8479 print_reg (index, 0, file);
8480 if (scale != 1)
8481 fprintf (file, ",%d", scale);
8482 }
8483 putc (')', file);
8484 }
8485 else
8486 {
8487 rtx offset = NULL_RTX;
8488
8489 if (disp)
8490 {
8491 /* Pull out the offset of a symbol; print any symbol itself. */
8492 if (GET_CODE (disp) == CONST
8493 && GET_CODE (XEXP (disp, 0)) == PLUS
8494 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8495 {
8496 offset = XEXP (XEXP (disp, 0), 1);
8497 disp = gen_rtx_CONST (VOIDmode,
8498 XEXP (XEXP (disp, 0), 0));
8499 }
8500
8501 if (flag_pic)
8502 output_pic_addr_const (file, disp, 0);
8503 else if (GET_CODE (disp) == LABEL_REF)
8504 output_asm_label (disp);
8505 else if (GET_CODE (disp) == CONST_INT)
8506 offset = disp;
8507 else
8508 output_addr_const (file, disp);
8509 }
8510
8511 putc ('[', file);
8512 if (base)
8513 {
8514 print_reg (base, 0, file);
8515 if (offset)
8516 {
8517 if (INTVAL (offset) >= 0)
8518 putc ('+', file);
8519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8520 }
8521 }
8522 else if (offset)
8523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8524 else
8525 putc ('0', file);
8526
8527 if (index)
8528 {
8529 putc ('+', file);
8530 print_reg (index, 0, file);
8531 if (scale != 1)
8532 fprintf (file, "*%d", scale);
8533 }
8534 putc (']', file);
8535 }
8536 }
8537}
8538
8539bool
8540output_addr_const_extra (FILE *file, rtx x)
8541{
8542 rtx op;
8543
8544 if (GET_CODE (x) != UNSPEC)
8545 return false;
8546
8547 op = XVECEXP (x, 0, 0);
8548 switch (XINT (x, 1))
8549 {
8550 case UNSPEC_GOTTPOFF:
8551 output_addr_const (file, op);
8552 /* FIXME: This might be @TPOFF in Sun ld. */
8553 fputs ("@GOTTPOFF", file);
8554 break;
8555 case UNSPEC_TPOFF:
8556 output_addr_const (file, op);
8557 fputs ("@TPOFF", file);
8558 break;
8559 case UNSPEC_NTPOFF:
8560 output_addr_const (file, op);
8561 if (TARGET_64BIT)
8562 fputs ("@TPOFF", file);
8563 else
8564 fputs ("@NTPOFF", file);
8565 break;
8566 case UNSPEC_DTPOFF:
8567 output_addr_const (file, op);
8568 fputs ("@DTPOFF", file);
8569 break;
8570 case UNSPEC_GOTNTPOFF:
8571 output_addr_const (file, op);
8572 if (TARGET_64BIT)
8573 fputs ("@GOTTPOFF(%rip)", file);
8574 else
8575 fputs ("@GOTNTPOFF", file);
8576 break;
8577 case UNSPEC_INDNTPOFF:
8578 output_addr_const (file, op);
8579 fputs ("@INDNTPOFF", file);
8580 break;
8581
8582 default:
8583 return false;
8584 }
8585
8586 return true;
8587}
8588
8589/* Split one or more DImode RTL references into pairs of SImode
8590 references. The RTL can be REG, offsettable MEM, integer constant, or
8591 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8592 split and "num" is its length. lo_half and hi_half are output arrays
8593 that parallel "operands". */
8594
8595void
8596split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8597{
8598 while (num--)
8599 {
8600 rtx op = operands[num];
8601
8602 /* simplify_subreg refuse to split volatile memory addresses,
8603 but we still have to handle it. */
8604 if (GET_CODE (op) == MEM)
8605 {
8606 lo_half[num] = adjust_address (op, SImode, 0);
8607 hi_half[num] = adjust_address (op, SImode, 4);
8608 }
8609 else
8610 {
8611 lo_half[num] = simplify_gen_subreg (SImode, op,
8612 GET_MODE (op) == VOIDmode
8613 ? DImode : GET_MODE (op), 0);
8614 hi_half[num] = simplify_gen_subreg (SImode, op,
8615 GET_MODE (op) == VOIDmode
8616 ? DImode : GET_MODE (op), 4);
8617 }
8618 }
8619}
8620/* Split one or more TImode RTL references into pairs of DImode
8621 references. The RTL can be REG, offsettable MEM, integer constant, or
8622 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8623 split and "num" is its length. lo_half and hi_half are output arrays
8624 that parallel "operands". */
8625
8626void
8627split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8628{
8629 while (num--)
8630 {
8631 rtx op = operands[num];
8632
8633 /* simplify_subreg refuse to split volatile memory addresses, but we
8634 still have to handle it. */
8635 if (GET_CODE (op) == MEM)
8636 {
8637 lo_half[num] = adjust_address (op, DImode, 0);
8638 hi_half[num] = adjust_address (op, DImode, 8);
8639 }
8640 else
8641 {
8642 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8643 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8644 }
8645 }
8646}
8647
8648/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8649 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8650 is the expression of the binary operation. The output may either be
8651 emitted here, or returned to the caller, like all output_* functions.
8652
8653 There is no guarantee that the operands are the same mode, as they
8654 might be within FLOAT or FLOAT_EXTEND expressions. */
8655
8656#ifndef SYSV386_COMPAT
8657/* Set to 1 for compatibility with brain-damaged assemblers. No-one
8658 wants to fix the assemblers because that causes incompatibility
8659 with gcc. No-one wants to fix gcc because that causes
8660 incompatibility with assemblers... You can use the option of
8661 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8662#define SYSV386_COMPAT 1
8663#endif
8664
8665const char *
8666output_387_binary_op (rtx insn, rtx *operands)
8667{
8668 static char buf[30];
8669 const char *p;
8670 const char *ssep;
8671 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8672
8673#ifdef ENABLE_CHECKING
8674 /* Even if we do not want to check the inputs, this documents input
8675 constraints. Which helps in understanding the following code. */
8676 if (STACK_REG_P (operands[0])
8677 && ((REG_P (operands[1])
8678 && REGNO (operands[0]) == REGNO (operands[1])
8679 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8680 || (REG_P (operands[2])
8681 && REGNO (operands[0]) == REGNO (operands[2])
8682 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8683 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8684 ; /* ok */
8685 else
8686 gcc_assert (is_sse);
8687#endif
8688
8689 switch (GET_CODE (operands[3]))
8690 {
8691 case PLUS:
8692 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8693 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8694 p = "fiadd";
8695 else
8696 p = "fadd";
8697 ssep = "add";
8698 break;
8699
8700 case MINUS:
8701 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8702 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8703 p = "fisub";
8704 else
8705 p = "fsub";
8706 ssep = "sub";
8707 break;
8708
8709 case MULT:
8710 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8711 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8712 p = "fimul";
8713 else
8714 p = "fmul";
8715 ssep = "mul";
8716 break;
8717
8718 case DIV:
8719 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8720 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8721 p = "fidiv";
8722 else
8723 p = "fdiv";
8724 ssep = "div";
8725 break;
8726
8727 default:
8728 gcc_unreachable ();
8729 }
8730
8731 if (is_sse)
8732 {
8733 strcpy (buf, ssep);
8734 if (GET_MODE (operands[0]) == SFmode)
8735 strcat (buf, "ss\t{%2, %0|%0, %2}");
8736 else
8737 strcat (buf, "sd\t{%2, %0|%0, %2}");
8738 return buf;
8739 }
8740 strcpy (buf, p);
8741
8742 switch (GET_CODE (operands[3]))
8743 {
8744 case MULT:
8745 case PLUS:
8746 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8747 {
8748 rtx temp = operands[2];
8749 operands[2] = operands[1];
8750 operands[1] = temp;
8751 }
8752
8753 /* know operands[0] == operands[1]. */
8754
8755 if (GET_CODE (operands[2]) == MEM)
8756 {
8757 p = "%z2\t%2";
8758 break;
8759 }
8760
8761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8762 {
8763 if (STACK_TOP_P (operands[0]))
8764 /* How is it that we are storing to a dead operand[2]?
8765 Well, presumably operands[1] is dead too. We can't
8766 store the result to st(0) as st(0) gets popped on this
8767 instruction. Instead store to operands[2] (which I
8768 think has to be st(1)). st(1) will be popped later.
8769 gcc <= 2.8.1 didn't have this check and generated
8770 assembly code that the Unixware assembler rejected. */
8771 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8772 else
8773 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8774 break;
8775 }
8776
8777 if (STACK_TOP_P (operands[0]))
8778 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8779 else
8780 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8781 break;
8782
8783 case MINUS:
8784 case DIV:
8785 if (GET_CODE (operands[1]) == MEM)
8786 {
8787 p = "r%z1\t%1";
8788 break;
8789 }
8790
8791 if (GET_CODE (operands[2]) == MEM)
8792 {
8793 p = "%z2\t%2";
8794 break;
8795 }
8796
8797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8798 {
8799#if SYSV386_COMPAT
8800 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8801 derived assemblers, confusingly reverse the direction of
8802 the operation for fsub{r} and fdiv{r} when the
8803 destination register is not st(0). The Intel assembler
8804 doesn't have this brain damage. Read !SYSV386_COMPAT to
8805 figure out what the hardware really does. */
8806 if (STACK_TOP_P (operands[0]))
8807 p = "{p\t%0, %2|rp\t%2, %0}";
8808 else
8809 p = "{rp\t%2, %0|p\t%0, %2}";
8810#else
8811 if (STACK_TOP_P (operands[0]))
8812 /* As above for fmul/fadd, we can't store to st(0). */
8813 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8814 else
8815 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8816#endif
8817 break;
8818 }
8819
8820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8821 {
8822#if SYSV386_COMPAT
8823 if (STACK_TOP_P (operands[0]))
8824 p = "{rp\t%0, %1|p\t%1, %0}";
8825 else
8826 p = "{p\t%1, %0|rp\t%0, %1}";
8827#else
8828 if (STACK_TOP_P (operands[0]))
8829 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8830 else
8831 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8832#endif
8833 break;
8834 }
8835
8836 if (STACK_TOP_P (operands[0]))
8837 {
8838 if (STACK_TOP_P (operands[1]))
8839 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8840 else
8841 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8842 break;
8843 }
8844 else if (STACK_TOP_P (operands[1]))
8845 {
8846#if SYSV386_COMPAT
8847 p = "{\t%1, %0|r\t%0, %1}";
8848#else
8849 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8850#endif
8851 }
8852 else
8853 {
8854#if SYSV386_COMPAT
8855 p = "{r\t%2, %0|\t%0, %2}";
8856#else
8857 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8858#endif
8859 }
8860 break;
8861
8862 default:
8863 gcc_unreachable ();
8864 }
8865
8866 strcat (buf, p);
8867 return buf;
8868}
8869
8870/* Return needed mode for entity in optimize_mode_switching pass. */
8871
8872int
8873ix86_mode_needed (int entity, rtx insn)
8874{
8875 enum attr_i387_cw mode;
8876
8877 /* The mode UNINITIALIZED is used to store control word after a
8878 function call or ASM pattern. The mode ANY specify that function
8879 has no requirements on the control word and make no changes in the
8880 bits we are interested in. */
8881
8882 if (CALL_P (insn)
8883 || (NONJUMP_INSN_P (insn)
8884 && (asm_noperands (PATTERN (insn)) >= 0
8885 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8886 return I387_CW_UNINITIALIZED;
8887
8888 if (recog_memoized (insn) < 0)
8889 return I387_CW_ANY;
8890
8891 mode = get_attr_i387_cw (insn);
8892
8893 switch (entity)
8894 {
8895 case I387_TRUNC:
8896 if (mode == I387_CW_TRUNC)
8897 return mode;
8898 break;
8899
8900 case I387_FLOOR:
8901 if (mode == I387_CW_FLOOR)
8902 return mode;
8903 break;
8904
8905 case I387_CEIL:
8906 if (mode == I387_CW_CEIL)
8907 return mode;
8908 break;
8909
8910 case I387_MASK_PM:
8911 if (mode == I387_CW_MASK_PM)
8912 return mode;
8913 break;
8914
8915 default:
8916 gcc_unreachable ();
8917 }
8918
8919 return I387_CW_ANY;
8920}
8921
8922/* Output code to initialize control word copies used by trunc?f?i and
8923 rounding patterns. CURRENT_MODE is set to current control word,
8924 while NEW_MODE is set to new control word. */
8925
8926void
8927emit_i387_cw_initialization (int mode)
8928{
8929 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8930 rtx new_mode;
8931
8932 int slot;
8933
8934 rtx reg = gen_reg_rtx (HImode);
8935
8936 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8937 emit_move_insn (reg, stored_mode);
8938
8939 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8940 {
8941 switch (mode)
8942 {
8943 case I387_CW_TRUNC:
8944 /* round toward zero (truncate) */
8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8946 slot = SLOT_CW_TRUNC;
8947 break;
8948
8949 case I387_CW_FLOOR:
8950 /* round down toward -oo */
8951 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8952 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8953 slot = SLOT_CW_FLOOR;
8954 break;
8955
8956 case I387_CW_CEIL:
8957 /* round up toward +oo */
8958 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8959 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8960 slot = SLOT_CW_CEIL;
8961 break;
8962
8963 case I387_CW_MASK_PM:
8964 /* mask precision exception for nearbyint() */
8965 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8966 slot = SLOT_CW_MASK_PM;
8967 break;
8968
8969 default:
8970 gcc_unreachable ();
8971 }
8972 }
8973 else
8974 {
8975 switch (mode)
8976 {
8977 case I387_CW_TRUNC:
8978 /* round toward zero (truncate) */
8979 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8980 slot = SLOT_CW_TRUNC;
8981 break;
8982
8983 case I387_CW_FLOOR:
8984 /* round down toward -oo */
8985 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8986 slot = SLOT_CW_FLOOR;
8987 break;
8988
8989 case I387_CW_CEIL:
8990 /* round up toward +oo */
8991 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8992 slot = SLOT_CW_CEIL;
8993 break;
8994
8995 case I387_CW_MASK_PM:
8996 /* mask precision exception for nearbyint() */
8997 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8998 slot = SLOT_CW_MASK_PM;
8999 break;
9000
9001 default:
9002 gcc_unreachable ();
9003 }
9004 }
9005
9006 gcc_assert (slot < MAX_386_STACK_LOCALS);
9007
9008 new_mode = assign_386_stack_local (HImode, slot);
9009 emit_move_insn (new_mode, reg);
9010}
9011
9012/* Output code for INSN to convert a float to a signed int. OPERANDS
9013 are the insn operands. The output may be [HSD]Imode and the input
9014 operand may be [SDX]Fmode. */
9015
9016const char *
9017output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9018{
9019 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9020 int dimode_p = GET_MODE (operands[0]) == DImode;
9021 int round_mode = get_attr_i387_cw (insn);
9022
9023 /* Jump through a hoop or two for DImode, since the hardware has no
9024 non-popping instruction. We used to do this a different way, but
9025 that was somewhat fragile and broke with post-reload splitters. */
9026 if ((dimode_p || fisttp) && !stack_top_dies)
9027 output_asm_insn ("fld\t%y1", operands);
9028
9029 gcc_assert (STACK_TOP_P (operands[1]));
9030 gcc_assert (GET_CODE (operands[0]) == MEM);
9031
9032 if (fisttp)
9033 output_asm_insn ("fisttp%z0\t%0", operands);
9034 else
9035 {
9036 if (round_mode != I387_CW_ANY)
9037 output_asm_insn ("fldcw\t%3", operands);
9038 if (stack_top_dies || dimode_p)
9039 output_asm_insn ("fistp%z0\t%0", operands);
9040 else
9041 output_asm_insn ("fist%z0\t%0", operands);
9042 if (round_mode != I387_CW_ANY)
9043 output_asm_insn ("fldcw\t%2", operands);
9044 }
9045
9046 return "";
9047}
9048
9049/* Output code for x87 ffreep insn. The OPNO argument, which may only
9050 have the values zero or one, indicates the ffreep insn's operand
9051 from the OPERANDS array. */
9052
9053static const char *
9054output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9055{
9056 if (TARGET_USE_FFREEP)
9057#if HAVE_AS_IX86_FFREEP
9058 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9059#else
9060 switch (REGNO (operands[opno]))
9061 {
9062 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9063 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9064 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9065 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9066 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9067 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9068 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9069 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9070 }
9071#endif
9072
9073 return opno ? "fstp\t%y1" : "fstp\t%y0";
9074}
9075
9076
9077/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9078 should be used. UNORDERED_P is true when fucom should be used. */
9079
9080const char *
9081output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9082{
9083 int stack_top_dies;
9084 rtx cmp_op0, cmp_op1;
9085 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9086
9087 if (eflags_p)
9088 {
9089 cmp_op0 = operands[0];
9090 cmp_op1 = operands[1];
9091 }
9092 else
9093 {
9094 cmp_op0 = operands[1];
9095 cmp_op1 = operands[2];
9096 }
9097
9098 if (is_sse)
9099 {
9100 if (GET_MODE (operands[0]) == SFmode)
9101 if (unordered_p)
9102 return "ucomiss\t{%1, %0|%0, %1}";
9103 else
9104 return "comiss\t{%1, %0|%0, %1}";
9105 else
9106 if (unordered_p)
9107 return "ucomisd\t{%1, %0|%0, %1}";
9108 else
9109 return "comisd\t{%1, %0|%0, %1}";
9110 }
9111
9112 gcc_assert (STACK_TOP_P (cmp_op0));
9113
9114 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9115
9116 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9117 {
9118 if (stack_top_dies)
9119 {
9120 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9121 return output_387_ffreep (operands, 1);
9122 }
9123 else
9124 return "ftst\n\tfnstsw\t%0";
9125 }
9126
9127 if (STACK_REG_P (cmp_op1)
9128 && stack_top_dies
9129 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9130 && REGNO (cmp_op1) != FIRST_STACK_REG)
9131 {
9132 /* If both the top of the 387 stack dies, and the other operand
9133 is also a stack register that dies, then this must be a
9134 `fcompp' float compare */
9135
9136 if (eflags_p)
9137 {
9138 /* There is no double popping fcomi variant. Fortunately,
9139 eflags is immune from the fstp's cc clobbering. */
9140 if (unordered_p)
9141 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9142 else
9143 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9144 return output_387_ffreep (operands, 0);
9145 }
9146 else
9147 {
9148 if (unordered_p)
9149 return "fucompp\n\tfnstsw\t%0";
9150 else
9151 return "fcompp\n\tfnstsw\t%0";
9152 }
9153 }
9154 else
9155 {
9156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9157
9158 static const char * const alt[16] =
9159 {
9160 "fcom%z2\t%y2\n\tfnstsw\t%0",
9161 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9162 "fucom%z2\t%y2\n\tfnstsw\t%0",
9163 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9164
9165 "ficom%z2\t%y2\n\tfnstsw\t%0",
9166 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9167 NULL,
9168 NULL,
9169
9170 "fcomi\t{%y1, %0|%0, %y1}",
9171 "fcomip\t{%y1, %0|%0, %y1}",
9172 "fucomi\t{%y1, %0|%0, %y1}",
9173 "fucomip\t{%y1, %0|%0, %y1}",
9174
9175 NULL,
9176 NULL,
9177 NULL,
9178 NULL
9179 };
9180
9181 int mask;
9182 const char *ret;
9183
9184 mask = eflags_p << 3;
9185 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9186 mask |= unordered_p << 1;
9187 mask |= stack_top_dies;
9188
9189 gcc_assert (mask < 16);
9190 ret = alt[mask];
9191 gcc_assert (ret);
9192
9193 return ret;
9194 }
9195}
9196
9197void
9198ix86_output_addr_vec_elt (FILE *file, int value)
9199{
9200 const char *directive = ASM_LONG;
9201
9202#ifdef ASM_QUAD
9203 if (TARGET_64BIT)
9204 directive = ASM_QUAD;
9205#else
9206 gcc_assert (!TARGET_64BIT);
9207#endif
9208
9209 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9210}
9211
9212void
9213ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9214{
9215 if (TARGET_64BIT)
9216 fprintf (file, "%s%s%d-%s%d\n",
9217 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9218 else if (HAVE_AS_GOTOFF_IN_DATA)
9219 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9220#if TARGET_MACHO
9221 else if (TARGET_MACHO)
9222 {
9223 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9224 machopic_output_function_base_name (file);
9225 fprintf(file, "\n");
9226 }
9227#endif
9228 else
9229 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9230 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9231}
9232
9233/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9234 for the target. */
9235
9236void
9237ix86_expand_clear (rtx dest)
9238{
9239 rtx tmp;
9240
9241 /* We play register width games, which are only valid after reload. */
9242 gcc_assert (reload_completed);
9243
9244 /* Avoid HImode and its attendant prefix byte. */
9245 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9246 dest = gen_rtx_REG (SImode, REGNO (dest));
9247
9248 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9249
9250 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9251 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9252 {
9253 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9255 }
9256
9257 emit_insn (tmp);
9258}
9259
9260/* X is an unchanging MEM. If it is a constant pool reference, return
9261 the constant pool rtx, else NULL. */
9262
9263rtx
9264maybe_get_pool_constant (rtx x)
9265{
9266 x = ix86_delegitimize_address (XEXP (x, 0));
9267
9268 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9269 return get_pool_constant (x);
9270
9271 return NULL_RTX;
9272}
9273
9274void
9275ix86_expand_move (enum machine_mode mode, rtx operands[])
9276{
9277 int strict = (reload_in_progress || reload_completed);
9278 rtx op0, op1;
9279 enum tls_model model;
9280
9281 op0 = operands[0];
9282 op1 = operands[1];
9283
9284 if (GET_CODE (op1) == SYMBOL_REF)
9285 {
9286 model = SYMBOL_REF_TLS_MODEL (op1);
9287 if (model)
9288 {
9289 op1 = legitimize_tls_address (op1, model, true);
9290 op1 = force_operand (op1, op0);
9291 if (op1 == op0)
9292 return;
9293 }
9294 }
9295 else if (GET_CODE (op1) == CONST
9296 && GET_CODE (XEXP (op1, 0)) == PLUS
9297 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9298 {
9299 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9300 if (model)
9301 {
9302 rtx addend = XEXP (XEXP (op1, 0), 1);
9303 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9304 op1 = force_operand (op1, NULL);
9305 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9306 op0, 1, OPTAB_DIRECT);
9307 if (op1 == op0)
9308 return;
9309 }
9310 }
9311
9312 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9313 {
9314 if (TARGET_MACHO && !TARGET_64BIT)
9315 {
9316#if TARGET_MACHO
9317 if (MACHOPIC_PURE)
9318 {
9319 rtx temp = ((reload_in_progress
9320 || ((op0 && GET_CODE (op0) == REG)
9321 && mode == Pmode))
9322 ? op0 : gen_reg_rtx (Pmode));
9323 op1 = machopic_indirect_data_reference (op1, temp);
9324 op1 = machopic_legitimize_pic_address (op1, mode,
9325 temp == op1 ? 0 : temp);
9326 }
9327 else if (MACHOPIC_INDIRECT)
9328 op1 = machopic_indirect_data_reference (op1, 0);
9329 if (op0 == op1)
9330 return;
9331#endif
9332 }
9333 else
9334 {
9335 if (GET_CODE (op0) == MEM)
9336 op1 = force_reg (Pmode, op1);
9337 else
9338 op1 = legitimize_address (op1, op1, Pmode);
9339 }
9340 }
9341 else
9342 {
9343 if (GET_CODE (op0) == MEM
9344 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9345 || !push_operand (op0, mode))
9346 && GET_CODE (op1) == MEM)
9347 op1 = force_reg (mode, op1);
9348
9349 if (push_operand (op0, mode)
9350 && ! general_no_elim_operand (op1, mode))
9351 op1 = copy_to_mode_reg (mode, op1);
9352
9353 /* Force large constants in 64bit compilation into register
9354 to get them CSEed. */
9355 if (TARGET_64BIT && mode == DImode
9356 && immediate_operand (op1, mode)
9357 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9358 && !register_operand (op0, mode)
9359 && optimize && !reload_completed && !reload_in_progress)
9360 op1 = copy_to_mode_reg (mode, op1);
9361
9362 if (FLOAT_MODE_P (mode))
9363 {
9364 /* If we are loading a floating point constant to a register,
9365 force the value to memory now, since we'll get better code
9366 out the back end. */
9367
9368 if (strict)
9369 ;
9370 else if (GET_CODE (op1) == CONST_DOUBLE)
9371 {
9372 op1 = validize_mem (force_const_mem (mode, op1));
9373 if (!register_operand (op0, mode))
9374 {
9375 rtx temp = gen_reg_rtx (mode);
9376 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9377 emit_move_insn (op0, temp);
9378 return;
9379 }
9380 }
9381 }
9382 }
9383
9384 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9385}
9386
9387void
9388ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9389{
9390 rtx op0 = operands[0], op1 = operands[1];
9391
9392 /* Force constants other than zero into memory. We do not know how
9393 the instructions used to build constants modify the upper 64 bits
9394 of the register, once we have that information we may be able
9395 to handle some of them more efficiently. */
9396 if ((reload_in_progress | reload_completed) == 0
9397 && register_operand (op0, mode)
9398 && CONSTANT_P (op1)
9399 && standard_sse_constant_p (op1) <= 0)
9400 op1 = validize_mem (force_const_mem (mode, op1));
9401
9402 /* Make operand1 a register if it isn't already. */
9403 if (!no_new_pseudos
9404 && !register_operand (op0, mode)
9405 && !register_operand (op1, mode))
9406 {
9407 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9408 return;
9409 }
9410
9411 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9412}
9413
9414/* Implement the movmisalign patterns for SSE. Non-SSE modes go
9415 straight to ix86_expand_vector_move. */
9416
9417void
9418ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9419{
9420 rtx op0, op1, m;
9421
9422 op0 = operands[0];
9423 op1 = operands[1];
9424
9425 if (MEM_P (op1))
9426 {
9427 /* If we're optimizing for size, movups is the smallest. */
9428 if (optimize_size)
9429 {
9430 op0 = gen_lowpart (V4SFmode, op0);
9431 op1 = gen_lowpart (V4SFmode, op1);
9432 emit_insn (gen_sse_movups (op0, op1));
9433 return;
9434 }
9435
9436 /* ??? If we have typed data, then it would appear that using
9437 movdqu is the only way to get unaligned data loaded with
9438 integer type. */
9439 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9440 {
9441 op0 = gen_lowpart (V16QImode, op0);
9442 op1 = gen_lowpart (V16QImode, op1);
9443 emit_insn (gen_sse2_movdqu (op0, op1));
9444 return;
9445 }
9446
9447 if (TARGET_SSE2 && mode == V2DFmode)
9448 {
9449 rtx zero;
9450
9451 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9452 {
9453 op0 = gen_lowpart (V2DFmode, op0);
9454 op1 = gen_lowpart (V2DFmode, op1);
9455 emit_insn (gen_sse2_movupd (op0, op1));
9456 return;
9457 }
9458
9459 /* When SSE registers are split into halves, we can avoid
9460 writing to the top half twice. */
9461 if (TARGET_SSE_SPLIT_REGS)
9462 {
9463 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9464 zero = op0;
9465 }
9466 else
9467 {
9468 /* ??? Not sure about the best option for the Intel chips.
9469 The following would seem to satisfy; the register is
9470 entirely cleared, breaking the dependency chain. We
9471 then store to the upper half, with a dependency depth
9472 of one. A rumor has it that Intel recommends two movsd
9473 followed by an unpacklpd, but this is unconfirmed. And
9474 given that the dependency depth of the unpacklpd would
9475 still be one, I'm not sure why this would be better. */
9476 zero = CONST0_RTX (V2DFmode);
9477 }
9478
9479 m = adjust_address (op1, DFmode, 0);
9480 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9481 m = adjust_address (op1, DFmode, 8);
9482 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9483 }
9484 else
9485 {
9486 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9487 {
9488 op0 = gen_lowpart (V4SFmode, op0);
9489 op1 = gen_lowpart (V4SFmode, op1);
9490 emit_insn (gen_sse_movups (op0, op1));
9491 return;
9492 }
9493
9494 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9495 emit_move_insn (op0, CONST0_RTX (mode));
9496 else
9497 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9498
9499 if (mode != V4SFmode)
9500 op0 = gen_lowpart (V4SFmode, op0);
9501 m = adjust_address (op1, V2SFmode, 0);
9502 emit_insn (gen_sse_loadlps (op0, op0, m));
9503 m = adjust_address (op1, V2SFmode, 8);
9504 emit_insn (gen_sse_loadhps (op0, op0, m));
9505 }
9506 }
9507 else if (MEM_P (op0))
9508 {
9509 /* If we're optimizing for size, movups is the smallest. */
9510 if (optimize_size)
9511 {
9512 op0 = gen_lowpart (V4SFmode, op0);
9513 op1 = gen_lowpart (V4SFmode, op1);
9514 emit_insn (gen_sse_movups (op0, op1));
9515 return;
9516 }
9517
9518 /* ??? Similar to above, only less clear because of quote
9519 typeless stores unquote. */
9520 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9521 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9522 {
9523 op0 = gen_lowpart (V16QImode, op0);
9524 op1 = gen_lowpart (V16QImode, op1);
9525 emit_insn (gen_sse2_movdqu (op0, op1));
9526 return;
9527 }
9528
9529 if (TARGET_SSE2 && mode == V2DFmode)
9530 {
9531 m = adjust_address (op0, DFmode, 0);
9532 emit_insn (gen_sse2_storelpd (m, op1));
9533 m = adjust_address (op0, DFmode, 8);
9534 emit_insn (gen_sse2_storehpd (m, op1));
9535 }
9536 else
9537 {
9538 if (mode != V4SFmode)
9539 op1 = gen_lowpart (V4SFmode, op1);
9540 m = adjust_address (op0, V2SFmode, 0);
9541 emit_insn (gen_sse_storelps (m, op1));
9542 m = adjust_address (op0, V2SFmode, 8);
9543 emit_insn (gen_sse_storehps (m, op1));
9544 }
9545 }
9546 else
9547 gcc_unreachable ();
9548}
9549
9550/* Expand a push in MODE. This is some mode for which we do not support
9551 proper push instructions, at least from the registers that we expect
9552 the value to live in. */
9553
9554void
9555ix86_expand_push (enum machine_mode mode, rtx x)
9556{
9557 rtx tmp;
9558
9559 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9560 GEN_INT (-GET_MODE_SIZE (mode)),
9561 stack_pointer_rtx, 1, OPTAB_DIRECT);
9562 if (tmp != stack_pointer_rtx)
9563 emit_move_insn (stack_pointer_rtx, tmp);
9564
9565 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9566 emit_move_insn (tmp, x);
9567}
9568
9569/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9570 destination to use for the operation. If different from the true
9571 destination in operands[0], a copy operation will be required. */
9572
9573rtx
9574ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9575 rtx operands[])
9576{
9577 int matching_memory;
9578 rtx src1, src2, dst;
9579
9580 dst = operands[0];
9581 src1 = operands[1];
9582 src2 = operands[2];
9583
9584 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9585 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9586 && (rtx_equal_p (dst, src2)
9587 || immediate_operand (src1, mode)))
9588 {
9589 rtx temp = src1;
9590 src1 = src2;
9591 src2 = temp;
9592 }
9593
9594 /* If the destination is memory, and we do not have matching source
9595 operands, do things in registers. */
9596 matching_memory = 0;
9597 if (GET_CODE (dst) == MEM)
9598 {
9599 if (rtx_equal_p (dst, src1))
9600 matching_memory = 1;
9601 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9602 && rtx_equal_p (dst, src2))
9603 matching_memory = 2;
9604 else
9605 dst = gen_reg_rtx (mode);
9606 }
9607
9608 /* Both source operands cannot be in memory. */
9609 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9610 {
9611 if (matching_memory != 2)
9612 src2 = force_reg (mode, src2);
9613 else
9614 src1 = force_reg (mode, src1);
9615 }
9616
9617 /* If the operation is not commutable, source 1 cannot be a constant
9618 or non-matching memory. */
9619 if ((CONSTANT_P (src1)
9620 || (!matching_memory && GET_CODE (src1) == MEM))
9621 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9622 src1 = force_reg (mode, src1);
9623
9624 src1 = operands[1] = src1;
9625 src2 = operands[2] = src2;
9626 return dst;
9627}
9628
9629/* Similarly, but assume that the destination has already been
9630 set up properly. */
9631
9632void
9633ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9634 enum machine_mode mode, rtx operands[])
9635{
9636 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9637 gcc_assert (dst == operands[0]);
9638}
9639
9640/* Attempt to expand a binary operator. Make the expansion closer to the
9641 actual machine, then just general_operand, which will allow 3 separate
9642 memory references (one output, two input) in a single insn. */
9643
9644void
9645ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9646 rtx operands[])
9647{
9648 rtx src1, src2, dst, op, clob;
9649
9650 dst = ix86_fixup_binary_operands (code, mode, operands);
9651 src1 = operands[1];
9652 src2 = operands[2];
9653
9654 /* Emit the instruction. */
9655
9656 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9657 if (reload_in_progress)
9658 {
9659 /* Reload doesn't know about the flags register, and doesn't know that
9660 it doesn't want to clobber it. We can only do this with PLUS. */
9661 gcc_assert (code == PLUS);
9662 emit_insn (op);
9663 }
9664 else
9665 {
9666 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9667 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9668 }
9669
9670 /* Fix up the destination if needed. */
9671 if (dst != operands[0])
9672 emit_move_insn (operands[0], dst);
9673}
9674
9675/* Return TRUE or FALSE depending on whether the binary operator meets the
9676 appropriate constraints. */
9677
9678int
9679ix86_binary_operator_ok (enum rtx_code code,
9680 enum machine_mode mode ATTRIBUTE_UNUSED,
9681 rtx operands[3])
9682{
9683 /* Both source operands cannot be in memory. */
9684 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9685 return 0;
9686 /* If the operation is not commutable, source 1 cannot be a constant. */
9687 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9688 return 0;
9689 /* If the destination is memory, we must have a matching source operand. */
9690 if (GET_CODE (operands[0]) == MEM
9691 && ! (rtx_equal_p (operands[0], operands[1])
9692 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9693 && rtx_equal_p (operands[0], operands[2]))))
9694 return 0;
9695 /* If the operation is not commutable and the source 1 is memory, we must
9696 have a matching destination. */
9697 if (GET_CODE (operands[1]) == MEM
9698 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9699 && ! rtx_equal_p (operands[0], operands[1]))
9700 return 0;
9701 return 1;
9702}
9703
9704/* Attempt to expand a unary operator. Make the expansion closer to the
9705 actual machine, then just general_operand, which will allow 2 separate
9706 memory references (one output, one input) in a single insn. */
9707
9708void
9709ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9710 rtx operands[])
9711{
9712 int matching_memory;
9713 rtx src, dst, op, clob;
9714
9715 dst = operands[0];
9716 src = operands[1];
9717
9718 /* If the destination is memory, and we do not have matching source
9719 operands, do things in registers. */
9720 matching_memory = 0;
9721 if (MEM_P (dst))
9722 {
9723 if (rtx_equal_p (dst, src))
9724 matching_memory = 1;
9725 else
9726 dst = gen_reg_rtx (mode);
9727 }
9728
9729 /* When source operand is memory, destination must match. */
9730 if (MEM_P (src) && !matching_memory)
9731 src = force_reg (mode, src);
9732
9733 /* Emit the instruction. */
9734
9735 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9736 if (reload_in_progress || code == NOT)
9737 {
9738 /* Reload doesn't know about the flags register, and doesn't know that
9739 it doesn't want to clobber it. */
9740 gcc_assert (code == NOT);
9741 emit_insn (op);
9742 }
9743 else
9744 {
9745 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9746 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9747 }
9748
9749 /* Fix up the destination if needed. */
9750 if (dst != operands[0])
9751 emit_move_insn (operands[0], dst);
9752}
9753
9754/* Return TRUE or FALSE depending on whether the unary operator meets the
9755 appropriate constraints. */
9756
9757int
9758ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9759 enum machine_mode mode ATTRIBUTE_UNUSED,
9760 rtx operands[2] ATTRIBUTE_UNUSED)
9761{
9762 /* If one of operands is memory, source and destination must match. */
9763 if ((GET_CODE (operands[0]) == MEM
9764 || GET_CODE (operands[1]) == MEM)
9765 && ! rtx_equal_p (operands[0], operands[1]))
9766 return FALSE;
9767 return TRUE;
9768}
9769
9770/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9771 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9772 true, then replicate the mask for all elements of the vector register.
9773 If INVERT is true, then create a mask excluding the sign bit. */
9774
9775rtx
9776ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9777{
9778 enum machine_mode vec_mode;
9779 HOST_WIDE_INT hi, lo;
9780 int shift = 63;
9781 rtvec v;
9782 rtx mask;
9783
9784 /* Find the sign bit, sign extended to 2*HWI. */
9785 if (mode == SFmode)
9786 lo = 0x80000000, hi = lo < 0;
9787 else if (HOST_BITS_PER_WIDE_INT >= 64)
9788 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9789 else
9790 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9791
9792 if (invert)
9793 lo = ~lo, hi = ~hi;
9794
9795 /* Force this value into the low part of a fp vector constant. */
9796 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9797 mask = gen_lowpart (mode, mask);
9798
9799 if (mode == SFmode)
9800 {
9801 if (vect)
9802 v = gen_rtvec (4, mask, mask, mask, mask);
9803 else
9804 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9805 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9806 vec_mode = V4SFmode;
9807 }
9808 else
9809 {
9810 if (vect)
9811 v = gen_rtvec (2, mask, mask);
9812 else
9813 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9814 vec_mode = V2DFmode;
9815 }
9816
9817 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9818}
9819
9820/* Generate code for floating point ABS or NEG. */
9821
9822void
9823ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9824 rtx operands[])
9825{
9826 rtx mask, set, use, clob, dst, src;
9827 bool matching_memory;
9828 bool use_sse = false;
9829 bool vector_mode = VECTOR_MODE_P (mode);
9830 enum machine_mode elt_mode = mode;
9831
9832 if (vector_mode)
9833 {
9834 elt_mode = GET_MODE_INNER (mode);
9835 use_sse = true;
9836 }
9837 else if (TARGET_SSE_MATH)
9838 use_sse = SSE_FLOAT_MODE_P (mode);
9839
9840 /* NEG and ABS performed with SSE use bitwise mask operations.
9841 Create the appropriate mask now. */
9842 if (use_sse)
9843 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9844 else
9845 mask = NULL_RTX;
9846
9847 dst = operands[0];
9848 src = operands[1];
9849
9850 /* If the destination is memory, and we don't have matching source
9851 operands or we're using the x87, do things in registers. */
9852 matching_memory = false;
9853 if (MEM_P (dst))
9854 {
9855 if (use_sse && rtx_equal_p (dst, src))
9856 matching_memory = true;
9857 else
9858 dst = gen_reg_rtx (mode);
9859 }
9860 if (MEM_P (src) && !matching_memory)
9861 src = force_reg (mode, src);
9862
9863 if (vector_mode)
9864 {
9865 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9866 set = gen_rtx_SET (VOIDmode, dst, set);
9867 emit_insn (set);
9868 }
9869 else
9870 {
9871 set = gen_rtx_fmt_e (code, mode, src);
9872 set = gen_rtx_SET (VOIDmode, dst, set);
9873 if (mask)
9874 {
9875 use = gen_rtx_USE (VOIDmode, mask);
9876 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9877 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9878 gen_rtvec (3, set, use, clob)));
9879 }
9880 else
9881 emit_insn (set);
9882 }
9883
9884 if (dst != operands[0])
9885 emit_move_insn (operands[0], dst);
9886}
9887
9888/* Expand a copysign operation. Special case operand 0 being a constant. */
9889
9890void
9891ix86_expand_copysign (rtx operands[])
9892{
9893 enum machine_mode mode, vmode;
9894 rtx dest, op0, op1, mask, nmask;
9895
9896 dest = operands[0];
9897 op0 = operands[1];
9898 op1 = operands[2];
9899
9900 mode = GET_MODE (dest);
9901 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9902
9903 if (GET_CODE (op0) == CONST_DOUBLE)
9904 {
9905 rtvec v;
9906
9907 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9908 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9909
9910 if (op0 == CONST0_RTX (mode))
9911 op0 = CONST0_RTX (vmode);
9912 else
9913 {
9914 if (mode == SFmode)
9915 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9916 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9917 else
9918 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9919 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9920 }
9921
9922 mask = ix86_build_signbit_mask (mode, 0, 0);
9923
9924 if (mode == SFmode)
9925 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9926 else
9927 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9928 }
9929 else
9930 {
9931 nmask = ix86_build_signbit_mask (mode, 0, 1);
9932 mask = ix86_build_signbit_mask (mode, 0, 0);
9933
9934 if (mode == SFmode)
9935 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9936 else
9937 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9938 }
9939}
9940
9941/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9942 be a constant, and so has already been expanded into a vector constant. */
9943
9944void
9945ix86_split_copysign_const (rtx operands[])
9946{
9947 enum machine_mode mode, vmode;
9948 rtx dest, op0, op1, mask, x;
9949
9950 dest = operands[0];
9951 op0 = operands[1];
9952 op1 = operands[2];
9953 mask = operands[3];
9954
9955 mode = GET_MODE (dest);
9956 vmode = GET_MODE (mask);
9957
9958 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9959 x = gen_rtx_AND (vmode, dest, mask);
9960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9961
9962 if (op0 != CONST0_RTX (vmode))
9963 {
9964 x = gen_rtx_IOR (vmode, dest, op0);
9965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9966 }
9967}
9968
9969/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9970 so we have to do two masks. */
9971
9972void
9973ix86_split_copysign_var (rtx operands[])
9974{
9975 enum machine_mode mode, vmode;
9976 rtx dest, scratch, op0, op1, mask, nmask, x;
9977
9978 dest = operands[0];
9979 scratch = operands[1];
9980 op0 = operands[2];
9981 op1 = operands[3];
9982 nmask = operands[4];
9983 mask = operands[5];
9984
9985 mode = GET_MODE (dest);
9986 vmode = GET_MODE (mask);
9987
9988 if (rtx_equal_p (op0, op1))
9989 {
9990 /* Shouldn't happen often (it's useless, obviously), but when it does
9991 we'd generate incorrect code if we continue below. */
9992 emit_move_insn (dest, op0);
9993 return;
9994 }
9995
9996 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9997 {
9998 gcc_assert (REGNO (op1) == REGNO (scratch));
9999
10000 x = gen_rtx_AND (vmode, scratch, mask);
10001 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10002
10003 dest = mask;
10004 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10005 x = gen_rtx_NOT (vmode, dest);
10006 x = gen_rtx_AND (vmode, x, op0);
10007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10008 }
10009 else
10010 {
10011 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10012 {
10013 x = gen_rtx_AND (vmode, scratch, mask);
10014 }
10015 else /* alternative 2,4 */
10016 {
10017 gcc_assert (REGNO (mask) == REGNO (scratch));
10018 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10019 x = gen_rtx_AND (vmode, scratch, op1);
10020 }
10021 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10022
10023 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10024 {
10025 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10026 x = gen_rtx_AND (vmode, dest, nmask);
10027 }
10028 else /* alternative 3,4 */
10029 {
10030 gcc_assert (REGNO (nmask) == REGNO (dest));
10031 dest = nmask;
10032 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10033 x = gen_rtx_AND (vmode, dest, op0);
10034 }
10035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10036 }
10037
10038 x = gen_rtx_IOR (vmode, dest, scratch);
10039 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10040}
10041
10042/* Return TRUE or FALSE depending on whether the first SET in INSN
10043 has source and destination with matching CC modes, and that the
10044 CC mode is at least as constrained as REQ_MODE. */
10045
10046int
10047ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10048{
10049 rtx set;
10050 enum machine_mode set_mode;
10051
10052 set = PATTERN (insn);
10053 if (GET_CODE (set) == PARALLEL)
10054 set = XVECEXP (set, 0, 0);
10055 gcc_assert (GET_CODE (set) == SET);
10056 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10057
10058 set_mode = GET_MODE (SET_DEST (set));
10059 switch (set_mode)
10060 {
10061 case CCNOmode:
10062 if (req_mode != CCNOmode
10063 && (req_mode != CCmode
10064 || XEXP (SET_SRC (set), 1) != const0_rtx))
10065 return 0;
10066 break;
10067 case CCmode:
10068 if (req_mode == CCGCmode)
10069 return 0;
10070 /* FALLTHRU */
10071 case CCGCmode:
10072 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10073 return 0;
10074 /* FALLTHRU */
10075 case CCGOCmode:
10076 if (req_mode == CCZmode)
10077 return 0;
10078 /* FALLTHRU */
10079 case CCZmode:
10080 break;
10081
10082 default:
10083 gcc_unreachable ();
10084 }
10085
10086 return (GET_MODE (SET_SRC (set)) == set_mode);
10087}
10088
10089/* Generate insn patterns to do an integer compare of OPERANDS. */
10090
10091static rtx
10092ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10093{
10094 enum machine_mode cmpmode;
10095 rtx tmp, flags;
10096
10097 cmpmode = SELECT_CC_MODE (code, op0, op1);
10098 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10099
10100 /* This is very simple, but making the interface the same as in the
10101 FP case makes the rest of the code easier. */
10102 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10103 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10104
10105 /* Return the test that should be put into the flags user, i.e.
10106 the bcc, scc, or cmov instruction. */
10107 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10108}
10109
10110/* Figure out whether to use ordered or unordered fp comparisons.
10111 Return the appropriate mode to use. */
10112
10113enum machine_mode
10114ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10115{
10116 /* ??? In order to make all comparisons reversible, we do all comparisons
10117 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10118 all forms trapping and nontrapping comparisons, we can make inequality
10119 comparisons trapping again, since it results in better code when using
10120 FCOM based compares. */
10121 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10122}
10123
10124enum machine_mode
10125ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10126{
10127 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10128 return ix86_fp_compare_mode (code);
10129 switch (code)
10130 {
10131 /* Only zero flag is needed. */
10132 case EQ: /* ZF=0 */
10133 case NE: /* ZF!=0 */
10134 return CCZmode;
10135 /* Codes needing carry flag. */
10136 case GEU: /* CF=0 */
10137 case GTU: /* CF=0 & ZF=0 */
10138 case LTU: /* CF=1 */
10139 case LEU: /* CF=1 | ZF=1 */
10140 return CCmode;
10141 /* Codes possibly doable only with sign flag when
10142 comparing against zero. */
10143 case GE: /* SF=OF or SF=0 */
10144 case LT: /* SF<>OF or SF=1 */
10145 if (op1 == const0_rtx)
10146 return CCGOCmode;
10147 else
10148 /* For other cases Carry flag is not required. */
10149 return CCGCmode;
10150 /* Codes doable only with sign flag when comparing
10151 against zero, but we miss jump instruction for it
10152 so we need to use relational tests against overflow
10153 that thus needs to be zero. */
10154 case GT: /* ZF=0 & SF=OF */
10155 case LE: /* ZF=1 | SF<>OF */
10156 if (op1 == const0_rtx)
10157 return CCNOmode;
10158 else
10159 return CCGCmode;
10160 /* strcmp pattern do (use flags) and combine may ask us for proper
10161 mode. */
10162 case USE:
10163 return CCmode;
10164 default:
10165 gcc_unreachable ();
10166 }
10167}
10168
10169/* Return the fixed registers used for condition codes. */
10170
10171static bool
10172ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10173{
10174 *p1 = FLAGS_REG;
10175 *p2 = FPSR_REG;
10176 return true;
10177}
10178
10179/* If two condition code modes are compatible, return a condition code
10180 mode which is compatible with both. Otherwise, return
10181 VOIDmode. */
10182
10183static enum machine_mode
10184ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10185{
10186 if (m1 == m2)
10187 return m1;
10188
10189 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10190 return VOIDmode;
10191
10192 if ((m1 == CCGCmode && m2 == CCGOCmode)
10193 || (m1 == CCGOCmode && m2 == CCGCmode))
10194 return CCGCmode;
10195
10196 switch (m1)
10197 {
10198 default:
10199 gcc_unreachable ();
10200
10201 case CCmode:
10202 case CCGCmode:
10203 case CCGOCmode:
10204 case CCNOmode:
10205 case CCZmode:
10206 switch (m2)
10207 {
10208 default:
10209 return VOIDmode;
10210
10211 case CCmode:
10212 case CCGCmode:
10213 case CCGOCmode:
10214 case CCNOmode:
10215 case CCZmode:
10216 return CCmode;
10217 }
10218
10219 case CCFPmode:
10220 case CCFPUmode:
10221 /* These are only compatible with themselves, which we already
10222 checked above. */
10223 return VOIDmode;
10224 }
10225}
10226
10227/* Return true if we should use an FCOMI instruction for this fp comparison. */
10228
10229int
10230ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10231{
10232 enum rtx_code swapped_code = swap_condition (code);
10233 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10234 || (ix86_fp_comparison_cost (swapped_code)
10235 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10236}
10237
10238/* Swap, force into registers, or otherwise massage the two operands
10239 to a fp comparison. The operands are updated in place; the new
10240 comparison code is returned. */
10241
10242static enum rtx_code
10243ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10244{
10245 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10246 rtx op0 = *pop0, op1 = *pop1;
10247 enum machine_mode op_mode = GET_MODE (op0);
10248 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10249
10250 /* All of the unordered compare instructions only work on registers.
10251 The same is true of the fcomi compare instructions. The XFmode
10252 compare instructions require registers except when comparing
10253 against zero or when converting operand 1 from fixed point to
10254 floating point. */
10255
10256 if (!is_sse
10257 && (fpcmp_mode == CCFPUmode
10258 || (op_mode == XFmode
10259 && ! (standard_80387_constant_p (op0) == 1
10260 || standard_80387_constant_p (op1) == 1)
10261 && GET_CODE (op1) != FLOAT)
10262 || ix86_use_fcomi_compare (code)))
10263 {
10264 op0 = force_reg (op_mode, op0);
10265 op1 = force_reg (op_mode, op1);
10266 }
10267 else
10268 {
10269 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10270 things around if they appear profitable, otherwise force op0
10271 into a register. */
10272
10273 if (standard_80387_constant_p (op0) == 0
10274 || (GET_CODE (op0) == MEM
10275 && ! (standard_80387_constant_p (op1) == 0
10276 || GET_CODE (op1) == MEM)))
10277 {
10278 rtx tmp;
10279 tmp = op0, op0 = op1, op1 = tmp;
10280 code = swap_condition (code);
10281 }
10282
10283 if (GET_CODE (op0) != REG)
10284 op0 = force_reg (op_mode, op0);
10285
10286 if (CONSTANT_P (op1))
10287 {
10288 int tmp = standard_80387_constant_p (op1);
10289 if (tmp == 0)
10290 op1 = validize_mem (force_const_mem (op_mode, op1));
10291 else if (tmp == 1)
10292 {
10293 if (TARGET_CMOVE)
10294 op1 = force_reg (op_mode, op1);
10295 }
10296 else
10297 op1 = force_reg (op_mode, op1);
10298 }
10299 }
10300
10301 /* Try to rearrange the comparison to make it cheaper. */
10302 if (ix86_fp_comparison_cost (code)
10303 > ix86_fp_comparison_cost (swap_condition (code))
10304 && (GET_CODE (op1) == REG || !no_new_pseudos))
10305 {
10306 rtx tmp;
10307 tmp = op0, op0 = op1, op1 = tmp;
10308 code = swap_condition (code);
10309 if (GET_CODE (op0) != REG)
10310 op0 = force_reg (op_mode, op0);
10311 }
10312
10313 *pop0 = op0;
10314 *pop1 = op1;
10315 return code;
10316}
10317
10318/* Convert comparison codes we use to represent FP comparison to integer
10319 code that will result in proper branch. Return UNKNOWN if no such code
10320 is available. */
10321
10322enum rtx_code
10323ix86_fp_compare_code_to_integer (enum rtx_code code)
10324{
10325 switch (code)
10326 {
10327 case GT:
10328 return GTU;
10329 case GE:
10330 return GEU;
10331 case ORDERED:
10332 case UNORDERED:
10333 return code;
10334 break;
10335 case UNEQ:
10336 return EQ;
10337 break;
10338 case UNLT:
10339 return LTU;
10340 break;
10341 case UNLE:
10342 return LEU;
10343 break;
10344 case LTGT:
10345 return NE;
10346 break;
10347 default:
10348 return UNKNOWN;
10349 }
10350}
10351
10352/* Split comparison code CODE into comparisons we can do using branch
10353 instructions. BYPASS_CODE is comparison code for branch that will
10354 branch around FIRST_CODE and SECOND_CODE. If some of branches
10355 is not required, set value to UNKNOWN.
10356 We never require more than two branches. */
10357
10358void
10359ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10360 enum rtx_code *first_code,
10361 enum rtx_code *second_code)
10362{
10363 *first_code = code;
10364 *bypass_code = UNKNOWN;
10365 *second_code = UNKNOWN;
10366
10367 /* The fcomi comparison sets flags as follows:
10368
10369 cmp ZF PF CF
10370 > 0 0 0
10371 < 0 0 1
10372 = 1 0 0
10373 un 1 1 1 */
10374
10375 switch (code)
10376 {
10377 case GT: /* GTU - CF=0 & ZF=0 */
10378 case GE: /* GEU - CF=0 */
10379 case ORDERED: /* PF=0 */
10380 case UNORDERED: /* PF=1 */
10381 case UNEQ: /* EQ - ZF=1 */
10382 case UNLT: /* LTU - CF=1 */
10383 case UNLE: /* LEU - CF=1 | ZF=1 */
10384 case LTGT: /* EQ - ZF=0 */
10385 break;
10386 case LT: /* LTU - CF=1 - fails on unordered */
10387 *first_code = UNLT;
10388 *bypass_code = UNORDERED;
10389 break;
10390 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10391 *first_code = UNLE;
10392 *bypass_code = UNORDERED;
10393 break;
10394 case EQ: /* EQ - ZF=1 - fails on unordered */
10395 *first_code = UNEQ;
10396 *bypass_code = UNORDERED;
10397 break;
10398 case NE: /* NE - ZF=0 - fails on unordered */
10399 *first_code = LTGT;
10400 *second_code = UNORDERED;
10401 break;
10402 case UNGE: /* GEU - CF=0 - fails on unordered */
10403 *first_code = GE;
10404 *second_code = UNORDERED;
10405 break;
10406 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10407 *first_code = GT;
10408 *second_code = UNORDERED;
10409 break;
10410 default:
10411 gcc_unreachable ();
10412 }
10413 if (!TARGET_IEEE_FP)
10414 {
10415 *second_code = UNKNOWN;
10416 *bypass_code = UNKNOWN;
10417 }
10418}
10419
10420/* Return cost of comparison done fcom + arithmetics operations on AX.
10421 All following functions do use number of instructions as a cost metrics.
10422 In future this should be tweaked to compute bytes for optimize_size and
10423 take into account performance of various instructions on various CPUs. */
10424static int
10425ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10426{
10427 if (!TARGET_IEEE_FP)
10428 return 4;
10429 /* The cost of code output by ix86_expand_fp_compare. */
10430 switch (code)
10431 {
10432 case UNLE:
10433 case UNLT:
10434 case LTGT:
10435 case GT:
10436 case GE:
10437 case UNORDERED:
10438 case ORDERED:
10439 case UNEQ:
10440 return 4;
10441 break;
10442 case LT:
10443 case NE:
10444 case EQ:
10445 case UNGE:
10446 return 5;
10447 break;
10448 case LE:
10449 case UNGT:
10450 return 6;
10451 break;
10452 default:
10453 gcc_unreachable ();
10454 }
10455}
10456
10457/* Return cost of comparison done using fcomi operation.
10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10459static int
10460ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10461{
10462 enum rtx_code bypass_code, first_code, second_code;
10463 /* Return arbitrarily high cost when instruction is not supported - this
10464 prevents gcc from using it. */
10465 if (!TARGET_CMOVE)
10466 return 1024;
10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10469}
10470
10471/* Return cost of comparison done using sahf operation.
10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10473static int
10474ix86_fp_comparison_sahf_cost (enum rtx_code code)
10475{
10476 enum rtx_code bypass_code, first_code, second_code;
10477 /* Return arbitrarily high cost when instruction is not preferred - this
10478 avoids gcc from using it. */
10479 if (!TARGET_USE_SAHF && !optimize_size)
10480 return 1024;
10481 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10482 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10483}
10484
10485/* Compute cost of the comparison done using any method.
10486 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10487static int
10488ix86_fp_comparison_cost (enum rtx_code code)
10489{
10490 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10491 int min;
10492
10493 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10494 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10495
10496 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10497 if (min > sahf_cost)
10498 min = sahf_cost;
10499 if (min > fcomi_cost)
10500 min = fcomi_cost;
10501 return min;
10502}
10503
10504/* Generate insn patterns to do a floating point compare of OPERANDS. */
10505
10506static rtx
10507ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10508 rtx *second_test, rtx *bypass_test)
10509{
10510 enum machine_mode fpcmp_mode, intcmp_mode;
10511 rtx tmp, tmp2;
10512 int cost = ix86_fp_comparison_cost (code);
10513 enum rtx_code bypass_code, first_code, second_code;
10514
10515 fpcmp_mode = ix86_fp_compare_mode (code);
10516 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10517
10518 if (second_test)
10519 *second_test = NULL_RTX;
10520 if (bypass_test)
10521 *bypass_test = NULL_RTX;
10522
10523 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10524
10525 /* Do fcomi/sahf based test when profitable. */
10526 if ((bypass_code == UNKNOWN || bypass_test)
10527 && (second_code == UNKNOWN || second_test)
10528 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10529 {
10530 if (TARGET_CMOVE)
10531 {
10532 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10533 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10534 tmp);
10535 emit_insn (tmp);
10536 }
10537 else
10538 {
10539 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10540 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10541 if (!scratch)
10542 scratch = gen_reg_rtx (HImode);
10543 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10544 emit_insn (gen_x86_sahf_1 (scratch));
10545 }
10546
10547 /* The FP codes work out to act like unsigned. */
10548 intcmp_mode = fpcmp_mode;
10549 code = first_code;
10550 if (bypass_code != UNKNOWN)
10551 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10552 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10553 const0_rtx);
10554 if (second_code != UNKNOWN)
10555 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10556 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10557 const0_rtx);
10558 }
10559 else
10560 {
10561 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10563 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10564 if (!scratch)
10565 scratch = gen_reg_rtx (HImode);
10566 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10567
10568 /* In the unordered case, we have to check C2 for NaN's, which
10569 doesn't happen to work out to anything nice combination-wise.
10570 So do some bit twiddling on the value we've got in AH to come
10571 up with an appropriate set of condition codes. */
10572
10573 intcmp_mode = CCNOmode;
10574 switch (code)
10575 {
10576 case GT:
10577 case UNGT:
10578 if (code == GT || !TARGET_IEEE_FP)
10579 {
10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10581 code = EQ;
10582 }
10583 else
10584 {
10585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10586 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10587 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10588 intcmp_mode = CCmode;
10589 code = GEU;
10590 }
10591 break;
10592 case LT:
10593 case UNLT:
10594 if (code == LT && TARGET_IEEE_FP)
10595 {
10596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10597 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10598 intcmp_mode = CCmode;
10599 code = EQ;
10600 }
10601 else
10602 {
10603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10604 code = NE;
10605 }
10606 break;
10607 case GE:
10608 case UNGE:
10609 if (code == GE || !TARGET_IEEE_FP)
10610 {
10611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10612 code = EQ;
10613 }
10614 else
10615 {
10616 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10617 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10618 GEN_INT (0x01)));
10619 code = NE;
10620 }
10621 break;
10622 case LE:
10623 case UNLE:
10624 if (code == LE && TARGET_IEEE_FP)
10625 {
10626 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10627 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10628 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10629 intcmp_mode = CCmode;
10630 code = LTU;
10631 }
10632 else
10633 {
10634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10635 code = NE;
10636 }
10637 break;
10638 case EQ:
10639 case UNEQ:
10640 if (code == EQ && TARGET_IEEE_FP)
10641 {
10642 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10643 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10644 intcmp_mode = CCmode;
10645 code = EQ;
10646 }
10647 else
10648 {
10649 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10650 code = NE;
10651 break;
10652 }
10653 break;
10654 case NE:
10655 case LTGT:
10656 if (code == NE && TARGET_IEEE_FP)
10657 {
10658 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10659 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10660 GEN_INT (0x40)));
10661 code = NE;
10662 }
10663 else
10664 {
10665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10666 code = EQ;
10667 }
10668 break;
10669
10670 case UNORDERED:
10671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10672 code = NE;
10673 break;
10674 case ORDERED:
10675 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10676 code = EQ;
10677 break;
10678
10679 default:
10680 gcc_unreachable ();
10681 }
10682 }
10683
10684 /* Return the test that should be put into the flags user, i.e.
10685 the bcc, scc, or cmov instruction. */
10686 return gen_rtx_fmt_ee (code, VOIDmode,
10687 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10688 const0_rtx);
10689}
10690
10691rtx
10692ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10693{
10694 rtx op0, op1, ret;
10695 op0 = ix86_compare_op0;
10696 op1 = ix86_compare_op1;
10697
10698 if (second_test)
10699 *second_test = NULL_RTX;
10700 if (bypass_test)
10701 *bypass_test = NULL_RTX;
10702
10703 if (ix86_compare_emitted)
10704 {
10705 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10706 ix86_compare_emitted = NULL_RTX;
10707 }
10708 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10709 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10710 second_test, bypass_test);
10711 else
10712 ret = ix86_expand_int_compare (code, op0, op1);
10713
10714 return ret;
10715}
10716
10717/* Return true if the CODE will result in nontrivial jump sequence. */
10718bool
10719ix86_fp_jump_nontrivial_p (enum rtx_code code)
10720{
10721 enum rtx_code bypass_code, first_code, second_code;
10722 if (!TARGET_CMOVE)
10723 return true;
10724 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10725 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10726}
10727
10728void
10729ix86_expand_branch (enum rtx_code code, rtx label)
10730{
10731 rtx tmp;
10732
10733 /* If we have emitted a compare insn, go straight to simple.
10734 ix86_expand_compare won't emit anything if ix86_compare_emitted
10735 is non NULL. */
10736 if (ix86_compare_emitted)
10737 goto simple;
10738
10739 switch (GET_MODE (ix86_compare_op0))
10740 {
10741 case QImode:
10742 case HImode:
10743 case SImode:
10744 simple:
10745 tmp = ix86_expand_compare (code, NULL, NULL);
10746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10747 gen_rtx_LABEL_REF (VOIDmode, label),
10748 pc_rtx);
10749 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10750 return;
10751
10752 case SFmode:
10753 case DFmode:
10754 case XFmode:
10755 {
10756 rtvec vec;
10757 int use_fcomi;
10758 enum rtx_code bypass_code, first_code, second_code;
10759
10760 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10761 &ix86_compare_op1);
10762
10763 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10764
10765 /* Check whether we will use the natural sequence with one jump. If
10766 so, we can expand jump early. Otherwise delay expansion by
10767 creating compound insn to not confuse optimizers. */
10768 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10769 && TARGET_CMOVE)
10770 {
10771 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10772 gen_rtx_LABEL_REF (VOIDmode, label),
10773 pc_rtx, NULL_RTX, NULL_RTX);
10774 }
10775 else
10776 {
10777 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10778 ix86_compare_op0, ix86_compare_op1);
10779 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10780 gen_rtx_LABEL_REF (VOIDmode, label),
10781 pc_rtx);
10782 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10783
10784 use_fcomi = ix86_use_fcomi_compare (code);
10785 vec = rtvec_alloc (3 + !use_fcomi);
10786 RTVEC_ELT (vec, 0) = tmp;
10787 RTVEC_ELT (vec, 1)
10788 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10789 RTVEC_ELT (vec, 2)
10790 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10791 if (! use_fcomi)
10792 RTVEC_ELT (vec, 3)
10793 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10794
10795 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10796 }
10797 return;
10798 }
10799
10800 case DImode:
10801 if (TARGET_64BIT)
10802 goto simple;
10803 case TImode:
10804 /* Expand DImode branch into multiple compare+branch. */
10805 {
10806 rtx lo[2], hi[2], label2;
10807 enum rtx_code code1, code2, code3;
10808 enum machine_mode submode;
10809
10810 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10811 {
10812 tmp = ix86_compare_op0;
10813 ix86_compare_op0 = ix86_compare_op1;
10814 ix86_compare_op1 = tmp;
10815 code = swap_condition (code);
10816 }
10817 if (GET_MODE (ix86_compare_op0) == DImode)
10818 {
10819 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10820 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10821 submode = SImode;
10822 }
10823 else
10824 {
10825 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10826 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10827 submode = DImode;
10828 }
10829
10830 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10831 avoid two branches. This costs one extra insn, so disable when
10832 optimizing for size. */
10833
10834 if ((code == EQ || code == NE)
10835 && (!optimize_size
10836 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10837 {
10838 rtx xor0, xor1;
10839
10840 xor1 = hi[0];
10841 if (hi[1] != const0_rtx)
10842 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10843 NULL_RTX, 0, OPTAB_WIDEN);
10844
10845 xor0 = lo[0];
10846 if (lo[1] != const0_rtx)
10847 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10848 NULL_RTX, 0, OPTAB_WIDEN);
10849
10850 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10851 NULL_RTX, 0, OPTAB_WIDEN);
10852
10853 ix86_compare_op0 = tmp;
10854 ix86_compare_op1 = const0_rtx;
10855 ix86_expand_branch (code, label);
10856 return;
10857 }
10858
10859 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10860 op1 is a constant and the low word is zero, then we can just
10861 examine the high word. */
10862
10863 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10864 switch (code)
10865 {
10866 case LT: case LTU: case GE: case GEU:
10867 ix86_compare_op0 = hi[0];
10868 ix86_compare_op1 = hi[1];
10869 ix86_expand_branch (code, label);
10870 return;
10871 default:
10872 break;
10873 }
10874
10875 /* Otherwise, we need two or three jumps. */
10876
10877 label2 = gen_label_rtx ();
10878
10879 code1 = code;
10880 code2 = swap_condition (code);
10881 code3 = unsigned_condition (code);
10882
10883 switch (code)
10884 {
10885 case LT: case GT: case LTU: case GTU:
10886 break;
10887
10888 case LE: code1 = LT; code2 = GT; break;
10889 case GE: code1 = GT; code2 = LT; break;
10890 case LEU: code1 = LTU; code2 = GTU; break;
10891 case GEU: code1 = GTU; code2 = LTU; break;
10892
10893 case EQ: code1 = UNKNOWN; code2 = NE; break;
10894 case NE: code2 = UNKNOWN; break;
10895
10896 default:
10897 gcc_unreachable ();
10898 }
10899
10900 /*
10901 * a < b =>
10902 * if (hi(a) < hi(b)) goto true;
10903 * if (hi(a) > hi(b)) goto false;
10904 * if (lo(a) < lo(b)) goto true;
10905 * false:
10906 */
10907
10908 ix86_compare_op0 = hi[0];
10909 ix86_compare_op1 = hi[1];
10910
10911 if (code1 != UNKNOWN)
10912 ix86_expand_branch (code1, label);
10913 if (code2 != UNKNOWN)
10914 ix86_expand_branch (code2, label2);
10915
10916 ix86_compare_op0 = lo[0];
10917 ix86_compare_op1 = lo[1];
10918 ix86_expand_branch (code3, label);
10919
10920 if (code2 != UNKNOWN)
10921 emit_label (label2);
10922 return;
10923 }
10924
10925 default:
10926 gcc_unreachable ();
10927 }
10928}
10929
10930/* Split branch based on floating point condition. */
10931void
10932ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10933 rtx target1, rtx target2, rtx tmp, rtx pushed)
10934{
10935 rtx second, bypass;
10936 rtx label = NULL_RTX;
10937 rtx condition;
10938 int bypass_probability = -1, second_probability = -1, probability = -1;
10939 rtx i;
10940
10941 if (target2 != pc_rtx)
10942 {
10943 rtx tmp = target2;
10944 code = reverse_condition_maybe_unordered (code);
10945 target2 = target1;
10946 target1 = tmp;
10947 }
10948
10949 condition = ix86_expand_fp_compare (code, op1, op2,
10950 tmp, &second, &bypass);
10951
10952 /* Remove pushed operand from stack. */
10953 if (pushed)
10954 ix86_free_from_memory (GET_MODE (pushed));
10955
10956 if (split_branch_probability >= 0)
10957 {
10958 /* Distribute the probabilities across the jumps.
10959 Assume the BYPASS and SECOND to be always test
10960 for UNORDERED. */
10961 probability = split_branch_probability;
10962
10963 /* Value of 1 is low enough to make no need for probability
10964 to be updated. Later we may run some experiments and see
10965 if unordered values are more frequent in practice. */
10966 if (bypass)
10967 bypass_probability = 1;
10968 if (second)
10969 second_probability = 1;
10970 }
10971 if (bypass != NULL_RTX)
10972 {
10973 label = gen_label_rtx ();
10974 i = emit_jump_insn (gen_rtx_SET
10975 (VOIDmode, pc_rtx,
10976 gen_rtx_IF_THEN_ELSE (VOIDmode,
10977 bypass,
10978 gen_rtx_LABEL_REF (VOIDmode,
10979 label),
10980 pc_rtx)));
10981 if (bypass_probability >= 0)
10982 REG_NOTES (i)
10983 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10984 GEN_INT (bypass_probability),
10985 REG_NOTES (i));
10986 }
10987 i = emit_jump_insn (gen_rtx_SET
10988 (VOIDmode, pc_rtx,
10989 gen_rtx_IF_THEN_ELSE (VOIDmode,
10990 condition, target1, target2)));
10991 if (probability >= 0)
10992 REG_NOTES (i)
10993 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10994 GEN_INT (probability),
10995 REG_NOTES (i));
10996 if (second != NULL_RTX)
10997 {
10998 i = emit_jump_insn (gen_rtx_SET
10999 (VOIDmode, pc_rtx,
11000 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11001 target2)));
11002 if (second_probability >= 0)
11003 REG_NOTES (i)
11004 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11005 GEN_INT (second_probability),
11006 REG_NOTES (i));
11007 }
11008 if (label != NULL_RTX)
11009 emit_label (label);
11010}
11011
11012int
11013ix86_expand_setcc (enum rtx_code code, rtx dest)
11014{
11015 rtx ret, tmp, tmpreg, equiv;
11016 rtx second_test, bypass_test;
11017
11018 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11019 return 0; /* FAIL */
11020
11021 gcc_assert (GET_MODE (dest) == QImode);
11022
11023 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11024 PUT_MODE (ret, QImode);
11025
11026 tmp = dest;
11027 tmpreg = dest;
11028
11029 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11030 if (bypass_test || second_test)
11031 {
11032 rtx test = second_test;
11033 int bypass = 0;
11034 rtx tmp2 = gen_reg_rtx (QImode);
11035 if (bypass_test)
11036 {
11037 gcc_assert (!second_test);
11038 test = bypass_test;
11039 bypass = 1;
11040 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11041 }
11042 PUT_MODE (test, QImode);
11043 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11044
11045 if (bypass)
11046 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11047 else
11048 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11049 }
11050
11051 /* Attach a REG_EQUAL note describing the comparison result. */
11052 if (ix86_compare_op0 && ix86_compare_op1)
11053 {
11054 equiv = simplify_gen_relational (code, QImode,
11055 GET_MODE (ix86_compare_op0),
11056 ix86_compare_op0, ix86_compare_op1);
11057 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11058 }
11059
11060 return 1; /* DONE */
11061}
11062
11063/* Expand comparison setting or clearing carry flag. Return true when
11064 successful and set pop for the operation. */
11065static bool
11066ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11067{
11068 enum machine_mode mode =
11069 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11070
11071 /* Do not handle DImode compares that go through special path. Also we can't
11072 deal with FP compares yet. This is possible to add. */
11073 if (mode == (TARGET_64BIT ? TImode : DImode))
11074 return false;
11075 if (FLOAT_MODE_P (mode))
11076 {
11077 rtx second_test = NULL, bypass_test = NULL;
11078 rtx compare_op, compare_seq;
11079
11080 /* Shortcut: following common codes never translate into carry flag compares. */
11081 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11082 || code == ORDERED || code == UNORDERED)
11083 return false;
11084
11085 /* These comparisons require zero flag; swap operands so they won't. */
11086 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11087 && !TARGET_IEEE_FP)
11088 {
11089 rtx tmp = op0;
11090 op0 = op1;
11091 op1 = tmp;
11092 code = swap_condition (code);
11093 }
11094
11095 /* Try to expand the comparison and verify that we end up with carry flag
11096 based comparison. This is fails to be true only when we decide to expand
11097 comparison using arithmetic that is not too common scenario. */
11098 start_sequence ();
11099 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11100 &second_test, &bypass_test);
11101 compare_seq = get_insns ();
11102 end_sequence ();
11103
11104 if (second_test || bypass_test)
11105 return false;
11106 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11107 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11108 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11109 else
11110 code = GET_CODE (compare_op);
11111 if (code != LTU && code != GEU)
11112 return false;
11113 emit_insn (compare_seq);
11114 *pop = compare_op;
11115 return true;
11116 }
11117 if (!INTEGRAL_MODE_P (mode))
11118 return false;
11119 switch (code)
11120 {
11121 case LTU:
11122 case GEU:
11123 break;
11124
11125 /* Convert a==0 into (unsigned)a<1. */
11126 case EQ:
11127 case NE:
11128 if (op1 != const0_rtx)
11129 return false;
11130 op1 = const1_rtx;
11131 code = (code == EQ ? LTU : GEU);
11132 break;
11133
11134 /* Convert a>b into b<a or a>=b-1. */
11135 case GTU:
11136 case LEU:
11137 if (GET_CODE (op1) == CONST_INT)
11138 {
11139 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11140 /* Bail out on overflow. We still can swap operands but that
11141 would force loading of the constant into register. */
11142 if (op1 == const0_rtx
11143 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11144 return false;
11145 code = (code == GTU ? GEU : LTU);
11146 }
11147 else
11148 {
11149 rtx tmp = op1;
11150 op1 = op0;
11151 op0 = tmp;
11152 code = (code == GTU ? LTU : GEU);
11153 }
11154 break;
11155
11156 /* Convert a>=0 into (unsigned)a<0x80000000. */
11157 case LT:
11158 case GE:
11159 if (mode == DImode || op1 != const0_rtx)
11160 return false;
11161 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11162 code = (code == LT ? GEU : LTU);
11163 break;
11164 case LE:
11165 case GT:
11166 if (mode == DImode || op1 != constm1_rtx)
11167 return false;
11168 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11169 code = (code == LE ? GEU : LTU);
11170 break;
11171
11172 default:
11173 return false;
11174 }
11175 /* Swapping operands may cause constant to appear as first operand. */
11176 if (!nonimmediate_operand (op0, VOIDmode))
11177 {
11178 if (no_new_pseudos)
11179 return false;
11180 op0 = force_reg (mode, op0);
11181 }
11182 ix86_compare_op0 = op0;
11183 ix86_compare_op1 = op1;
11184 *pop = ix86_expand_compare (code, NULL, NULL);
11185 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11186 return true;
11187}
11188
11189int
11190ix86_expand_int_movcc (rtx operands[])
11191{
11192 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11193 rtx compare_seq, compare_op;
11194 rtx second_test, bypass_test;
11195 enum machine_mode mode = GET_MODE (operands[0]);
11196 bool sign_bit_compare_p = false;;
11197
11198 start_sequence ();
11199 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11200 compare_seq = get_insns ();
11201 end_sequence ();
11202
11203 compare_code = GET_CODE (compare_op);
11204
11205 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11206 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11207 sign_bit_compare_p = true;
11208
11209 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11210 HImode insns, we'd be swallowed in word prefix ops. */
11211
11212 if ((mode != HImode || TARGET_FAST_PREFIX)
11213 && (mode != (TARGET_64BIT ? TImode : DImode))
11214 && GET_CODE (operands[2]) == CONST_INT
11215 && GET_CODE (operands[3]) == CONST_INT)
11216 {
11217 rtx out = operands[0];
11218 HOST_WIDE_INT ct = INTVAL (operands[2]);
11219 HOST_WIDE_INT cf = INTVAL (operands[3]);
11220 HOST_WIDE_INT diff;
11221
11222 diff = ct - cf;
11223 /* Sign bit compares are better done using shifts than we do by using
11224 sbb. */
11225 if (sign_bit_compare_p
11226 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11227 ix86_compare_op1, &compare_op))
11228 {
11229 /* Detect overlap between destination and compare sources. */
11230 rtx tmp = out;
11231
11232 if (!sign_bit_compare_p)
11233 {
11234 bool fpcmp = false;
11235
11236 compare_code = GET_CODE (compare_op);
11237
11238 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11239 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11240 {
11241 fpcmp = true;
11242 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11243 }
11244
11245 /* To simplify rest of code, restrict to the GEU case. */
11246 if (compare_code == LTU)
11247 {
11248 HOST_WIDE_INT tmp = ct;
11249 ct = cf;
11250 cf = tmp;
11251 compare_code = reverse_condition (compare_code);
11252 code = reverse_condition (code);
11253 }
11254 else
11255 {
11256 if (fpcmp)
11257 PUT_CODE (compare_op,
11258 reverse_condition_maybe_unordered
11259 (GET_CODE (compare_op)));
11260 else
11261 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11262 }
11263 diff = ct - cf;
11264
11265 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11266 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11267 tmp = gen_reg_rtx (mode);
11268
11269 if (mode == DImode)
11270 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11271 else
11272 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11273 }
11274 else
11275 {
11276 if (code == GT || code == GE)
11277 code = reverse_condition (code);
11278 else
11279 {
11280 HOST_WIDE_INT tmp = ct;
11281 ct = cf;
11282 cf = tmp;
11283 diff = ct - cf;
11284 }
11285 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11286 ix86_compare_op1, VOIDmode, 0, -1);
11287 }
11288
11289 if (diff == 1)
11290 {
11291 /*
11292 * cmpl op0,op1
11293 * sbbl dest,dest
11294 * [addl dest, ct]
11295 *
11296 * Size 5 - 8.
11297 */
11298 if (ct)
11299 tmp = expand_simple_binop (mode, PLUS,
11300 tmp, GEN_INT (ct),
11301 copy_rtx (tmp), 1, OPTAB_DIRECT);
11302 }
11303 else if (cf == -1)
11304 {
11305 /*
11306 * cmpl op0,op1
11307 * sbbl dest,dest
11308 * orl $ct, dest
11309 *
11310 * Size 8.
11311 */
11312 tmp = expand_simple_binop (mode, IOR,
11313 tmp, GEN_INT (ct),
11314 copy_rtx (tmp), 1, OPTAB_DIRECT);
11315 }
11316 else if (diff == -1 && ct)
11317 {
11318 /*
11319 * cmpl op0,op1
11320 * sbbl dest,dest
11321 * notl dest
11322 * [addl dest, cf]
11323 *
11324 * Size 8 - 11.
11325 */
11326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11327 if (cf)
11328 tmp = expand_simple_binop (mode, PLUS,
11329 copy_rtx (tmp), GEN_INT (cf),
11330 copy_rtx (tmp), 1, OPTAB_DIRECT);
11331 }
11332 else
11333 {
11334 /*
11335 * cmpl op0,op1
11336 * sbbl dest,dest
11337 * [notl dest]
11338 * andl cf - ct, dest
11339 * [addl dest, ct]
11340 *
11341 * Size 8 - 11.
11342 */
11343
11344 if (cf == 0)
11345 {
11346 cf = ct;
11347 ct = 0;
11348 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11349 }
11350
11351 tmp = expand_simple_binop (mode, AND,
11352 copy_rtx (tmp),
11353 gen_int_mode (cf - ct, mode),
11354 copy_rtx (tmp), 1, OPTAB_DIRECT);
11355 if (ct)
11356 tmp = expand_simple_binop (mode, PLUS,
11357 copy_rtx (tmp), GEN_INT (ct),
11358 copy_rtx (tmp), 1, OPTAB_DIRECT);
11359 }
11360
11361 if (!rtx_equal_p (tmp, out))
11362 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11363
11364 return 1; /* DONE */
11365 }
11366
11367 if (diff < 0)
11368 {
11369 HOST_WIDE_INT tmp;
11370 tmp = ct, ct = cf, cf = tmp;
11371 diff = -diff;
11372 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11373 {
11374 /* We may be reversing unordered compare to normal compare, that
11375 is not valid in general (we may convert non-trapping condition
11376 to trapping one), however on i386 we currently emit all
11377 comparisons unordered. */
11378 compare_code = reverse_condition_maybe_unordered (compare_code);
11379 code = reverse_condition_maybe_unordered (code);
11380 }
11381 else
11382 {
11383 compare_code = reverse_condition (compare_code);
11384 code = reverse_condition (code);
11385 }
11386 }
11387
11388 compare_code = UNKNOWN;
11389 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11390 && GET_CODE (ix86_compare_op1) == CONST_INT)
11391 {
11392 if (ix86_compare_op1 == const0_rtx
11393 && (code == LT || code == GE))
11394 compare_code = code;
11395 else if (ix86_compare_op1 == constm1_rtx)
11396 {
11397 if (code == LE)
11398 compare_code = LT;
11399 else if (code == GT)
11400 compare_code = GE;
11401 }
11402 }
11403
11404 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11405 if (compare_code != UNKNOWN
11406 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11407 && (cf == -1 || ct == -1))
11408 {
11409 /* If lea code below could be used, only optimize
11410 if it results in a 2 insn sequence. */
11411
11412 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11413 || diff == 3 || diff == 5 || diff == 9)
11414 || (compare_code == LT && ct == -1)
11415 || (compare_code == GE && cf == -1))
11416 {
11417 /*
11418 * notl op1 (if necessary)
11419 * sarl $31, op1
11420 * orl cf, op1
11421 */
11422 if (ct != -1)
11423 {
11424 cf = ct;
11425 ct = -1;
11426 code = reverse_condition (code);
11427 }
11428
11429 out = emit_store_flag (out, code, ix86_compare_op0,
11430 ix86_compare_op1, VOIDmode, 0, -1);
11431
11432 out = expand_simple_binop (mode, IOR,
11433 out, GEN_INT (cf),
11434 out, 1, OPTAB_DIRECT);
11435 if (out != operands[0])
11436 emit_move_insn (operands[0], out);
11437
11438 return 1; /* DONE */
11439 }
11440 }
11441
11442
11443 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11444 || diff == 3 || diff == 5 || diff == 9)
11445 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11446 && (mode != DImode
11447 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11448 {
11449 /*
11450 * xorl dest,dest
11451 * cmpl op1,op2
11452 * setcc dest
11453 * lea cf(dest*(ct-cf)),dest
11454 *
11455 * Size 14.
11456 *
11457 * This also catches the degenerate setcc-only case.
11458 */
11459
11460 rtx tmp;
11461 int nops;
11462
11463 out = emit_store_flag (out, code, ix86_compare_op0,
11464 ix86_compare_op1, VOIDmode, 0, 1);
11465
11466 nops = 0;
11467 /* On x86_64 the lea instruction operates on Pmode, so we need
11468 to get arithmetics done in proper mode to match. */
11469 if (diff == 1)
11470 tmp = copy_rtx (out);
11471 else
11472 {
11473 rtx out1;
11474 out1 = copy_rtx (out);
11475 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11476 nops++;
11477 if (diff & 1)
11478 {
11479 tmp = gen_rtx_PLUS (mode, tmp, out1);
11480 nops++;
11481 }
11482 }
11483 if (cf != 0)
11484 {
11485 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11486 nops++;
11487 }
11488 if (!rtx_equal_p (tmp, out))
11489 {
11490 if (nops == 1)
11491 out = force_operand (tmp, copy_rtx (out));
11492 else
11493 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11494 }
11495 if (!rtx_equal_p (out, operands[0]))
11496 emit_move_insn (operands[0], copy_rtx (out));
11497
11498 return 1; /* DONE */
11499 }
11500
11501 /*
11502 * General case: Jumpful:
11503 * xorl dest,dest cmpl op1, op2
11504 * cmpl op1, op2 movl ct, dest
11505 * setcc dest jcc 1f
11506 * decl dest movl cf, dest
11507 * andl (cf-ct),dest 1:
11508 * addl ct,dest
11509 *
11510 * Size 20. Size 14.
11511 *
11512 * This is reasonably steep, but branch mispredict costs are
11513 * high on modern cpus, so consider failing only if optimizing
11514 * for space.
11515 */
11516
11517 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11518 && BRANCH_COST >= 2)
11519 {
11520 if (cf == 0)
11521 {
11522 cf = ct;
11523 ct = 0;
11524 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11525 /* We may be reversing unordered compare to normal compare,
11526 that is not valid in general (we may convert non-trapping
11527 condition to trapping one), however on i386 we currently
11528 emit all comparisons unordered. */
11529 code = reverse_condition_maybe_unordered (code);
11530 else
11531 {
11532 code = reverse_condition (code);
11533 if (compare_code != UNKNOWN)
11534 compare_code = reverse_condition (compare_code);
11535 }
11536 }
11537
11538 if (compare_code != UNKNOWN)
11539 {
11540 /* notl op1 (if needed)
11541 sarl $31, op1
11542 andl (cf-ct), op1
11543 addl ct, op1
11544
11545 For x < 0 (resp. x <= -1) there will be no notl,
11546 so if possible swap the constants to get rid of the
11547 complement.
11548 True/false will be -1/0 while code below (store flag
11549 followed by decrement) is 0/-1, so the constants need
11550 to be exchanged once more. */
11551
11552 if (compare_code == GE || !cf)
11553 {
11554 code = reverse_condition (code);
11555 compare_code = LT;
11556 }
11557 else
11558 {
11559 HOST_WIDE_INT tmp = cf;
11560 cf = ct;
11561 ct = tmp;
11562 }
11563
11564 out = emit_store_flag (out, code, ix86_compare_op0,
11565 ix86_compare_op1, VOIDmode, 0, -1);
11566 }
11567 else
11568 {
11569 out = emit_store_flag (out, code, ix86_compare_op0,
11570 ix86_compare_op1, VOIDmode, 0, 1);
11571
11572 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11573 copy_rtx (out), 1, OPTAB_DIRECT);
11574 }
11575
11576 out = expand_simple_binop (mode, AND, copy_rtx (out),
11577 gen_int_mode (cf - ct, mode),
11578 copy_rtx (out), 1, OPTAB_DIRECT);
11579 if (ct)
11580 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11581 copy_rtx (out), 1, OPTAB_DIRECT);
11582 if (!rtx_equal_p (out, operands[0]))
11583 emit_move_insn (operands[0], copy_rtx (out));
11584
11585 return 1; /* DONE */
11586 }
11587 }
11588
11589 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11590 {
11591 /* Try a few things more with specific constants and a variable. */
11592
11593 optab op;
11594 rtx var, orig_out, out, tmp;
11595
11596 if (BRANCH_COST <= 2)
11597 return 0; /* FAIL */
11598
11599 /* If one of the two operands is an interesting constant, load a
11600 constant with the above and mask it in with a logical operation. */
11601
11602 if (GET_CODE (operands[2]) == CONST_INT)
11603 {
11604 var = operands[3];
11605 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11606 operands[3] = constm1_rtx, op = and_optab;
11607 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11608 operands[3] = const0_rtx, op = ior_optab;
11609 else
11610 return 0; /* FAIL */
11611 }
11612 else if (GET_CODE (operands[3]) == CONST_INT)
11613 {
11614 var = operands[2];
11615 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11616 operands[2] = constm1_rtx, op = and_optab;
11617 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11618 operands[2] = const0_rtx, op = ior_optab;
11619 else
11620 return 0; /* FAIL */
11621 }
11622 else
11623 return 0; /* FAIL */
11624
11625 orig_out = operands[0];
11626 tmp = gen_reg_rtx (mode);
11627 operands[0] = tmp;
11628
11629 /* Recurse to get the constant loaded. */
11630 if (ix86_expand_int_movcc (operands) == 0)
11631 return 0; /* FAIL */
11632
11633 /* Mask in the interesting variable. */
11634 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11635 OPTAB_WIDEN);
11636 if (!rtx_equal_p (out, orig_out))
11637 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11638
11639 return 1; /* DONE */
11640 }
11641
11642 /*
11643 * For comparison with above,
11644 *
11645 * movl cf,dest
11646 * movl ct,tmp
11647 * cmpl op1,op2
11648 * cmovcc tmp,dest
11649 *
11650 * Size 15.
11651 */
11652
11653 if (! nonimmediate_operand (operands[2], mode))
11654 operands[2] = force_reg (mode, operands[2]);
11655 if (! nonimmediate_operand (operands[3], mode))
11656 operands[3] = force_reg (mode, operands[3]);
11657
11658 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11659 {
11660 rtx tmp = gen_reg_rtx (mode);
11661 emit_move_insn (tmp, operands[3]);
11662 operands[3] = tmp;
11663 }
11664 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11665 {
11666 rtx tmp = gen_reg_rtx (mode);
11667 emit_move_insn (tmp, operands[2]);
11668 operands[2] = tmp;
11669 }
11670
11671 if (! register_operand (operands[2], VOIDmode)
11672 && (mode == QImode
11673 || ! register_operand (operands[3], VOIDmode)))
11674 operands[2] = force_reg (mode, operands[2]);
11675
11676 if (mode == QImode
11677 && ! register_operand (operands[3], VOIDmode))
11678 operands[3] = force_reg (mode, operands[3]);
11679
11680 emit_insn (compare_seq);
11681 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11682 gen_rtx_IF_THEN_ELSE (mode,
11683 compare_op, operands[2],
11684 operands[3])));
11685 if (bypass_test)
11686 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11687 gen_rtx_IF_THEN_ELSE (mode,
11688 bypass_test,
11689 copy_rtx (operands[3]),
11690 copy_rtx (operands[0]))));
11691 if (second_test)
11692 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11693 gen_rtx_IF_THEN_ELSE (mode,
11694 second_test,
11695 copy_rtx (operands[2]),
11696 copy_rtx (operands[0]))));
11697
11698 return 1; /* DONE */
11699}
11700
11701/* Swap, force into registers, or otherwise massage the two operands
11702 to an sse comparison with a mask result. Thus we differ a bit from
11703 ix86_prepare_fp_compare_args which expects to produce a flags result.
11704
11705 The DEST operand exists to help determine whether to commute commutative
11706 operators. The POP0/POP1 operands are updated in place. The new
11707 comparison code is returned, or UNKNOWN if not implementable. */
11708
11709static enum rtx_code
11710ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11711 rtx *pop0, rtx *pop1)
11712{
11713 rtx tmp;
11714
11715 switch (code)
11716 {
11717 case LTGT:
11718 case UNEQ:
11719 /* We have no LTGT as an operator. We could implement it with
11720 NE & ORDERED, but this requires an extra temporary. It's
11721 not clear that it's worth it. */
11722 return UNKNOWN;
11723
11724 case LT:
11725 case LE:
11726 case UNGT:
11727 case UNGE:
11728 /* These are supported directly. */
11729 break;
11730
11731 case EQ:
11732 case NE:
11733 case UNORDERED:
11734 case ORDERED:
11735 /* For commutative operators, try to canonicalize the destination
11736 operand to be first in the comparison - this helps reload to
11737 avoid extra moves. */
11738 if (!dest || !rtx_equal_p (dest, *pop1))
11739 break;
11740 /* FALLTHRU */
11741
11742 case GE:
11743 case GT:
11744 case UNLE:
11745 case UNLT:
11746 /* These are not supported directly. Swap the comparison operands
11747 to transform into something that is supported. */
11748 tmp = *pop0;
11749 *pop0 = *pop1;
11750 *pop1 = tmp;
11751 code = swap_condition (code);
11752 break;
11753
11754 default:
11755 gcc_unreachable ();
11756 }
11757
11758 return code;
11759}
11760
11761/* Detect conditional moves that exactly match min/max operational
11762 semantics. Note that this is IEEE safe, as long as we don't
11763 interchange the operands.
11764
11765 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11766 and TRUE if the operation is successful and instructions are emitted. */
11767
11768static bool
11769ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11770 rtx cmp_op1, rtx if_true, rtx if_false)
11771{
11772 enum machine_mode mode;
11773 bool is_min;
11774 rtx tmp;
11775
11776 if (code == LT)
11777 ;
11778 else if (code == UNGE)
11779 {
11780 tmp = if_true;
11781 if_true = if_false;
11782 if_false = tmp;
11783 }
11784 else
11785 return false;
11786
11787 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11788 is_min = true;
11789 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11790 is_min = false;
11791 else
11792 return false;
11793
11794 mode = GET_MODE (dest);
11795
11796 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11797 but MODE may be a vector mode and thus not appropriate. */
11798 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11799 {
11800 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11801 rtvec v;
11802
11803 if_true = force_reg (mode, if_true);
11804 v = gen_rtvec (2, if_true, if_false);
11805 tmp = gen_rtx_UNSPEC (mode, v, u);
11806 }
11807 else
11808 {
11809 code = is_min ? SMIN : SMAX;
11810 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11811 }
11812
11813 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11814 return true;
11815}
11816
11817/* Expand an sse vector comparison. Return the register with the result. */
11818
11819static rtx
11820ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11821 rtx op_true, rtx op_false)
11822{
11823 enum machine_mode mode = GET_MODE (dest);
11824 rtx x;
11825
11826 cmp_op0 = force_reg (mode, cmp_op0);
11827 if (!nonimmediate_operand (cmp_op1, mode))
11828 cmp_op1 = force_reg (mode, cmp_op1);
11829
11830 if (optimize
11831 || reg_overlap_mentioned_p (dest, op_true)
11832 || reg_overlap_mentioned_p (dest, op_false))
11833 dest = gen_reg_rtx (mode);
11834
11835 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11836 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11837
11838 return dest;
11839}
11840
11841/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11842 operations. This is used for both scalar and vector conditional moves. */
11843
11844static void
11845ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11846{
11847 enum machine_mode mode = GET_MODE (dest);
11848 rtx t2, t3, x;
11849
11850 if (op_false == CONST0_RTX (mode))
11851 {
11852 op_true = force_reg (mode, op_true);
11853 x = gen_rtx_AND (mode, cmp, op_true);
11854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11855 }
11856 else if (op_true == CONST0_RTX (mode))
11857 {
11858 op_false = force_reg (mode, op_false);
11859 x = gen_rtx_NOT (mode, cmp);
11860 x = gen_rtx_AND (mode, x, op_false);
11861 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11862 }
11863 else
11864 {
11865 op_true = force_reg (mode, op_true);
11866 op_false = force_reg (mode, op_false);
11867
11868 t2 = gen_reg_rtx (mode);
11869 if (optimize)
11870 t3 = gen_reg_rtx (mode);
11871 else
11872 t3 = dest;
11873
11874 x = gen_rtx_AND (mode, op_true, cmp);
11875 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11876
11877 x = gen_rtx_NOT (mode, cmp);
11878 x = gen_rtx_AND (mode, x, op_false);
11879 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11880
11881 x = gen_rtx_IOR (mode, t3, t2);
11882 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11883 }
11884}
11885
11886/* Expand a floating-point conditional move. Return true if successful. */
11887
11888int
11889ix86_expand_fp_movcc (rtx operands[])
11890{
11891 enum machine_mode mode = GET_MODE (operands[0]);
11892 enum rtx_code code = GET_CODE (operands[1]);
11893 rtx tmp, compare_op, second_test, bypass_test;
11894
11895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11896 {
11897 enum machine_mode cmode;
11898
11899 /* Since we've no cmove for sse registers, don't force bad register
11900 allocation just to gain access to it. Deny movcc when the
11901 comparison mode doesn't match the move mode. */
11902 cmode = GET_MODE (ix86_compare_op0);
11903 if (cmode == VOIDmode)
11904 cmode = GET_MODE (ix86_compare_op1);
11905 if (cmode != mode)
11906 return 0;
11907
11908 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11909 &ix86_compare_op0,
11910 &ix86_compare_op1);
11911 if (code == UNKNOWN)
11912 return 0;
11913
11914 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11915 ix86_compare_op1, operands[2],
11916 operands[3]))
11917 return 1;
11918
11919 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11920 ix86_compare_op1, operands[2], operands[3]);
11921 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11922 return 1;
11923 }
11924
11925 /* The floating point conditional move instructions don't directly
11926 support conditions resulting from a signed integer comparison. */
11927
11928 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11929
11930 /* The floating point conditional move instructions don't directly
11931 support signed integer comparisons. */
11932
11933 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11934 {
11935 gcc_assert (!second_test && !bypass_test);
11936 tmp = gen_reg_rtx (QImode);
11937 ix86_expand_setcc (code, tmp);
11938 code = NE;
11939 ix86_compare_op0 = tmp;
11940 ix86_compare_op1 = const0_rtx;
11941 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11942 }
11943 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11944 {
11945 tmp = gen_reg_rtx (mode);
11946 emit_move_insn (tmp, operands[3]);
11947 operands[3] = tmp;
11948 }
11949 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11950 {
11951 tmp = gen_reg_rtx (mode);
11952 emit_move_insn (tmp, operands[2]);
11953 operands[2] = tmp;
11954 }
11955
11956 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11957 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11958 operands[2], operands[3])));
11959 if (bypass_test)
11960 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11961 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11962 operands[3], operands[0])));
11963 if (second_test)
11964 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11965 gen_rtx_IF_THEN_ELSE (mode, second_test,
11966 operands[2], operands[0])));
11967
11968 return 1;
11969}
11970
11971/* Expand a floating-point vector conditional move; a vcond operation
11972 rather than a movcc operation. */
11973
11974bool
11975ix86_expand_fp_vcond (rtx operands[])
11976{
11977 enum rtx_code code = GET_CODE (operands[3]);
11978 rtx cmp;
11979
11980 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11981 &operands[4], &operands[5]);
11982 if (code == UNKNOWN)
11983 return false;
11984
11985 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11986 operands[5], operands[1], operands[2]))
11987 return true;
11988
11989 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11990 operands[1], operands[2]);
11991 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11992 return true;
11993}
11994
11995/* Expand a signed integral vector conditional move. */
11996
11997bool
11998ix86_expand_int_vcond (rtx operands[])
11999{
12000 enum machine_mode mode = GET_MODE (operands[0]);
12001 enum rtx_code code = GET_CODE (operands[3]);
12002 bool negate = false;
12003 rtx x, cop0, cop1;
12004
12005 cop0 = operands[4];
12006 cop1 = operands[5];
12007
12008 /* Canonicalize the comparison to EQ, GT, GTU. */
12009 switch (code)
12010 {
12011 case EQ:
12012 case GT:
12013 case GTU:
12014 break;
12015
12016 case NE:
12017 case LE:
12018 case LEU:
12019 code = reverse_condition (code);
12020 negate = true;
12021 break;
12022
12023 case GE:
12024 case GEU:
12025 code = reverse_condition (code);
12026 negate = true;
12027 /* FALLTHRU */
12028
12029 case LT:
12030 case LTU:
12031 code = swap_condition (code);
12032 x = cop0, cop0 = cop1, cop1 = x;
12033 break;
12034
12035 default:
12036 gcc_unreachable ();
12037 }
12038
12039 /* Unsigned parallel compare is not supported by the hardware. Play some
12040 tricks to turn this into a signed comparison against 0. */
12041 if (code == GTU)
12042 {
12043 cop0 = force_reg (mode, cop0);
12044
12045 switch (mode)
12046 {
12047 case V4SImode:
12048 {
12049 rtx t1, t2, mask;
12050
12051 /* Perform a parallel modulo subtraction. */
12052 t1 = gen_reg_rtx (mode);
12053 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12054
12055 /* Extract the original sign bit of op0. */
12056 mask = GEN_INT (-0x80000000);
12057 mask = gen_rtx_CONST_VECTOR (mode,
12058 gen_rtvec (4, mask, mask, mask, mask));
12059 mask = force_reg (mode, mask);
12060 t2 = gen_reg_rtx (mode);
12061 emit_insn (gen_andv4si3 (t2, cop0, mask));
12062
12063 /* XOR it back into the result of the subtraction. This results
12064 in the sign bit set iff we saw unsigned underflow. */
12065 x = gen_reg_rtx (mode);
12066 emit_insn (gen_xorv4si3 (x, t1, t2));
12067
12068 code = GT;
12069 }
12070 break;
12071
12072 case V16QImode:
12073 case V8HImode:
12074 /* Perform a parallel unsigned saturating subtraction. */
12075 x = gen_reg_rtx (mode);
12076 emit_insn (gen_rtx_SET (VOIDmode, x,
12077 gen_rtx_US_MINUS (mode, cop0, cop1)));
12078
12079 code = EQ;
12080 negate = !negate;
12081 break;
12082
12083 default:
12084 gcc_unreachable ();
12085 }
12086
12087 cop0 = x;
12088 cop1 = CONST0_RTX (mode);
12089 }
12090
12091 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12092 operands[1+negate], operands[2-negate]);
12093
12094 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12095 operands[2-negate]);
12096 return true;
12097}
12098
12099/* Expand conditional increment or decrement using adb/sbb instructions.
12100 The default case using setcc followed by the conditional move can be
12101 done by generic code. */
12102int
12103ix86_expand_int_addcc (rtx operands[])
12104{
12105 enum rtx_code code = GET_CODE (operands[1]);
12106 rtx compare_op;
12107 rtx val = const0_rtx;
12108 bool fpcmp = false;
12109 enum machine_mode mode = GET_MODE (operands[0]);
12110
12111 if (operands[3] != const1_rtx
12112 && operands[3] != constm1_rtx)
12113 return 0;
12114 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12115 ix86_compare_op1, &compare_op))
12116 return 0;
12117 code = GET_CODE (compare_op);
12118
12119 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12120 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12121 {
12122 fpcmp = true;
12123 code = ix86_fp_compare_code_to_integer (code);
12124 }
12125
12126 if (code != LTU)
12127 {
12128 val = constm1_rtx;
12129 if (fpcmp)
12130 PUT_CODE (compare_op,
12131 reverse_condition_maybe_unordered
12132 (GET_CODE (compare_op)));
12133 else
12134 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12135 }
12136 PUT_MODE (compare_op, mode);
12137
12138 /* Construct either adc or sbb insn. */
12139 if ((code == LTU) == (operands[3] == constm1_rtx))
12140 {
12141 switch (GET_MODE (operands[0]))
12142 {
12143 case QImode:
12144 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12145 break;
12146 case HImode:
12147 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12148 break;
12149 case SImode:
12150 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12151 break;
12152 case DImode:
12153 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12154 break;
12155 default:
12156 gcc_unreachable ();
12157 }
12158 }
12159 else
12160 {
12161 switch (GET_MODE (operands[0]))
12162 {
12163 case QImode:
12164 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12165 break;
12166 case HImode:
12167 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12168 break;
12169 case SImode:
12170 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12171 break;
12172 case DImode:
12173 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12174 break;
12175 default:
12176 gcc_unreachable ();
12177 }
12178 }
12179 return 1; /* DONE */
12180}
12181
12182
12183/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12184 works for floating pointer parameters and nonoffsetable memories.
12185 For pushes, it returns just stack offsets; the values will be saved
12186 in the right order. Maximally three parts are generated. */
12187
12188static int
12189ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12190{
12191 int size;
12192
12193 if (!TARGET_64BIT)
12194 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12195 else
12196 size = (GET_MODE_SIZE (mode) + 4) / 8;
12197
12198 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12199 gcc_assert (size >= 2 && size <= 3);
12200
12201 /* Optimize constant pool reference to immediates. This is used by fp
12202 moves, that force all constants to memory to allow combining. */
12203 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12204 {
12205 rtx tmp = maybe_get_pool_constant (operand);
12206 if (tmp)
12207 operand = tmp;
12208 }
12209
12210 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12211 {
12212 /* The only non-offsetable memories we handle are pushes. */
12213 int ok = push_operand (operand, VOIDmode);
12214
12215 gcc_assert (ok);
12216
12217 operand = copy_rtx (operand);
12218 PUT_MODE (operand, Pmode);
12219 parts[0] = parts[1] = parts[2] = operand;
12220 return size;
12221 }
12222
12223 if (GET_CODE (operand) == CONST_VECTOR)
12224 {
12225 enum machine_mode imode = int_mode_for_mode (mode);
12226 /* Caution: if we looked through a constant pool memory above,
12227 the operand may actually have a different mode now. That's
12228 ok, since we want to pun this all the way back to an integer. */
12229 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12230 gcc_assert (operand != NULL);
12231 mode = imode;
12232 }
12233
12234 if (!TARGET_64BIT)
12235 {
12236 if (mode == DImode)
12237 split_di (&operand, 1, &parts[0], &parts[1]);
12238 else
12239 {
12240 if (REG_P (operand))
12241 {
12242 gcc_assert (reload_completed);
12243 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12244 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12245 if (size == 3)
12246 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12247 }
12248 else if (offsettable_memref_p (operand))
12249 {
12250 operand = adjust_address (operand, SImode, 0);
12251 parts[0] = operand;
12252 parts[1] = adjust_address (operand, SImode, 4);
12253 if (size == 3)
12254 parts[2] = adjust_address (operand, SImode, 8);
12255 }
12256 else if (GET_CODE (operand) == CONST_DOUBLE)
12257 {
12258 REAL_VALUE_TYPE r;
12259 long l[4];
12260
12261 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12262 switch (mode)
12263 {
12264 case XFmode:
12265 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12266 parts[2] = gen_int_mode (l[2], SImode);
12267 break;
12268 case DFmode:
12269 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12270 break;
12271 default:
12272 gcc_unreachable ();
12273 }
12274 parts[1] = gen_int_mode (l[1], SImode);
12275 parts[0] = gen_int_mode (l[0], SImode);
12276 }
12277 else
12278 gcc_unreachable ();
12279 }
12280 }
12281 else
12282 {
12283 if (mode == TImode)
12284 split_ti (&operand, 1, &parts[0], &parts[1]);
12285 if (mode == XFmode || mode == TFmode)
12286 {
12287 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12288 if (REG_P (operand))
12289 {
12290 gcc_assert (reload_completed);
12291 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12292 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12293 }
12294 else if (offsettable_memref_p (operand))
12295 {
12296 operand = adjust_address (operand, DImode, 0);
12297 parts[0] = operand;
12298 parts[1] = adjust_address (operand, upper_mode, 8);
12299 }
12300 else if (GET_CODE (operand) == CONST_DOUBLE)
12301 {
12302 REAL_VALUE_TYPE r;
12303 long l[4];
12304
12305 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12306 real_to_target (l, &r, mode);
12307
12308 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12309 if (HOST_BITS_PER_WIDE_INT >= 64)
12310 parts[0]
12311 = gen_int_mode
12312 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12313 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12314 DImode);
12315 else
12316 parts[0] = immed_double_const (l[0], l[1], DImode);
12317
12318 if (upper_mode == SImode)
12319 parts[1] = gen_int_mode (l[2], SImode);
12320 else if (HOST_BITS_PER_WIDE_INT >= 64)
12321 parts[1]
12322 = gen_int_mode
12323 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12324 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12325 DImode);
12326 else
12327 parts[1] = immed_double_const (l[2], l[3], DImode);
12328 }
12329 else
12330 gcc_unreachable ();
12331 }
12332 }
12333
12334 return size;
12335}
12336
12337/* Emit insns to perform a move or push of DI, DF, and XF values.
12338 Return false when normal moves are needed; true when all required
12339 insns have been emitted. Operands 2-4 contain the input values
12340 int the correct order; operands 5-7 contain the output values. */
12341
12342void
12343ix86_split_long_move (rtx operands[])
12344{
12345 rtx part[2][3];
12346 int nparts;
12347 int push = 0;
12348 int collisions = 0;
12349 enum machine_mode mode = GET_MODE (operands[0]);
12350
12351 /* The DFmode expanders may ask us to move double.
12352 For 64bit target this is single move. By hiding the fact
12353 here we simplify i386.md splitters. */
12354 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12355 {
12356 /* Optimize constant pool reference to immediates. This is used by
12357 fp moves, that force all constants to memory to allow combining. */
12358
12359 if (GET_CODE (operands[1]) == MEM
12360 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12361 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12362 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12363 if (push_operand (operands[0], VOIDmode))
12364 {
12365 operands[0] = copy_rtx (operands[0]);
12366 PUT_MODE (operands[0], Pmode);
12367 }
12368 else
12369 operands[0] = gen_lowpart (DImode, operands[0]);
12370 operands[1] = gen_lowpart (DImode, operands[1]);
12371 emit_move_insn (operands[0], operands[1]);
12372 return;
12373 }
12374
12375 /* The only non-offsettable memory we handle is push. */
12376 if (push_operand (operands[0], VOIDmode))
12377 push = 1;
12378 else
12379 gcc_assert (GET_CODE (operands[0]) != MEM
12380 || offsettable_memref_p (operands[0]));
12381
12382 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12383 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12384
12385 /* When emitting push, take care for source operands on the stack. */
12386 if (push && GET_CODE (operands[1]) == MEM
12387 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12388 {
12389 if (nparts == 3)
12390 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12391 XEXP (part[1][2], 0));
12392 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12393 XEXP (part[1][1], 0));
12394 }
12395
12396 /* We need to do copy in the right order in case an address register
12397 of the source overlaps the destination. */
12398 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12399 {
12400 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12401 collisions++;
12402 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12403 collisions++;
12404 if (nparts == 3
12405 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12406 collisions++;
12407
12408 /* Collision in the middle part can be handled by reordering. */
12409 if (collisions == 1 && nparts == 3
12410 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12411 {
12412 rtx tmp;
12413 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12414 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12415 }
12416
12417 /* If there are more collisions, we can't handle it by reordering.
12418 Do an lea to the last part and use only one colliding move. */
12419 else if (collisions > 1)
12420 {
12421 rtx base;
12422
12423 collisions = 1;
12424
12425 base = part[0][nparts - 1];
12426
12427 /* Handle the case when the last part isn't valid for lea.
12428 Happens in 64-bit mode storing the 12-byte XFmode. */
12429 if (GET_MODE (base) != Pmode)
12430 base = gen_rtx_REG (Pmode, REGNO (base));
12431
12432 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12433 part[1][0] = replace_equiv_address (part[1][0], base);
12434 part[1][1] = replace_equiv_address (part[1][1],
12435 plus_constant (base, UNITS_PER_WORD));
12436 if (nparts == 3)
12437 part[1][2] = replace_equiv_address (part[1][2],
12438 plus_constant (base, 8));
12439 }
12440 }
12441
12442 if (push)
12443 {
12444 if (!TARGET_64BIT)
12445 {
12446 if (nparts == 3)
12447 {
12448 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12449 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12450 emit_move_insn (part[0][2], part[1][2]);
12451 }
12452 }
12453 else
12454 {
12455 /* In 64bit mode we don't have 32bit push available. In case this is
12456 register, it is OK - we will just use larger counterpart. We also
12457 retype memory - these comes from attempt to avoid REX prefix on
12458 moving of second half of TFmode value. */
12459 if (GET_MODE (part[1][1]) == SImode)
12460 {
12461 switch (GET_CODE (part[1][1]))
12462 {
12463 case MEM:
12464 part[1][1] = adjust_address (part[1][1], DImode, 0);
12465 break;
12466
12467 case REG:
12468 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12469 break;
12470
12471 default:
12472 gcc_unreachable ();
12473 }
12474
12475 if (GET_MODE (part[1][0]) == SImode)
12476 part[1][0] = part[1][1];
12477 }
12478 }
12479 emit_move_insn (part[0][1], part[1][1]);
12480 emit_move_insn (part[0][0], part[1][0]);
12481 return;
12482 }
12483
12484 /* Choose correct order to not overwrite the source before it is copied. */
12485 if ((REG_P (part[0][0])
12486 && REG_P (part[1][1])
12487 && (REGNO (part[0][0]) == REGNO (part[1][1])
12488 || (nparts == 3
12489 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12490 || (collisions > 0
12491 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12492 {
12493 if (nparts == 3)
12494 {
12495 operands[2] = part[0][2];
12496 operands[3] = part[0][1];
12497 operands[4] = part[0][0];
12498 operands[5] = part[1][2];
12499 operands[6] = part[1][1];
12500 operands[7] = part[1][0];
12501 }
12502 else
12503 {
12504 operands[2] = part[0][1];
12505 operands[3] = part[0][0];
12506 operands[5] = part[1][1];
12507 operands[6] = part[1][0];
12508 }
12509 }
12510 else
12511 {
12512 if (nparts == 3)
12513 {
12514 operands[2] = part[0][0];
12515 operands[3] = part[0][1];
12516 operands[4] = part[0][2];
12517 operands[5] = part[1][0];
12518 operands[6] = part[1][1];
12519 operands[7] = part[1][2];
12520 }
12521 else
12522 {
12523 operands[2] = part[0][0];
12524 operands[3] = part[0][1];
12525 operands[5] = part[1][0];
12526 operands[6] = part[1][1];
12527 }
12528 }
12529
12530 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12531 if (optimize_size)
12532 {
12533 if (GET_CODE (operands[5]) == CONST_INT
12534 && operands[5] != const0_rtx
12535 && REG_P (operands[2]))
12536 {
12537 if (GET_CODE (operands[6]) == CONST_INT
12538 && INTVAL (operands[6]) == INTVAL (operands[5]))
12539 operands[6] = operands[2];
12540
12541 if (nparts == 3
12542 && GET_CODE (operands[7]) == CONST_INT
12543 && INTVAL (operands[7]) == INTVAL (operands[5]))
12544 operands[7] = operands[2];
12545 }
12546
12547 if (nparts == 3
12548 && GET_CODE (operands[6]) == CONST_INT
12549 && operands[6] != const0_rtx
12550 && REG_P (operands[3])
12551 && GET_CODE (operands[7]) == CONST_INT
12552 && INTVAL (operands[7]) == INTVAL (operands[6]))
12553 operands[7] = operands[3];
12554 }
12555
12556 emit_move_insn (operands[2], operands[5]);
12557 emit_move_insn (operands[3], operands[6]);
12558 if (nparts == 3)
12559 emit_move_insn (operands[4], operands[7]);
12560
12561 return;
12562}
12563
12564/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12565 left shift by a constant, either using a single shift or
12566 a sequence of add instructions. */
12567
12568static void
12569ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12570{
12571 if (count == 1)
12572 {
12573 emit_insn ((mode == DImode
12574 ? gen_addsi3
12575 : gen_adddi3) (operand, operand, operand));
12576 }
12577 else if (!optimize_size
12578 && count * ix86_cost->add <= ix86_cost->shift_const)
12579 {
12580 int i;
12581 for (i=0; i<count; i++)
12582 {
12583 emit_insn ((mode == DImode
12584 ? gen_addsi3
12585 : gen_adddi3) (operand, operand, operand));
12586 }
12587 }
12588 else
12589 emit_insn ((mode == DImode
12590 ? gen_ashlsi3
12591 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12592}
12593
12594void
12595ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12596{
12597 rtx low[2], high[2];
12598 int count;
12599 const int single_width = mode == DImode ? 32 : 64;
12600
12601 if (GET_CODE (operands[2]) == CONST_INT)
12602 {
12603 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12604 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12605
12606 if (count >= single_width)
12607 {
12608 emit_move_insn (high[0], low[1]);
12609 emit_move_insn (low[0], const0_rtx);
12610
12611 if (count > single_width)
12612 ix86_expand_ashl_const (high[0], count - single_width, mode);
12613 }
12614 else
12615 {
12616 if (!rtx_equal_p (operands[0], operands[1]))
12617 emit_move_insn (operands[0], operands[1]);
12618 emit_insn ((mode == DImode
12619 ? gen_x86_shld_1
12620 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12621 ix86_expand_ashl_const (low[0], count, mode);
12622 }
12623 return;
12624 }
12625
12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12627
12628 if (operands[1] == const1_rtx)
12629 {
12630 /* Assuming we've chosen a QImode capable registers, then 1 << N
12631 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12632 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12633 {
12634 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12635
12636 ix86_expand_clear (low[0]);
12637 ix86_expand_clear (high[0]);
12638 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12639
12640 d = gen_lowpart (QImode, low[0]);
12641 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12642 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12643 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12644
12645 d = gen_lowpart (QImode, high[0]);
12646 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12647 s = gen_rtx_NE (QImode, flags, const0_rtx);
12648 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12649 }
12650
12651 /* Otherwise, we can get the same results by manually performing
12652 a bit extract operation on bit 5/6, and then performing the two
12653 shifts. The two methods of getting 0/1 into low/high are exactly
12654 the same size. Avoiding the shift in the bit extract case helps
12655 pentium4 a bit; no one else seems to care much either way. */
12656 else
12657 {
12658 rtx x;
12659
12660 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12661 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12662 else
12663 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12664 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12665
12666 emit_insn ((mode == DImode
12667 ? gen_lshrsi3
12668 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12669 emit_insn ((mode == DImode
12670 ? gen_andsi3
12671 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12672 emit_move_insn (low[0], high[0]);
12673 emit_insn ((mode == DImode
12674 ? gen_xorsi3
12675 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12676 }
12677
12678 emit_insn ((mode == DImode
12679 ? gen_ashlsi3
12680 : gen_ashldi3) (low[0], low[0], operands[2]));
12681 emit_insn ((mode == DImode
12682 ? gen_ashlsi3
12683 : gen_ashldi3) (high[0], high[0], operands[2]));
12684 return;
12685 }
12686
12687 if (operands[1] == constm1_rtx)
12688 {
12689 /* For -1 << N, we can avoid the shld instruction, because we
12690 know that we're shifting 0...31/63 ones into a -1. */
12691 emit_move_insn (low[0], constm1_rtx);
12692 if (optimize_size)
12693 emit_move_insn (high[0], low[0]);
12694 else
12695 emit_move_insn (high[0], constm1_rtx);
12696 }
12697 else
12698 {
12699 if (!rtx_equal_p (operands[0], operands[1]))
12700 emit_move_insn (operands[0], operands[1]);
12701
12702 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12703 emit_insn ((mode == DImode
12704 ? gen_x86_shld_1
12705 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12706 }
12707
12708 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12709
12710 if (TARGET_CMOVE && scratch)
12711 {
12712 ix86_expand_clear (scratch);
12713 emit_insn ((mode == DImode
12714 ? gen_x86_shift_adj_1
12715 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12716 }
12717 else
12718 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12719}
12720
12721void
12722ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12723{
12724 rtx low[2], high[2];
12725 int count;
12726 const int single_width = mode == DImode ? 32 : 64;
12727
12728 if (GET_CODE (operands[2]) == CONST_INT)
12729 {
12730 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12731 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12732
12733 if (count == single_width * 2 - 1)
12734 {
12735 emit_move_insn (high[0], high[1]);
12736 emit_insn ((mode == DImode
12737 ? gen_ashrsi3
12738 : gen_ashrdi3) (high[0], high[0],
12739 GEN_INT (single_width - 1)));
12740 emit_move_insn (low[0], high[0]);
12741
12742 }
12743 else if (count >= single_width)
12744 {
12745 emit_move_insn (low[0], high[1]);
12746 emit_move_insn (high[0], low[0]);
12747 emit_insn ((mode == DImode
12748 ? gen_ashrsi3
12749 : gen_ashrdi3) (high[0], high[0],
12750 GEN_INT (single_width - 1)));
12751 if (count > single_width)
12752 emit_insn ((mode == DImode
12753 ? gen_ashrsi3
12754 : gen_ashrdi3) (low[0], low[0],
12755 GEN_INT (count - single_width)));
12756 }
12757 else
12758 {
12759 if (!rtx_equal_p (operands[0], operands[1]))
12760 emit_move_insn (operands[0], operands[1]);
12761 emit_insn ((mode == DImode
12762 ? gen_x86_shrd_1
12763 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12764 emit_insn ((mode == DImode
12765 ? gen_ashrsi3
12766 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12767 }
12768 }
12769 else
12770 {
12771 if (!rtx_equal_p (operands[0], operands[1]))
12772 emit_move_insn (operands[0], operands[1]);
12773
12774 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12775
12776 emit_insn ((mode == DImode
12777 ? gen_x86_shrd_1
12778 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12779 emit_insn ((mode == DImode
12780 ? gen_ashrsi3
12781 : gen_ashrdi3) (high[0], high[0], operands[2]));
12782
12783 if (TARGET_CMOVE && scratch)
12784 {
12785 emit_move_insn (scratch, high[0]);
12786 emit_insn ((mode == DImode
12787 ? gen_ashrsi3
12788 : gen_ashrdi3) (scratch, scratch,
12789 GEN_INT (single_width - 1)));
12790 emit_insn ((mode == DImode
12791 ? gen_x86_shift_adj_1
12792 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12793 scratch));
12794 }
12795 else
12796 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12797 }
12798}
12799
12800void
12801ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12802{
12803 rtx low[2], high[2];
12804 int count;
12805 const int single_width = mode == DImode ? 32 : 64;
12806
12807 if (GET_CODE (operands[2]) == CONST_INT)
12808 {
12809 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12810 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12811
12812 if (count >= single_width)
12813 {
12814 emit_move_insn (low[0], high[1]);
12815 ix86_expand_clear (high[0]);
12816
12817 if (count > single_width)
12818 emit_insn ((mode == DImode
12819 ? gen_lshrsi3
12820 : gen_lshrdi3) (low[0], low[0],
12821 GEN_INT (count - single_width)));
12822 }
12823 else
12824 {
12825 if (!rtx_equal_p (operands[0], operands[1]))
12826 emit_move_insn (operands[0], operands[1]);
12827 emit_insn ((mode == DImode
12828 ? gen_x86_shrd_1
12829 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12830 emit_insn ((mode == DImode
12831 ? gen_lshrsi3
12832 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12833 }
12834 }
12835 else
12836 {
12837 if (!rtx_equal_p (operands[0], operands[1]))
12838 emit_move_insn (operands[0], operands[1]);
12839
12840 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12841
12842 emit_insn ((mode == DImode
12843 ? gen_x86_shrd_1
12844 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12845 emit_insn ((mode == DImode
12846 ? gen_lshrsi3
12847 : gen_lshrdi3) (high[0], high[0], operands[2]));
12848
12849 /* Heh. By reversing the arguments, we can reuse this pattern. */
12850 if (TARGET_CMOVE && scratch)
12851 {
12852 ix86_expand_clear (scratch);
12853 emit_insn ((mode == DImode
12854 ? gen_x86_shift_adj_1
12855 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12856 scratch));
12857 }
12858 else
12859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12860 }
12861}
12862
12863/* Helper function for the string operations below. Dest VARIABLE whether
12864 it is aligned to VALUE bytes. If true, jump to the label. */
12865static rtx
12866ix86_expand_aligntest (rtx variable, int value)
12867{
12868 rtx label = gen_label_rtx ();
12869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12870 if (GET_MODE (variable) == DImode)
12871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12872 else
12873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12875 1, label);
12876 return label;
12877}
12878
12879/* Adjust COUNTER by the VALUE. */
12880static void
12881ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12882{
12883 if (GET_MODE (countreg) == DImode)
12884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12885 else
12886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12887}
12888
12889/* Zero extend possibly SImode EXP to Pmode register. */
12890rtx
12891ix86_zero_extend_to_Pmode (rtx exp)
12892{
12893 rtx r;
12894 if (GET_MODE (exp) == VOIDmode)
12895 return force_reg (Pmode, exp);
12896 if (GET_MODE (exp) == Pmode)
12897 return copy_to_mode_reg (Pmode, exp);
12898 r = gen_reg_rtx (Pmode);
12899 emit_insn (gen_zero_extendsidi2 (r, exp));
12900 return r;
12901}
12902
12903/* Expand string move (memcpy) operation. Use i386 string operations when
12904 profitable. expand_clrmem contains similar code. */
12905int
12906ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12907{
12908 rtx srcreg, destreg, countreg, srcexp, destexp;
12909 enum machine_mode counter_mode;
12910 HOST_WIDE_INT align = 0;
12911 unsigned HOST_WIDE_INT count = 0;
12912
12913 if (GET_CODE (align_exp) == CONST_INT)
12914 align = INTVAL (align_exp);
12915
12916 /* Can't use any of this if the user has appropriated esi or edi. */
12917 if (global_regs[4] || global_regs[5])
12918 return 0;
12919
12920 /* This simple hack avoids all inlining code and simplifies code below. */
12921 if (!TARGET_ALIGN_STRINGOPS)
12922 align = 64;
12923
12924 if (GET_CODE (count_exp) == CONST_INT)
12925 {
12926 count = INTVAL (count_exp);
12927 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12928 return 0;
12929 }
12930
12931 /* Figure out proper mode for counter. For 32bits it is always SImode,
12932 for 64bits use SImode when possible, otherwise DImode.
12933 Set count to number of bytes copied when known at compile time. */
12934 if (!TARGET_64BIT
12935 || GET_MODE (count_exp) == SImode
12936 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12937 counter_mode = SImode;
12938 else
12939 counter_mode = DImode;
12940
12941 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12942
12943 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12944 if (destreg != XEXP (dst, 0))
12945 dst = replace_equiv_address_nv (dst, destreg);
12946 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12947 if (srcreg != XEXP (src, 0))
12948 src = replace_equiv_address_nv (src, srcreg);
12949
12950 /* When optimizing for size emit simple rep ; movsb instruction for
12951 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12952 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12953 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12954 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12955 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12956 known to be zero or not. The rep; movsb sequence causes higher
12957 register pressure though, so take that into account. */
12958
12959 if ((!optimize || optimize_size)
12960 && (count == 0
12961 || ((count & 0x03)
12962 && (!optimize_size
12963 || count > 5 * 4
12964 || (count & 3) + count / 4 > 6))))
12965 {
12966 emit_insn (gen_cld ());
12967 countreg = ix86_zero_extend_to_Pmode (count_exp);
12968 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12969 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12970 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12971 destexp, srcexp));
12972 }
12973
12974 /* For constant aligned (or small unaligned) copies use rep movsl
12975 followed by code copying the rest. For PentiumPro ensure 8 byte
12976 alignment to allow rep movsl acceleration. */
12977
12978 else if (count != 0
12979 && (align >= 8
12980 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12981 || optimize_size || count < (unsigned int) 64))
12982 {
12983 unsigned HOST_WIDE_INT offset = 0;
12984 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12985 rtx srcmem, dstmem;
12986
12987 emit_insn (gen_cld ());
12988 if (count & ~(size - 1))
12989 {
12990 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12991 {
12992 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12993
12994 while (offset < (count & ~(size - 1)))
12995 {
12996 srcmem = adjust_automodify_address_nv (src, movs_mode,
12997 srcreg, offset);
12998 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12999 destreg, offset);
13000 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13001 offset += size;
13002 }
13003 }
13004 else
13005 {
13006 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
13007 & (TARGET_64BIT ? -1 : 0x3fffffff));
13008 countreg = copy_to_mode_reg (counter_mode, countreg);
13009 countreg = ix86_zero_extend_to_Pmode (countreg);
13010
13011 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13012 GEN_INT (size == 4 ? 2 : 3));
13013 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13014 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13015
13016 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13017 countreg, destexp, srcexp));
13018 offset = count & ~(size - 1);
13019 }
13020 }
13021 if (size == 8 && (count & 0x04))
13022 {
13023 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13024 offset);
13025 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13026 offset);
13027 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13028 offset += 4;
13029 }
13030 if (count & 0x02)
13031 {
13032 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13033 offset);
13034 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13035 offset);
13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13037 offset += 2;
13038 }
13039 if (count & 0x01)
13040 {
13041 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13042 offset);
13043 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13044 offset);
13045 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13046 }
13047 }
13048 /* The generic code based on the glibc implementation:
13049 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13050 allowing accelerated copying there)
13051 - copy the data using rep movsl
13052 - copy the rest. */
13053 else
13054 {
13055 rtx countreg2;
13056 rtx label = NULL;
13057 rtx srcmem, dstmem;
13058 int desired_alignment = (TARGET_PENTIUMPRO
13059 && (count == 0 || count >= (unsigned int) 260)
13060 ? 8 : UNITS_PER_WORD);
13061 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13062 dst = change_address (dst, BLKmode, destreg);
13063 src = change_address (src, BLKmode, srcreg);
13064
13065 /* In case we don't know anything about the alignment, default to
13066 library version, since it is usually equally fast and result in
13067 shorter code.
13068
13069 Also emit call when we know that the count is large and call overhead
13070 will not be important. */
13071 if (!TARGET_INLINE_ALL_STRINGOPS
13072 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13073 return 0;
13074
13075 if (TARGET_SINGLE_STRINGOP)
13076 emit_insn (gen_cld ());
13077
13078 countreg2 = gen_reg_rtx (Pmode);
13079 countreg = copy_to_mode_reg (counter_mode, count_exp);
13080
13081 /* We don't use loops to align destination and to copy parts smaller
13082 than 4 bytes, because gcc is able to optimize such code better (in
13083 the case the destination or the count really is aligned, gcc is often
13084 able to predict the branches) and also it is friendlier to the
13085 hardware branch prediction.
13086
13087 Using loops is beneficial for generic case, because we can
13088 handle small counts using the loops. Many CPUs (such as Athlon)
13089 have large REP prefix setup costs.
13090
13091 This is quite costly. Maybe we can revisit this decision later or
13092 add some customizability to this code. */
13093
13094 if (count == 0 && align < desired_alignment)
13095 {
13096 label = gen_label_rtx ();
13097 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13098 LEU, 0, counter_mode, 1, label);
13099 }
13100 if (align <= 1)
13101 {
13102 rtx label = ix86_expand_aligntest (destreg, 1);
13103 srcmem = change_address (src, QImode, srcreg);
13104 dstmem = change_address (dst, QImode, destreg);
13105 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13106 ix86_adjust_counter (countreg, 1);
13107 emit_label (label);
13108 LABEL_NUSES (label) = 1;
13109 }
13110 if (align <= 2)
13111 {
13112 rtx label = ix86_expand_aligntest (destreg, 2);
13113 srcmem = change_address (src, HImode, srcreg);
13114 dstmem = change_address (dst, HImode, destreg);
13115 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13116 ix86_adjust_counter (countreg, 2);
13117 emit_label (label);
13118 LABEL_NUSES (label) = 1;
13119 }
13120 if (align <= 4 && desired_alignment > 4)
13121 {
13122 rtx label = ix86_expand_aligntest (destreg, 4);
13123 srcmem = change_address (src, SImode, srcreg);
13124 dstmem = change_address (dst, SImode, destreg);
13125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13126 ix86_adjust_counter (countreg, 4);
13127 emit_label (label);
13128 LABEL_NUSES (label) = 1;
13129 }
13130
13131 if (label && desired_alignment > 4 && !TARGET_64BIT)
13132 {
13133 emit_label (label);
13134 LABEL_NUSES (label) = 1;
13135 label = NULL_RTX;
13136 }
13137 if (!TARGET_SINGLE_STRINGOP)
13138 emit_insn (gen_cld ());
13139 if (TARGET_64BIT)
13140 {
13141 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13142 GEN_INT (3)));
13143 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13144 }
13145 else
13146 {
13147 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13148 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13149 }
13150 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13151 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13152 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13153 countreg2, destexp, srcexp));
13154
13155 if (label)
13156 {
13157 emit_label (label);
13158 LABEL_NUSES (label) = 1;
13159 }
13160 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13161 {
13162 srcmem = change_address (src, SImode, srcreg);
13163 dstmem = change_address (dst, SImode, destreg);
13164 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13165 }
13166 if ((align <= 4 || count == 0) && TARGET_64BIT)
13167 {
13168 rtx label = ix86_expand_aligntest (countreg, 4);
13169 srcmem = change_address (src, SImode, srcreg);
13170 dstmem = change_address (dst, SImode, destreg);
13171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13172 emit_label (label);
13173 LABEL_NUSES (label) = 1;
13174 }
13175 if (align > 2 && count != 0 && (count & 2))
13176 {
13177 srcmem = change_address (src, HImode, srcreg);
13178 dstmem = change_address (dst, HImode, destreg);
13179 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13180 }
13181 if (align <= 2 || count == 0)
13182 {
13183 rtx label = ix86_expand_aligntest (countreg, 2);
13184 srcmem = change_address (src, HImode, srcreg);
13185 dstmem = change_address (dst, HImode, destreg);
13186 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13187 emit_label (label);
13188 LABEL_NUSES (label) = 1;
13189 }
13190 if (align > 1 && count != 0 && (count & 1))
13191 {
13192 srcmem = change_address (src, QImode, srcreg);
13193 dstmem = change_address (dst, QImode, destreg);
13194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13195 }
13196 if (align <= 1 || count == 0)
13197 {
13198 rtx label = ix86_expand_aligntest (countreg, 1);
13199 srcmem = change_address (src, QImode, srcreg);
13200 dstmem = change_address (dst, QImode, destreg);
13201 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13202 emit_label (label);
13203 LABEL_NUSES (label) = 1;
13204 }
13205 }
13206
13207 return 1;
13208}
13209
13210/* Expand string clear operation (bzero). Use i386 string operations when
13211 profitable. expand_movmem contains similar code. */
13212int
13213ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13214{
13215 rtx destreg, zeroreg, countreg, destexp;
13216 enum machine_mode counter_mode;
13217 HOST_WIDE_INT align = 0;
13218 unsigned HOST_WIDE_INT count = 0;
13219
13220 if (GET_CODE (align_exp) == CONST_INT)
13221 align = INTVAL (align_exp);
13222
13223 /* Can't use any of this if the user has appropriated esi. */
13224 if (global_regs[4])
13225 return 0;
13226
13227 /* This simple hack avoids all inlining code and simplifies code below. */
13228 if (!TARGET_ALIGN_STRINGOPS)
13229 align = 32;
13230
13231 if (GET_CODE (count_exp) == CONST_INT)
13232 {
13233 count = INTVAL (count_exp);
13234 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13235 return 0;
13236 }
13237 /* Figure out proper mode for counter. For 32bits it is always SImode,
13238 for 64bits use SImode when possible, otherwise DImode.
13239 Set count to number of bytes copied when known at compile time. */
13240 if (!TARGET_64BIT
13241 || GET_MODE (count_exp) == SImode
13242 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13243 counter_mode = SImode;
13244 else
13245 counter_mode = DImode;
13246
13247 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13248 if (destreg != XEXP (dst, 0))
13249 dst = replace_equiv_address_nv (dst, destreg);
13250
13251
13252 /* When optimizing for size emit simple rep ; movsb instruction for
13253 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13254 sequence is 7 bytes long, so if optimizing for size and count is
13255 small enough that some stosl, stosw and stosb instructions without
13256 rep are shorter, fall back into the next if. */
13257
13258 if ((!optimize || optimize_size)
13259 && (count == 0
13260 || ((count & 0x03)
13261 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13262 {
13263 emit_insn (gen_cld ());
13264
13265 countreg = ix86_zero_extend_to_Pmode (count_exp);
13266 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13267 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13268 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13269 }
13270 else if (count != 0
13271 && (align >= 8
13272 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13273 || optimize_size || count < (unsigned int) 64))
13274 {
13275 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13276 unsigned HOST_WIDE_INT offset = 0;
13277
13278 emit_insn (gen_cld ());
13279
13280 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13281 if (count & ~(size - 1))
13282 {
13283 unsigned HOST_WIDE_INT repcount;
13284 unsigned int max_nonrep;
13285
13286 repcount = count >> (size == 4 ? 2 : 3);
13287 if (!TARGET_64BIT)
13288 repcount &= 0x3fffffff;
13289
13290 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13291 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13292 bytes. In both cases the latter seems to be faster for small
13293 values of N. */
13294 max_nonrep = size == 4 ? 7 : 4;
13295 if (!optimize_size)
13296 switch (ix86_tune)
13297 {
13298 case PROCESSOR_PENTIUM4:
13299 case PROCESSOR_NOCONA:
13300 max_nonrep = 3;
13301 break;
13302 default:
13303 break;
13304 }
13305
13306 if (repcount <= max_nonrep)
13307 while (repcount-- > 0)
13308 {
13309 rtx mem = adjust_automodify_address_nv (dst,
13310 GET_MODE (zeroreg),
13311 destreg, offset);
13312 emit_insn (gen_strset (destreg, mem, zeroreg));
13313 offset += size;
13314 }
13315 else
13316 {
13317 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13318 countreg = ix86_zero_extend_to_Pmode (countreg);
13319 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13320 GEN_INT (size == 4 ? 2 : 3));
13321 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13322 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13323 destexp));
13324 offset = count & ~(size - 1);
13325 }
13326 }
13327 if (size == 8 && (count & 0x04))
13328 {
13329 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13330 offset);
13331 emit_insn (gen_strset (destreg, mem,
13332 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13333 offset += 4;
13334 }
13335 if (count & 0x02)
13336 {
13337 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13338 offset);
13339 emit_insn (gen_strset (destreg, mem,
13340 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13341 offset += 2;
13342 }
13343 if (count & 0x01)
13344 {
13345 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13346 offset);
13347 emit_insn (gen_strset (destreg, mem,
13348 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13349 }
13350 }
13351 else
13352 {
13353 rtx countreg2;
13354 rtx label = NULL;
13355 /* Compute desired alignment of the string operation. */
13356 int desired_alignment = (TARGET_PENTIUMPRO
13357 && (count == 0 || count >= (unsigned int) 260)
13358 ? 8 : UNITS_PER_WORD);
13359
13360 /* In case we don't know anything about the alignment, default to
13361 library version, since it is usually equally fast and result in
13362 shorter code.
13363
13364 Also emit call when we know that the count is large and call overhead
13365 will not be important. */
13366 if (!TARGET_INLINE_ALL_STRINGOPS
13367 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13368 return 0;
13369
13370 if (TARGET_SINGLE_STRINGOP)
13371 emit_insn (gen_cld ());
13372
13373 countreg2 = gen_reg_rtx (Pmode);
13374 countreg = copy_to_mode_reg (counter_mode, count_exp);
13375 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13376 /* Get rid of MEM_OFFSET, it won't be accurate. */
13377 dst = change_address (dst, BLKmode, destreg);
13378
13379 if (count == 0 && align < desired_alignment)
13380 {
13381 label = gen_label_rtx ();
13382 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13383 LEU, 0, counter_mode, 1, label);
13384 }
13385 if (align <= 1)
13386 {
13387 rtx label = ix86_expand_aligntest (destreg, 1);
13388 emit_insn (gen_strset (destreg, dst,
13389 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13390 ix86_adjust_counter (countreg, 1);
13391 emit_label (label);
13392 LABEL_NUSES (label) = 1;
13393 }
13394 if (align <= 2)
13395 {
13396 rtx label = ix86_expand_aligntest (destreg, 2);
13397 emit_insn (gen_strset (destreg, dst,
13398 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13399 ix86_adjust_counter (countreg, 2);
13400 emit_label (label);
13401 LABEL_NUSES (label) = 1;
13402 }
13403 if (align <= 4 && desired_alignment > 4)
13404 {
13405 rtx label = ix86_expand_aligntest (destreg, 4);
13406 emit_insn (gen_strset (destreg, dst,
13407 (TARGET_64BIT
13408 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13409 : zeroreg)));
13410 ix86_adjust_counter (countreg, 4);
13411 emit_label (label);
13412 LABEL_NUSES (label) = 1;
13413 }
13414
13415 if (label && desired_alignment > 4 && !TARGET_64BIT)
13416 {
13417 emit_label (label);
13418 LABEL_NUSES (label) = 1;
13419 label = NULL_RTX;
13420 }
13421
13422 if (!TARGET_SINGLE_STRINGOP)
13423 emit_insn (gen_cld ());
13424 if (TARGET_64BIT)
13425 {
13426 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13427 GEN_INT (3)));
13428 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13429 }
13430 else
13431 {
13432 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13433 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13434 }
13435 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13436 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13437
13438 if (label)
13439 {
13440 emit_label (label);
13441 LABEL_NUSES (label) = 1;
13442 }
13443
13444 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13445 emit_insn (gen_strset (destreg, dst,
13446 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13447 if (TARGET_64BIT && (align <= 4 || count == 0))
13448 {
13449 rtx label = ix86_expand_aligntest (countreg, 4);
13450 emit_insn (gen_strset (destreg, dst,
13451 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13452 emit_label (label);
13453 LABEL_NUSES (label) = 1;
13454 }
13455 if (align > 2 && count != 0 && (count & 2))
13456 emit_insn (gen_strset (destreg, dst,
13457 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13458 if (align <= 2 || count == 0)
13459 {
13460 rtx label = ix86_expand_aligntest (countreg, 2);
13461 emit_insn (gen_strset (destreg, dst,
13462 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13463 emit_label (label);
13464 LABEL_NUSES (label) = 1;
13465 }
13466 if (align > 1 && count != 0 && (count & 1))
13467 emit_insn (gen_strset (destreg, dst,
13468 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13469 if (align <= 1 || count == 0)
13470 {
13471 rtx label = ix86_expand_aligntest (countreg, 1);
13472 emit_insn (gen_strset (destreg, dst,
13473 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13474 emit_label (label);
13475 LABEL_NUSES (label) = 1;
13476 }
13477 }
13478 return 1;
13479}
13480
13481/* Expand strlen. */
13482int
13483ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13484{
13485 rtx addr, scratch1, scratch2, scratch3, scratch4;
13486
13487 /* The generic case of strlen expander is long. Avoid it's
13488 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13489
13490 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13491 && !TARGET_INLINE_ALL_STRINGOPS
13492 && !optimize_size
13493 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13494 return 0;
13495
13496 addr = force_reg (Pmode, XEXP (src, 0));
13497 scratch1 = gen_reg_rtx (Pmode);
13498
13499 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13500 && !optimize_size)
13501 {
13502 /* Well it seems that some optimizer does not combine a call like
13503 foo(strlen(bar), strlen(bar));
13504 when the move and the subtraction is done here. It does calculate
13505 the length just once when these instructions are done inside of
13506 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13507 often used and I use one fewer register for the lifetime of
13508 output_strlen_unroll() this is better. */
13509
13510 emit_move_insn (out, addr);
13511
13512 ix86_expand_strlensi_unroll_1 (out, src, align);
13513
13514 /* strlensi_unroll_1 returns the address of the zero at the end of
13515 the string, like memchr(), so compute the length by subtracting
13516 the start address. */
13517 if (TARGET_64BIT)
13518 emit_insn (gen_subdi3 (out, out, addr));
13519 else
13520 emit_insn (gen_subsi3 (out, out, addr));
13521 }
13522 else
13523 {
13524 rtx unspec;
13525 scratch2 = gen_reg_rtx (Pmode);
13526 scratch3 = gen_reg_rtx (Pmode);
13527 scratch4 = force_reg (Pmode, constm1_rtx);
13528
13529 emit_move_insn (scratch3, addr);
13530 eoschar = force_reg (QImode, eoschar);
13531
13532 emit_insn (gen_cld ());
13533 src = replace_equiv_address_nv (src, scratch3);
13534
13535 /* If .md starts supporting :P, this can be done in .md. */
13536 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13537 scratch4), UNSPEC_SCAS);
13538 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13539 if (TARGET_64BIT)
13540 {
13541 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13542 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13543 }
13544 else
13545 {
13546 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13547 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13548 }
13549 }
13550 return 1;
13551}
13552
13553/* Expand the appropriate insns for doing strlen if not just doing
13554 repnz; scasb
13555
13556 out = result, initialized with the start address
13557 align_rtx = alignment of the address.
13558 scratch = scratch register, initialized with the startaddress when
13559 not aligned, otherwise undefined
13560
13561 This is just the body. It needs the initializations mentioned above and
13562 some address computing at the end. These things are done in i386.md. */
13563
13564static void
13565ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13566{
13567 int align;
13568 rtx tmp;
13569 rtx align_2_label = NULL_RTX;
13570 rtx align_3_label = NULL_RTX;
13571 rtx align_4_label = gen_label_rtx ();
13572 rtx end_0_label = gen_label_rtx ();
13573 rtx mem;
13574 rtx tmpreg = gen_reg_rtx (SImode);
13575 rtx scratch = gen_reg_rtx (SImode);
13576 rtx cmp;
13577
13578 align = 0;
13579 if (GET_CODE (align_rtx) == CONST_INT)
13580 align = INTVAL (align_rtx);
13581
13582 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13583
13584 /* Is there a known alignment and is it less than 4? */
13585 if (align < 4)
13586 {
13587 rtx scratch1 = gen_reg_rtx (Pmode);
13588 emit_move_insn (scratch1, out);
13589 /* Is there a known alignment and is it not 2? */
13590 if (align != 2)
13591 {
13592 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13593 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13594
13595 /* Leave just the 3 lower bits. */
13596 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13597 NULL_RTX, 0, OPTAB_WIDEN);
13598
13599 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13600 Pmode, 1, align_4_label);
13601 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13602 Pmode, 1, align_2_label);
13603 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13604 Pmode, 1, align_3_label);
13605 }
13606 else
13607 {
13608 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13609 check if is aligned to 4 - byte. */
13610
13611 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13612 NULL_RTX, 0, OPTAB_WIDEN);
13613
13614 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13615 Pmode, 1, align_4_label);
13616 }
13617
13618 mem = change_address (src, QImode, out);
13619
13620 /* Now compare the bytes. */
13621
13622 /* Compare the first n unaligned byte on a byte per byte basis. */
13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13624 QImode, 1, end_0_label);
13625
13626 /* Increment the address. */
13627 if (TARGET_64BIT)
13628 emit_insn (gen_adddi3 (out, out, const1_rtx));
13629 else
13630 emit_insn (gen_addsi3 (out, out, const1_rtx));
13631
13632 /* Not needed with an alignment of 2 */
13633 if (align != 2)
13634 {
13635 emit_label (align_2_label);
13636
13637 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13638 end_0_label);
13639
13640 if (TARGET_64BIT)
13641 emit_insn (gen_adddi3 (out, out, const1_rtx));
13642 else
13643 emit_insn (gen_addsi3 (out, out, const1_rtx));
13644
13645 emit_label (align_3_label);
13646 }
13647
13648 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13649 end_0_label);
13650
13651 if (TARGET_64BIT)
13652 emit_insn (gen_adddi3 (out, out, const1_rtx));
13653 else
13654 emit_insn (gen_addsi3 (out, out, const1_rtx));
13655 }
13656
13657 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13658 align this loop. It gives only huge programs, but does not help to
13659 speed up. */
13660 emit_label (align_4_label);
13661
13662 mem = change_address (src, SImode, out);
13663 emit_move_insn (scratch, mem);
13664 if (TARGET_64BIT)
13665 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13666 else
13667 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13668
13669 /* This formula yields a nonzero result iff one of the bytes is zero.
13670 This saves three branches inside loop and many cycles. */
13671
13672 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13673 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13674 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13675 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13676 gen_int_mode (0x80808080, SImode)));
13677 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13678 align_4_label);
13679
13680 if (TARGET_CMOVE)
13681 {
13682 rtx reg = gen_reg_rtx (SImode);
13683 rtx reg2 = gen_reg_rtx (Pmode);
13684 emit_move_insn (reg, tmpreg);
13685 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13686
13687 /* If zero is not in the first two bytes, move two bytes forward. */
13688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13690 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13691 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13692 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13693 reg,
13694 tmpreg)));
13695 /* Emit lea manually to avoid clobbering of flags. */
13696 emit_insn (gen_rtx_SET (SImode, reg2,
13697 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13698
13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13701 emit_insn (gen_rtx_SET (VOIDmode, out,
13702 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13703 reg2,
13704 out)));
13705
13706 }
13707 else
13708 {
13709 rtx end_2_label = gen_label_rtx ();
13710 /* Is zero in the first two bytes? */
13711
13712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13714 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13715 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13716 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13717 pc_rtx);
13718 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13719 JUMP_LABEL (tmp) = end_2_label;
13720
13721 /* Not in the first two. Move two bytes forward. */
13722 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13723 if (TARGET_64BIT)
13724 emit_insn (gen_adddi3 (out, out, const2_rtx));
13725 else
13726 emit_insn (gen_addsi3 (out, out, const2_rtx));
13727
13728 emit_label (end_2_label);
13729
13730 }
13731
13732 /* Avoid branch in fixing the byte. */
13733 tmpreg = gen_lowpart (QImode, tmpreg);
13734 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13735 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13736 if (TARGET_64BIT)
13737 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13738 else
13739 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13740
13741 emit_label (end_0_label);
13742}
13743
13744void
13745ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13746 rtx callarg2 ATTRIBUTE_UNUSED,
13747 rtx pop, int sibcall)
13748{
13749 rtx use = NULL, call;
13750
13751 if (pop == const0_rtx)
13752 pop = NULL;
13753 gcc_assert (!TARGET_64BIT || !pop);
13754
13755 if (TARGET_MACHO && !TARGET_64BIT)
13756 {
13757#if TARGET_MACHO
13758 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13759 fnaddr = machopic_indirect_call_target (fnaddr);
13760#endif
13761 }
13762 else
13763 {
13764 /* Static functions and indirect calls don't need the pic register. */
13765 if (! TARGET_64BIT && flag_pic
13766 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13767 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13768 use_reg (&use, pic_offset_table_rtx);
13769 }
13770
13771 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13772 {
13773 rtx al = gen_rtx_REG (QImode, 0);
13774 emit_move_insn (al, callarg2);
13775 use_reg (&use, al);
13776 }
13777
13778 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13779 {
13780 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13781 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13782 }
13783 if (sibcall && TARGET_64BIT
13784 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13785 {
13786 rtx addr;
13787 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13788 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13789 emit_move_insn (fnaddr, addr);
13790 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13791 }
13792
13793 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13794 if (retval)
13795 call = gen_rtx_SET (VOIDmode, retval, call);
13796 if (pop)
13797 {
13798 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13799 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13800 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13801 }
13802
13803 call = emit_call_insn (call);
13804 if (use)
13805 CALL_INSN_FUNCTION_USAGE (call) = use;
13806}
13807
13808
13809/* Clear stack slot assignments remembered from previous functions.
13810 This is called from INIT_EXPANDERS once before RTL is emitted for each
13811 function. */
13812
13813static struct machine_function *
13814ix86_init_machine_status (void)
13815{
13816 struct machine_function *f;
13817
13818 f = ggc_alloc_cleared (sizeof (struct machine_function));
13819 f->use_fast_prologue_epilogue_nregs = -1;
13820 f->tls_descriptor_call_expanded_p = 0;
13821
13822 return f;
13823}
13824
13825/* Return a MEM corresponding to a stack slot with mode MODE.
13826 Allocate a new slot if necessary.
13827
13828 The RTL for a function can have several slots available: N is
13829 which slot to use. */
13830
13831rtx
13832assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13833{
13834 struct stack_local_entry *s;
13835
13836 gcc_assert (n < MAX_386_STACK_LOCALS);
13837
13838 /* Virtual slot is valid only before vregs are instantiated. */
13839 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13840
13841 for (s = ix86_stack_locals; s; s = s->next)
13842 if (s->mode == mode && s->n == n)
13843 return s->rtl;
13844
13845 s = (struct stack_local_entry *)
13846 ggc_alloc (sizeof (struct stack_local_entry));
13847 s->n = n;
13848 s->mode = mode;
13849 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13850
13851 s->next = ix86_stack_locals;
13852 ix86_stack_locals = s;
13853 return s->rtl;
13854}
13855
13856/* Construct the SYMBOL_REF for the tls_get_addr function. */
13857
13858static GTY(()) rtx ix86_tls_symbol;
13859rtx
13860ix86_tls_get_addr (void)
13861{
13862
13863 if (!ix86_tls_symbol)
13864 {
13865 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13866 (TARGET_ANY_GNU_TLS
13867 && !TARGET_64BIT)
13868 ? "___tls_get_addr"
13869 : "__tls_get_addr");
13870 }
13871
13872 return ix86_tls_symbol;
13873}
13874
13875/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13876
13877static GTY(()) rtx ix86_tls_module_base_symbol;
13878rtx
13879ix86_tls_module_base (void)
13880{
13881
13882 if (!ix86_tls_module_base_symbol)
13883 {
13884 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13885 "_TLS_MODULE_BASE_");
13886 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13887 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13888 }
13889
13890 return ix86_tls_module_base_symbol;
13891}
13892
13893/* Calculate the length of the memory address in the instruction
13894 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13895
13896int
13897memory_address_length (rtx addr)
13898{
13899 struct ix86_address parts;
13900 rtx base, index, disp;
13901 int len;
13902 int ok;
13903
13904 if (GET_CODE (addr) == PRE_DEC
13905 || GET_CODE (addr) == POST_INC
13906 || GET_CODE (addr) == PRE_MODIFY
13907 || GET_CODE (addr) == POST_MODIFY)
13908 return 0;
13909
13910 ok = ix86_decompose_address (addr, &parts);
13911 gcc_assert (ok);
13912
13913 if (parts.base && GET_CODE (parts.base) == SUBREG)
13914 parts.base = SUBREG_REG (parts.base);
13915 if (parts.index && GET_CODE (parts.index) == SUBREG)
13916 parts.index = SUBREG_REG (parts.index);
13917
13918 base = parts.base;
13919 index = parts.index;
13920 disp = parts.disp;
13921 len = 0;
13922
13923 /* Rule of thumb:
13924 - esp as the base always wants an index,
13925 - ebp as the base always wants a displacement. */
13926
13927 /* Register Indirect. */
13928 if (base && !index && !disp)
13929 {
13930 /* esp (for its index) and ebp (for its displacement) need
13931 the two-byte modrm form. */
13932 if (addr == stack_pointer_rtx
13933 || addr == arg_pointer_rtx
13934 || addr == frame_pointer_rtx
13935 || addr == hard_frame_pointer_rtx)
13936 len = 1;
13937 }
13938
13939 /* Direct Addressing. */
13940 else if (disp && !base && !index)
13941 len = 4;
13942
13943 else
13944 {
13945 /* Find the length of the displacement constant. */
13946 if (disp)
13947 {
13948 if (base && satisfies_constraint_K (disp))
13949 len = 1;
13950 else
13951 len = 4;
13952 }
13953 /* ebp always wants a displacement. */
13954 else if (base == hard_frame_pointer_rtx)
13955 len = 1;
13956
13957 /* An index requires the two-byte modrm form.... */
13958 if (index
13959 /* ...like esp, which always wants an index. */
13960 || base == stack_pointer_rtx
13961 || base == arg_pointer_rtx
13962 || base == frame_pointer_rtx)
13963 len += 1;
13964 }
13965
13966 return len;
13967}
13968
13969/* Compute default value for "length_immediate" attribute. When SHORTFORM
13970 is set, expect that insn have 8bit immediate alternative. */
13971int
13972ix86_attr_length_immediate_default (rtx insn, int shortform)
13973{
13974 int len = 0;
13975 int i;
13976 extract_insn_cached (insn);
13977 for (i = recog_data.n_operands - 1; i >= 0; --i)
13978 if (CONSTANT_P (recog_data.operand[i]))
13979 {
13980 gcc_assert (!len);
13981 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13982 len = 1;
13983 else
13984 {
13985 switch (get_attr_mode (insn))
13986 {
13987 case MODE_QI:
13988 len+=1;
13989 break;
13990 case MODE_HI:
13991 len+=2;
13992 break;
13993 case MODE_SI:
13994 len+=4;
13995 break;
13996 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13997 case MODE_DI:
13998 len+=4;
13999 break;
14000 default:
14001 fatal_insn ("unknown insn mode", insn);
14002 }
14003 }
14004 }
14005 return len;
14006}
14007/* Compute default value for "length_address" attribute. */
14008int
14009ix86_attr_length_address_default (rtx insn)
14010{
14011 int i;
14012
14013 if (get_attr_type (insn) == TYPE_LEA)
14014 {
14015 rtx set = PATTERN (insn);
14016
14017 if (GET_CODE (set) == PARALLEL)
14018 set = XVECEXP (set, 0, 0);
14019
14020 gcc_assert (GET_CODE (set) == SET);
14021
14022 return memory_address_length (SET_SRC (set));
14023 }
14024
14025 extract_insn_cached (insn);
14026 for (i = recog_data.n_operands - 1; i >= 0; --i)
14027 if (GET_CODE (recog_data.operand[i]) == MEM)
14028 {
14029 return memory_address_length (XEXP (recog_data.operand[i], 0));
14030 break;
14031 }
14032 return 0;
14033}
14034
14035/* Return the maximum number of instructions a cpu can issue. */
14036
14037static int
14038ix86_issue_rate (void)
14039{
14040 switch (ix86_tune)
14041 {
14042 case PROCESSOR_PENTIUM:
14043 case PROCESSOR_K6:
14044 return 2;
14045
14046 case PROCESSOR_PENTIUMPRO:
14047 case PROCESSOR_PENTIUM4:
14048 case PROCESSOR_ATHLON:
14049 case PROCESSOR_K8:
14050 case PROCESSOR_AMDFAM10:
14051 case PROCESSOR_NOCONA:
14052 case PROCESSOR_GENERIC32:
14053 case PROCESSOR_GENERIC64:
14054 return 3;
14055
14056 case PROCESSOR_CORE2:
14057 return 4;
14058
14059 default:
14060 return 1;
14061 }
14062}
14063
14064/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14065 by DEP_INSN and nothing set by DEP_INSN. */
14066
14067static int
14068ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14069{
14070 rtx set, set2;
14071
14072 /* Simplify the test for uninteresting insns. */
14073 if (insn_type != TYPE_SETCC
14074 && insn_type != TYPE_ICMOV
14075 && insn_type != TYPE_FCMOV
14076 && insn_type != TYPE_IBR)
14077 return 0;
14078
14079 if ((set = single_set (dep_insn)) != 0)
14080 {
14081 set = SET_DEST (set);
14082 set2 = NULL_RTX;
14083 }
14084 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14085 && XVECLEN (PATTERN (dep_insn), 0) == 2
14086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14087 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14088 {
14089 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14090 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14091 }
14092 else
14093 return 0;
14094
14095 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14096 return 0;
14097
14098 /* This test is true if the dependent insn reads the flags but
14099 not any other potentially set register. */
14100 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14101 return 0;
14102
14103 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14104 return 0;
14105
14106 return 1;
14107}
14108
14109/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14110 address with operands set by DEP_INSN. */
14111
14112static int
14113ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14114{
14115 rtx addr;
14116
14117 if (insn_type == TYPE_LEA
14118 && TARGET_PENTIUM)
14119 {
14120 addr = PATTERN (insn);
14121
14122 if (GET_CODE (addr) == PARALLEL)
14123 addr = XVECEXP (addr, 0, 0);
14124
14125 gcc_assert (GET_CODE (addr) == SET);
14126
14127 addr = SET_SRC (addr);
14128 }
14129 else
14130 {
14131 int i;
14132 extract_insn_cached (insn);
14133 for (i = recog_data.n_operands - 1; i >= 0; --i)
14134 if (GET_CODE (recog_data.operand[i]) == MEM)
14135 {
14136 addr = XEXP (recog_data.operand[i], 0);
14137 goto found;
14138 }
14139 return 0;
14140 found:;
14141 }
14142
14143 return modified_in_p (addr, dep_insn);
14144}
14145
14146static int
14147ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14148{
14149 enum attr_type insn_type, dep_insn_type;
14150 enum attr_memory memory;
14151 rtx set, set2;
14152 int dep_insn_code_number;
14153
14154 /* Anti and output dependencies have zero cost on all CPUs. */
14155 if (REG_NOTE_KIND (link) != 0)
14156 return 0;
14157
14158 dep_insn_code_number = recog_memoized (dep_insn);
14159
14160 /* If we can't recognize the insns, we can't really do anything. */
14161 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14162 return cost;
14163
14164 insn_type = get_attr_type (insn);
14165 dep_insn_type = get_attr_type (dep_insn);
14166
14167 switch (ix86_tune)
14168 {
14169 case PROCESSOR_PENTIUM:
14170 /* Address Generation Interlock adds a cycle of latency. */
14171 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14172 cost += 1;
14173
14174 /* ??? Compares pair with jump/setcc. */
14175 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14176 cost = 0;
14177
14178 /* Floating point stores require value to be ready one cycle earlier. */
14179 if (insn_type == TYPE_FMOV
14180 && get_attr_memory (insn) == MEMORY_STORE
14181 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14182 cost += 1;
14183 break;
14184
14185 case PROCESSOR_PENTIUMPRO:
14186 memory = get_attr_memory (insn);
14187
14188 /* INT->FP conversion is expensive. */
14189 if (get_attr_fp_int_src (dep_insn))
14190 cost += 5;
14191
14192 /* There is one cycle extra latency between an FP op and a store. */
14193 if (insn_type == TYPE_FMOV
14194 && (set = single_set (dep_insn)) != NULL_RTX
14195 && (set2 = single_set (insn)) != NULL_RTX
14196 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14197 && GET_CODE (SET_DEST (set2)) == MEM)
14198 cost += 1;
14199
14200 /* Show ability of reorder buffer to hide latency of load by executing
14201 in parallel with previous instruction in case
14202 previous instruction is not needed to compute the address. */
14203 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14204 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14205 {
14206 /* Claim moves to take one cycle, as core can issue one load
14207 at time and the next load can start cycle later. */
14208 if (dep_insn_type == TYPE_IMOV
14209 || dep_insn_type == TYPE_FMOV)
14210 cost = 1;
14211 else if (cost > 1)
14212 cost--;
14213 }
14214 break;
14215
14216 case PROCESSOR_K6:
14217 memory = get_attr_memory (insn);
14218
14219 /* The esp dependency is resolved before the instruction is really
14220 finished. */
14221 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14222 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14223 return 1;
14224
14225 /* INT->FP conversion is expensive. */
14226 if (get_attr_fp_int_src (dep_insn))
14227 cost += 5;
14228
14229 /* Show ability of reorder buffer to hide latency of load by executing
14230 in parallel with previous instruction in case
14231 previous instruction is not needed to compute the address. */
14232 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14233 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14234 {
14235 /* Claim moves to take one cycle, as core can issue one load
14236 at time and the next load can start cycle later. */
14237 if (dep_insn_type == TYPE_IMOV
14238 || dep_insn_type == TYPE_FMOV)
14239 cost = 1;
14240 else if (cost > 2)
14241 cost -= 2;
14242 else
14243 cost = 1;
14244 }
14245 break;
14246
14247 case PROCESSOR_ATHLON:
14248 case PROCESSOR_K8:
14249 case PROCESSOR_AMDFAM10:
14250 case PROCESSOR_GENERIC32:
14251 case PROCESSOR_GENERIC64:
14252 memory = get_attr_memory (insn);
14253
14254 /* Show ability of reorder buffer to hide latency of load by executing
14255 in parallel with previous instruction in case
14256 previous instruction is not needed to compute the address. */
14257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14258 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14259 {
14260 enum attr_unit unit = get_attr_unit (insn);
14261 int loadcost = 3;
14262
14263 /* Because of the difference between the length of integer and
14264 floating unit pipeline preparation stages, the memory operands
14265 for floating point are cheaper.
14266
14267 ??? For Athlon it the difference is most probably 2. */
14268 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14269 loadcost = 3;
14270 else
14271 loadcost = TARGET_ATHLON ? 2 : 0;
14272
14273 if (cost >= loadcost)
14274 cost -= loadcost;
14275 else
14276 cost = 0;
14277 }
14278
14279 default:
14280 break;
14281 }
14282
14283 return cost;
14284}
14285
14286/* How many alternative schedules to try. This should be as wide as the
14287 scheduling freedom in the DFA, but no wider. Making this value too
14288 large results extra work for the scheduler. */
14289
14290static int
14291ia32_multipass_dfa_lookahead (void)
14292{
14293 if (ix86_tune == PROCESSOR_PENTIUM)
14294 return 2;
14295
14296 if (ix86_tune == PROCESSOR_PENTIUMPRO
14297 || ix86_tune == PROCESSOR_K6)
14298 return 1;
14299
14300 else
14301 return 0;
14302}
14303
14304
14305/* Compute the alignment given to a constant that is being placed in memory.
14306 EXP is the constant and ALIGN is the alignment that the object would
14307 ordinarily have.
14308 The value of this function is used instead of that alignment to align
14309 the object. */
14310
14311int
14312ix86_constant_alignment (tree exp, int align)
14313{
14314 if (TREE_CODE (exp) == REAL_CST)
14315 {
14316 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14317 return 64;
14318 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14319 return 128;
14320 }
14321 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14322 && !TARGET_NO_ALIGN_LONG_STRINGS
14323 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14324 return BITS_PER_WORD;
14325
14326 return align;
14327}
14328
14329/* Compute the alignment for a static variable.
14330 TYPE is the data type, and ALIGN is the alignment that
14331 the object would ordinarily have. The value of this function is used
14332 instead of that alignment to align the object. */
14333
14334int
14335ix86_data_alignment (tree type, int align)
14336{
14337 int max_align = optimize_size ? BITS_PER_WORD : 256;
14338
14339 if (AGGREGATE_TYPE_P (type)
14340 && TYPE_SIZE (type)
14341 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14342 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14343 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14344 && align < max_align)
14345 align = max_align;
14346
14347 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14348 to 16byte boundary. */
14349 if (TARGET_64BIT)
14350 {
14351 if (AGGREGATE_TYPE_P (type)
14352 && TYPE_SIZE (type)
14353 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14354 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14355 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14356 return 128;
14357 }
14358
14359 if (TREE_CODE (type) == ARRAY_TYPE)
14360 {
14361 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14362 return 64;
14363 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14364 return 128;
14365 }
14366 else if (TREE_CODE (type) == COMPLEX_TYPE)
14367 {
14368
14369 if (TYPE_MODE (type) == DCmode && align < 64)
14370 return 64;
14371 if (TYPE_MODE (type) == XCmode && align < 128)
14372 return 128;
14373 }
14374 else if ((TREE_CODE (type) == RECORD_TYPE
14375 || TREE_CODE (type) == UNION_TYPE
14376 || TREE_CODE (type) == QUAL_UNION_TYPE)
14377 && TYPE_FIELDS (type))
14378 {
14379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14380 return 64;
14381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14382 return 128;
14383 }
14384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14385 || TREE_CODE (type) == INTEGER_TYPE)
14386 {
14387 if (TYPE_MODE (type) == DFmode && align < 64)
14388 return 64;
14389 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14390 return 128;
14391 }
14392
14393 return align;
14394}
14395
14396/* Compute the alignment for a local variable.
14397 TYPE is the data type, and ALIGN is the alignment that
14398 the object would ordinarily have. The value of this macro is used
14399 instead of that alignment to align the object. */
14400
14401int
14402ix86_local_alignment (tree type, int align)
14403{
14404 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14405 to 16byte boundary. */
14406 if (TARGET_64BIT)
14407 {
14408 if (AGGREGATE_TYPE_P (type)
14409 && TYPE_SIZE (type)
14410 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14411 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14412 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14413 return 128;
14414 }
14415 if (TREE_CODE (type) == ARRAY_TYPE)
14416 {
14417 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14418 return 64;
14419 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14420 return 128;
14421 }
14422 else if (TREE_CODE (type) == COMPLEX_TYPE)
14423 {
14424 if (TYPE_MODE (type) == DCmode && align < 64)
14425 return 64;
14426 if (TYPE_MODE (type) == XCmode && align < 128)
14427 return 128;
14428 }
14429 else if ((TREE_CODE (type) == RECORD_TYPE
14430 || TREE_CODE (type) == UNION_TYPE
14431 || TREE_CODE (type) == QUAL_UNION_TYPE)
14432 && TYPE_FIELDS (type))
14433 {
14434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14435 return 64;
14436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14437 return 128;
14438 }
14439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14440 || TREE_CODE (type) == INTEGER_TYPE)
14441 {
14442
14443 if (TYPE_MODE (type) == DFmode && align < 64)
14444 return 64;
14445 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14446 return 128;
14447 }
14448 return align;
14449}
14450
14451/* Emit RTL insns to initialize the variable parts of a trampoline.
14452 FNADDR is an RTX for the address of the function's pure code.
14453 CXT is an RTX for the static chain value for the function. */
14454void
14455x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14456{
14457 if (!TARGET_64BIT)
14458 {
14459 /* Compute offset from the end of the jmp to the target function. */
14460 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14461 plus_constant (tramp, 10),
14462 NULL_RTX, 1, OPTAB_DIRECT);
14463 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14464 gen_int_mode (0xb9, QImode));
14465 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14466 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14467 gen_int_mode (0xe9, QImode));
14468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14469 }
14470 else
14471 {
14472 int offset = 0;
14473 /* Try to load address using shorter movl instead of movabs.
14474 We may want to support movq for kernel mode, but kernel does not use
14475 trampolines at the moment. */
14476 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14477 {
14478 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14479 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14480 gen_int_mode (0xbb41, HImode));
14481 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14482 gen_lowpart (SImode, fnaddr));
14483 offset += 6;
14484 }
14485 else
14486 {
14487 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14488 gen_int_mode (0xbb49, HImode));
14489 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14490 fnaddr);
14491 offset += 10;
14492 }
14493 /* Load static chain using movabs to r10. */
14494 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14495 gen_int_mode (0xba49, HImode));
14496 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14497 cxt);
14498 offset += 10;
14499 /* Jump to the r11 */
14500 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14501 gen_int_mode (0xff49, HImode));
14502 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14503 gen_int_mode (0xe3, QImode));
14504 offset += 3;
14505 gcc_assert (offset <= TRAMPOLINE_SIZE);
14506 }
14507
14508#ifdef ENABLE_EXECUTE_STACK
14509 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14510 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14511#endif
14512}
14513
14514/* Codes for all the SSE/MMX builtins. */
14515enum ix86_builtins
14516{
14517 IX86_BUILTIN_ADDPS,
14518 IX86_BUILTIN_ADDSS,
14519 IX86_BUILTIN_DIVPS,
14520 IX86_BUILTIN_DIVSS,
14521 IX86_BUILTIN_MULPS,
14522 IX86_BUILTIN_MULSS,
14523 IX86_BUILTIN_SUBPS,
14524 IX86_BUILTIN_SUBSS,
14525
14526 IX86_BUILTIN_CMPEQPS,
14527 IX86_BUILTIN_CMPLTPS,
14528 IX86_BUILTIN_CMPLEPS,
14529 IX86_BUILTIN_CMPGTPS,
14530 IX86_BUILTIN_CMPGEPS,
14531 IX86_BUILTIN_CMPNEQPS,
14532 IX86_BUILTIN_CMPNLTPS,
14533 IX86_BUILTIN_CMPNLEPS,
14534 IX86_BUILTIN_CMPNGTPS,
14535 IX86_BUILTIN_CMPNGEPS,
14536 IX86_BUILTIN_CMPORDPS,
14537 IX86_BUILTIN_CMPUNORDPS,
14538 IX86_BUILTIN_CMPEQSS,
14539 IX86_BUILTIN_CMPLTSS,
14540 IX86_BUILTIN_CMPLESS,
14541 IX86_BUILTIN_CMPNEQSS,
14542 IX86_BUILTIN_CMPNLTSS,
14543 IX86_BUILTIN_CMPNLESS,
14544 IX86_BUILTIN_CMPNGTSS,
14545 IX86_BUILTIN_CMPNGESS,
14546 IX86_BUILTIN_CMPORDSS,
14547 IX86_BUILTIN_CMPUNORDSS,
14548
14549 IX86_BUILTIN_COMIEQSS,
14550 IX86_BUILTIN_COMILTSS,
14551 IX86_BUILTIN_COMILESS,
14552 IX86_BUILTIN_COMIGTSS,
14553 IX86_BUILTIN_COMIGESS,
14554 IX86_BUILTIN_COMINEQSS,
14555 IX86_BUILTIN_UCOMIEQSS,
14556 IX86_BUILTIN_UCOMILTSS,
14557 IX86_BUILTIN_UCOMILESS,
14558 IX86_BUILTIN_UCOMIGTSS,
14559 IX86_BUILTIN_UCOMIGESS,
14560 IX86_BUILTIN_UCOMINEQSS,
14561
14562 IX86_BUILTIN_CVTPI2PS,
14563 IX86_BUILTIN_CVTPS2PI,
14564 IX86_BUILTIN_CVTSI2SS,
14565 IX86_BUILTIN_CVTSI642SS,
14566 IX86_BUILTIN_CVTSS2SI,
14567 IX86_BUILTIN_CVTSS2SI64,
14568 IX86_BUILTIN_CVTTPS2PI,
14569 IX86_BUILTIN_CVTTSS2SI,
14570 IX86_BUILTIN_CVTTSS2SI64,
14571
14572 IX86_BUILTIN_MAXPS,
14573 IX86_BUILTIN_MAXSS,
14574 IX86_BUILTIN_MINPS,
14575 IX86_BUILTIN_MINSS,
14576
14577 IX86_BUILTIN_LOADUPS,
14578 IX86_BUILTIN_STOREUPS,
14579 IX86_BUILTIN_MOVSS,
14580
14581 IX86_BUILTIN_MOVHLPS,
14582 IX86_BUILTIN_MOVLHPS,
14583 IX86_BUILTIN_LOADHPS,
14584 IX86_BUILTIN_LOADLPS,
14585 IX86_BUILTIN_STOREHPS,
14586 IX86_BUILTIN_STORELPS,
14587
14588 IX86_BUILTIN_MASKMOVQ,
14589 IX86_BUILTIN_MOVMSKPS,
14590 IX86_BUILTIN_PMOVMSKB,
14591
14592 IX86_BUILTIN_MOVNTPS,
14593 IX86_BUILTIN_MOVNTQ,
14594
14595 IX86_BUILTIN_LOADDQU,
14596 IX86_BUILTIN_STOREDQU,
14597
14598 IX86_BUILTIN_PACKSSWB,
14599 IX86_BUILTIN_PACKSSDW,
14600 IX86_BUILTIN_PACKUSWB,
14601
14602 IX86_BUILTIN_PADDB,
14603 IX86_BUILTIN_PADDW,
14604 IX86_BUILTIN_PADDD,
14605 IX86_BUILTIN_PADDQ,
14606 IX86_BUILTIN_PADDSB,
14607 IX86_BUILTIN_PADDSW,
14608 IX86_BUILTIN_PADDUSB,
14609 IX86_BUILTIN_PADDUSW,
14610 IX86_BUILTIN_PSUBB,
14611 IX86_BUILTIN_PSUBW,
14612 IX86_BUILTIN_PSUBD,
14613 IX86_BUILTIN_PSUBQ,
14614 IX86_BUILTIN_PSUBSB,
14615 IX86_BUILTIN_PSUBSW,
14616 IX86_BUILTIN_PSUBUSB,
14617 IX86_BUILTIN_PSUBUSW,
14618
14619 IX86_BUILTIN_PAND,
14620 IX86_BUILTIN_PANDN,
14621 IX86_BUILTIN_POR,
14622 IX86_BUILTIN_PXOR,
14623
14624 IX86_BUILTIN_PAVGB,
14625 IX86_BUILTIN_PAVGW,
14626
14627 IX86_BUILTIN_PCMPEQB,
14628 IX86_BUILTIN_PCMPEQW,
14629 IX86_BUILTIN_PCMPEQD,
14630 IX86_BUILTIN_PCMPGTB,
14631 IX86_BUILTIN_PCMPGTW,
14632 IX86_BUILTIN_PCMPGTD,
14633
14634 IX86_BUILTIN_PMADDWD,
14635
14636 IX86_BUILTIN_PMAXSW,
14637 IX86_BUILTIN_PMAXUB,
14638 IX86_BUILTIN_PMINSW,
14639 IX86_BUILTIN_PMINUB,
14640
14641 IX86_BUILTIN_PMULHUW,
14642 IX86_BUILTIN_PMULHW,
14643 IX86_BUILTIN_PMULLW,
14644
14645 IX86_BUILTIN_PSADBW,
14646 IX86_BUILTIN_PSHUFW,
14647
14648 IX86_BUILTIN_PSLLW,
14649 IX86_BUILTIN_PSLLD,
14650 IX86_BUILTIN_PSLLQ,
14651 IX86_BUILTIN_PSRAW,
14652 IX86_BUILTIN_PSRAD,
14653 IX86_BUILTIN_PSRLW,
14654 IX86_BUILTIN_PSRLD,
14655 IX86_BUILTIN_PSRLQ,
14656 IX86_BUILTIN_PSLLWI,
14657 IX86_BUILTIN_PSLLDI,
14658 IX86_BUILTIN_PSLLQI,
14659 IX86_BUILTIN_PSRAWI,
14660 IX86_BUILTIN_PSRADI,
14661 IX86_BUILTIN_PSRLWI,
14662 IX86_BUILTIN_PSRLDI,
14663 IX86_BUILTIN_PSRLQI,
14664
14665 IX86_BUILTIN_PUNPCKHBW,
14666 IX86_BUILTIN_PUNPCKHWD,
14667 IX86_BUILTIN_PUNPCKHDQ,
14668 IX86_BUILTIN_PUNPCKLBW,
14669 IX86_BUILTIN_PUNPCKLWD,
14670 IX86_BUILTIN_PUNPCKLDQ,
14671
14672 IX86_BUILTIN_SHUFPS,
14673
14674 IX86_BUILTIN_RCPPS,
14675 IX86_BUILTIN_RCPSS,
14676 IX86_BUILTIN_RSQRTPS,
14677 IX86_BUILTIN_RSQRTSS,
14678 IX86_BUILTIN_SQRTPS,
14679 IX86_BUILTIN_SQRTSS,
14680
14681 IX86_BUILTIN_UNPCKHPS,
14682 IX86_BUILTIN_UNPCKLPS,
14683
14684 IX86_BUILTIN_ANDPS,
14685 IX86_BUILTIN_ANDNPS,
14686 IX86_BUILTIN_ORPS,
14687 IX86_BUILTIN_XORPS,
14688
14689 IX86_BUILTIN_EMMS,
14690 IX86_BUILTIN_LDMXCSR,
14691 IX86_BUILTIN_STMXCSR,
14692 IX86_BUILTIN_SFENCE,
14693
14694 /* 3DNow! Original */
14695 IX86_BUILTIN_FEMMS,
14696 IX86_BUILTIN_PAVGUSB,
14697 IX86_BUILTIN_PF2ID,
14698 IX86_BUILTIN_PFACC,
14699 IX86_BUILTIN_PFADD,
14700 IX86_BUILTIN_PFCMPEQ,
14701 IX86_BUILTIN_PFCMPGE,
14702 IX86_BUILTIN_PFCMPGT,
14703 IX86_BUILTIN_PFMAX,
14704 IX86_BUILTIN_PFMIN,
14705 IX86_BUILTIN_PFMUL,
14706 IX86_BUILTIN_PFRCP,
14707 IX86_BUILTIN_PFRCPIT1,
14708 IX86_BUILTIN_PFRCPIT2,
14709 IX86_BUILTIN_PFRSQIT1,
14710 IX86_BUILTIN_PFRSQRT,
14711 IX86_BUILTIN_PFSUB,
14712 IX86_BUILTIN_PFSUBR,
14713 IX86_BUILTIN_PI2FD,
14714 IX86_BUILTIN_PMULHRW,
14715
14716 /* 3DNow! Athlon Extensions */
14717 IX86_BUILTIN_PF2IW,
14718 IX86_BUILTIN_PFNACC,
14719 IX86_BUILTIN_PFPNACC,
14720 IX86_BUILTIN_PI2FW,
14721 IX86_BUILTIN_PSWAPDSI,
14722 IX86_BUILTIN_PSWAPDSF,
14723
14724 /* SSE2 */
14725 IX86_BUILTIN_ADDPD,
14726 IX86_BUILTIN_ADDSD,
14727 IX86_BUILTIN_DIVPD,
14728 IX86_BUILTIN_DIVSD,
14729 IX86_BUILTIN_MULPD,
14730 IX86_BUILTIN_MULSD,
14731 IX86_BUILTIN_SUBPD,
14732 IX86_BUILTIN_SUBSD,
14733
14734 IX86_BUILTIN_CMPEQPD,
14735 IX86_BUILTIN_CMPLTPD,
14736 IX86_BUILTIN_CMPLEPD,
14737 IX86_BUILTIN_CMPGTPD,
14738 IX86_BUILTIN_CMPGEPD,
14739 IX86_BUILTIN_CMPNEQPD,
14740 IX86_BUILTIN_CMPNLTPD,
14741 IX86_BUILTIN_CMPNLEPD,
14742 IX86_BUILTIN_CMPNGTPD,
14743 IX86_BUILTIN_CMPNGEPD,
14744 IX86_BUILTIN_CMPORDPD,
14745 IX86_BUILTIN_CMPUNORDPD,
14746 IX86_BUILTIN_CMPNEPD,
14747 IX86_BUILTIN_CMPEQSD,
14748 IX86_BUILTIN_CMPLTSD,
14749 IX86_BUILTIN_CMPLESD,
14750 IX86_BUILTIN_CMPNEQSD,
14751 IX86_BUILTIN_CMPNLTSD,
14752 IX86_BUILTIN_CMPNLESD,
14753 IX86_BUILTIN_CMPORDSD,
14754 IX86_BUILTIN_CMPUNORDSD,
14755 IX86_BUILTIN_CMPNESD,
14756
14757 IX86_BUILTIN_COMIEQSD,
14758 IX86_BUILTIN_COMILTSD,
14759 IX86_BUILTIN_COMILESD,
14760 IX86_BUILTIN_COMIGTSD,
14761 IX86_BUILTIN_COMIGESD,
14762 IX86_BUILTIN_COMINEQSD,
14763 IX86_BUILTIN_UCOMIEQSD,
14764 IX86_BUILTIN_UCOMILTSD,
14765 IX86_BUILTIN_UCOMILESD,
14766 IX86_BUILTIN_UCOMIGTSD,
14767 IX86_BUILTIN_UCOMIGESD,
14768 IX86_BUILTIN_UCOMINEQSD,
14769
14770 IX86_BUILTIN_MAXPD,
14771 IX86_BUILTIN_MAXSD,
14772 IX86_BUILTIN_MINPD,
14773 IX86_BUILTIN_MINSD,
14774
14775 IX86_BUILTIN_ANDPD,
14776 IX86_BUILTIN_ANDNPD,
14777 IX86_BUILTIN_ORPD,
14778 IX86_BUILTIN_XORPD,
14779
14780 IX86_BUILTIN_SQRTPD,
14781 IX86_BUILTIN_SQRTSD,
14782
14783 IX86_BUILTIN_UNPCKHPD,
14784 IX86_BUILTIN_UNPCKLPD,
14785
14786 IX86_BUILTIN_SHUFPD,
14787
14788 IX86_BUILTIN_LOADUPD,
14789 IX86_BUILTIN_STOREUPD,
14790 IX86_BUILTIN_MOVSD,
14791
14792 IX86_BUILTIN_LOADHPD,
14793 IX86_BUILTIN_LOADLPD,
14794
14795 IX86_BUILTIN_CVTDQ2PD,
14796 IX86_BUILTIN_CVTDQ2PS,
14797
14798 IX86_BUILTIN_CVTPD2DQ,
14799 IX86_BUILTIN_CVTPD2PI,
14800 IX86_BUILTIN_CVTPD2PS,
14801 IX86_BUILTIN_CVTTPD2DQ,
14802 IX86_BUILTIN_CVTTPD2PI,
14803
14804 IX86_BUILTIN_CVTPI2PD,
14805 IX86_BUILTIN_CVTSI2SD,
14806 IX86_BUILTIN_CVTSI642SD,
14807
14808 IX86_BUILTIN_CVTSD2SI,
14809 IX86_BUILTIN_CVTSD2SI64,
14810 IX86_BUILTIN_CVTSD2SS,
14811 IX86_BUILTIN_CVTSS2SD,
14812 IX86_BUILTIN_CVTTSD2SI,
14813 IX86_BUILTIN_CVTTSD2SI64,
14814
14815 IX86_BUILTIN_CVTPS2DQ,
14816 IX86_BUILTIN_CVTPS2PD,
14817 IX86_BUILTIN_CVTTPS2DQ,
14818
14819 IX86_BUILTIN_MOVNTI,
14820 IX86_BUILTIN_MOVNTPD,
14821 IX86_BUILTIN_MOVNTDQ,
14822
14823 /* SSE2 MMX */
14824 IX86_BUILTIN_MASKMOVDQU,
14825 IX86_BUILTIN_MOVMSKPD,
14826 IX86_BUILTIN_PMOVMSKB128,
14827
14828 IX86_BUILTIN_PACKSSWB128,
14829 IX86_BUILTIN_PACKSSDW128,
14830 IX86_BUILTIN_PACKUSWB128,
14831
14832 IX86_BUILTIN_PADDB128,
14833 IX86_BUILTIN_PADDW128,
14834 IX86_BUILTIN_PADDD128,
14835 IX86_BUILTIN_PADDQ128,
14836 IX86_BUILTIN_PADDSB128,
14837 IX86_BUILTIN_PADDSW128,
14838 IX86_BUILTIN_PADDUSB128,
14839 IX86_BUILTIN_PADDUSW128,
14840 IX86_BUILTIN_PSUBB128,
14841 IX86_BUILTIN_PSUBW128,
14842 IX86_BUILTIN_PSUBD128,
14843 IX86_BUILTIN_PSUBQ128,
14844 IX86_BUILTIN_PSUBSB128,
14845 IX86_BUILTIN_PSUBSW128,
14846 IX86_BUILTIN_PSUBUSB128,
14847 IX86_BUILTIN_PSUBUSW128,
14848
14849 IX86_BUILTIN_PAND128,
14850 IX86_BUILTIN_PANDN128,
14851 IX86_BUILTIN_POR128,
14852 IX86_BUILTIN_PXOR128,
14853
14854 IX86_BUILTIN_PAVGB128,
14855 IX86_BUILTIN_PAVGW128,
14856
14857 IX86_BUILTIN_PCMPEQB128,
14858 IX86_BUILTIN_PCMPEQW128,
14859 IX86_BUILTIN_PCMPEQD128,
14860 IX86_BUILTIN_PCMPGTB128,
14861 IX86_BUILTIN_PCMPGTW128,
14862 IX86_BUILTIN_PCMPGTD128,
14863
14864 IX86_BUILTIN_PMADDWD128,
14865
14866 IX86_BUILTIN_PMAXSW128,
14867 IX86_BUILTIN_PMAXUB128,
14868 IX86_BUILTIN_PMINSW128,
14869 IX86_BUILTIN_PMINUB128,
14870
14871 IX86_BUILTIN_PMULUDQ,
14872 IX86_BUILTIN_PMULUDQ128,
14873 IX86_BUILTIN_PMULHUW128,
14874 IX86_BUILTIN_PMULHW128,
14875 IX86_BUILTIN_PMULLW128,
14876
14877 IX86_BUILTIN_PSADBW128,
14878 IX86_BUILTIN_PSHUFHW,
14879 IX86_BUILTIN_PSHUFLW,
14880 IX86_BUILTIN_PSHUFD,
14881
14882 IX86_BUILTIN_PSLLW128,
14883 IX86_BUILTIN_PSLLD128,
14884 IX86_BUILTIN_PSLLQ128,
14885 IX86_BUILTIN_PSRAW128,
14886 IX86_BUILTIN_PSRAD128,
14887 IX86_BUILTIN_PSRLW128,
14888 IX86_BUILTIN_PSRLD128,
14889 IX86_BUILTIN_PSRLQ128,
14890 IX86_BUILTIN_PSLLDQI128,
14891 IX86_BUILTIN_PSLLWI128,
14892 IX86_BUILTIN_PSLLDI128,
14893 IX86_BUILTIN_PSLLQI128,
14894 IX86_BUILTIN_PSRAWI128,
14895 IX86_BUILTIN_PSRADI128,
14896 IX86_BUILTIN_PSRLDQI128,
14897 IX86_BUILTIN_PSRLWI128,
14898 IX86_BUILTIN_PSRLDI128,
14899 IX86_BUILTIN_PSRLQI128,
14900
14901 IX86_BUILTIN_PUNPCKHBW128,
14902 IX86_BUILTIN_PUNPCKHWD128,
14903 IX86_BUILTIN_PUNPCKHDQ128,
14904 IX86_BUILTIN_PUNPCKHQDQ128,
14905 IX86_BUILTIN_PUNPCKLBW128,
14906 IX86_BUILTIN_PUNPCKLWD128,
14907 IX86_BUILTIN_PUNPCKLDQ128,
14908 IX86_BUILTIN_PUNPCKLQDQ128,
14909
14910 IX86_BUILTIN_CLFLUSH,
14911 IX86_BUILTIN_MFENCE,
14912 IX86_BUILTIN_LFENCE,
14913
14914 /* Prescott New Instructions. */
14915 IX86_BUILTIN_ADDSUBPS,
14916 IX86_BUILTIN_HADDPS,
14917 IX86_BUILTIN_HSUBPS,
14918 IX86_BUILTIN_MOVSHDUP,
14919 IX86_BUILTIN_MOVSLDUP,
14920 IX86_BUILTIN_ADDSUBPD,
14921 IX86_BUILTIN_HADDPD,
14922 IX86_BUILTIN_HSUBPD,
14923 IX86_BUILTIN_LDDQU,
14924
14925 IX86_BUILTIN_MONITOR,
14926 IX86_BUILTIN_MWAIT,
14927
14928 /* SSSE3. */
14929 IX86_BUILTIN_PHADDW,
14930 IX86_BUILTIN_PHADDD,
14931 IX86_BUILTIN_PHADDSW,
14932 IX86_BUILTIN_PHSUBW,
14933 IX86_BUILTIN_PHSUBD,
14934 IX86_BUILTIN_PHSUBSW,
14935 IX86_BUILTIN_PMADDUBSW,
14936 IX86_BUILTIN_PMULHRSW,
14937 IX86_BUILTIN_PSHUFB,
14938 IX86_BUILTIN_PSIGNB,
14939 IX86_BUILTIN_PSIGNW,
14940 IX86_BUILTIN_PSIGND,
14941 IX86_BUILTIN_PALIGNR,
14942 IX86_BUILTIN_PABSB,
14943 IX86_BUILTIN_PABSW,
14944 IX86_BUILTIN_PABSD,
14945
14946 IX86_BUILTIN_PHADDW128,
14947 IX86_BUILTIN_PHADDD128,
14948 IX86_BUILTIN_PHADDSW128,
14949 IX86_BUILTIN_PHSUBW128,
14950 IX86_BUILTIN_PHSUBD128,
14951 IX86_BUILTIN_PHSUBSW128,
14952 IX86_BUILTIN_PMADDUBSW128,
14953 IX86_BUILTIN_PMULHRSW128,
14954 IX86_BUILTIN_PSHUFB128,
14955 IX86_BUILTIN_PSIGNB128,
14956 IX86_BUILTIN_PSIGNW128,
14957 IX86_BUILTIN_PSIGND128,
14958 IX86_BUILTIN_PALIGNR128,
14959 IX86_BUILTIN_PABSB128,
14960 IX86_BUILTIN_PABSW128,
14961 IX86_BUILTIN_PABSD128,
14962
14963 /* AMDFAM10 - SSE4A New Instructions. */
14964 IX86_BUILTIN_MOVNTSD,
14965 IX86_BUILTIN_MOVNTSS,
14966 IX86_BUILTIN_EXTRQI,
14967 IX86_BUILTIN_EXTRQ,
14968 IX86_BUILTIN_INSERTQI,
14969 IX86_BUILTIN_INSERTQ,
14970
14971 IX86_BUILTIN_VEC_INIT_V2SI,
14972 IX86_BUILTIN_VEC_INIT_V4HI,
14973 IX86_BUILTIN_VEC_INIT_V8QI,
14974 IX86_BUILTIN_VEC_EXT_V2DF,
14975 IX86_BUILTIN_VEC_EXT_V2DI,
14976 IX86_BUILTIN_VEC_EXT_V4SF,
14977 IX86_BUILTIN_VEC_EXT_V4SI,
14978 IX86_BUILTIN_VEC_EXT_V8HI,
14979 IX86_BUILTIN_VEC_EXT_V16QI,
14980 IX86_BUILTIN_VEC_EXT_V2SI,
14981 IX86_BUILTIN_VEC_EXT_V4HI,
14982 IX86_BUILTIN_VEC_SET_V8HI,
14983 IX86_BUILTIN_VEC_SET_V4HI,
14984
14985 IX86_BUILTIN_MAX
14986};
14987
14988#define def_builtin(MASK, NAME, TYPE, CODE) \
14989do { \
14990 if ((MASK) & target_flags \
14991 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14992 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14993 NULL, NULL_TREE); \
14994} while (0)
14995
14996/* Bits for builtin_description.flag. */
14997
14998/* Set when we don't support the comparison natively, and should
14999 swap_comparison in order to support it. */
15000#define BUILTIN_DESC_SWAP_OPERANDS 1
15001
15002struct builtin_description
15003{
15004 const unsigned int mask;
15005 const enum insn_code icode;
15006 const char *const name;
15007 const enum ix86_builtins code;
15008 const enum rtx_code comparison;
15009 const unsigned int flag;
15010};
15011
15012static const struct builtin_description bdesc_comi[] =
15013{
15014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15018 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15019 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15024 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15025 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15030 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15037 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15038};
15039
15040static const struct builtin_description bdesc_2arg[] =
15041{
15042 /* SSE */
15043 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15044 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15045 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15046 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15047 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15048 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15049 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15050 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15051
15052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15054 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15055 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15056 BUILTIN_DESC_SWAP_OPERANDS },
15057 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15058 BUILTIN_DESC_SWAP_OPERANDS },
15059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15062 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15063 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15066 BUILTIN_DESC_SWAP_OPERANDS },
15067 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15074 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15075 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15076 BUILTIN_DESC_SWAP_OPERANDS },
15077 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15078 BUILTIN_DESC_SWAP_OPERANDS },
15079 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15080
15081 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15082 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15083 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15084 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15085
15086 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15087 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15088 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15089 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15090
15091 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15092 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15093 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15094 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15095 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15096
15097 /* MMX */
15098 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15099 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15101 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15102 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15103 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15104 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15105 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15106
15107 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15110 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15111 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15112 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15113 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15114 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15115
15116 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15117 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15118 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15119
15120 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15121 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15122 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15123 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15124
15125 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15126 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15127
15128 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15132 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15133 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15134
15135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15137 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15138 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15139
15140 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15141 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15142 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15144 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15145 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15146
15147 /* Special. */
15148 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15149 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15150 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15151
15152 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15153 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15154 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15155
15156 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15160 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15161 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15162
15163 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15164 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15165 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15166 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15167 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15168 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15169
15170 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15171 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15172 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15173 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15174
15175 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15176 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15177
15178 /* SSE2 */
15179 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15180 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15181 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15182 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15185 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15186 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15187
15188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15190 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15191 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15192 BUILTIN_DESC_SWAP_OPERANDS },
15193 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15194 BUILTIN_DESC_SWAP_OPERANDS },
15195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15198 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15199 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15200 BUILTIN_DESC_SWAP_OPERANDS },
15201 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15202 BUILTIN_DESC_SWAP_OPERANDS },
15203 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15210 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15211 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15212
15213 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15214 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15217
15218 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15220 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15222
15223 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15224 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15225 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15226
15227 /* SSE2 MMX */
15228 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15229 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15230 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15231 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15232 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15234 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15235 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15236
15237 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15238 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15239 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15240 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15241 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15242 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15243 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15244 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15245
15246 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15248
15249 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15250 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15251 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15253
15254 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15255 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15256
15257 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15261 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15263
15264 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15266 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15267 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15268
15269 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15270 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15271 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15273 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15274 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15275 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15277
15278 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15279 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15281
15282 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15284
15285 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15286 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15287
15288 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15289 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15291
15292 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15293 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15294 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15295
15296 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15297 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15298
15299 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15300
15301 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15302 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15303 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15304 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15305
15306 /* SSE3 MMX */
15307 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15308 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15309 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15310 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15311 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15312 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15313
15314 /* SSSE3 */
15315 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15325 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15326 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15327 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15328 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15329 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15330 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15331 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15332 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15333 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15334 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15335 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15336 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15337 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15338 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15339};
15340
15341static const struct builtin_description bdesc_1arg[] =
15342{
15343 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15344 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15345
15346 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15347 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15348 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15349
15350 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15351 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15352 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15353 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15354 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15355 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15356
15357 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15358 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15359
15360 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15361
15362 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15363 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15364
15365 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15367 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15368 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15369 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15370
15371 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15372
15373 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15374 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15375 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15376 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15377
15378 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15379 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15380 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15381
15382 /* SSE3 */
15383 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15384 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15385
15386 /* SSSE3 */
15387 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15388 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15389 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15390 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15391 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15392 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15393};
15394
15395static void
15396ix86_init_builtins (void)
15397{
15398 if (TARGET_MMX)
15399 ix86_init_mmx_sse_builtins ();
15400}
15401
15402/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15403 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15404 builtins. */
15405static void
15406ix86_init_mmx_sse_builtins (void)
15407{
15408 const struct builtin_description * d;
15409 size_t i;
15410
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "tm_p.h"
31#include "regs.h"
32#include "hard-reg-set.h"
33#include "real.h"
34#include "insn-config.h"
35#include "conditions.h"
36#include "output.h"
37#include "insn-codes.h"
38#include "insn-attr.h"
39#include "flags.h"
40#include "except.h"
41#include "function.h"
42#include "recog.h"
43#include "expr.h"
44#include "optabs.h"
45#include "toplev.h"
46#include "basic-block.h"
47#include "ggc.h"
48#include "target.h"
49#include "target-def.h"
50#include "langhooks.h"
51#include "cgraph.h"
52#include "tree-gimple.h"
53#include "dwarf2.h"
54#include "tm-constrs.h"
55
56#ifndef CHECK_STACK_LIMIT
57#define CHECK_STACK_LIMIT (-1)
58#endif
59
60/* Return index of given mode in mult and division cost tables. */
61#define MODE_INDEX(mode) \
62 ((mode) == QImode ? 0 \
63 : (mode) == HImode ? 1 \
64 : (mode) == SImode ? 2 \
65 : (mode) == DImode ? 3 \
66 : 4)
67
68/* Processor costs (relative to an add) */
69/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
70#define COSTS_N_BYTES(N) ((N) * 2)
71
72static const
73struct processor_costs size_cost = { /* costs for tuning for size */
74 COSTS_N_BYTES (2), /* cost of an add instruction */
75 COSTS_N_BYTES (3), /* cost of a lea instruction */
76 COSTS_N_BYTES (2), /* variable shift costs */
77 COSTS_N_BYTES (3), /* constant shift costs */
78 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
79 COSTS_N_BYTES (3), /* HI */
80 COSTS_N_BYTES (3), /* SI */
81 COSTS_N_BYTES (3), /* DI */
82 COSTS_N_BYTES (5)}, /* other */
83 0, /* cost of multiply per each bit set */
84 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 COSTS_N_BYTES (3), /* cost of movsx */
90 COSTS_N_BYTES (3), /* cost of movzx */
91 0, /* "large" insn */
92 2, /* MOVE_RATIO */
93 2, /* cost for loading QImode using movzbl */
94 {2, 2, 2}, /* cost of loading integer registers
95 in QImode, HImode and SImode.
96 Relative to reg-reg move (2). */
97 {2, 2, 2}, /* cost of storing integer registers */
98 2, /* cost of reg,reg fld/fst */
99 {2, 2, 2}, /* cost of loading fp registers
100 in SFmode, DFmode and XFmode */
101 {2, 2, 2}, /* cost of storing fp registers
102 in SFmode, DFmode and XFmode */
103 3, /* cost of moving MMX register */
104 {3, 3}, /* cost of loading MMX registers
105 in SImode and DImode */
106 {3, 3}, /* cost of storing MMX registers
107 in SImode and DImode */
108 3, /* cost of moving SSE register */
109 {3, 3, 3}, /* cost of loading SSE registers
110 in SImode, DImode and TImode */
111 {3, 3, 3}, /* cost of storing SSE registers
112 in SImode, DImode and TImode */
113 3, /* MMX or SSE register to integer */
114 0, /* size of prefetch block */
115 0, /* number of parallel prefetches */
116 2, /* Branch cost */
117 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
118 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
119 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
120 COSTS_N_BYTES (2), /* cost of FABS instruction. */
121 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
122 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123};
124
125/* Processor costs (relative to an add) */
126static const
127struct processor_costs i386_cost = { /* 386 specific costs */
128 COSTS_N_INSNS (1), /* cost of an add instruction */
129 COSTS_N_INSNS (1), /* cost of a lea instruction */
130 COSTS_N_INSNS (3), /* variable shift costs */
131 COSTS_N_INSNS (2), /* constant shift costs */
132 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
133 COSTS_N_INSNS (6), /* HI */
134 COSTS_N_INSNS (6), /* SI */
135 COSTS_N_INSNS (6), /* DI */
136 COSTS_N_INSNS (6)}, /* other */
137 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
138 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
139 COSTS_N_INSNS (23), /* HI */
140 COSTS_N_INSNS (23), /* SI */
141 COSTS_N_INSNS (23), /* DI */
142 COSTS_N_INSNS (23)}, /* other */
143 COSTS_N_INSNS (3), /* cost of movsx */
144 COSTS_N_INSNS (2), /* cost of movzx */
145 15, /* "large" insn */
146 3, /* MOVE_RATIO */
147 4, /* cost for loading QImode using movzbl */
148 {2, 4, 2}, /* cost of loading integer registers
149 in QImode, HImode and SImode.
150 Relative to reg-reg move (2). */
151 {2, 4, 2}, /* cost of storing integer registers */
152 2, /* cost of reg,reg fld/fst */
153 {8, 8, 8}, /* cost of loading fp registers
154 in SFmode, DFmode and XFmode */
155 {8, 8, 8}, /* cost of storing fp registers
156 in SFmode, DFmode and XFmode */
157 2, /* cost of moving MMX register */
158 {4, 8}, /* cost of loading MMX registers
159 in SImode and DImode */
160 {4, 8}, /* cost of storing MMX registers
161 in SImode and DImode */
162 2, /* cost of moving SSE register */
163 {4, 8, 16}, /* cost of loading SSE registers
164 in SImode, DImode and TImode */
165 {4, 8, 16}, /* cost of storing SSE registers
166 in SImode, DImode and TImode */
167 3, /* MMX or SSE register to integer */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
170 1, /* Branch cost */
171 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
172 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
173 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
174 COSTS_N_INSNS (22), /* cost of FABS instruction. */
175 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
176 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
177};
178
179static const
180struct processor_costs i486_cost = { /* 486 specific costs */
181 COSTS_N_INSNS (1), /* cost of an add instruction */
182 COSTS_N_INSNS (1), /* cost of a lea instruction */
183 COSTS_N_INSNS (3), /* variable shift costs */
184 COSTS_N_INSNS (2), /* constant shift costs */
185 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
186 COSTS_N_INSNS (12), /* HI */
187 COSTS_N_INSNS (12), /* SI */
188 COSTS_N_INSNS (12), /* DI */
189 COSTS_N_INSNS (12)}, /* other */
190 1, /* cost of multiply per each bit set */
191 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
192 COSTS_N_INSNS (40), /* HI */
193 COSTS_N_INSNS (40), /* SI */
194 COSTS_N_INSNS (40), /* DI */
195 COSTS_N_INSNS (40)}, /* other */
196 COSTS_N_INSNS (3), /* cost of movsx */
197 COSTS_N_INSNS (2), /* cost of movzx */
198 15, /* "large" insn */
199 3, /* MOVE_RATIO */
200 4, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {8, 8, 8}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {8, 8, 8}, /* cost of storing fp registers
209 in SFmode, DFmode and XFmode */
210 2, /* cost of moving MMX register */
211 {4, 8}, /* cost of loading MMX registers
212 in SImode and DImode */
213 {4, 8}, /* cost of storing MMX registers
214 in SImode and DImode */
215 2, /* cost of moving SSE register */
216 {4, 8, 16}, /* cost of loading SSE registers
217 in SImode, DImode and TImode */
218 {4, 8, 16}, /* cost of storing SSE registers
219 in SImode, DImode and TImode */
220 3, /* MMX or SSE register to integer */
221 0, /* size of prefetch block */
222 0, /* number of parallel prefetches */
223 1, /* Branch cost */
224 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
225 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
226 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
227 COSTS_N_INSNS (3), /* cost of FABS instruction. */
228 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
229 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
230};
231
232static const
233struct processor_costs pentium_cost = {
234 COSTS_N_INSNS (1), /* cost of an add instruction */
235 COSTS_N_INSNS (1), /* cost of a lea instruction */
236 COSTS_N_INSNS (4), /* variable shift costs */
237 COSTS_N_INSNS (1), /* constant shift costs */
238 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
239 COSTS_N_INSNS (11), /* HI */
240 COSTS_N_INSNS (11), /* SI */
241 COSTS_N_INSNS (11), /* DI */
242 COSTS_N_INSNS (11)}, /* other */
243 0, /* cost of multiply per each bit set */
244 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
245 COSTS_N_INSNS (25), /* HI */
246 COSTS_N_INSNS (25), /* SI */
247 COSTS_N_INSNS (25), /* DI */
248 COSTS_N_INSNS (25)}, /* other */
249 COSTS_N_INSNS (3), /* cost of movsx */
250 COSTS_N_INSNS (2), /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 6, /* cost for loading QImode using movzbl */
254 {2, 4, 2}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 4, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of storing fp registers
262 in SFmode, DFmode and XFmode */
263 8, /* cost of moving MMX register */
264 {8, 8}, /* cost of loading MMX registers
265 in SImode and DImode */
266 {8, 8}, /* cost of storing MMX registers
267 in SImode and DImode */
268 2, /* cost of moving SSE register */
269 {4, 8, 16}, /* cost of loading SSE registers
270 in SImode, DImode and TImode */
271 {4, 8, 16}, /* cost of storing SSE registers
272 in SImode, DImode and TImode */
273 3, /* MMX or SSE register to integer */
274 0, /* size of prefetch block */
275 0, /* number of parallel prefetches */
276 2, /* Branch cost */
277 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
278 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
279 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
280 COSTS_N_INSNS (1), /* cost of FABS instruction. */
281 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
282 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
283};
284
285static const
286struct processor_costs pentiumpro_cost = {
287 COSTS_N_INSNS (1), /* cost of an add instruction */
288 COSTS_N_INSNS (1), /* cost of a lea instruction */
289 COSTS_N_INSNS (1), /* variable shift costs */
290 COSTS_N_INSNS (1), /* constant shift costs */
291 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
292 COSTS_N_INSNS (4), /* HI */
293 COSTS_N_INSNS (4), /* SI */
294 COSTS_N_INSNS (4), /* DI */
295 COSTS_N_INSNS (4)}, /* other */
296 0, /* cost of multiply per each bit set */
297 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
298 COSTS_N_INSNS (17), /* HI */
299 COSTS_N_INSNS (17), /* SI */
300 COSTS_N_INSNS (17), /* DI */
301 COSTS_N_INSNS (17)}, /* other */
302 COSTS_N_INSNS (1), /* cost of movsx */
303 COSTS_N_INSNS (1), /* cost of movzx */
304 8, /* "large" insn */
305 6, /* MOVE_RATIO */
306 2, /* cost for loading QImode using movzbl */
307 {4, 4, 4}, /* cost of loading integer registers
308 in QImode, HImode and SImode.
309 Relative to reg-reg move (2). */
310 {2, 2, 2}, /* cost of storing integer registers */
311 2, /* cost of reg,reg fld/fst */
312 {2, 2, 6}, /* cost of loading fp registers
313 in SFmode, DFmode and XFmode */
314 {4, 4, 6}, /* cost of storing fp registers
315 in SFmode, DFmode and XFmode */
316 2, /* cost of moving MMX register */
317 {2, 2}, /* cost of loading MMX registers
318 in SImode and DImode */
319 {2, 2}, /* cost of storing MMX registers
320 in SImode and DImode */
321 2, /* cost of moving SSE register */
322 {2, 2, 8}, /* cost of loading SSE registers
323 in SImode, DImode and TImode */
324 {2, 2, 8}, /* cost of storing SSE registers
325 in SImode, DImode and TImode */
326 3, /* MMX or SSE register to integer */
327 32, /* size of prefetch block */
328 6, /* number of parallel prefetches */
329 2, /* Branch cost */
330 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
331 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
332 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
333 COSTS_N_INSNS (2), /* cost of FABS instruction. */
334 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
335 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
336};
337
338static const
339struct processor_costs geode_cost = {
340 COSTS_N_INSNS (1), /* cost of an add instruction */
341 COSTS_N_INSNS (1), /* cost of a lea instruction */
342 COSTS_N_INSNS (2), /* variable shift costs */
343 COSTS_N_INSNS (1), /* constant shift costs */
344 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
345 COSTS_N_INSNS (4), /* HI */
346 COSTS_N_INSNS (7), /* SI */
347 COSTS_N_INSNS (7), /* DI */
348 COSTS_N_INSNS (7)}, /* other */
349 0, /* cost of multiply per each bit set */
350 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
351 COSTS_N_INSNS (23), /* HI */
352 COSTS_N_INSNS (39), /* SI */
353 COSTS_N_INSNS (39), /* DI */
354 COSTS_N_INSNS (39)}, /* other */
355 COSTS_N_INSNS (1), /* cost of movsx */
356 COSTS_N_INSNS (1), /* cost of movzx */
357 8, /* "large" insn */
358 4, /* MOVE_RATIO */
359 1, /* cost for loading QImode using movzbl */
360 {1, 1, 1}, /* cost of loading integer registers
361 in QImode, HImode and SImode.
362 Relative to reg-reg move (2). */
363 {1, 1, 1}, /* cost of storing integer registers */
364 1, /* cost of reg,reg fld/fst */
365 {1, 1, 1}, /* cost of loading fp registers
366 in SFmode, DFmode and XFmode */
367 {4, 6, 6}, /* cost of storing fp registers
368 in SFmode, DFmode and XFmode */
369
370 1, /* cost of moving MMX register */
371 {1, 1}, /* cost of loading MMX registers
372 in SImode and DImode */
373 {1, 1}, /* cost of storing MMX registers
374 in SImode and DImode */
375 1, /* cost of moving SSE register */
376 {1, 1, 1}, /* cost of loading SSE registers
377 in SImode, DImode and TImode */
378 {1, 1, 1}, /* cost of storing SSE registers
379 in SImode, DImode and TImode */
380 1, /* MMX or SSE register to integer */
381 32, /* size of prefetch block */
382 1, /* number of parallel prefetches */
383 1, /* Branch cost */
384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
385 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
386 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
387 COSTS_N_INSNS (1), /* cost of FABS instruction. */
388 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
389 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
390};
391
392static const
393struct processor_costs k6_cost = {
394 COSTS_N_INSNS (1), /* cost of an add instruction */
395 COSTS_N_INSNS (2), /* cost of a lea instruction */
396 COSTS_N_INSNS (1), /* variable shift costs */
397 COSTS_N_INSNS (1), /* constant shift costs */
398 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
399 COSTS_N_INSNS (3), /* HI */
400 COSTS_N_INSNS (3), /* SI */
401 COSTS_N_INSNS (3), /* DI */
402 COSTS_N_INSNS (3)}, /* other */
403 0, /* cost of multiply per each bit set */
404 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
405 COSTS_N_INSNS (18), /* HI */
406 COSTS_N_INSNS (18), /* SI */
407 COSTS_N_INSNS (18), /* DI */
408 COSTS_N_INSNS (18)}, /* other */
409 COSTS_N_INSNS (2), /* cost of movsx */
410 COSTS_N_INSNS (2), /* cost of movzx */
411 8, /* "large" insn */
412 4, /* MOVE_RATIO */
413 3, /* cost for loading QImode using movzbl */
414 {4, 5, 4}, /* cost of loading integer registers
415 in QImode, HImode and SImode.
416 Relative to reg-reg move (2). */
417 {2, 3, 2}, /* cost of storing integer registers */
418 4, /* cost of reg,reg fld/fst */
419 {6, 6, 6}, /* cost of loading fp registers
420 in SFmode, DFmode and XFmode */
421 {4, 4, 4}, /* cost of storing fp registers
422 in SFmode, DFmode and XFmode */
423 2, /* cost of moving MMX register */
424 {2, 2}, /* cost of loading MMX registers
425 in SImode and DImode */
426 {2, 2}, /* cost of storing MMX registers
427 in SImode and DImode */
428 2, /* cost of moving SSE register */
429 {2, 2, 8}, /* cost of loading SSE registers
430 in SImode, DImode and TImode */
431 {2, 2, 8}, /* cost of storing SSE registers
432 in SImode, DImode and TImode */
433 6, /* MMX or SSE register to integer */
434 32, /* size of prefetch block */
435 1, /* number of parallel prefetches */
436 1, /* Branch cost */
437 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
438 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
439 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
440 COSTS_N_INSNS (2), /* cost of FABS instruction. */
441 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
442 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
443};
444
445static const
446struct processor_costs athlon_cost = {
447 COSTS_N_INSNS (1), /* cost of an add instruction */
448 COSTS_N_INSNS (2), /* cost of a lea instruction */
449 COSTS_N_INSNS (1), /* variable shift costs */
450 COSTS_N_INSNS (1), /* constant shift costs */
451 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
452 COSTS_N_INSNS (5), /* HI */
453 COSTS_N_INSNS (5), /* SI */
454 COSTS_N_INSNS (5), /* DI */
455 COSTS_N_INSNS (5)}, /* other */
456 0, /* cost of multiply per each bit set */
457 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
458 COSTS_N_INSNS (26), /* HI */
459 COSTS_N_INSNS (42), /* SI */
460 COSTS_N_INSNS (74), /* DI */
461 COSTS_N_INSNS (74)}, /* other */
462 COSTS_N_INSNS (1), /* cost of movsx */
463 COSTS_N_INSNS (1), /* cost of movzx */
464 8, /* "large" insn */
465 9, /* MOVE_RATIO */
466 4, /* cost for loading QImode using movzbl */
467 {3, 4, 3}, /* cost of loading integer registers
468 in QImode, HImode and SImode.
469 Relative to reg-reg move (2). */
470 {3, 4, 3}, /* cost of storing integer registers */
471 4, /* cost of reg,reg fld/fst */
472 {4, 4, 12}, /* cost of loading fp registers
473 in SFmode, DFmode and XFmode */
474 {6, 6, 8}, /* cost of storing fp registers
475 in SFmode, DFmode and XFmode */
476 2, /* cost of moving MMX register */
477 {4, 4}, /* cost of loading MMX registers
478 in SImode and DImode */
479 {4, 4}, /* cost of storing MMX registers
480 in SImode and DImode */
481 2, /* cost of moving SSE register */
482 {4, 4, 6}, /* cost of loading SSE registers
483 in SImode, DImode and TImode */
484 {4, 4, 5}, /* cost of storing SSE registers
485 in SImode, DImode and TImode */
486 5, /* MMX or SSE register to integer */
487 64, /* size of prefetch block */
488 6, /* number of parallel prefetches */
489 5, /* Branch cost */
490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
492 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496};
497
498static const
499struct processor_costs k8_cost = {
500 COSTS_N_INSNS (1), /* cost of an add instruction */
501 COSTS_N_INSNS (2), /* cost of a lea instruction */
502 COSTS_N_INSNS (1), /* variable shift costs */
503 COSTS_N_INSNS (1), /* constant shift costs */
504 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
505 COSTS_N_INSNS (4), /* HI */
506 COSTS_N_INSNS (3), /* SI */
507 COSTS_N_INSNS (4), /* DI */
508 COSTS_N_INSNS (5)}, /* other */
509 0, /* cost of multiply per each bit set */
510 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
511 COSTS_N_INSNS (26), /* HI */
512 COSTS_N_INSNS (42), /* SI */
513 COSTS_N_INSNS (74), /* DI */
514 COSTS_N_INSNS (74)}, /* other */
515 COSTS_N_INSNS (1), /* cost of movsx */
516 COSTS_N_INSNS (1), /* cost of movzx */
517 8, /* "large" insn */
518 9, /* MOVE_RATIO */
519 4, /* cost for loading QImode using movzbl */
520 {3, 4, 3}, /* cost of loading integer registers
521 in QImode, HImode and SImode.
522 Relative to reg-reg move (2). */
523 {3, 4, 3}, /* cost of storing integer registers */
524 4, /* cost of reg,reg fld/fst */
525 {4, 4, 12}, /* cost of loading fp registers
526 in SFmode, DFmode and XFmode */
527 {6, 6, 8}, /* cost of storing fp registers
528 in SFmode, DFmode and XFmode */
529 2, /* cost of moving MMX register */
530 {3, 3}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {4, 4}, /* cost of storing MMX registers
533 in SImode and DImode */
534 2, /* cost of moving SSE register */
535 {4, 3, 6}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {4, 4, 5}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 5, /* MMX or SSE register to integer */
540 64, /* size of prefetch block */
541 6, /* number of parallel prefetches */
542 5, /* Branch cost */
543 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
544 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
545 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
546 COSTS_N_INSNS (2), /* cost of FABS instruction. */
547 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
548 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
549};
550
551struct processor_costs amdfam10_cost = {
552 COSTS_N_INSNS (1), /* cost of an add instruction */
553 COSTS_N_INSNS (2), /* cost of a lea instruction */
554 COSTS_N_INSNS (1), /* variable shift costs */
555 COSTS_N_INSNS (1), /* constant shift costs */
556 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
557 COSTS_N_INSNS (4), /* HI */
558 COSTS_N_INSNS (3), /* SI */
559 COSTS_N_INSNS (4), /* DI */
560 COSTS_N_INSNS (5)}, /* other */
561 0, /* cost of multiply per each bit set */
562 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
563 COSTS_N_INSNS (35), /* HI */
564 COSTS_N_INSNS (51), /* SI */
565 COSTS_N_INSNS (83), /* DI */
566 COSTS_N_INSNS (83)}, /* other */
567 COSTS_N_INSNS (1), /* cost of movsx */
568 COSTS_N_INSNS (1), /* cost of movzx */
569 8, /* "large" insn */
570 9, /* MOVE_RATIO */
571 4, /* cost for loading QImode using movzbl */
572 {3, 4, 3}, /* cost of loading integer registers
573 in QImode, HImode and SImode.
574 Relative to reg-reg move (2). */
575 {3, 4, 3}, /* cost of storing integer registers */
576 4, /* cost of reg,reg fld/fst */
577 {4, 4, 12}, /* cost of loading fp registers
578 in SFmode, DFmode and XFmode */
579 {6, 6, 8}, /* cost of storing fp registers
580 in SFmode, DFmode and XFmode */
581 2, /* cost of moving MMX register */
582 {3, 3}, /* cost of loading MMX registers
583 in SImode and DImode */
584 {4, 4}, /* cost of storing MMX registers
585 in SImode and DImode */
586 2, /* cost of moving SSE register */
587 {4, 4, 3}, /* cost of loading SSE registers
588 in SImode, DImode and TImode */
589 {4, 4, 5}, /* cost of storing SSE registers
590 in SImode, DImode and TImode */
591 3, /* MMX or SSE register to integer */
592 /* On K8
593 MOVD reg64, xmmreg Double FSTORE 4
594 MOVD reg32, xmmreg Double FSTORE 4
595 On AMDFAM10
596 MOVD reg64, xmmreg Double FADD 3
597 1/1 1/1
598 MOVD reg32, xmmreg Double FADD 3
599 1/1 1/1 */
600 64, /* size of prefetch block */
601 /* New AMD processors never drop prefetches; if they cannot be performed
602 immediately, they are queued. We set number of simultaneous prefetches
603 to a large constant to reflect this (it probably is not a good idea not
604 to limit number of prefetches at all, as their execution also takes some
605 time). */
606 100, /* number of parallel prefetches */
607 5, /* Branch cost */
608 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
609 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
610 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
611 COSTS_N_INSNS (2), /* cost of FABS instruction. */
612 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
613 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
614};
615
616static const
617struct processor_costs pentium4_cost = {
618 COSTS_N_INSNS (1), /* cost of an add instruction */
619 COSTS_N_INSNS (3), /* cost of a lea instruction */
620 COSTS_N_INSNS (4), /* variable shift costs */
621 COSTS_N_INSNS (4), /* constant shift costs */
622 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
623 COSTS_N_INSNS (15), /* HI */
624 COSTS_N_INSNS (15), /* SI */
625 COSTS_N_INSNS (15), /* DI */
626 COSTS_N_INSNS (15)}, /* other */
627 0, /* cost of multiply per each bit set */
628 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
629 COSTS_N_INSNS (56), /* HI */
630 COSTS_N_INSNS (56), /* SI */
631 COSTS_N_INSNS (56), /* DI */
632 COSTS_N_INSNS (56)}, /* other */
633 COSTS_N_INSNS (1), /* cost of movsx */
634 COSTS_N_INSNS (1), /* cost of movzx */
635 16, /* "large" insn */
636 6, /* MOVE_RATIO */
637 2, /* cost for loading QImode using movzbl */
638 {4, 5, 4}, /* cost of loading integer registers
639 in QImode, HImode and SImode.
640 Relative to reg-reg move (2). */
641 {2, 3, 2}, /* cost of storing integer registers */
642 2, /* cost of reg,reg fld/fst */
643 {2, 2, 6}, /* cost of loading fp registers
644 in SFmode, DFmode and XFmode */
645 {4, 4, 6}, /* cost of storing fp registers
646 in SFmode, DFmode and XFmode */
647 2, /* cost of moving MMX register */
648 {2, 2}, /* cost of loading MMX registers
649 in SImode and DImode */
650 {2, 2}, /* cost of storing MMX registers
651 in SImode and DImode */
652 12, /* cost of moving SSE register */
653 {12, 12, 12}, /* cost of loading SSE registers
654 in SImode, DImode and TImode */
655 {2, 2, 8}, /* cost of storing SSE registers
656 in SImode, DImode and TImode */
657 10, /* MMX or SSE register to integer */
658 64, /* size of prefetch block */
659 6, /* number of parallel prefetches */
660 2, /* Branch cost */
661 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
662 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
663 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
664 COSTS_N_INSNS (2), /* cost of FABS instruction. */
665 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
666 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
667};
668
669static const
670struct processor_costs nocona_cost = {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (1), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (10), /* HI */
677 COSTS_N_INSNS (10), /* SI */
678 COSTS_N_INSNS (10), /* DI */
679 COSTS_N_INSNS (10)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (66), /* HI */
683 COSTS_N_INSNS (66), /* SI */
684 COSTS_N_INSNS (66), /* DI */
685 COSTS_N_INSNS (66)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 16, /* "large" insn */
689 17, /* MOVE_RATIO */
690 4, /* cost for loading QImode using movzbl */
691 {4, 4, 4}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {4, 4, 4}, /* cost of storing integer registers */
695 3, /* cost of reg,reg fld/fst */
696 {12, 12, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {4, 4, 4}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 6, /* cost of moving MMX register */
701 {12, 12}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {12, 12}, /* cost of storing MMX registers
704 in SImode and DImode */
705 6, /* cost of moving SSE register */
706 {12, 12, 12}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {12, 12, 12}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 8, /* MMX or SSE register to integer */
711 128, /* size of prefetch block */
712 8, /* number of parallel prefetches */
713 1, /* Branch cost */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (3), /* cost of FABS instruction. */
718 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
720};
721
722static const
723struct processor_costs core2_cost = {
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
726 COSTS_N_INSNS (1), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (3), /* HI */
730 COSTS_N_INSNS (3), /* SI */
731 COSTS_N_INSNS (3), /* DI */
732 COSTS_N_INSNS (3)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (22), /* HI */
736 COSTS_N_INSNS (22), /* SI */
737 COSTS_N_INSNS (22), /* DI */
738 COSTS_N_INSNS (22)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 16, /* MOVE_RATIO */
743 2, /* cost for loading QImode using movzbl */
744 {6, 6, 6}, /* cost of loading integer registers
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
747 {4, 4, 4}, /* cost of storing integer registers */
748 2, /* cost of reg,reg fld/fst */
749 {6, 6, 6}, /* cost of loading fp registers
750 in SFmode, DFmode and XFmode */
751 {4, 4, 4}, /* cost of loading integer registers */
752 2, /* cost of moving MMX register */
753 {6, 6}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {6, 6, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 4}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 2, /* MMX or SSE register to integer */
763 128, /* size of prefetch block */
764 8, /* number of parallel prefetches */
765 3, /* Branch cost */
766 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
767 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
768 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
769 COSTS_N_INSNS (1), /* cost of FABS instruction. */
770 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
771 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
772};
773
774/* Generic64 should produce code tuned for Nocona and K8. */
775static const
776struct processor_costs generic64_cost = {
777 COSTS_N_INSNS (1), /* cost of an add instruction */
778 /* On all chips taken into consideration lea is 2 cycles and more. With
779 this cost however our current implementation of synth_mult results in
780 use of unnecessary temporary registers causing regression on several
781 SPECfp benchmarks. */
782 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
783 COSTS_N_INSNS (1), /* variable shift costs */
784 COSTS_N_INSNS (1), /* constant shift costs */
785 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
786 COSTS_N_INSNS (4), /* HI */
787 COSTS_N_INSNS (3), /* SI */
788 COSTS_N_INSNS (4), /* DI */
789 COSTS_N_INSNS (2)}, /* other */
790 0, /* cost of multiply per each bit set */
791 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
792 COSTS_N_INSNS (26), /* HI */
793 COSTS_N_INSNS (42), /* SI */
794 COSTS_N_INSNS (74), /* DI */
795 COSTS_N_INSNS (74)}, /* other */
796 COSTS_N_INSNS (1), /* cost of movsx */
797 COSTS_N_INSNS (1), /* cost of movzx */
798 8, /* "large" insn */
799 17, /* MOVE_RATIO */
800 4, /* cost for loading QImode using movzbl */
801 {4, 4, 4}, /* cost of loading integer registers
802 in QImode, HImode and SImode.
803 Relative to reg-reg move (2). */
804 {4, 4, 4}, /* cost of storing integer registers */
805 4, /* cost of reg,reg fld/fst */
806 {12, 12, 12}, /* cost of loading fp registers
807 in SFmode, DFmode and XFmode */
808 {6, 6, 8}, /* cost of storing fp registers
809 in SFmode, DFmode and XFmode */
810 2, /* cost of moving MMX register */
811 {8, 8}, /* cost of loading MMX registers
812 in SImode and DImode */
813 {8, 8}, /* cost of storing MMX registers
814 in SImode and DImode */
815 2, /* cost of moving SSE register */
816 {8, 8, 8}, /* cost of loading SSE registers
817 in SImode, DImode and TImode */
818 {8, 8, 8}, /* cost of storing SSE registers
819 in SImode, DImode and TImode */
820 5, /* MMX or SSE register to integer */
821 64, /* size of prefetch block */
822 6, /* number of parallel prefetches */
823 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
824 is increased to perhaps more appropriate value of 5. */
825 3, /* Branch cost */
826 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
827 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
828 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
829 COSTS_N_INSNS (8), /* cost of FABS instruction. */
830 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
831 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
832};
833
834/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
835static const
836struct processor_costs generic32_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (2)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (26), /* HI */
849 COSTS_N_INSNS (42), /* SI */
850 COSTS_N_INSNS (74), /* DI */
851 COSTS_N_INSNS (74)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
855 17, /* MOVE_RATIO */
856 4, /* cost for loading QImode using movzbl */
857 {4, 4, 4}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {4, 4, 4}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {12, 12, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {8, 8}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {8, 8}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {8, 8, 8}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {8, 8, 8}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 5, /* MMX or SSE register to integer */
877 64, /* size of prefetch block */
878 6, /* number of parallel prefetches */
879 3, /* Branch cost */
880 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
881 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
882 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
883 COSTS_N_INSNS (8), /* cost of FABS instruction. */
884 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
885 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
886};
887
888const struct processor_costs *ix86_cost = &pentium_cost;
889
890/* Processor feature/optimization bitmasks. */
891#define m_386 (1<<PROCESSOR_I386)
892#define m_486 (1<<PROCESSOR_I486)
893#define m_PENT (1<<PROCESSOR_PENTIUM)
894#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
895#define m_GEODE (1<<PROCESSOR_GEODE)
896#define m_K6_GEODE (m_K6 | m_GEODE)
897#define m_K6 (1<<PROCESSOR_K6)
898#define m_ATHLON (1<<PROCESSOR_ATHLON)
899#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
900#define m_K8 (1<<PROCESSOR_K8)
901#define m_ATHLON_K8 (m_K8 | m_ATHLON)
902#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
903#define m_NOCONA (1<<PROCESSOR_NOCONA)
904#define m_CORE2 (1<<PROCESSOR_CORE2)
905#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
906#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
907#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
908#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
909
910/* Generic instruction choice should be common subset of supported CPUs
911 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
912
913/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
914 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
915 generic because it is not working well with PPro base chips. */
916const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
917 | m_GENERIC64;
918const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
919 | m_NOCONA | m_CORE2 | m_GENERIC;
920const int x86_zero_extend_with_and = m_486 | m_PENT;
921/* Enable to zero extend integer registers to avoid partial dependencies */
922const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
923 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
924const int x86_double_with_add = ~m_386;
925const int x86_use_bit_test = m_386;
926const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
927 | m_K6 | m_CORE2 | m_GENERIC;
928const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
929 | m_NOCONA;
930const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
931const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
932 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
933/* Branch hints were put in P4 based on simulation result. But
934 after P4 was made, no performance benefit was observed with
935 branch hints. It also increases the code size. As the result,
936 icc never generates branch hints. */
937const int x86_branch_hints = 0;
938const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
939 /*m_GENERIC | m_ATHLON_K8 ? */
940/* We probably ought to watch for partial register stalls on Generic32
941 compilation setting as well. However in current implementation the
942 partial register stalls are not eliminated very well - they can
943 be introduced via subregs synthesized by combine and can happen
944 in caller/callee saving sequences.
945 Because this option pays back little on PPro based chips and is in conflict
946 with partial reg. dependencies used by Athlon/P4 based chips, it is better
947 to leave it off for generic32 for now. */
948const int x86_partial_reg_stall = m_PPRO;
949const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
950const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
951const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
952 | m_CORE2 | m_GENERIC);
953const int x86_use_mov0 = m_K6;
954const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
955const int x86_read_modify_write = ~m_PENT;
956const int x86_read_modify = ~(m_PENT | m_PPRO);
957const int x86_split_long_moves = m_PPRO;
958const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
959 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
960 /* m_PENT4 ? */
961const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
962const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
963const int x86_qimode_math = ~(0);
964const int x86_promote_qi_regs = 0;
965/* On PPro this flag is meant to avoid partial register stalls. Just like
966 the x86_partial_reg_stall this option might be considered for Generic32
967 if our scheme for avoiding partial stalls was more effective. */
968const int x86_himode_math = ~(m_PPRO);
969const int x86_promote_hi_regs = m_PPRO;
970/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
971const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
972 | m_CORE2 | m_GENERIC;
973const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
974 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
976 | m_CORE2 | m_GENERIC;
977const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
978 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979/* Enable if integer moves are preferred for DFmode copies */
980const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
981 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
982const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
983 | m_CORE2 | m_GENERIC;
984const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
985 | m_CORE2 | m_GENERIC;
986/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
987 for outgoing arguments will be computed and placed into the variable
988 `current_function_outgoing_args_size'. No space will be pushed onto the stack
989 for each call; instead, the function prologue should increase the stack frame
990 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
991 not proper. */
992const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
993 | m_NOCONA | m_PPRO | m_CORE2
994 | m_GENERIC;
995const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
996const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
997const int x86_shift1 = ~m_486;
998const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
999 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1000 | m_NOCONA | m_CORE2 | m_GENERIC;
1001/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1002 that thread 128bit SSE registers as single units versus K8 based chips that
1003 divide SSE registers to two 64bit halves.
1004 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1005 to allow register renaming on 128bit SSE units, but usually results in one
1006 extra microop on 64bit SSE units. Experimental results shows that disabling
1007 this option on P4 brings over 20% SPECfp regression, while enabling it on
1008 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1009 of moves. */
1010const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1011 | m_GENERIC | m_AMDFAM10;
1012/* Set for machines where the type and dependencies are resolved on SSE
1013 register parts instead of whole registers, so we may maintain just
1014 lower part of scalar values in proper format leaving the upper part
1015 undefined. */
1016const int x86_sse_split_regs = m_ATHLON_K8;
1017/* Code generation for scalar reg-reg moves of single and double precision data:
1018 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1019 movaps reg, reg
1020 else
1021 movss reg, reg
1022 if (x86_sse_partial_reg_dependency == true)
1023 movapd reg, reg
1024 else
1025 movsd reg, reg
1026
1027 Code generation for scalar loads of double precision data:
1028 if (x86_sse_split_regs == true)
1029 movlpd mem, reg (gas syntax)
1030 else
1031 movsd mem, reg
1032
1033 Code generation for unaligned packed loads of single precision data
1034 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1035 if (x86_sse_unaligned_move_optimal)
1036 movups mem, reg
1037
1038 if (x86_sse_partial_reg_dependency == true)
1039 {
1040 xorps reg, reg
1041 movlps mem, reg
1042 movhps mem+8, reg
1043 }
1044 else
1045 {
1046 movlps mem, reg
1047 movhps mem+8, reg
1048 }
1049
1050 Code generation for unaligned packed loads of double precision data
1051 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1052 if (x86_sse_unaligned_move_optimal)
1053 movupd mem, reg
1054
1055 if (x86_sse_split_regs == true)
1056 {
1057 movlpd mem, reg
1058 movhpd mem+8, reg
1059 }
1060 else
1061 {
1062 movsd mem, reg
1063 movhpd mem+8, reg
1064 }
1065 */
1066const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1067const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1068const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1069const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1070const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
1071const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1072
1073/* ??? Allowing interunit moves makes it all too easy for the compiler to put
1074 integer data in xmm registers. Which results in pretty abysmal code. */
1075const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1076
1077const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1078 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1079/* Some CPU cores are not able to predict more than 4 branch instructions in
1080 the 16 byte window. */
1081const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1082 | m_NOCONA | m_CORE2 | m_GENERIC;
1083const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1084 | m_CORE2 | m_GENERIC;
1085const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1086/* Compare and exchange was added for 80486. */
1087const int x86_cmpxchg = ~m_386;
1088/* Compare and exchange 8 bytes was added for pentium. */
1089const int x86_cmpxchg8b = ~(m_386 | m_486);
1090/* Exchange and add was added for 80486. */
1091const int x86_xadd = ~m_386;
1092/* Byteswap was added for 80486. */
1093const int x86_bswap = ~m_386;
1094const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1095
1096/* In case the average insn count for single function invocation is
1097 lower than this constant, emit fast (but longer) prologue and
1098 epilogue code. */
1099#define FAST_PROLOGUE_INSN_COUNT 20
1100
1101/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1102static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1103static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1104static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1105
1106/* Array of the smallest class containing reg number REGNO, indexed by
1107 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1108
1109enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1110{
1111 /* ax, dx, cx, bx */
1112 AREG, DREG, CREG, BREG,
1113 /* si, di, bp, sp */
1114 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1115 /* FP registers */
1116 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1117 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1118 /* arg pointer */
1119 NON_Q_REGS,
1120 /* flags, fpsr, dirflag, frame */
1121 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1122 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1123 SSE_REGS, SSE_REGS,
1124 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1125 MMX_REGS, MMX_REGS,
1126 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1127 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1128 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1129 SSE_REGS, SSE_REGS,
1130};
1131
1132/* The "default" register map used in 32bit mode. */
1133
1134int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1135{
1136 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1137 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1138 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1139 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1140 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1141 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1142 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1143};
1144
1145static int const x86_64_int_parameter_registers[6] =
1146{
1147 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1148 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1149};
1150
1151static int const x86_64_int_return_registers[4] =
1152{
1153 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1154};
1155
1156/* The "default" register map used in 64bit mode. */
1157int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1158{
1159 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1160 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1162 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1163 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1164 8,9,10,11,12,13,14,15, /* extended integer registers */
1165 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1166};
1167
1168/* Define the register numbers to be used in Dwarf debugging information.
1169 The SVR4 reference port C compiler uses the following register numbers
1170 in its Dwarf output code:
1171 0 for %eax (gcc regno = 0)
1172 1 for %ecx (gcc regno = 2)
1173 2 for %edx (gcc regno = 1)
1174 3 for %ebx (gcc regno = 3)
1175 4 for %esp (gcc regno = 7)
1176 5 for %ebp (gcc regno = 6)
1177 6 for %esi (gcc regno = 4)
1178 7 for %edi (gcc regno = 5)
1179 The following three DWARF register numbers are never generated by
1180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1181 believes these numbers have these meanings.
1182 8 for %eip (no gcc equivalent)
1183 9 for %eflags (gcc regno = 17)
1184 10 for %trapno (no gcc equivalent)
1185 It is not at all clear how we should number the FP stack registers
1186 for the x86 architecture. If the version of SDB on x86/svr4 were
1187 a bit less brain dead with respect to floating-point then we would
1188 have a precedent to follow with respect to DWARF register numbers
1189 for x86 FP registers, but the SDB on x86/svr4 is so completely
1190 broken with respect to FP registers that it is hardly worth thinking
1191 of it as something to strive for compatibility with.
1192 The version of x86/svr4 SDB I have at the moment does (partially)
1193 seem to believe that DWARF register number 11 is associated with
1194 the x86 register %st(0), but that's about all. Higher DWARF
1195 register numbers don't seem to be associated with anything in
1196 particular, and even for DWARF regno 11, SDB only seems to under-
1197 stand that it should say that a variable lives in %st(0) (when
1198 asked via an `=' command) if we said it was in DWARF regno 11,
1199 but SDB still prints garbage when asked for the value of the
1200 variable in question (via a `/' command).
1201 (Also note that the labels SDB prints for various FP stack regs
1202 when doing an `x' command are all wrong.)
1203 Note that these problems generally don't affect the native SVR4
1204 C compiler because it doesn't allow the use of -O with -g and
1205 because when it is *not* optimizing, it allocates a memory
1206 location for each floating-point variable, and the memory
1207 location is what gets described in the DWARF AT_location
1208 attribute for the variable in question.
1209 Regardless of the severe mental illness of the x86/svr4 SDB, we
1210 do something sensible here and we use the following DWARF
1211 register numbers. Note that these are all stack-top-relative
1212 numbers.
1213 11 for %st(0) (gcc regno = 8)
1214 12 for %st(1) (gcc regno = 9)
1215 13 for %st(2) (gcc regno = 10)
1216 14 for %st(3) (gcc regno = 11)
1217 15 for %st(4) (gcc regno = 12)
1218 16 for %st(5) (gcc regno = 13)
1219 17 for %st(6) (gcc regno = 14)
1220 18 for %st(7) (gcc regno = 15)
1221*/
1222int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1223{
1224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
1227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1231};
1232
1233/* Test and compare insns in i386.md store the information needed to
1234 generate branch and scc insns here. */
1235
1236rtx ix86_compare_op0 = NULL_RTX;
1237rtx ix86_compare_op1 = NULL_RTX;
1238rtx ix86_compare_emitted = NULL_RTX;
1239
1240/* Size of the register save area. */
1241#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1242
1243/* Define the structure for the machine field in struct function. */
1244
1245struct stack_local_entry GTY(())
1246{
1247 unsigned short mode;
1248 unsigned short n;
1249 rtx rtl;
1250 struct stack_local_entry *next;
1251};
1252
1253/* Structure describing stack frame layout.
1254 Stack grows downward:
1255
1256 [arguments]
1257 <- ARG_POINTER
1258 saved pc
1259
1260 saved frame pointer if frame_pointer_needed
1261 <- HARD_FRAME_POINTER
1262 [saved regs]
1263
1264 [padding1] \
1265 )
1266 [va_arg registers] (
1267 > to_allocate <- FRAME_POINTER
1268 [frame] (
1269 )
1270 [padding2] /
1271 */
1272struct ix86_frame
1273{
1274 int nregs;
1275 int padding1;
1276 int va_arg_size;
1277 HOST_WIDE_INT frame;
1278 int padding2;
1279 int outgoing_arguments_size;
1280 int red_zone_size;
1281
1282 HOST_WIDE_INT to_allocate;
1283 /* The offsets relative to ARG_POINTER. */
1284 HOST_WIDE_INT frame_pointer_offset;
1285 HOST_WIDE_INT hard_frame_pointer_offset;
1286 HOST_WIDE_INT stack_pointer_offset;
1287
1288 /* When save_regs_using_mov is set, emit prologue using
1289 move instead of push instructions. */
1290 bool save_regs_using_mov;
1291};
1292
1293/* Code model option. */
1294enum cmodel ix86_cmodel;
1295/* Asm dialect. */
1296enum asm_dialect ix86_asm_dialect = ASM_ATT;
1297/* TLS dialects. */
1298enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1299
1300/* Which unit we are generating floating point math for. */
1301enum fpmath_unit ix86_fpmath;
1302
1303/* Which cpu are we scheduling for. */
1304enum processor_type ix86_tune;
1305/* Which instruction set architecture to use. */
1306enum processor_type ix86_arch;
1307
1308/* true if sse prefetch instruction is not NOOP. */
1309int x86_prefetch_sse;
1310
1311/* true if cmpxchg16b is supported. */
1312int x86_cmpxchg16b;
1313
1314/* ix86_regparm_string as a number */
1315static int ix86_regparm;
1316
1317/* -mstackrealign option */
1318extern int ix86_force_align_arg_pointer;
1319static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1320
1321/* Preferred alignment for stack boundary in bits. */
1322unsigned int ix86_preferred_stack_boundary;
1323
1324/* Values 1-5: see jump.c */
1325int ix86_branch_cost;
1326
1327/* Variables which are this size or smaller are put in the data/bss
1328 or ldata/lbss sections. */
1329
1330int ix86_section_threshold = 65536;
1331
1332/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1333char internal_label_prefix[16];
1334int internal_label_prefix_len;
1335
1336static bool ix86_handle_option (size_t, const char *, int);
1337static void output_pic_addr_const (FILE *, rtx, int);
1338static void put_condition_code (enum rtx_code, enum machine_mode,
1339 int, int, FILE *);
1340static const char *get_some_local_dynamic_name (void);
1341static int get_some_local_dynamic_name_1 (rtx *, void *);
1342static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1343static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1344 rtx *);
1345static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1346static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1347 enum machine_mode);
1348static rtx get_thread_pointer (int);
1349static rtx legitimize_tls_address (rtx, enum tls_model, int);
1350static void get_pc_thunk_name (char [32], unsigned int);
1351static rtx gen_push (rtx);
1352static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1353static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1354static struct machine_function * ix86_init_machine_status (void);
1355static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1356static int ix86_nsaved_regs (void);
1357static void ix86_emit_save_regs (void);
1358static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1359static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1360static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1361static HOST_WIDE_INT ix86_GOT_alias_set (void);
1362static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1363static rtx ix86_expand_aligntest (rtx, int);
1364static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1365static int ix86_issue_rate (void);
1366static int ix86_adjust_cost (rtx, rtx, rtx, int);
1367static int ia32_multipass_dfa_lookahead (void);
1368static void ix86_init_mmx_sse_builtins (void);
1369static rtx x86_this_parameter (tree);
1370static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1371 HOST_WIDE_INT, tree);
1372static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1373static void x86_file_start (void);
1374static void ix86_reorg (void);
1375static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1376static tree ix86_build_builtin_va_list (void);
1377static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1378 tree, int *, int);
1379static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1380static bool ix86_scalar_mode_supported_p (enum machine_mode);
1381static bool ix86_vector_mode_supported_p (enum machine_mode);
1382
1383static int ix86_address_cost (rtx);
1384static bool ix86_cannot_force_const_mem (rtx);
1385static rtx ix86_delegitimize_address (rtx);
1386
1387static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1388
1389struct builtin_description;
1390static rtx ix86_expand_sse_comi (const struct builtin_description *,
1391 tree, rtx);
1392static rtx ix86_expand_sse_compare (const struct builtin_description *,
1393 tree, rtx);
1394static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1395static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1396static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1397static rtx ix86_expand_store_builtin (enum insn_code, tree);
1398static rtx safe_vector_operand (rtx, enum machine_mode);
1399static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1400static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1401static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1402static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1403static int ix86_fp_comparison_cost (enum rtx_code code);
1404static unsigned int ix86_select_alt_pic_regnum (void);
1405static int ix86_save_reg (unsigned int, int);
1406static void ix86_compute_frame_layout (struct ix86_frame *);
1407static int ix86_comp_type_attributes (tree, tree);
1408static int ix86_function_regparm (tree, tree);
1409const struct attribute_spec ix86_attribute_table[];
1410static bool ix86_function_ok_for_sibcall (tree, tree);
1411static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1412static int ix86_value_regno (enum machine_mode, tree, tree);
1413static bool contains_128bit_aligned_vector_p (tree);
1414static rtx ix86_struct_value_rtx (tree, int);
1415static bool ix86_ms_bitfield_layout_p (tree);
1416static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1417static int extended_reg_mentioned_1 (rtx *, void *);
1418static bool ix86_rtx_costs (rtx, int, int, int *);
1419static int min_insn_size (rtx);
1420static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1421static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1422static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1423 tree, bool);
1424static void ix86_init_builtins (void);
1425static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1426static const char *ix86_mangle_fundamental_type (tree);
1427static tree ix86_stack_protect_fail (void);
1428static rtx ix86_internal_arg_pointer (void);
1429static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1430
1431/* This function is only used on Solaris. */
1432static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1433 ATTRIBUTE_UNUSED;
1434
1435/* Register class used for passing given 64bit part of the argument.
1436 These represent classes as documented by the PS ABI, with the exception
1437 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1438 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1439
1440 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1441 whenever possible (upper half does contain padding).
1442 */
1443enum x86_64_reg_class
1444 {
1445 X86_64_NO_CLASS,
1446 X86_64_INTEGER_CLASS,
1447 X86_64_INTEGERSI_CLASS,
1448 X86_64_SSE_CLASS,
1449 X86_64_SSESF_CLASS,
1450 X86_64_SSEDF_CLASS,
1451 X86_64_SSEUP_CLASS,
1452 X86_64_X87_CLASS,
1453 X86_64_X87UP_CLASS,
1454 X86_64_COMPLEX_X87_CLASS,
1455 X86_64_MEMORY_CLASS
1456 };
1457static const char * const x86_64_reg_class_name[] = {
1458 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1459 "sseup", "x87", "x87up", "cplx87", "no"
1460};
1461
1462#define MAX_CLASSES 4
1463
1464/* Table of constants used by fldpi, fldln2, etc.... */
1465static REAL_VALUE_TYPE ext_80387_constants_table [5];
1466static bool ext_80387_constants_init = 0;
1467static void init_ext_80387_constants (void);
1468static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1469static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1470static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1471static section *x86_64_elf_select_section (tree decl, int reloc,
1472 unsigned HOST_WIDE_INT align)
1473 ATTRIBUTE_UNUSED;
1474
1475/* Initialize the GCC target structure. */
1476#undef TARGET_ATTRIBUTE_TABLE
1477#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1478#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1479# undef TARGET_MERGE_DECL_ATTRIBUTES
1480# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1481#endif
1482
1483#undef TARGET_COMP_TYPE_ATTRIBUTES
1484#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1485
1486#undef TARGET_INIT_BUILTINS
1487#define TARGET_INIT_BUILTINS ix86_init_builtins
1488#undef TARGET_EXPAND_BUILTIN
1489#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1490
1491#undef TARGET_ASM_FUNCTION_EPILOGUE
1492#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1493
1494#undef TARGET_ENCODE_SECTION_INFO
1495#ifndef SUBTARGET_ENCODE_SECTION_INFO
1496#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1497#else
1498#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1499#endif
1500
1501#undef TARGET_ASM_OPEN_PAREN
1502#define TARGET_ASM_OPEN_PAREN ""
1503#undef TARGET_ASM_CLOSE_PAREN
1504#define TARGET_ASM_CLOSE_PAREN ""
1505
1506#undef TARGET_ASM_ALIGNED_HI_OP
1507#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1508#undef TARGET_ASM_ALIGNED_SI_OP
1509#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1510#ifdef ASM_QUAD
1511#undef TARGET_ASM_ALIGNED_DI_OP
1512#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1513#endif
1514
1515#undef TARGET_ASM_UNALIGNED_HI_OP
1516#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1517#undef TARGET_ASM_UNALIGNED_SI_OP
1518#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1519#undef TARGET_ASM_UNALIGNED_DI_OP
1520#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1521
1522#undef TARGET_SCHED_ADJUST_COST
1523#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1524#undef TARGET_SCHED_ISSUE_RATE
1525#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1526#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1527#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1528 ia32_multipass_dfa_lookahead
1529
1530#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1531#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1532
1533#ifdef HAVE_AS_TLS
1534#undef TARGET_HAVE_TLS
1535#define TARGET_HAVE_TLS true
1536#endif
1537#undef TARGET_CANNOT_FORCE_CONST_MEM
1538#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1539#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1540#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1541
1542#undef TARGET_DELEGITIMIZE_ADDRESS
1543#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1544
1545#undef TARGET_MS_BITFIELD_LAYOUT_P
1546#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1547
1548#if TARGET_MACHO
1549#undef TARGET_BINDS_LOCAL_P
1550#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1551#endif
1552
1553#undef TARGET_ASM_OUTPUT_MI_THUNK
1554#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1555#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1556#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1557
1558#undef TARGET_ASM_FILE_START
1559#define TARGET_ASM_FILE_START x86_file_start
1560
1561#undef TARGET_DEFAULT_TARGET_FLAGS
1562#define TARGET_DEFAULT_TARGET_FLAGS \
1563 (TARGET_DEFAULT \
1564 | TARGET_64BIT_DEFAULT \
1565 | TARGET_SUBTARGET_DEFAULT \
1566 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1567
1568#undef TARGET_HANDLE_OPTION
1569#define TARGET_HANDLE_OPTION ix86_handle_option
1570
1571#undef TARGET_RTX_COSTS
1572#define TARGET_RTX_COSTS ix86_rtx_costs
1573#undef TARGET_ADDRESS_COST
1574#define TARGET_ADDRESS_COST ix86_address_cost
1575
1576#undef TARGET_FIXED_CONDITION_CODE_REGS
1577#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1578#undef TARGET_CC_MODES_COMPATIBLE
1579#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1580
1581#undef TARGET_MACHINE_DEPENDENT_REORG
1582#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1583
1584#undef TARGET_BUILD_BUILTIN_VA_LIST
1585#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1586
1587#undef TARGET_MD_ASM_CLOBBERS
1588#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1589
1590#undef TARGET_PROMOTE_PROTOTYPES
1591#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1592#undef TARGET_STRUCT_VALUE_RTX
1593#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1594#undef TARGET_SETUP_INCOMING_VARARGS
1595#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1596#undef TARGET_MUST_PASS_IN_STACK
1597#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1598#undef TARGET_PASS_BY_REFERENCE
1599#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1600#undef TARGET_INTERNAL_ARG_POINTER
1601#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1602#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1603#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1604
1605#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1606#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1607
1608#undef TARGET_SCALAR_MODE_SUPPORTED_P
1609#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1610
1611#undef TARGET_VECTOR_MODE_SUPPORTED_P
1612#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1613
1614#ifdef HAVE_AS_TLS
1615#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1616#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1617#endif
1618
1619#ifdef SUBTARGET_INSERT_ATTRIBUTES
1620#undef TARGET_INSERT_ATTRIBUTES
1621#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1622#endif
1623
1624#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1625#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1626
1627#undef TARGET_STACK_PROTECT_FAIL
1628#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1629
1630#undef TARGET_FUNCTION_VALUE
1631#define TARGET_FUNCTION_VALUE ix86_function_value
1632
1633struct gcc_target targetm = TARGET_INITIALIZER;
1634
1635
1636/* The svr4 ABI for the i386 says that records and unions are returned
1637 in memory. */
1638#ifndef DEFAULT_PCC_STRUCT_RETURN
1639#define DEFAULT_PCC_STRUCT_RETURN 1
1640#endif
1641
1642/* Implement TARGET_HANDLE_OPTION. */
1643
1644static bool
1645ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1646{
1647 switch (code)
1648 {
1649 case OPT_m3dnow:
1650 if (!value)
1651 {
1652 target_flags &= ~MASK_3DNOW_A;
1653 target_flags_explicit |= MASK_3DNOW_A;
1654 }
1655 return true;
1656
1657 case OPT_mmmx:
1658 if (!value)
1659 {
1660 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1661 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1662 }
1663 return true;
1664
1665 case OPT_msse:
1666 if (!value)
1667 {
1668 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1669 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1670 }
1671 return true;
1672
1673 case OPT_msse2:
1674 if (!value)
1675 {
1676 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1677 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1678 }
1679 return true;
1680
1681 case OPT_msse3:
1682 if (!value)
1683 {
1684 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1685 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1686 }
1687 return true;
1688
1689 case OPT_maes:
1690 if (!value)
1691 {
1692 target_flags &= ~MASK_AES;
1693 target_flags_explicit |= MASK_AES;
1694 }
1695 return true;
1696
1697 default:
1698 return true;
1699 }
1700}
1701
1702/* Sometimes certain combinations of command options do not make
1703 sense on a particular target machine. You can define a macro
1704 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1705 defined, is executed once just after all the command options have
1706 been parsed.
1707
1708 Don't use this macro to turn on various extra optimizations for
1709 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1710
1711void
1712override_options (void)
1713{
1714 int i;
1715 int ix86_tune_defaulted = 0;
1716
1717 /* Comes from final.c -- no real reason to change it. */
1718#define MAX_CODE_ALIGN 16
1719
1720 static struct ptt
1721 {
1722 const struct processor_costs *cost; /* Processor costs */
1723 const int target_enable; /* Target flags to enable. */
1724 const int target_disable; /* Target flags to disable. */
1725 const int align_loop; /* Default alignments. */
1726 const int align_loop_max_skip;
1727 const int align_jump;
1728 const int align_jump_max_skip;
1729 const int align_func;
1730 }
1731 const processor_target_table[PROCESSOR_max] =
1732 {
1733 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1734 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1735 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1736 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1737 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1738 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1739 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1740 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1741 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1742 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1743 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1744 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1745 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1746 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1747 };
1748
1749 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1750 static struct pta
1751 {
1752 const char *const name; /* processor name or nickname. */
1753 const enum processor_type processor;
1754 const enum pta_flags
1755 {
1756 PTA_SSE = 1,
1757 PTA_SSE2 = 2,
1758 PTA_SSE3 = 4,
1759 PTA_MMX = 8,
1760 PTA_PREFETCH_SSE = 16,
1761 PTA_3DNOW = 32,
1762 PTA_3DNOW_A = 64,
1763 PTA_64BIT = 128,
1764 PTA_SSSE3 = 256,
1765 PTA_CX16 = 512,
1766 PTA_POPCNT = 1024,
1767 PTA_ABM = 2048,
1768 PTA_SSE4A = 4096
1769 } flags;
1770 }
1771 const processor_alias_table[] =
1772 {
1773 {"i386", PROCESSOR_I386, 0},
1774 {"i486", PROCESSOR_I486, 0},
1775 {"i586", PROCESSOR_PENTIUM, 0},
1776 {"pentium", PROCESSOR_PENTIUM, 0},
1777 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1778 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1779 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1780 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1781 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1782 {"i686", PROCESSOR_PENTIUMPRO, 0},
1783 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1784 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1785 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1786 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1787 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1788 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1789 | PTA_MMX | PTA_PREFETCH_SSE},
1790 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1791 | PTA_MMX | PTA_PREFETCH_SSE},
1792 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1793 | PTA_MMX | PTA_PREFETCH_SSE},
1794 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1795 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1796 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1797 | PTA_64BIT | PTA_MMX
1798 | PTA_PREFETCH_SSE | PTA_CX16},
1799 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1800 | PTA_3DNOW_A},
1801 {"k6", PROCESSOR_K6, PTA_MMX},
1802 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1803 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1804 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1805 | PTA_3DNOW_A},
1806 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1807 | PTA_3DNOW | PTA_3DNOW_A},
1808 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1809 | PTA_3DNOW_A | PTA_SSE},
1810 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1811 | PTA_3DNOW_A | PTA_SSE},
1812 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1813 | PTA_3DNOW_A | PTA_SSE},
1814 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1815 | PTA_SSE | PTA_SSE2 },
1816 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1817 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1818 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1819 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1820 | PTA_SSE3 },
1821 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1822 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1823 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1824 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1825 | PTA_SSE3 },
1826 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1827 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1828 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1829 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1830 | PTA_SSE3 },
1831 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1832 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1833 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1834 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1835 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1836 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1837 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1838 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1839 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1840 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1841 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1842 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1843 };
1844
1845 int const pta_size = ARRAY_SIZE (processor_alias_table);
1846
1847#ifdef SUBTARGET_OVERRIDE_OPTIONS
1848 SUBTARGET_OVERRIDE_OPTIONS;
1849#endif
1850
1851#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1852 SUBSUBTARGET_OVERRIDE_OPTIONS;
1853#endif
1854
1855 /* -fPIC is the default for x86_64. */
1856 if (TARGET_MACHO && TARGET_64BIT)
1857 flag_pic = 2;
1858
1859 /* Set the default values for switches whose default depends on TARGET_64BIT
1860 in case they weren't overwritten by command line options. */
1861 if (TARGET_64BIT)
1862 {
1863 /* Mach-O doesn't support omitting the frame pointer for now. */
1864 if (flag_omit_frame_pointer == 2)
1865 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1866 if (flag_asynchronous_unwind_tables == 2)
1867 flag_asynchronous_unwind_tables = 1;
1868 if (flag_pcc_struct_return == 2)
1869 flag_pcc_struct_return = 0;
1870 }
1871 else
1872 {
1873 if (flag_omit_frame_pointer == 2)
1874 flag_omit_frame_pointer = 0;
1875 if (flag_asynchronous_unwind_tables == 2)
1876 flag_asynchronous_unwind_tables = 0;
1877 if (flag_pcc_struct_return == 2)
1878 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1879 }
1880
1881 /* Need to check -mtune=generic first. */
1882 if (ix86_tune_string)
1883 {
1884 if (!strcmp (ix86_tune_string, "generic")
1885 || !strcmp (ix86_tune_string, "i686")
1886 /* As special support for cross compilers we read -mtune=native
1887 as -mtune=generic. With native compilers we won't see the
1888 -mtune=native, as it was changed by the driver. */
1889 || !strcmp (ix86_tune_string, "native"))
1890 {
1891 if (TARGET_64BIT)
1892 ix86_tune_string = "generic64";
1893 else
1894 ix86_tune_string = "generic32";
1895 }
1896 else if (!strncmp (ix86_tune_string, "generic", 7))
1897 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1898 }
1899 else
1900 {
1901 if (ix86_arch_string)
1902 ix86_tune_string = ix86_arch_string;
1903 if (!ix86_tune_string)
1904 {
1905 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1906 ix86_tune_defaulted = 1;
1907 }
1908
1909 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1910 need to use a sensible tune option. */
1911 if (!strcmp (ix86_tune_string, "generic")
1912 || !strcmp (ix86_tune_string, "x86-64")
1913 || !strcmp (ix86_tune_string, "i686"))
1914 {
1915 if (TARGET_64BIT)
1916 ix86_tune_string = "generic64";
1917 else
1918 ix86_tune_string = "generic32";
1919 }
1920 }
1921 if (!strcmp (ix86_tune_string, "x86-64"))
1922 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1923 "-mtune=generic instead as appropriate.");
1924
1925 if (!ix86_arch_string)
1926 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i486";
1927 if (!strcmp (ix86_arch_string, "generic"))
1928 error ("generic CPU can be used only for -mtune= switch");
1929 if (!strncmp (ix86_arch_string, "generic", 7))
1930 error ("bad value (%s) for -march= switch", ix86_arch_string);
1931
1932 if (ix86_cmodel_string != 0)
1933 {
1934 if (!strcmp (ix86_cmodel_string, "small"))
1935 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1936 else if (!strcmp (ix86_cmodel_string, "medium"))
1937 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1938 else if (flag_pic)
1939 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1940 else if (!strcmp (ix86_cmodel_string, "32"))
1941 ix86_cmodel = CM_32;
1942 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1943 ix86_cmodel = CM_KERNEL;
1944 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1945 ix86_cmodel = CM_LARGE;
1946 else
1947 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1948 }
1949 else
1950 {
1951 ix86_cmodel = CM_32;
1952 if (TARGET_64BIT)
1953 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1954 }
1955 if (ix86_asm_string != 0)
1956 {
1957 if (! TARGET_MACHO
1958 && !strcmp (ix86_asm_string, "intel"))
1959 ix86_asm_dialect = ASM_INTEL;
1960 else if (!strcmp (ix86_asm_string, "att"))
1961 ix86_asm_dialect = ASM_ATT;
1962 else
1963 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1964 }
1965 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1966 error ("code model %qs not supported in the %s bit mode",
1967 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1968 if (ix86_cmodel == CM_LARGE)
1969 sorry ("code model %<large%> not supported yet");
1970 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1971 sorry ("%i-bit mode not compiled in",
1972 (target_flags & MASK_64BIT) ? 64 : 32);
1973
1974 for (i = 0; i < pta_size; i++)
1975 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1976 {
1977 ix86_arch = processor_alias_table[i].processor;
1978 /* Default cpu tuning to the architecture. */
1979 ix86_tune = ix86_arch;
1980 if (processor_alias_table[i].flags & PTA_MMX
1981 && !(target_flags_explicit & MASK_MMX))
1982 target_flags |= MASK_MMX;
1983 if (processor_alias_table[i].flags & PTA_3DNOW
1984 && !(target_flags_explicit & MASK_3DNOW))
1985 target_flags |= MASK_3DNOW;
1986 if (processor_alias_table[i].flags & PTA_3DNOW_A
1987 && !(target_flags_explicit & MASK_3DNOW_A))
1988 target_flags |= MASK_3DNOW_A;
1989 if (processor_alias_table[i].flags & PTA_SSE
1990 && !(target_flags_explicit & MASK_SSE))
1991 target_flags |= MASK_SSE;
1992 if (processor_alias_table[i].flags & PTA_SSE2
1993 && !(target_flags_explicit & MASK_SSE2))
1994 target_flags |= MASK_SSE2;
1995 if (processor_alias_table[i].flags & PTA_SSE3
1996 && !(target_flags_explicit & MASK_SSE3))
1997 target_flags |= MASK_SSE3;
1998 if (processor_alias_table[i].flags & PTA_SSSE3
1999 && !(target_flags_explicit & MASK_SSSE3))
2000 target_flags |= MASK_SSSE3;
2001 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2002 x86_prefetch_sse = true;
2003 if (processor_alias_table[i].flags & PTA_CX16)
2004 x86_cmpxchg16b = true;
2005 if (processor_alias_table[i].flags & PTA_POPCNT
2006 && !(target_flags_explicit & MASK_POPCNT))
2007 target_flags |= MASK_POPCNT;
2008 if (processor_alias_table[i].flags & PTA_ABM
2009 && !(target_flags_explicit & MASK_ABM))
2010 target_flags |= MASK_ABM;
2011 if (processor_alias_table[i].flags & PTA_SSE4A
2012 && !(target_flags_explicit & MASK_SSE4A))
2013 target_flags |= MASK_SSE4A;
2014 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2015 error ("CPU you selected does not support x86-64 "
2016 "instruction set");
2017 break;
2018 }
2019
2020 if (i == pta_size)
2021 error ("bad value (%s) for -march= switch", ix86_arch_string);
2022
2023 for (i = 0; i < pta_size; i++)
2024 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2025 {
2026 ix86_tune = processor_alias_table[i].processor;
2027 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2028 {
2029 if (ix86_tune_defaulted)
2030 {
2031 ix86_tune_string = "x86-64";
2032 for (i = 0; i < pta_size; i++)
2033 if (! strcmp (ix86_tune_string,
2034 processor_alias_table[i].name))
2035 break;
2036 ix86_tune = processor_alias_table[i].processor;
2037 }
2038 else
2039 error ("CPU you selected does not support x86-64 "
2040 "instruction set");
2041 }
2042 /* Intel CPUs have always interpreted SSE prefetch instructions as
2043 NOPs; so, we can enable SSE prefetch instructions even when
2044 -mtune (rather than -march) points us to a processor that has them.
2045 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2046 higher processors. */
2047 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2048 x86_prefetch_sse = true;
2049 break;
2050 }
2051 if (i == pta_size)
2052 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2053
2054 if (optimize_size)
2055 ix86_cost = &size_cost;
2056 else
2057 ix86_cost = processor_target_table[ix86_tune].cost;
2058 target_flags |= processor_target_table[ix86_tune].target_enable;
2059 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2060
2061 /* Arrange to set up i386_stack_locals for all functions. */
2062 init_machine_status = ix86_init_machine_status;
2063
2064 /* Validate -mregparm= value. */
2065 if (ix86_regparm_string)
2066 {
2067 i = atoi (ix86_regparm_string);
2068 if (i < 0 || i > REGPARM_MAX)
2069 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2070 else
2071 ix86_regparm = i;
2072 }
2073 else
2074 if (TARGET_64BIT)
2075 ix86_regparm = REGPARM_MAX;
2076
2077 /* If the user has provided any of the -malign-* options,
2078 warn and use that value only if -falign-* is not set.
2079 Remove this code in GCC 3.2 or later. */
2080 if (ix86_align_loops_string)
2081 {
2082 warning (0, "-malign-loops is obsolete, use -falign-loops");
2083 if (align_loops == 0)
2084 {
2085 i = atoi (ix86_align_loops_string);
2086 if (i < 0 || i > MAX_CODE_ALIGN)
2087 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2088 else
2089 align_loops = 1 << i;
2090 }
2091 }
2092
2093 if (ix86_align_jumps_string)
2094 {
2095 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2096 if (align_jumps == 0)
2097 {
2098 i = atoi (ix86_align_jumps_string);
2099 if (i < 0 || i > MAX_CODE_ALIGN)
2100 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2101 else
2102 align_jumps = 1 << i;
2103 }
2104 }
2105
2106 if (ix86_align_funcs_string)
2107 {
2108 warning (0, "-malign-functions is obsolete, use -falign-functions");
2109 if (align_functions == 0)
2110 {
2111 i = atoi (ix86_align_funcs_string);
2112 if (i < 0 || i > MAX_CODE_ALIGN)
2113 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2114 else
2115 align_functions = 1 << i;
2116 }
2117 }
2118
2119 /* Default align_* from the processor table. */
2120 if (align_loops == 0)
2121 {
2122 align_loops = processor_target_table[ix86_tune].align_loop;
2123 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2124 }
2125 if (align_jumps == 0)
2126 {
2127 align_jumps = processor_target_table[ix86_tune].align_jump;
2128 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2129 }
2130 if (align_functions == 0)
2131 {
2132 align_functions = processor_target_table[ix86_tune].align_func;
2133 }
2134
2135 /* Validate -mbranch-cost= value, or provide default. */
2136 ix86_branch_cost = ix86_cost->branch_cost;
2137 if (ix86_branch_cost_string)
2138 {
2139 i = atoi (ix86_branch_cost_string);
2140 if (i < 0 || i > 5)
2141 error ("-mbranch-cost=%d is not between 0 and 5", i);
2142 else
2143 ix86_branch_cost = i;
2144 }
2145 if (ix86_section_threshold_string)
2146 {
2147 i = atoi (ix86_section_threshold_string);
2148 if (i < 0)
2149 error ("-mlarge-data-threshold=%d is negative", i);
2150 else
2151 ix86_section_threshold = i;
2152 }
2153
2154 if (ix86_tls_dialect_string)
2155 {
2156 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2157 ix86_tls_dialect = TLS_DIALECT_GNU;
2158 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2159 ix86_tls_dialect = TLS_DIALECT_GNU2;
2160 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2161 ix86_tls_dialect = TLS_DIALECT_SUN;
2162 else
2163 error ("bad value (%s) for -mtls-dialect= switch",
2164 ix86_tls_dialect_string);
2165 }
2166
2167 /* Keep nonleaf frame pointers. */
2168 if (flag_omit_frame_pointer)
2169 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2170 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2171 flag_omit_frame_pointer = 1;
2172
2173 /* If we're doing fast math, we don't care about comparison order
2174 wrt NaNs. This lets us use a shorter comparison sequence. */
2175 if (flag_finite_math_only)
2176 target_flags &= ~MASK_IEEE_FP;
2177
2178 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2179 since the insns won't need emulation. */
2180 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2181 target_flags &= ~MASK_NO_FANCY_MATH_387;
2182
2183 /* Likewise, if the target doesn't have a 387, or we've specified
2184 software floating point, don't use 387 inline intrinsics. */
2185 if (!TARGET_80387)
2186 target_flags |= MASK_NO_FANCY_MATH_387;
2187
2188 /* Turn on SSE3 builtins for -mssse3. */
2189 if (TARGET_SSSE3)
2190 target_flags |= MASK_SSE3;
2191
2192 /* Turn on SSE3 builtins for -msse4a. */
2193 if (TARGET_SSE4A)
2194 target_flags |= MASK_SSE3;
2195
2196 /* Turn on SSE2 builtins for -msse3. */
2197 if (TARGET_SSE3)
2198 target_flags |= MASK_SSE2;
2199
2200 /* Turn on SSE2 builtins for -maes. */
2201 if (TARGET_AES)
2202 target_flags |= MASK_SSE2;
2203
2204 /* Turn on SSE builtins for -msse2. */
2205 if (TARGET_SSE2)
2206 target_flags |= MASK_SSE;
2207
2208 /* Turn on MMX builtins for -msse. */
2209 if (TARGET_SSE)
2210 {
2211 target_flags |= MASK_MMX & ~target_flags_explicit;
2212 x86_prefetch_sse = true;
2213 }
2214
2215 /* Turn on MMX builtins for 3Dnow. */
2216 if (TARGET_3DNOW)
2217 target_flags |= MASK_MMX;
2218
2219 /* Turn on POPCNT builtins for -mabm. */
2220 if (TARGET_ABM)
2221 target_flags |= MASK_POPCNT;
2222
2223 if (TARGET_64BIT)
2224 {
2225 if (TARGET_ALIGN_DOUBLE)
2226 error ("-malign-double makes no sense in the 64bit mode");
2227 if (TARGET_RTD)
2228 error ("-mrtd calling convention not supported in the 64bit mode");
2229
2230 /* Enable by default the SSE and MMX builtins. Do allow the user to
2231 explicitly disable any of these. In particular, disabling SSE and
2232 MMX for kernel code is extremely useful. */
2233 target_flags
2234 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2235 & ~target_flags_explicit);
2236 }
2237 else
2238 {
2239 /* i386 ABI does not specify red zone. It still makes sense to use it
2240 when programmer takes care to stack from being destroyed. */
2241 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2242 target_flags |= MASK_NO_RED_ZONE;
2243 }
2244
2245 /* Validate -mpreferred-stack-boundary= value, or provide default.
2246 The default of 128 bits is for Pentium III's SSE __m128. We can't
2247 change it because of optimize_size. Otherwise, we can't mix object
2248 files compiled with -Os and -On. */
2249 ix86_preferred_stack_boundary = 128;
2250 if (ix86_preferred_stack_boundary_string)
2251 {
2252 i = atoi (ix86_preferred_stack_boundary_string);
2253 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2254 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2255 TARGET_64BIT ? 4 : 2);
2256 else
2257 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2258 }
2259
2260 /* Accept -msseregparm only if at least SSE support is enabled. */
2261 if (TARGET_SSEREGPARM
2262 && ! TARGET_SSE)
2263 error ("-msseregparm used without SSE enabled");
2264
2265 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2266
2267 if (ix86_fpmath_string != 0)
2268 {
2269 if (! strcmp (ix86_fpmath_string, "387"))
2270 ix86_fpmath = FPMATH_387;
2271 else if (! strcmp (ix86_fpmath_string, "sse"))
2272 {
2273 if (!TARGET_SSE)
2274 {
2275 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2276 ix86_fpmath = FPMATH_387;
2277 }
2278 else
2279 ix86_fpmath = FPMATH_SSE;
2280 }
2281 else if (! strcmp (ix86_fpmath_string, "387,sse")
2282 || ! strcmp (ix86_fpmath_string, "sse,387"))
2283 {
2284 if (!TARGET_SSE)
2285 {
2286 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2287 ix86_fpmath = FPMATH_387;
2288 }
2289 else if (!TARGET_80387)
2290 {
2291 warning (0, "387 instruction set disabled, using SSE arithmetics");
2292 ix86_fpmath = FPMATH_SSE;
2293 }
2294 else
2295 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2296 }
2297 else
2298 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2299 }
2300
2301 /* If the i387 is disabled, then do not return values in it. */
2302 if (!TARGET_80387)
2303 target_flags &= ~MASK_FLOAT_RETURNS;
2304
2305 if ((x86_accumulate_outgoing_args & TUNEMASK)
2306 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2307 && !optimize_size)
2308 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2309
2310 /* ??? Unwind info is not correct around the CFG unless either a frame
2311 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2312 unwind info generation to be aware of the CFG and propagating states
2313 around edges. */
2314 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2315 || flag_exceptions || flag_non_call_exceptions)
2316 && flag_omit_frame_pointer
2317 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2318 {
2319 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2320 warning (0, "unwind tables currently require either a frame pointer "
2321 "or -maccumulate-outgoing-args for correctness");
2322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2323 }
2324
2325 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2326 {
2327 char *p;
2328 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2329 p = strchr (internal_label_prefix, 'X');
2330 internal_label_prefix_len = p - internal_label_prefix;
2331 *p = '\0';
2332 }
2333
2334 /* When scheduling description is not available, disable scheduler pass
2335 so it won't slow down the compilation and make x87 code slower. */
2336 if (!TARGET_SCHEDULE)
2337 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2338}
2339
2340/* switch to the appropriate section for output of DECL.
2341 DECL is either a `VAR_DECL' node or a constant of some sort.
2342 RELOC indicates whether forming the initial value of DECL requires
2343 link-time relocations. */
2344
2345static section *
2346x86_64_elf_select_section (tree decl, int reloc,
2347 unsigned HOST_WIDE_INT align)
2348{
2349 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2350 && ix86_in_large_data_p (decl))
2351 {
2352 const char *sname = NULL;
2353 unsigned int flags = SECTION_WRITE;
2354 switch (categorize_decl_for_section (decl, reloc))
2355 {
2356 case SECCAT_DATA:
2357 sname = ".ldata";
2358 break;
2359 case SECCAT_DATA_REL:
2360 sname = ".ldata.rel";
2361 break;
2362 case SECCAT_DATA_REL_LOCAL:
2363 sname = ".ldata.rel.local";
2364 break;
2365 case SECCAT_DATA_REL_RO:
2366 sname = ".ldata.rel.ro";
2367 break;
2368 case SECCAT_DATA_REL_RO_LOCAL:
2369 sname = ".ldata.rel.ro.local";
2370 break;
2371 case SECCAT_BSS:
2372 sname = ".lbss";
2373 flags |= SECTION_BSS;
2374 break;
2375 case SECCAT_RODATA:
2376 case SECCAT_RODATA_MERGE_STR:
2377 case SECCAT_RODATA_MERGE_STR_INIT:
2378 case SECCAT_RODATA_MERGE_CONST:
2379 sname = ".lrodata";
2380 flags = 0;
2381 break;
2382 case SECCAT_SRODATA:
2383 case SECCAT_SDATA:
2384 case SECCAT_SBSS:
2385 gcc_unreachable ();
2386 case SECCAT_TEXT:
2387 case SECCAT_TDATA:
2388 case SECCAT_TBSS:
2389 /* We don't split these for medium model. Place them into
2390 default sections and hope for best. */
2391 break;
2392 }
2393 if (sname)
2394 {
2395 /* We might get called with string constants, but get_named_section
2396 doesn't like them as they are not DECLs. Also, we need to set
2397 flags in that case. */
2398 if (!DECL_P (decl))
2399 return get_section (sname, flags, NULL);
2400 return get_named_section (decl, sname, reloc);
2401 }
2402 }
2403 return default_elf_select_section (decl, reloc, align);
2404}
2405
2406/* Build up a unique section name, expressed as a
2407 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2408 RELOC indicates whether the initial value of EXP requires
2409 link-time relocations. */
2410
2411static void
2412x86_64_elf_unique_section (tree decl, int reloc)
2413{
2414 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2415 && ix86_in_large_data_p (decl))
2416 {
2417 const char *prefix = NULL;
2418 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2419 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2420
2421 switch (categorize_decl_for_section (decl, reloc))
2422 {
2423 case SECCAT_DATA:
2424 case SECCAT_DATA_REL:
2425 case SECCAT_DATA_REL_LOCAL:
2426 case SECCAT_DATA_REL_RO:
2427 case SECCAT_DATA_REL_RO_LOCAL:
2428 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2429 break;
2430 case SECCAT_BSS:
2431 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2432 break;
2433 case SECCAT_RODATA:
2434 case SECCAT_RODATA_MERGE_STR:
2435 case SECCAT_RODATA_MERGE_STR_INIT:
2436 case SECCAT_RODATA_MERGE_CONST:
2437 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2438 break;
2439 case SECCAT_SRODATA:
2440 case SECCAT_SDATA:
2441 case SECCAT_SBSS:
2442 gcc_unreachable ();
2443 case SECCAT_TEXT:
2444 case SECCAT_TDATA:
2445 case SECCAT_TBSS:
2446 /* We don't split these for medium model. Place them into
2447 default sections and hope for best. */
2448 break;
2449 }
2450 if (prefix)
2451 {
2452 const char *name;
2453 size_t nlen, plen;
2454 char *string;
2455 plen = strlen (prefix);
2456
2457 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2458 name = targetm.strip_name_encoding (name);
2459 nlen = strlen (name);
2460
2461 string = alloca (nlen + plen + 1);
2462 memcpy (string, prefix, plen);
2463 memcpy (string + plen, name, nlen + 1);
2464
2465 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2466 return;
2467 }
2468 }
2469 default_unique_section (decl, reloc);
2470}
2471
2472#ifdef COMMON_ASM_OP
2473/* This says how to output assembler code to declare an
2474 uninitialized external linkage data object.
2475
2476 For medium model x86-64 we need to use .largecomm opcode for
2477 large objects. */
2478void
2479x86_elf_aligned_common (FILE *file,
2480 const char *name, unsigned HOST_WIDE_INT size,
2481 int align)
2482{
2483 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2484 && size > (unsigned int)ix86_section_threshold)
2485 fprintf (file, ".largecomm\t");
2486 else
2487 fprintf (file, "%s", COMMON_ASM_OP);
2488 assemble_name (file, name);
2489 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2490 size, align / BITS_PER_UNIT);
2491}
2492
2493/* Utility function for targets to use in implementing
2494 ASM_OUTPUT_ALIGNED_BSS. */
2495
2496void
2497x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2498 const char *name, unsigned HOST_WIDE_INT size,
2499 int align)
2500{
2501 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2502 && size > (unsigned int)ix86_section_threshold)
2503 switch_to_section (get_named_section (decl, ".lbss", 0));
2504 else
2505 switch_to_section (bss_section);
2506 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2507#ifdef ASM_DECLARE_OBJECT_NAME
2508 last_assemble_variable_decl = decl;
2509 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2510#else
2511 /* Standard thing is just output label for the object. */
2512 ASM_OUTPUT_LABEL (file, name);
2513#endif /* ASM_DECLARE_OBJECT_NAME */
2514 ASM_OUTPUT_SKIP (file, size ? size : 1);
2515}
2516#endif
2517
2518void
2519optimization_options (int level, int size ATTRIBUTE_UNUSED)
2520{
2521 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2522 make the problem with not enough registers even worse. */
2523#ifdef INSN_SCHEDULING
2524 if (level > 1)
2525 flag_schedule_insns = 0;
2526#endif
2527
2528 if (TARGET_MACHO)
2529 /* The Darwin libraries never set errno, so we might as well
2530 avoid calling them when that's the only reason we would. */
2531 flag_errno_math = 0;
2532
2533 /* The default values of these switches depend on the TARGET_64BIT
2534 that is not known at this moment. Mark these values with 2 and
2535 let user the to override these. In case there is no command line option
2536 specifying them, we will set the defaults in override_options. */
2537 if (optimize >= 1)
2538 flag_omit_frame_pointer = 2;
2539 flag_pcc_struct_return = 2;
2540 flag_asynchronous_unwind_tables = 2;
2541#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2542 SUBTARGET_OPTIMIZATION_OPTIONS;
2543#endif
2544}
2545
2546/* Table of valid machine attributes. */
2547const struct attribute_spec ix86_attribute_table[] =
2548{
2549 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2550 /* Stdcall attribute says callee is responsible for popping arguments
2551 if they are not variable. */
2552 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2553 /* Fastcall attribute says callee is responsible for popping arguments
2554 if they are not variable. */
2555 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2556 /* Cdecl attribute says the callee is a normal C declaration */
2557 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2558 /* Regparm attribute specifies how many integer arguments are to be
2559 passed in registers. */
2560 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2561 /* Sseregparm attribute says we are using x86_64 calling conventions
2562 for FP arguments. */
2563 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2564 /* force_align_arg_pointer says this function realigns the stack at entry. */
2565 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2566 false, true, true, ix86_handle_cconv_attribute },
2567#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2568 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2569 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2570 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2571#endif
2572 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2573 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2574#ifdef SUBTARGET_ATTRIBUTE_TABLE
2575 SUBTARGET_ATTRIBUTE_TABLE,
2576#endif
2577 { NULL, 0, 0, false, false, false, NULL }
2578};
2579
2580/* Decide whether we can make a sibling call to a function. DECL is the
2581 declaration of the function being targeted by the call and EXP is the
2582 CALL_EXPR representing the call. */
2583
2584static bool
2585ix86_function_ok_for_sibcall (tree decl, tree exp)
2586{
2587 tree func;
2588 rtx a, b;
2589
2590 /* If we are generating position-independent code, we cannot sibcall
2591 optimize any indirect call, or a direct call to a global function,
2592 as the PLT requires %ebx be live. */
2593 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2594 return false;
2595
2596 if (decl)
2597 func = decl;
2598 else
2599 {
2600 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2601 if (POINTER_TYPE_P (func))
2602 func = TREE_TYPE (func);
2603 }
2604
2605 /* Check that the return value locations are the same. Like
2606 if we are returning floats on the 80387 register stack, we cannot
2607 make a sibcall from a function that doesn't return a float to a
2608 function that does or, conversely, from a function that does return
2609 a float to a function that doesn't; the necessary stack adjustment
2610 would not be executed. This is also the place we notice
2611 differences in the return value ABI. Note that it is ok for one
2612 of the functions to have void return type as long as the return
2613 value of the other is passed in a register. */
2614 a = ix86_function_value (TREE_TYPE (exp), func, false);
2615 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2616 cfun->decl, false);
2617 if (STACK_REG_P (a) || STACK_REG_P (b))
2618 {
2619 if (!rtx_equal_p (a, b))
2620 return false;
2621 }
2622 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2623 ;
2624 else if (!rtx_equal_p (a, b))
2625 return false;
2626
2627 /* If this call is indirect, we'll need to be able to use a call-clobbered
2628 register for the address of the target function. Make sure that all
2629 such registers are not used for passing parameters. */
2630 if (!decl && !TARGET_64BIT)
2631 {
2632 tree type;
2633
2634 /* We're looking at the CALL_EXPR, we need the type of the function. */
2635 type = TREE_OPERAND (exp, 0); /* pointer expression */
2636 type = TREE_TYPE (type); /* pointer type */
2637 type = TREE_TYPE (type); /* function type */
2638
2639 if (ix86_function_regparm (type, NULL) >= 3)
2640 {
2641 /* ??? Need to count the actual number of registers to be used,
2642 not the possible number of registers. Fix later. */
2643 return false;
2644 }
2645 }
2646
2647#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2648 /* Dllimport'd functions are also called indirectly. */
2649 if (decl && DECL_DLLIMPORT_P (decl)
2650 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2651 return false;
2652#endif
2653
2654 /* If we forced aligned the stack, then sibcalling would unalign the
2655 stack, which may break the called function. */
2656 if (cfun->machine->force_align_arg_pointer)
2657 return false;
2658
2659 /* Otherwise okay. That also includes certain types of indirect calls. */
2660 return true;
2661}
2662
2663/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2664 calling convention attributes;
2665 arguments as in struct attribute_spec.handler. */
2666
2667static tree
2668ix86_handle_cconv_attribute (tree *node, tree name,
2669 tree args,
2670 int flags ATTRIBUTE_UNUSED,
2671 bool *no_add_attrs)
2672{
2673 if (TREE_CODE (*node) != FUNCTION_TYPE
2674 && TREE_CODE (*node) != METHOD_TYPE
2675 && TREE_CODE (*node) != FIELD_DECL
2676 && TREE_CODE (*node) != TYPE_DECL)
2677 {
2678 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2679 IDENTIFIER_POINTER (name));
2680 *no_add_attrs = true;
2681 return NULL_TREE;
2682 }
2683
2684 /* Can combine regparm with all attributes but fastcall. */
2685 if (is_attribute_p ("regparm", name))
2686 {
2687 tree cst;
2688
2689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2690 {
2691 error ("fastcall and regparm attributes are not compatible");
2692 }
2693
2694 cst = TREE_VALUE (args);
2695 if (TREE_CODE (cst) != INTEGER_CST)
2696 {
2697 warning (OPT_Wattributes,
2698 "%qs attribute requires an integer constant argument",
2699 IDENTIFIER_POINTER (name));
2700 *no_add_attrs = true;
2701 }
2702 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2703 {
2704 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2705 IDENTIFIER_POINTER (name), REGPARM_MAX);
2706 *no_add_attrs = true;
2707 }
2708
2709 if (!TARGET_64BIT
2710 && lookup_attribute (ix86_force_align_arg_pointer_string,
2711 TYPE_ATTRIBUTES (*node))
2712 && compare_tree_int (cst, REGPARM_MAX-1))
2713 {
2714 error ("%s functions limited to %d register parameters",
2715 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2716 }
2717
2718 return NULL_TREE;
2719 }
2720
2721 if (TARGET_64BIT)
2722 {
2723 warning (OPT_Wattributes, "%qs attribute ignored",
2724 IDENTIFIER_POINTER (name));
2725 *no_add_attrs = true;
2726 return NULL_TREE;
2727 }
2728
2729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2730 if (is_attribute_p ("fastcall", name))
2731 {
2732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2733 {
2734 error ("fastcall and cdecl attributes are not compatible");
2735 }
2736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2737 {
2738 error ("fastcall and stdcall attributes are not compatible");
2739 }
2740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2741 {
2742 error ("fastcall and regparm attributes are not compatible");
2743 }
2744 }
2745
2746 /* Can combine stdcall with fastcall (redundant), regparm and
2747 sseregparm. */
2748 else if (is_attribute_p ("stdcall", name))
2749 {
2750 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2751 {
2752 error ("stdcall and cdecl attributes are not compatible");
2753 }
2754 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2755 {
2756 error ("stdcall and fastcall attributes are not compatible");
2757 }
2758 }
2759
2760 /* Can combine cdecl with regparm and sseregparm. */
2761 else if (is_attribute_p ("cdecl", name))
2762 {
2763 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2764 {
2765 error ("stdcall and cdecl attributes are not compatible");
2766 }
2767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2768 {
2769 error ("fastcall and cdecl attributes are not compatible");
2770 }
2771 }
2772
2773 /* Can combine sseregparm with all attributes. */
2774
2775 return NULL_TREE;
2776}
2777
2778/* Return 0 if the attributes for two types are incompatible, 1 if they
2779 are compatible, and 2 if they are nearly compatible (which causes a
2780 warning to be generated). */
2781
2782static int
2783ix86_comp_type_attributes (tree type1, tree type2)
2784{
2785 /* Check for mismatch of non-default calling convention. */
2786 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2787
2788 if (TREE_CODE (type1) != FUNCTION_TYPE)
2789 return 1;
2790
2791 /* Check for mismatched fastcall/regparm types. */
2792 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2793 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2794 || (ix86_function_regparm (type1, NULL)
2795 != ix86_function_regparm (type2, NULL)))
2796 return 0;
2797
2798 /* Check for mismatched sseregparm types. */
2799 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2800 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2801 return 0;
2802
2803 /* Check for mismatched return types (cdecl vs stdcall). */
2804 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2805 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2806 return 0;
2807
2808 return 1;
2809}
2810
2811/* Return the regparm value for a function with the indicated TYPE and DECL.
2812 DECL may be NULL when calling function indirectly
2813 or considering a libcall. */
2814
2815static int
2816ix86_function_regparm (tree type, tree decl)
2817{
2818 tree attr;
2819 int regparm = ix86_regparm;
2820 bool user_convention = false;
2821
2822 if (!TARGET_64BIT)
2823 {
2824 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2825 if (attr)
2826 {
2827 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2828 user_convention = true;
2829 }
2830
2831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2832 {
2833 regparm = 2;
2834 user_convention = true;
2835 }
2836
2837 /* Use register calling convention for local functions when possible. */
2838 if (!TARGET_64BIT && !user_convention && decl
2839 && flag_unit_at_a_time && !profile_flag)
2840 {
2841 struct cgraph_local_info *i = cgraph_local_info (decl);
2842 if (i && i->local)
2843 {
2844 int local_regparm, globals = 0, regno;
2845
2846 /* Make sure no regparm register is taken by a global register
2847 variable. */
2848 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2849 if (global_regs[local_regparm])
2850 break;
2851 /* We can't use regparm(3) for nested functions as these use
2852 static chain pointer in third argument. */
2853 if (local_regparm == 3
2854 && decl_function_context (decl)
2855 && !DECL_NO_STATIC_CHAIN (decl))
2856 local_regparm = 2;
2857 /* If the function realigns its stackpointer, the
2858 prologue will clobber %ecx. If we've already
2859 generated code for the callee, the callee
2860 DECL_STRUCT_FUNCTION is gone, so we fall back to
2861 scanning the attributes for the self-realigning
2862 property. */
2863 if ((DECL_STRUCT_FUNCTION (decl)
2864 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2865 || (!DECL_STRUCT_FUNCTION (decl)
2866 && lookup_attribute (ix86_force_align_arg_pointer_string,
2867 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2868 local_regparm = 2;
2869 /* Each global register variable increases register preassure,
2870 so the more global reg vars there are, the smaller regparm
2871 optimization use, unless requested by the user explicitly. */
2872 for (regno = 0; regno < 6; regno++)
2873 if (global_regs[regno])
2874 globals++;
2875 local_regparm
2876 = globals < local_regparm ? local_regparm - globals : 0;
2877
2878 if (local_regparm > regparm)
2879 regparm = local_regparm;
2880 }
2881 }
2882 }
2883 return regparm;
2884}
2885
2886/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2887 DFmode (2) arguments in SSE registers for a function with the
2888 indicated TYPE and DECL. DECL may be NULL when calling function
2889 indirectly or considering a libcall. Otherwise return 0. */
2890
2891static int
2892ix86_function_sseregparm (tree type, tree decl)
2893{
2894 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2895 by the sseregparm attribute. */
2896 if (TARGET_SSEREGPARM
2897 || (type
2898 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2899 {
2900 if (!TARGET_SSE)
2901 {
2902 if (decl)
2903 error ("Calling %qD with attribute sseregparm without "
2904 "SSE/SSE2 enabled", decl);
2905 else
2906 error ("Calling %qT with attribute sseregparm without "
2907 "SSE/SSE2 enabled", type);
2908 return 0;
2909 }
2910
2911 return 2;
2912 }
2913
2914 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2915 (and DFmode for SSE2) arguments in SSE registers,
2916 even for 32-bit targets. */
2917 if (!TARGET_64BIT && decl
2918 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2919 {
2920 struct cgraph_local_info *i = cgraph_local_info (decl);
2921 if (i && i->local)
2922 return TARGET_SSE2 ? 2 : 1;
2923 }
2924
2925 return 0;
2926}
2927
2928/* Return true if EAX is live at the start of the function. Used by
2929 ix86_expand_prologue to determine if we need special help before
2930 calling allocate_stack_worker. */
2931
2932static bool
2933ix86_eax_live_at_start_p (void)
2934{
2935 /* Cheat. Don't bother working forward from ix86_function_regparm
2936 to the function type to whether an actual argument is located in
2937 eax. Instead just look at cfg info, which is still close enough
2938 to correct at this point. This gives false positives for broken
2939 functions that might use uninitialized data that happens to be
2940 allocated in eax, but who cares? */
2941 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2942}
2943
2944/* Value is the number of bytes of arguments automatically
2945 popped when returning from a subroutine call.
2946 FUNDECL is the declaration node of the function (as a tree),
2947 FUNTYPE is the data type of the function (as a tree),
2948 or for a library call it is an identifier node for the subroutine name.
2949 SIZE is the number of bytes of arguments passed on the stack.
2950
2951 On the 80386, the RTD insn may be used to pop them if the number
2952 of args is fixed, but if the number is variable then the caller
2953 must pop them all. RTD can't be used for library calls now
2954 because the library is compiled with the Unix compiler.
2955 Use of RTD is a selectable option, since it is incompatible with
2956 standard Unix calling sequences. If the option is not selected,
2957 the caller must always pop the args.
2958
2959 The attribute stdcall is equivalent to RTD on a per module basis. */
2960
2961int
2962ix86_return_pops_args (tree fundecl, tree funtype, int size)
2963{
2964 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2965
2966 /* Cdecl functions override -mrtd, and never pop the stack. */
2967 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2968
2969 /* Stdcall and fastcall functions will pop the stack if not
2970 variable args. */
2971 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2972 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2973 rtd = 1;
2974
2975 if (rtd
2976 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2977 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2978 == void_type_node)))
2979 return size;
2980 }
2981
2982 /* Lose any fake structure return argument if it is passed on the stack. */
2983 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2984 && !TARGET_64BIT
2985 && !KEEP_AGGREGATE_RETURN_POINTER)
2986 {
2987 int nregs = ix86_function_regparm (funtype, fundecl);
2988
2989 if (!nregs)
2990 return GET_MODE_SIZE (Pmode);
2991 }
2992
2993 return 0;
2994}
2995
2996/* Argument support functions. */
2997
2998/* Return true when register may be used to pass function parameters. */
2999bool
3000ix86_function_arg_regno_p (int regno)
3001{
3002 int i;
3003 if (!TARGET_64BIT)
3004 {
3005 if (TARGET_MACHO)
3006 return (regno < REGPARM_MAX
3007 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3008 else
3009 return (regno < REGPARM_MAX
3010 || (TARGET_MMX && MMX_REGNO_P (regno)
3011 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3012 || (TARGET_SSE && SSE_REGNO_P (regno)
3013 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3014 }
3015
3016 if (TARGET_MACHO)
3017 {
3018 if (SSE_REGNO_P (regno) && TARGET_SSE)
3019 return true;
3020 }
3021 else
3022 {
3023 if (TARGET_SSE && SSE_REGNO_P (regno)
3024 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3025 return true;
3026 }
3027 /* RAX is used as hidden argument to va_arg functions. */
3028 if (!regno)
3029 return true;
3030 for (i = 0; i < REGPARM_MAX; i++)
3031 if (regno == x86_64_int_parameter_registers[i])
3032 return true;
3033 return false;
3034}
3035
3036/* Return if we do not know how to pass TYPE solely in registers. */
3037
3038static bool
3039ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3040{
3041 if (must_pass_in_stack_var_size_or_pad (mode, type))
3042 return true;
3043
3044 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3045 The layout_type routine is crafty and tries to trick us into passing
3046 currently unsupported vector types on the stack by using TImode. */
3047 return (!TARGET_64BIT && mode == TImode
3048 && type && TREE_CODE (type) != VECTOR_TYPE);
3049}
3050
3051/* Initialize a variable CUM of type CUMULATIVE_ARGS
3052 for a call to a function whose data type is FNTYPE.
3053 For a library call, FNTYPE is 0. */
3054
3055void
3056init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3057 tree fntype, /* tree ptr for function decl */
3058 rtx libname, /* SYMBOL_REF of library name or 0 */
3059 tree fndecl)
3060{
3061 static CUMULATIVE_ARGS zero_cum;
3062 tree param, next_param;
3063
3064 if (TARGET_DEBUG_ARG)
3065 {
3066 fprintf (stderr, "\ninit_cumulative_args (");
3067 if (fntype)
3068 fprintf (stderr, "fntype code = %s, ret code = %s",
3069 tree_code_name[(int) TREE_CODE (fntype)],
3070 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3071 else
3072 fprintf (stderr, "no fntype");
3073
3074 if (libname)
3075 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3076 }
3077
3078 *cum = zero_cum;
3079
3080 /* Set up the number of registers to use for passing arguments. */
3081 cum->nregs = ix86_regparm;
3082 if (TARGET_SSE)
3083 cum->sse_nregs = SSE_REGPARM_MAX;
3084 if (TARGET_MMX)
3085 cum->mmx_nregs = MMX_REGPARM_MAX;
3086 cum->warn_sse = true;
3087 cum->warn_mmx = true;
3088 cum->maybe_vaarg = false;
3089
3090 /* Use ecx and edx registers if function has fastcall attribute,
3091 else look for regparm information. */
3092 if (fntype && !TARGET_64BIT)
3093 {
3094 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3095 {
3096 cum->nregs = 2;
3097 cum->fastcall = 1;
3098 }
3099 else
3100 cum->nregs = ix86_function_regparm (fntype, fndecl);
3101 }
3102
3103 /* Set up the number of SSE registers used for passing SFmode
3104 and DFmode arguments. Warn for mismatching ABI. */
3105 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3106
3107 /* Determine if this function has variable arguments. This is
3108 indicated by the last argument being 'void_type_mode' if there
3109 are no variable arguments. If there are variable arguments, then
3110 we won't pass anything in registers in 32-bit mode. */
3111
3112 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3113 {
3114 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3115 param != 0; param = next_param)
3116 {
3117 next_param = TREE_CHAIN (param);
3118 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3119 {
3120 if (!TARGET_64BIT)
3121 {
3122 cum->nregs = 0;
3123 cum->sse_nregs = 0;
3124 cum->mmx_nregs = 0;
3125 cum->warn_sse = 0;
3126 cum->warn_mmx = 0;
3127 cum->fastcall = 0;
3128 cum->float_in_sse = 0;
3129 }
3130 cum->maybe_vaarg = true;
3131 }
3132 }
3133 }
3134 if ((!fntype && !libname)
3135 || (fntype && !TYPE_ARG_TYPES (fntype)))
3136 cum->maybe_vaarg = true;
3137
3138 if (TARGET_DEBUG_ARG)
3139 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3140
3141 return;
3142}
3143
3144/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3145 But in the case of vector types, it is some vector mode.
3146
3147 When we have only some of our vector isa extensions enabled, then there
3148 are some modes for which vector_mode_supported_p is false. For these
3149 modes, the generic vector support in gcc will choose some non-vector mode
3150 in order to implement the type. By computing the natural mode, we'll
3151 select the proper ABI location for the operand and not depend on whatever
3152 the middle-end decides to do with these vector types. */
3153
3154static enum machine_mode
3155type_natural_mode (tree type)
3156{
3157 enum machine_mode mode = TYPE_MODE (type);
3158
3159 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3160 {
3161 HOST_WIDE_INT size = int_size_in_bytes (type);
3162 if ((size == 8 || size == 16)
3163 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3164 && TYPE_VECTOR_SUBPARTS (type) > 1)
3165 {
3166 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3167
3168 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3169 mode = MIN_MODE_VECTOR_FLOAT;
3170 else
3171 mode = MIN_MODE_VECTOR_INT;
3172
3173 /* Get the mode which has this inner mode and number of units. */
3174 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3175 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3176 && GET_MODE_INNER (mode) == innermode)
3177 return mode;
3178
3179 gcc_unreachable ();
3180 }
3181 }
3182
3183 return mode;
3184}
3185
3186/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3187 this may not agree with the mode that the type system has chosen for the
3188 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3189 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3190
3191static rtx
3192gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3193 unsigned int regno)
3194{
3195 rtx tmp;
3196
3197 if (orig_mode != BLKmode)
3198 tmp = gen_rtx_REG (orig_mode, regno);
3199 else
3200 {
3201 tmp = gen_rtx_REG (mode, regno);
3202 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3203 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3204 }
3205
3206 return tmp;
3207}
3208
3209/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3210 of this code is to classify each 8bytes of incoming argument by the register
3211 class and assign registers accordingly. */
3212
3213/* Return the union class of CLASS1 and CLASS2.
3214 See the x86-64 PS ABI for details. */
3215
3216static enum x86_64_reg_class
3217merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3218{
3219 /* Rule #1: If both classes are equal, this is the resulting class. */
3220 if (class1 == class2)
3221 return class1;
3222
3223 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3224 the other class. */
3225 if (class1 == X86_64_NO_CLASS)
3226 return class2;
3227 if (class2 == X86_64_NO_CLASS)
3228 return class1;
3229
3230 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3231 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3232 return X86_64_MEMORY_CLASS;
3233
3234 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3235 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3236 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3237 return X86_64_INTEGERSI_CLASS;
3238 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3239 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3240 return X86_64_INTEGER_CLASS;
3241
3242 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3243 MEMORY is used. */
3244 if (class1 == X86_64_X87_CLASS
3245 || class1 == X86_64_X87UP_CLASS
3246 || class1 == X86_64_COMPLEX_X87_CLASS
3247 || class2 == X86_64_X87_CLASS
3248 || class2 == X86_64_X87UP_CLASS
3249 || class2 == X86_64_COMPLEX_X87_CLASS)
3250 return X86_64_MEMORY_CLASS;
3251
3252 /* Rule #6: Otherwise class SSE is used. */
3253 return X86_64_SSE_CLASS;
3254}
3255
3256/* Classify the argument of type TYPE and mode MODE.
3257 CLASSES will be filled by the register class used to pass each word
3258 of the operand. The number of words is returned. In case the parameter
3259 should be passed in memory, 0 is returned. As a special case for zero
3260 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3261
3262 BIT_OFFSET is used internally for handling records and specifies offset
3263 of the offset in bits modulo 256 to avoid overflow cases.
3264
3265 See the x86-64 PS ABI for details.
3266*/
3267
3268static int
3269classify_argument (enum machine_mode mode, tree type,
3270 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3271{
3272 HOST_WIDE_INT bytes =
3273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3274 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3275
3276 /* Variable sized entities are always passed/returned in memory. */
3277 if (bytes < 0)
3278 return 0;
3279
3280 if (mode != VOIDmode
3281 && targetm.calls.must_pass_in_stack (mode, type))
3282 return 0;
3283
3284 if (type && AGGREGATE_TYPE_P (type))
3285 {
3286 int i;
3287 tree field;
3288 enum x86_64_reg_class subclasses[MAX_CLASSES];
3289
3290 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3291 if (bytes > 16)
3292 return 0;
3293
3294 for (i = 0; i < words; i++)
3295 classes[i] = X86_64_NO_CLASS;
3296
3297 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3298 signalize memory class, so handle it as special case. */
3299 if (!words)
3300 {
3301 classes[0] = X86_64_NO_CLASS;
3302 return 1;
3303 }
3304
3305 /* Classify each field of record and merge classes. */
3306 switch (TREE_CODE (type))
3307 {
3308 case RECORD_TYPE:
3309 /* For classes first merge in the field of the subclasses. */
3310 if (TYPE_BINFO (type))
3311 {
3312 tree binfo, base_binfo;
3313 int basenum;
3314
3315 for (binfo = TYPE_BINFO (type), basenum = 0;
3316 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
3317 {
3318 int num;
3319 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
3320 tree type = BINFO_TYPE (base_binfo);
3321
3322 num = classify_argument (TYPE_MODE (type),
3323 type, subclasses,
3324 (offset + bit_offset) % 256);
3325 if (!num)
3326 return 0;
3327 for (i = 0; i < num; i++)
3328 {
3329 int pos = (offset + (bit_offset % 64)) / 8 / 8;
3330 classes[i + pos] =
3331 merge_classes (subclasses[i], classes[i + pos]);
3332 }
3333 }
3334 }
3335 /* And now merge the fields of structure. */
3336 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3337 {
3338 if (TREE_CODE (field) == FIELD_DECL)
3339 {
3340 int num;
3341
3342 if (TREE_TYPE (field) == error_mark_node)
3343 continue;
3344
3345 /* Bitfields are always classified as integer. Handle them
3346 early, since later code would consider them to be
3347 misaligned integers. */
3348 if (DECL_BIT_FIELD (field))
3349 {
3350 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3351 i < ((int_bit_position (field) + (bit_offset % 64))
3352 + tree_low_cst (DECL_SIZE (field), 0)
3353 + 63) / 8 / 8; i++)
3354 classes[i] =
3355 merge_classes (X86_64_INTEGER_CLASS,
3356 classes[i]);
3357 }
3358 else
3359 {
3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361 TREE_TYPE (field), subclasses,
3362 (int_bit_position (field)
3363 + bit_offset) % 256);
3364 if (!num)
3365 return 0;
3366 for (i = 0; i < num; i++)
3367 {
3368 int pos =
3369 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3370 classes[i + pos] =
3371 merge_classes (subclasses[i], classes[i + pos]);
3372 }
3373 }
3374 }
3375 }
3376 break;
3377
3378 case ARRAY_TYPE:
3379 /* Arrays are handled as small records. */
3380 {
3381 int num;
3382 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3383 TREE_TYPE (type), subclasses, bit_offset);
3384 if (!num)
3385 return 0;
3386
3387 /* The partial classes are now full classes. */
3388 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3389 subclasses[0] = X86_64_SSE_CLASS;
3390 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3391 subclasses[0] = X86_64_INTEGER_CLASS;
3392
3393 for (i = 0; i < words; i++)
3394 classes[i] = subclasses[i % num];
3395
3396 break;
3397 }
3398 case UNION_TYPE:
3399 case QUAL_UNION_TYPE:
3400 /* Unions are similar to RECORD_TYPE but offset is always 0.
3401 */
3402
3403 /* Unions are not derived. */
3404 gcc_assert (!TYPE_BINFO (type)
3405 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3406 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3407 {
3408 if (TREE_CODE (field) == FIELD_DECL)
3409 {
3410 int num;
3411
3412 if (TREE_TYPE (field) == error_mark_node)
3413 continue;
3414
3415 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3416 TREE_TYPE (field), subclasses,
3417 bit_offset);
3418 if (!num)
3419 return 0;
3420 for (i = 0; i < num; i++)
3421 classes[i] = merge_classes (subclasses[i], classes[i]);
3422 }
3423 }
3424 break;
3425
3426 default:
3427 gcc_unreachable ();
3428 }
3429
3430 /* Final merger cleanup. */
3431 for (i = 0; i < words; i++)
3432 {
3433 /* If one class is MEMORY, everything should be passed in
3434 memory. */
3435 if (classes[i] == X86_64_MEMORY_CLASS)
3436 return 0;
3437
3438 /* The X86_64_SSEUP_CLASS should be always preceded by
3439 X86_64_SSE_CLASS. */
3440 if (classes[i] == X86_64_SSEUP_CLASS
3441 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3442 classes[i] = X86_64_SSE_CLASS;
3443
3444 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3445 if (classes[i] == X86_64_X87UP_CLASS
3446 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3447 classes[i] = X86_64_SSE_CLASS;
3448 }
3449 return words;
3450 }
3451
3452 /* Compute alignment needed. We align all types to natural boundaries with
3453 exception of XFmode that is aligned to 64bits. */
3454 if (mode != VOIDmode && mode != BLKmode)
3455 {
3456 int mode_alignment = GET_MODE_BITSIZE (mode);
3457
3458 if (mode == XFmode)
3459 mode_alignment = 128;
3460 else if (mode == XCmode)
3461 mode_alignment = 256;
3462 if (COMPLEX_MODE_P (mode))
3463 mode_alignment /= 2;
3464 /* Misaligned fields are always returned in memory. */
3465 if (bit_offset % mode_alignment)
3466 return 0;
3467 }
3468
3469 /* for V1xx modes, just use the base mode */
3470 if (VECTOR_MODE_P (mode)
3471 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3472 mode = GET_MODE_INNER (mode);
3473
3474 /* Classification of atomic types. */
3475 switch (mode)
3476 {
3477 case SDmode:
3478 case DDmode:
3479 classes[0] = X86_64_SSE_CLASS;
3480 return 1;
3481 case TDmode:
3482 classes[0] = X86_64_SSE_CLASS;
3483 classes[1] = X86_64_SSEUP_CLASS;
3484 return 2;
3485 case DImode:
3486 case SImode:
3487 case HImode:
3488 case QImode:
3489 case CSImode:
3490 case CHImode:
3491 case CQImode:
3492 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3493 classes[0] = X86_64_INTEGERSI_CLASS;
3494 else
3495 classes[0] = X86_64_INTEGER_CLASS;
3496 return 1;
3497 case CDImode:
3498 case TImode:
3499 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3500 return 2;
3501 case CTImode:
3502 return 0;
3503 case SFmode:
3504 if (!(bit_offset % 64))
3505 classes[0] = X86_64_SSESF_CLASS;
3506 else
3507 classes[0] = X86_64_SSE_CLASS;
3508 return 1;
3509 case DFmode:
3510 classes[0] = X86_64_SSEDF_CLASS;
3511 return 1;
3512 case XFmode:
3513 classes[0] = X86_64_X87_CLASS;
3514 classes[1] = X86_64_X87UP_CLASS;
3515 return 2;
3516 case TFmode:
3517 classes[0] = X86_64_SSE_CLASS;
3518 classes[1] = X86_64_SSEUP_CLASS;
3519 return 2;
3520 case SCmode:
3521 classes[0] = X86_64_SSE_CLASS;
3522 return 1;
3523 case DCmode:
3524 classes[0] = X86_64_SSEDF_CLASS;
3525 classes[1] = X86_64_SSEDF_CLASS;
3526 return 2;
3527 case XCmode:
3528 classes[0] = X86_64_COMPLEX_X87_CLASS;
3529 return 1;
3530 case TCmode:
3531 /* This modes is larger than 16 bytes. */
3532 return 0;
3533 case V4SFmode:
3534 case V4SImode:
3535 case V16QImode:
3536 case V8HImode:
3537 case V2DFmode:
3538 case V2DImode:
3539 classes[0] = X86_64_SSE_CLASS;
3540 classes[1] = X86_64_SSEUP_CLASS;
3541 return 2;
3542 case V2SFmode:
3543 case V2SImode:
3544 case V4HImode:
3545 case V8QImode:
3546 classes[0] = X86_64_SSE_CLASS;
3547 return 1;
3548 case BLKmode:
3549 case VOIDmode:
3550 return 0;
3551 default:
3552 gcc_assert (VECTOR_MODE_P (mode));
3553
3554 if (bytes > 16)
3555 return 0;
3556
3557 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3558
3559 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3560 classes[0] = X86_64_INTEGERSI_CLASS;
3561 else
3562 classes[0] = X86_64_INTEGER_CLASS;
3563 classes[1] = X86_64_INTEGER_CLASS;
3564 return 1 + (bytes > 8);
3565 }
3566}
3567
3568/* Examine the argument and return set number of register required in each
3569 class. Return 0 iff parameter should be passed in memory. */
3570static int
3571examine_argument (enum machine_mode mode, tree type, int in_return,
3572 int *int_nregs, int *sse_nregs)
3573{
3574 enum x86_64_reg_class class[MAX_CLASSES];
3575 int n = classify_argument (mode, type, class, 0);
3576
3577 *int_nregs = 0;
3578 *sse_nregs = 0;
3579 if (!n)
3580 return 0;
3581 for (n--; n >= 0; n--)
3582 switch (class[n])
3583 {
3584 case X86_64_INTEGER_CLASS:
3585 case X86_64_INTEGERSI_CLASS:
3586 (*int_nregs)++;
3587 break;
3588 case X86_64_SSE_CLASS:
3589 case X86_64_SSESF_CLASS:
3590 case X86_64_SSEDF_CLASS:
3591 (*sse_nregs)++;
3592 break;
3593 case X86_64_NO_CLASS:
3594 case X86_64_SSEUP_CLASS:
3595 break;
3596 case X86_64_X87_CLASS:
3597 case X86_64_X87UP_CLASS:
3598 if (!in_return)
3599 return 0;
3600 break;
3601 case X86_64_COMPLEX_X87_CLASS:
3602 return in_return ? 2 : 0;
3603 case X86_64_MEMORY_CLASS:
3604 gcc_unreachable ();
3605 }
3606 return 1;
3607}
3608
3609/* Construct container for the argument used by GCC interface. See
3610 FUNCTION_ARG for the detailed description. */
3611
3612static rtx
3613construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3614 tree type, int in_return, int nintregs, int nsseregs,
3615 const int *intreg, int sse_regno)
3616{
3617 /* The following variables hold the static issued_error state. */
3618 static bool issued_sse_arg_error;
3619 static bool issued_sse_ret_error;
3620 static bool issued_x87_ret_error;
3621
3622 enum machine_mode tmpmode;
3623 int bytes =
3624 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3625 enum x86_64_reg_class class[MAX_CLASSES];
3626 int n;
3627 int i;
3628 int nexps = 0;
3629 int needed_sseregs, needed_intregs;
3630 rtx exp[MAX_CLASSES];
3631 rtx ret;
3632
3633 n = classify_argument (mode, type, class, 0);
3634 if (TARGET_DEBUG_ARG)
3635 {
3636 if (!n)
3637 fprintf (stderr, "Memory class\n");
3638 else
3639 {
3640 fprintf (stderr, "Classes:");
3641 for (i = 0; i < n; i++)
3642 {
3643 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3644 }
3645 fprintf (stderr, "\n");
3646 }
3647 }
3648 if (!n)
3649 return NULL;
3650 if (!examine_argument (mode, type, in_return, &needed_intregs,
3651 &needed_sseregs))
3652 return NULL;
3653 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3654 return NULL;
3655
3656 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3657 some less clueful developer tries to use floating-point anyway. */
3658 if (needed_sseregs && !TARGET_SSE)
3659 {
3660 if (in_return)
3661 {
3662 if (!issued_sse_ret_error)
3663 {
3664 error ("SSE register return with SSE disabled");
3665 issued_sse_ret_error = true;
3666 }
3667 }
3668 else if (!issued_sse_arg_error)
3669 {
3670 error ("SSE register argument with SSE disabled");
3671 issued_sse_arg_error = true;
3672 }
3673 return NULL;
3674 }
3675
3676 /* Likewise, error if the ABI requires us to return values in the
3677 x87 registers and the user specified -mno-80387. */
3678 if (!TARGET_80387 && in_return)
3679 for (i = 0; i < n; i++)
3680 if (class[i] == X86_64_X87_CLASS
3681 || class[i] == X86_64_X87UP_CLASS
3682 || class[i] == X86_64_COMPLEX_X87_CLASS)
3683 {
3684 if (!issued_x87_ret_error)
3685 {
3686 error ("x87 register return with x87 disabled");
3687 issued_x87_ret_error = true;
3688 }
3689 return NULL;
3690 }
3691
3692 /* First construct simple cases. Avoid SCmode, since we want to use
3693 single register to pass this type. */
3694 if (n == 1 && mode != SCmode)
3695 switch (class[0])
3696 {
3697 case X86_64_INTEGER_CLASS:
3698 case X86_64_INTEGERSI_CLASS:
3699 return gen_rtx_REG (mode, intreg[0]);
3700 case X86_64_SSE_CLASS:
3701 case X86_64_SSESF_CLASS:
3702 case X86_64_SSEDF_CLASS:
3703 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3704 case X86_64_X87_CLASS:
3705 case X86_64_COMPLEX_X87_CLASS:
3706 return gen_rtx_REG (mode, FIRST_STACK_REG);
3707 case X86_64_NO_CLASS:
3708 /* Zero sized array, struct or class. */
3709 return NULL;
3710 default:
3711 gcc_unreachable ();
3712 }
3713 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3714 && mode != BLKmode)
3715 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3716 if (n == 2
3717 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3718 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3719 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3720 && class[1] == X86_64_INTEGER_CLASS
3721 && (mode == CDImode || mode == TImode || mode == TFmode)
3722 && intreg[0] + 1 == intreg[1])
3723 return gen_rtx_REG (mode, intreg[0]);
3724
3725 /* Otherwise figure out the entries of the PARALLEL. */
3726 for (i = 0; i < n; i++)
3727 {
3728 switch (class[i])
3729 {
3730 case X86_64_NO_CLASS:
3731 break;
3732 case X86_64_INTEGER_CLASS:
3733 case X86_64_INTEGERSI_CLASS:
3734 /* Merge TImodes on aligned occasions here too. */
3735 if (i * 8 + 8 > bytes)
3736 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3737 else if (class[i] == X86_64_INTEGERSI_CLASS)
3738 tmpmode = SImode;
3739 else
3740 tmpmode = DImode;
3741 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3742 if (tmpmode == BLKmode)
3743 tmpmode = DImode;
3744 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3745 gen_rtx_REG (tmpmode, *intreg),
3746 GEN_INT (i*8));
3747 intreg++;
3748 break;
3749 case X86_64_SSESF_CLASS:
3750 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3751 gen_rtx_REG (SFmode,
3752 SSE_REGNO (sse_regno)),
3753 GEN_INT (i*8));
3754 sse_regno++;
3755 break;
3756 case X86_64_SSEDF_CLASS:
3757 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3758 gen_rtx_REG (DFmode,
3759 SSE_REGNO (sse_regno)),
3760 GEN_INT (i*8));
3761 sse_regno++;
3762 break;
3763 case X86_64_SSE_CLASS:
3764 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3765 tmpmode = TImode;
3766 else
3767 tmpmode = DImode;
3768 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3769 gen_rtx_REG (tmpmode,
3770 SSE_REGNO (sse_regno)),
3771 GEN_INT (i*8));
3772 if (tmpmode == TImode)
3773 i++;
3774 sse_regno++;
3775 break;
3776 default:
3777 gcc_unreachable ();
3778 }
3779 }
3780
3781 /* Empty aligned struct, union or class. */
3782 if (nexps == 0)
3783 return NULL;
3784
3785 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3786 for (i = 0; i < nexps; i++)
3787 XVECEXP (ret, 0, i) = exp [i];
3788 return ret;
3789}
3790
3791/* Update the data in CUM to advance over an argument
3792 of mode MODE and data type TYPE.
3793 (TYPE is null for libcalls where that information may not be available.) */
3794
3795void
3796function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3797 tree type, int named)
3798{
3799 int bytes =
3800 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3801 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3802
3803 if (type)
3804 mode = type_natural_mode (type);
3805
3806 if (TARGET_DEBUG_ARG)
3807 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3808 "mode=%s, named=%d)\n\n",
3809 words, cum->words, cum->nregs, cum->sse_nregs,
3810 GET_MODE_NAME (mode), named);
3811
3812 if (TARGET_64BIT)
3813 {
3814 int int_nregs, sse_nregs;
3815 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3816 cum->words += words;
3817 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3818 {
3819 cum->nregs -= int_nregs;
3820 cum->sse_nregs -= sse_nregs;
3821 cum->regno += int_nregs;
3822 cum->sse_regno += sse_nregs;
3823 }
3824 else
3825 cum->words += words;
3826 }
3827 else
3828 {
3829 switch (mode)
3830 {
3831 default:
3832 break;
3833
3834 case BLKmode:
3835 if (bytes < 0)
3836 break;
3837 /* FALLTHRU */
3838
3839 case DImode:
3840 case SImode:
3841 case HImode:
3842 case QImode:
3843 cum->words += words;
3844 cum->nregs -= words;
3845 cum->regno += words;
3846
3847 if (cum->nregs <= 0)
3848 {
3849 cum->nregs = 0;
3850 cum->regno = 0;
3851 }
3852 break;
3853
3854 case DFmode:
3855 if (cum->float_in_sse < 2)
3856 break;
3857 case SFmode:
3858 if (cum->float_in_sse < 1)
3859 break;
3860 /* FALLTHRU */
3861
3862 case TImode:
3863 case V16QImode:
3864 case V8HImode:
3865 case V4SImode:
3866 case V2DImode:
3867 case V4SFmode:
3868 case V2DFmode:
3869 if (!type || !AGGREGATE_TYPE_P (type))
3870 {
3871 cum->sse_words += words;
3872 cum->sse_nregs -= 1;
3873 cum->sse_regno += 1;
3874 if (cum->sse_nregs <= 0)
3875 {
3876 cum->sse_nregs = 0;
3877 cum->sse_regno = 0;
3878 }
3879 }
3880 break;
3881
3882 case V8QImode:
3883 case V4HImode:
3884 case V2SImode:
3885 case V2SFmode:
3886 if (!type || !AGGREGATE_TYPE_P (type))
3887 {
3888 cum->mmx_words += words;
3889 cum->mmx_nregs -= 1;
3890 cum->mmx_regno += 1;
3891 if (cum->mmx_nregs <= 0)
3892 {
3893 cum->mmx_nregs = 0;
3894 cum->mmx_regno = 0;
3895 }
3896 }
3897 break;
3898 }
3899 }
3900}
3901
3902/* Define where to put the arguments to a function.
3903 Value is zero to push the argument on the stack,
3904 or a hard register in which to store the argument.
3905
3906 MODE is the argument's machine mode.
3907 TYPE is the data type of the argument (as a tree).
3908 This is null for libcalls where that information may
3909 not be available.
3910 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3911 the preceding args and about the function being called.
3912 NAMED is nonzero if this argument is a named parameter
3913 (otherwise it is an extra parameter matching an ellipsis). */
3914
3915rtx
3916function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3917 tree type, int named)
3918{
3919 enum machine_mode mode = orig_mode;
3920 rtx ret = NULL_RTX;
3921 int bytes =
3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3924 static bool warnedsse, warnedmmx;
3925
3926 /* To simplify the code below, represent vector types with a vector mode
3927 even if MMX/SSE are not active. */
3928 if (type && TREE_CODE (type) == VECTOR_TYPE)
3929 mode = type_natural_mode (type);
3930
3931 /* Handle a hidden AL argument containing number of registers for varargs
3932 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3933 any AL settings. */
3934 if (mode == VOIDmode)
3935 {
3936 if (TARGET_64BIT)
3937 return GEN_INT (cum->maybe_vaarg
3938 ? (cum->sse_nregs < 0
3939 ? SSE_REGPARM_MAX
3940 : cum->sse_regno)
3941 : -1);
3942 else
3943 return constm1_rtx;
3944 }
3945 if (TARGET_64BIT)
3946 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3947 cum->sse_nregs,
3948 &x86_64_int_parameter_registers [cum->regno],
3949 cum->sse_regno);
3950 else
3951 switch (mode)
3952 {
3953 /* For now, pass fp/complex values on the stack. */
3954 default:
3955 break;
3956
3957 case BLKmode:
3958 if (bytes < 0)
3959 break;
3960 /* FALLTHRU */
3961 case DImode:
3962 case SImode:
3963 case HImode:
3964 case QImode:
3965 if (words <= cum->nregs)
3966 {
3967 int regno = cum->regno;
3968
3969 /* Fastcall allocates the first two DWORD (SImode) or
3970 smaller arguments to ECX and EDX. */
3971 if (cum->fastcall)
3972 {
3973 if (mode == BLKmode || mode == DImode)
3974 break;
3975
3976 /* ECX not EAX is the first allocated register. */
3977 if (regno == 0)
3978 regno = 2;
3979 }
3980 ret = gen_rtx_REG (mode, regno);
3981 }
3982 break;
3983 case DFmode:
3984 if (cum->float_in_sse < 2)
3985 break;
3986 case SFmode:
3987 if (cum->float_in_sse < 1)
3988 break;
3989 /* FALLTHRU */
3990 case TImode:
3991 case V16QImode:
3992 case V8HImode:
3993 case V4SImode:
3994 case V2DImode:
3995 case V4SFmode:
3996 case V2DFmode:
3997 if (!type || !AGGREGATE_TYPE_P (type))
3998 {
3999 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4000 {
4001 warnedsse = true;
4002 warning (0, "SSE vector argument without SSE enabled "
4003 "changes the ABI");
4004 }
4005 if (cum->sse_nregs)
4006 ret = gen_reg_or_parallel (mode, orig_mode,
4007 cum->sse_regno + FIRST_SSE_REG);
4008 }
4009 break;
4010 case V8QImode:
4011 case V4HImode:
4012 case V2SImode:
4013 case V2SFmode:
4014 if (!type || !AGGREGATE_TYPE_P (type))
4015 {
4016 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4017 {
4018 warnedmmx = true;
4019 warning (0, "MMX vector argument without MMX enabled "
4020 "changes the ABI");
4021 }
4022 if (cum->mmx_nregs)
4023 ret = gen_reg_or_parallel (mode, orig_mode,
4024 cum->mmx_regno + FIRST_MMX_REG);
4025 }
4026 break;
4027 }
4028
4029 if (TARGET_DEBUG_ARG)
4030 {
4031 fprintf (stderr,
4032 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4033 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4034
4035 if (ret)
4036 print_simple_rtl (stderr, ret);
4037 else
4038 fprintf (stderr, ", stack");
4039
4040 fprintf (stderr, " )\n");
4041 }
4042
4043 return ret;
4044}
4045
4046/* A C expression that indicates when an argument must be passed by
4047 reference. If nonzero for an argument, a copy of that argument is
4048 made in memory and a pointer to the argument is passed instead of
4049 the argument itself. The pointer is passed in whatever way is
4050 appropriate for passing a pointer to that type. */
4051
4052static bool
4053ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4054 enum machine_mode mode ATTRIBUTE_UNUSED,
4055 tree type, bool named ATTRIBUTE_UNUSED)
4056{
4057 if (!TARGET_64BIT)
4058 return 0;
4059
4060 if (type && int_size_in_bytes (type) == -1)
4061 {
4062 if (TARGET_DEBUG_ARG)
4063 fprintf (stderr, "function_arg_pass_by_reference\n");
4064 return 1;
4065 }
4066
4067 return 0;
4068}
4069
4070/* Return true when TYPE should be 128bit aligned for 32bit argument passing
4071 ABI. Only called if TARGET_SSE. */
4072static bool
4073contains_128bit_aligned_vector_p (tree type)
4074{
4075 enum machine_mode mode = TYPE_MODE (type);
4076 if (SSE_REG_MODE_P (mode)
4077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4078 return true;
4079 if (TYPE_ALIGN (type) < 128)
4080 return false;
4081
4082 if (AGGREGATE_TYPE_P (type))
4083 {
4084 /* Walk the aggregates recursively. */
4085 switch (TREE_CODE (type))
4086 {
4087 case RECORD_TYPE:
4088 case UNION_TYPE:
4089 case QUAL_UNION_TYPE:
4090 {
4091 tree field;
4092
4093 if (TYPE_BINFO (type))
4094 {
4095 tree binfo, base_binfo;
4096 int i;
4097
4098 for (binfo = TYPE_BINFO (type), i = 0;
4099 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
4100 if (contains_128bit_aligned_vector_p
4101 (BINFO_TYPE (base_binfo)))
4102 return true;
4103 }
4104 /* And now merge the fields of structure. */
4105 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4106 {
4107 if (TREE_CODE (field) == FIELD_DECL
4108 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4109 return true;
4110 }
4111 break;
4112 }
4113
4114 case ARRAY_TYPE:
4115 /* Just for use if some languages passes arrays by value. */
4116 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4117 return true;
4118 break;
4119
4120 default:
4121 gcc_unreachable ();
4122 }
4123 }
4124 return false;
4125}
4126
4127/* Gives the alignment boundary, in bits, of an argument with the
4128 specified mode and type. */
4129
4130int
4131ix86_function_arg_boundary (enum machine_mode mode, tree type)
4132{
4133 int align;
4134 if (type)
4135 align = TYPE_ALIGN (type);
4136 else
4137 align = GET_MODE_ALIGNMENT (mode);
4138 if (align < PARM_BOUNDARY)
4139 align = PARM_BOUNDARY;
4140 if (!TARGET_64BIT)
4141 {
4142 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4143 make an exception for SSE modes since these require 128bit
4144 alignment.
4145
4146 The handling here differs from field_alignment. ICC aligns MMX
4147 arguments to 4 byte boundaries, while structure fields are aligned
4148 to 8 byte boundaries. */
4149 if (!TARGET_SSE)
4150 align = PARM_BOUNDARY;
4151 else if (!type)
4152 {
4153 if (!SSE_REG_MODE_P (mode))
4154 align = PARM_BOUNDARY;
4155 }
4156 else
4157 {
4158 if (!contains_128bit_aligned_vector_p (type))
4159 align = PARM_BOUNDARY;
4160 }
4161 }
4162 if (align > 128)
4163 align = 128;
4164 return align;
4165}
4166
4167/* Return true if N is a possible register number of function value. */
4168bool
4169ix86_function_value_regno_p (int regno)
4170{
4171 if (TARGET_MACHO)
4172 {
4173 if (!TARGET_64BIT)
4174 {
4175 return ((regno) == 0
4176 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4177 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4178 }
4179 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4180 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4181 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4182 }
4183 else
4184 {
4185 if (regno == 0
4186 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4187 || (regno == FIRST_SSE_REG && TARGET_SSE))
4188 return true;
4189
4190 if (!TARGET_64BIT
4191 && (regno == FIRST_MMX_REG && TARGET_MMX))
4192 return true;
4193
4194 return false;
4195 }
4196}
4197
4198/* Define how to find the value returned by a function.
4199 VALTYPE is the data type of the value (as a tree).
4200 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4201 otherwise, FUNC is 0. */
4202rtx
4203ix86_function_value (tree valtype, tree fntype_or_decl,
4204 bool outgoing ATTRIBUTE_UNUSED)
4205{
4206 enum machine_mode natmode = type_natural_mode (valtype);
4207
4208 if (TARGET_64BIT)
4209 {
4210 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4211 1, REGPARM_MAX, SSE_REGPARM_MAX,
4212 x86_64_int_return_registers, 0);
4213 /* For zero sized structures, construct_container return NULL, but we
4214 need to keep rest of compiler happy by returning meaningful value. */
4215 if (!ret)
4216 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4217 return ret;
4218 }
4219 else
4220 {
4221 tree fn = NULL_TREE, fntype;
4222 if (fntype_or_decl
4223 && DECL_P (fntype_or_decl))
4224 fn = fntype_or_decl;
4225 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4226 return gen_rtx_REG (TYPE_MODE (valtype),
4227 ix86_value_regno (natmode, fn, fntype));
4228 }
4229}
4230
4231/* Return true iff type is returned in memory. */
4232int
4233ix86_return_in_memory (tree type)
4234{
4235 int needed_intregs, needed_sseregs, size;
4236 enum machine_mode mode = type_natural_mode (type);
4237
4238 if (TARGET_64BIT)
4239 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4240
4241 if (mode == BLKmode)
4242 return 1;
4243
4244 size = int_size_in_bytes (type);
4245
4246 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4247 return 0;
4248
4249 if (VECTOR_MODE_P (mode) || mode == TImode)
4250 {
4251 /* User-created vectors small enough to fit in EAX. */
4252 if (size < 8)
4253 return 0;
4254
4255 /* MMX/3dNow values are returned in MM0,
4256 except when it doesn't exits. */
4257 if (size == 8)
4258 return (TARGET_MMX ? 0 : 1);
4259
4260 /* SSE values are returned in XMM0, except when it doesn't exist. */
4261 if (size == 16)
4262 return (TARGET_SSE ? 0 : 1);
4263 }
4264
4265 if (mode == XFmode)
4266 return 0;
4267
4268 if (mode == TDmode)
4269 return 1;
4270
4271 if (size > 12)
4272 return 1;
4273 return 0;
4274}
4275
4276/* When returning SSE vector types, we have a choice of either
4277 (1) being abi incompatible with a -march switch, or
4278 (2) generating an error.
4279 Given no good solution, I think the safest thing is one warning.
4280 The user won't be able to use -Werror, but....
4281
4282 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4283 called in response to actually generating a caller or callee that
4284 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4285 via aggregate_value_p for general type probing from tree-ssa. */
4286
4287static rtx
4288ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4289{
4290 static bool warnedsse, warnedmmx;
4291
4292 if (type)
4293 {
4294 /* Look at the return type of the function, not the function type. */
4295 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4296
4297 if (!TARGET_SSE && !warnedsse)
4298 {
4299 if (mode == TImode
4300 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4301 {
4302 warnedsse = true;
4303 warning (0, "SSE vector return without SSE enabled "
4304 "changes the ABI");
4305 }
4306 }
4307
4308 if (!TARGET_MMX && !warnedmmx)
4309 {
4310 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4311 {
4312 warnedmmx = true;
4313 warning (0, "MMX vector return without MMX enabled "
4314 "changes the ABI");
4315 }
4316 }
4317 }
4318
4319 return NULL;
4320}
4321
4322/* Define how to find the value returned by a library function
4323 assuming the value has mode MODE. */
4324rtx
4325ix86_libcall_value (enum machine_mode mode)
4326{
4327 if (TARGET_64BIT)
4328 {
4329 switch (mode)
4330 {
4331 case SFmode:
4332 case SCmode:
4333 case DFmode:
4334 case DCmode:
4335 case TFmode:
4336 case SDmode:
4337 case DDmode:
4338 case TDmode:
4339 return gen_rtx_REG (mode, FIRST_SSE_REG);
4340 case XFmode:
4341 case XCmode:
4342 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4343 case TCmode:
4344 return NULL;
4345 default:
4346 return gen_rtx_REG (mode, 0);
4347 }
4348 }
4349 else
4350 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4351}
4352
4353/* Given a mode, return the register to use for a return value. */
4354
4355static int
4356ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4357{
4358 gcc_assert (!TARGET_64BIT);
4359
4360 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4361 we normally prevent this case when mmx is not available. However
4362 some ABIs may require the result to be returned like DImode. */
4363 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4364 return TARGET_MMX ? FIRST_MMX_REG : 0;
4365
4366 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4367 we prevent this case when sse is not available. However some ABIs
4368 may require the result to be returned like integer TImode. */
4369 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4370 return TARGET_SSE ? FIRST_SSE_REG : 0;
4371
4372 /* Decimal floating point values can go in %eax, unlike other float modes. */
4373 if (DECIMAL_FLOAT_MODE_P (mode))
4374 return 0;
4375
4376 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4377 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4378 return 0;
4379
4380 /* Floating point return values in %st(0), except for local functions when
4381 SSE math is enabled or for functions with sseregparm attribute. */
4382 if ((func || fntype)
4383 && (mode == SFmode || mode == DFmode))
4384 {
4385 int sse_level = ix86_function_sseregparm (fntype, func);
4386 if ((sse_level >= 1 && mode == SFmode)
4387 || (sse_level == 2 && mode == DFmode))
4388 return FIRST_SSE_REG;
4389 }
4390
4391 return FIRST_FLOAT_REG;
4392}
4393
4394/* Create the va_list data type. */
4395
4396static tree
4397ix86_build_builtin_va_list (void)
4398{
4399 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4400
4401 /* For i386 we use plain pointer to argument area. */
4402 if (!TARGET_64BIT)
4403 return build_pointer_type (char_type_node);
4404
4405 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4406 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4407
4408 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4409 unsigned_type_node);
4410 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4411 unsigned_type_node);
4412 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4413 ptr_type_node);
4414 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4415 ptr_type_node);
4416
4417 va_list_gpr_counter_field = f_gpr;
4418 va_list_fpr_counter_field = f_fpr;
4419
4420 DECL_FIELD_CONTEXT (f_gpr) = record;
4421 DECL_FIELD_CONTEXT (f_fpr) = record;
4422 DECL_FIELD_CONTEXT (f_ovf) = record;
4423 DECL_FIELD_CONTEXT (f_sav) = record;
4424
4425 TREE_CHAIN (record) = type_decl;
4426 TYPE_NAME (record) = type_decl;
4427 TYPE_FIELDS (record) = f_gpr;
4428 TREE_CHAIN (f_gpr) = f_fpr;
4429 TREE_CHAIN (f_fpr) = f_ovf;
4430 TREE_CHAIN (f_ovf) = f_sav;
4431
4432 layout_type (record);
4433
4434 /* The correct type is an array type of one element. */
4435 return build_array_type (record, build_index_type (size_zero_node));
4436}
4437
4438/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4439
4440static void
4441ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4442 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4443 int no_rtl)
4444{
4445 CUMULATIVE_ARGS next_cum;
4446 rtx save_area = NULL_RTX, mem;
4447 rtx label;
4448 rtx label_ref;
4449 rtx tmp_reg;
4450 rtx nsse_reg;
4451 int set;
4452 tree fntype;
4453 int stdarg_p;
4454 int i;
4455
4456 if (!TARGET_64BIT)
4457 return;
4458
4459 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4460 return;
4461
4462 /* Indicate to allocate space on the stack for varargs save area. */
4463 ix86_save_varrargs_registers = 1;
4464
4465 cfun->stack_alignment_needed = 128;
4466
4467 fntype = TREE_TYPE (current_function_decl);
4468 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4469 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4470 != void_type_node));
4471
4472 /* For varargs, we do not want to skip the dummy va_dcl argument.
4473 For stdargs, we do want to skip the last named argument. */
4474 next_cum = *cum;
4475 if (stdarg_p)
4476 function_arg_advance (&next_cum, mode, type, 1);
4477
4478 if (!no_rtl)
4479 save_area = frame_pointer_rtx;
4480
4481 set = get_varargs_alias_set ();
4482
4483 for (i = next_cum.regno;
4484 i < ix86_regparm
4485 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4486 i++)
4487 {
4488 mem = gen_rtx_MEM (Pmode,
4489 plus_constant (save_area, i * UNITS_PER_WORD));
4490 MEM_NOTRAP_P (mem) = 1;
4491 set_mem_alias_set (mem, set);
4492 emit_move_insn (mem, gen_rtx_REG (Pmode,
4493 x86_64_int_parameter_registers[i]));
4494 }
4495
4496 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4497 {
4498 /* Now emit code to save SSE registers. The AX parameter contains number
4499 of SSE parameter registers used to call this function. We use
4500 sse_prologue_save insn template that produces computed jump across
4501 SSE saves. We need some preparation work to get this working. */
4502
4503 label = gen_label_rtx ();
4504 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4505
4506 /* Compute address to jump to :
4507 label - 5*eax + nnamed_sse_arguments*5 */
4508 tmp_reg = gen_reg_rtx (Pmode);
4509 nsse_reg = gen_reg_rtx (Pmode);
4510 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4512 gen_rtx_MULT (Pmode, nsse_reg,
4513 GEN_INT (4))));
4514 if (next_cum.sse_regno)
4515 emit_move_insn
4516 (nsse_reg,
4517 gen_rtx_CONST (DImode,
4518 gen_rtx_PLUS (DImode,
4519 label_ref,
4520 GEN_INT (next_cum.sse_regno * 4))));
4521 else
4522 emit_move_insn (nsse_reg, label_ref);
4523 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4524
4525 /* Compute address of memory block we save into. We always use pointer
4526 pointing 127 bytes after first byte to store - this is needed to keep
4527 instruction size limited by 4 bytes. */
4528 tmp_reg = gen_reg_rtx (Pmode);
4529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4530 plus_constant (save_area,
4531 8 * REGPARM_MAX + 127)));
4532 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4533 MEM_NOTRAP_P (mem) = 1;
4534 set_mem_alias_set (mem, set);
4535 set_mem_align (mem, BITS_PER_WORD);
4536
4537 /* And finally do the dirty job! */
4538 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4539 GEN_INT (next_cum.sse_regno), label));
4540 }
4541
4542}
4543
4544/* Implement va_start. */
4545
4546void
4547ix86_va_start (tree valist, rtx nextarg)
4548{
4549 HOST_WIDE_INT words, n_gpr, n_fpr;
4550 tree f_gpr, f_fpr, f_ovf, f_sav;
4551 tree gpr, fpr, ovf, sav, t;
4552 tree type;
4553
4554 /* Only 64bit target needs something special. */
4555 if (!TARGET_64BIT)
4556 {
4557 std_expand_builtin_va_start (valist, nextarg);
4558 return;
4559 }
4560
4561 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4562 f_fpr = TREE_CHAIN (f_gpr);
4563 f_ovf = TREE_CHAIN (f_fpr);
4564 f_sav = TREE_CHAIN (f_ovf);
4565
4566 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4567 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4568 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4569 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4570 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4571
4572 /* Count number of gp and fp argument registers used. */
4573 words = current_function_args_info.words;
4574 n_gpr = current_function_args_info.regno;
4575 n_fpr = current_function_args_info.sse_regno;
4576
4577 if (TARGET_DEBUG_ARG)
4578 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4579 (int) words, (int) n_gpr, (int) n_fpr);
4580
4581 if (cfun->va_list_gpr_size)
4582 {
4583 type = TREE_TYPE (gpr);
4584 t = build2 (MODIFY_EXPR, type, gpr,
4585 build_int_cst (type, n_gpr * 8));
4586 TREE_SIDE_EFFECTS (t) = 1;
4587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4588 }
4589
4590 if (cfun->va_list_fpr_size)
4591 {
4592 type = TREE_TYPE (fpr);
4593 t = build2 (MODIFY_EXPR, type, fpr,
4594 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4595 TREE_SIDE_EFFECTS (t) = 1;
4596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4597 }
4598
4599 /* Find the overflow area. */
4600 type = TREE_TYPE (ovf);
4601 t = make_tree (type, virtual_incoming_args_rtx);
4602 if (words != 0)
4603 t = build2 (PLUS_EXPR, type, t,
4604 build_int_cst (type, words * UNITS_PER_WORD));
4605 t = build2 (MODIFY_EXPR, type, ovf, t);
4606 TREE_SIDE_EFFECTS (t) = 1;
4607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4608
4609 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4610 {
4611 /* Find the register save area.
4612 Prologue of the function save it right above stack frame. */
4613 type = TREE_TYPE (sav);
4614 t = make_tree (type, frame_pointer_rtx);
4615 t = build2 (MODIFY_EXPR, type, sav, t);
4616 TREE_SIDE_EFFECTS (t) = 1;
4617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4618 }
4619}
4620
4621/* Implement va_arg. */
4622
4623tree
4624ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4625{
4626 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4627 tree f_gpr, f_fpr, f_ovf, f_sav;
4628 tree gpr, fpr, ovf, sav, t;
4629 int size, rsize;
4630 tree lab_false, lab_over = NULL_TREE;
4631 tree addr, t2;
4632 rtx container;
4633 int indirect_p = 0;
4634 tree ptrtype;
4635 enum machine_mode nat_mode;
4636
4637 /* Only 64bit target needs something special. */
4638 if (!TARGET_64BIT)
4639 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4640
4641 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4642 f_fpr = TREE_CHAIN (f_gpr);
4643 f_ovf = TREE_CHAIN (f_fpr);
4644 f_sav = TREE_CHAIN (f_ovf);
4645
4646 valist = build_va_arg_indirect_ref (valist);
4647 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4648 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4649 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4650 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4651
4652 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4653 if (indirect_p)
4654 type = build_pointer_type (type);
4655 size = int_size_in_bytes (type);
4656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4657
4658 nat_mode = type_natural_mode (type);
4659 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4660 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4661
4662 /* Pull the value out of the saved registers. */
4663
4664 addr = create_tmp_var (ptr_type_node, "addr");
4665 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4666
4667 if (container)
4668 {
4669 int needed_intregs, needed_sseregs;
4670 bool need_temp;
4671 tree int_addr, sse_addr;
4672
4673 lab_false = create_artificial_label ();
4674 lab_over = create_artificial_label ();
4675
4676 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4677
4678 need_temp = (!REG_P (container)
4679 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4680 || TYPE_ALIGN (type) > 128));
4681
4682 /* In case we are passing structure, verify that it is consecutive block
4683 on the register save area. If not we need to do moves. */
4684 if (!need_temp && !REG_P (container))
4685 {
4686 /* Verify that all registers are strictly consecutive */
4687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4688 {
4689 int i;
4690
4691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4692 {
4693 rtx slot = XVECEXP (container, 0, i);
4694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4695 || INTVAL (XEXP (slot, 1)) != i * 16)
4696 need_temp = 1;
4697 }
4698 }
4699 else
4700 {
4701 int i;
4702
4703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4704 {
4705 rtx slot = XVECEXP (container, 0, i);
4706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4707 || INTVAL (XEXP (slot, 1)) != i * 8)
4708 need_temp = 1;
4709 }
4710 }
4711 }
4712 if (!need_temp)
4713 {
4714 int_addr = addr;
4715 sse_addr = addr;
4716 }
4717 else
4718 {
4719 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4720 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4721 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4722 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4723 }
4724
4725 /* First ensure that we fit completely in registers. */
4726 if (needed_intregs)
4727 {
4728 t = build_int_cst (TREE_TYPE (gpr),
4729 (REGPARM_MAX - needed_intregs + 1) * 8);
4730 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4731 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4732 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4733 gimplify_and_add (t, pre_p);
4734 }
4735 if (needed_sseregs)
4736 {
4737 t = build_int_cst (TREE_TYPE (fpr),
4738 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4739 + REGPARM_MAX * 8);
4740 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4741 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4742 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4743 gimplify_and_add (t, pre_p);
4744 }
4745
4746 /* Compute index to start of area used for integer regs. */
4747 if (needed_intregs)
4748 {
4749 /* int_addr = gpr + sav; */
4750 t = fold_convert (ptr_type_node, gpr);
4751 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4752 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4753 gimplify_and_add (t, pre_p);
4754 }
4755 if (needed_sseregs)
4756 {
4757 /* sse_addr = fpr + sav; */
4758 t = fold_convert (ptr_type_node, fpr);
4759 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4760 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4761 gimplify_and_add (t, pre_p);
4762 }
4763 if (need_temp)
4764 {
4765 int i;
4766 tree temp = create_tmp_var (type, "va_arg_tmp");
4767
4768 /* addr = &temp; */
4769 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4770 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4771 gimplify_and_add (t, pre_p);
4772
4773 for (i = 0; i < XVECLEN (container, 0); i++)
4774 {
4775 rtx slot = XVECEXP (container, 0, i);
4776 rtx reg = XEXP (slot, 0);
4777 enum machine_mode mode = GET_MODE (reg);
4778 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4779 tree addr_type = build_pointer_type (piece_type);
4780 tree src_addr, src;
4781 int src_offset;
4782 tree dest_addr, dest;
4783
4784 if (SSE_REGNO_P (REGNO (reg)))
4785 {
4786 src_addr = sse_addr;
4787 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4788 }
4789 else
4790 {
4791 src_addr = int_addr;
4792 src_offset = REGNO (reg) * 8;
4793 }
4794 src_addr = fold_convert (addr_type, src_addr);
4795 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4796 size_int (src_offset)));
4797 src = build_va_arg_indirect_ref (src_addr);
4798
4799 dest_addr = fold_convert (addr_type, addr);
4800 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4801 size_int (INTVAL (XEXP (slot, 1)))));
4802 dest = build_va_arg_indirect_ref (dest_addr);
4803
4804 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4805 gimplify_and_add (t, pre_p);
4806 }
4807 }
4808
4809 if (needed_intregs)
4810 {
4811 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4812 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4813 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4814 gimplify_and_add (t, pre_p);
4815 }
4816 if (needed_sseregs)
4817 {
4818 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4819 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4820 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4821 gimplify_and_add (t, pre_p);
4822 }
4823
4824 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4825 gimplify_and_add (t, pre_p);
4826
4827 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4828 append_to_statement_list (t, pre_p);
4829 }
4830
4831 /* ... otherwise out of the overflow area. */
4832
4833 /* Care for on-stack alignment if needed. */
4834 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4835 || integer_zerop (TYPE_SIZE (type)))
4836 t = ovf;
4837 else
4838 {
4839 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4840 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4841 build_int_cst (TREE_TYPE (ovf), align - 1));
4842 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4843 build_int_cst (TREE_TYPE (t), -align));
4844 }
4845 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4846
4847 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4848 gimplify_and_add (t2, pre_p);
4849
4850 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4851 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4852 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4853 gimplify_and_add (t, pre_p);
4854
4855 if (container)
4856 {
4857 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4858 append_to_statement_list (t, pre_p);
4859 }
4860
4861 ptrtype = build_pointer_type (type);
4862 addr = fold_convert (ptrtype, addr);
4863
4864 if (indirect_p)
4865 addr = build_va_arg_indirect_ref (addr);
4866 return build_va_arg_indirect_ref (addr);
4867}
4868
4869/* Return nonzero if OPNUM's MEM should be matched
4870 in movabs* patterns. */
4871
4872int
4873ix86_check_movabs (rtx insn, int opnum)
4874{
4875 rtx set, mem;
4876
4877 set = PATTERN (insn);
4878 if (GET_CODE (set) == PARALLEL)
4879 set = XVECEXP (set, 0, 0);
4880 gcc_assert (GET_CODE (set) == SET);
4881 mem = XEXP (set, opnum);
4882 while (GET_CODE (mem) == SUBREG)
4883 mem = SUBREG_REG (mem);
4884 gcc_assert (GET_CODE (mem) == MEM);
4885 return (volatile_ok || !MEM_VOLATILE_P (mem));
4886}
4887
4888/* Initialize the table of extra 80387 mathematical constants. */
4889
4890static void
4891init_ext_80387_constants (void)
4892{
4893 static const char * cst[5] =
4894 {
4895 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4896 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4897 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4898 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4899 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4900 };
4901 int i;
4902
4903 for (i = 0; i < 5; i++)
4904 {
4905 real_from_string (&ext_80387_constants_table[i], cst[i]);
4906 /* Ensure each constant is rounded to XFmode precision. */
4907 real_convert (&ext_80387_constants_table[i],
4908 XFmode, &ext_80387_constants_table[i]);
4909 }
4910
4911 ext_80387_constants_init = 1;
4912}
4913
4914/* Return true if the constant is something that can be loaded with
4915 a special instruction. */
4916
4917int
4918standard_80387_constant_p (rtx x)
4919{
4920 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4921 return -1;
4922
4923 if (x == CONST0_RTX (GET_MODE (x)))
4924 return 1;
4925 if (x == CONST1_RTX (GET_MODE (x)))
4926 return 2;
4927
4928 /* For XFmode constants, try to find a special 80387 instruction when
4929 optimizing for size or on those CPUs that benefit from them. */
4930 if (GET_MODE (x) == XFmode
4931 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4932 {
4933 REAL_VALUE_TYPE r;
4934 int i;
4935
4936 if (! ext_80387_constants_init)
4937 init_ext_80387_constants ();
4938
4939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4940 for (i = 0; i < 5; i++)
4941 if (real_identical (&r, &ext_80387_constants_table[i]))
4942 return i + 3;
4943 }
4944
4945 return 0;
4946}
4947
4948/* Return the opcode of the special instruction to be used to load
4949 the constant X. */
4950
4951const char *
4952standard_80387_constant_opcode (rtx x)
4953{
4954 switch (standard_80387_constant_p (x))
4955 {
4956 case 1:
4957 return "fldz";
4958 case 2:
4959 return "fld1";
4960 case 3:
4961 return "fldlg2";
4962 case 4:
4963 return "fldln2";
4964 case 5:
4965 return "fldl2e";
4966 case 6:
4967 return "fldl2t";
4968 case 7:
4969 return "fldpi";
4970 default:
4971 gcc_unreachable ();
4972 }
4973}
4974
4975/* Return the CONST_DOUBLE representing the 80387 constant that is
4976 loaded by the specified special instruction. The argument IDX
4977 matches the return value from standard_80387_constant_p. */
4978
4979rtx
4980standard_80387_constant_rtx (int idx)
4981{
4982 int i;
4983
4984 if (! ext_80387_constants_init)
4985 init_ext_80387_constants ();
4986
4987 switch (idx)
4988 {
4989 case 3:
4990 case 4:
4991 case 5:
4992 case 6:
4993 case 7:
4994 i = idx - 3;
4995 break;
4996
4997 default:
4998 gcc_unreachable ();
4999 }
5000
5001 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5002 XFmode);
5003}
5004
5005/* Return 1 if mode is a valid mode for sse. */
5006static int
5007standard_sse_mode_p (enum machine_mode mode)
5008{
5009 switch (mode)
5010 {
5011 case V16QImode:
5012 case V8HImode:
5013 case V4SImode:
5014 case V2DImode:
5015 case V4SFmode:
5016 case V2DFmode:
5017 return 1;
5018
5019 default:
5020 return 0;
5021 }
5022}
5023
5024/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5025 */
5026int
5027standard_sse_constant_p (rtx x)
5028{
5029 enum machine_mode mode = GET_MODE (x);
5030
5031 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5032 return 1;
5033 if (vector_all_ones_operand (x, mode)
5034 && standard_sse_mode_p (mode))
5035 return TARGET_SSE2 ? 2 : -1;
5036
5037 return 0;
5038}
5039
5040/* Return the opcode of the special instruction to be used to load
5041 the constant X. */
5042
5043const char *
5044standard_sse_constant_opcode (rtx insn, rtx x)
5045{
5046 switch (standard_sse_constant_p (x))
5047 {
5048 case 1:
5049 if (get_attr_mode (insn) == MODE_V4SF)
5050 return "xorps\t%0, %0";
5051 else if (get_attr_mode (insn) == MODE_V2DF)
5052 return "xorpd\t%0, %0";
5053 else
5054 return "pxor\t%0, %0";
5055 case 2:
5056 return "pcmpeqd\t%0, %0";
5057 }
5058 gcc_unreachable ();
5059}
5060
5061/* Returns 1 if OP contains a symbol reference */
5062
5063int
5064symbolic_reference_mentioned_p (rtx op)
5065{
5066 const char *fmt;
5067 int i;
5068
5069 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5070 return 1;
5071
5072 fmt = GET_RTX_FORMAT (GET_CODE (op));
5073 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5074 {
5075 if (fmt[i] == 'E')
5076 {
5077 int j;
5078
5079 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5080 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5081 return 1;
5082 }
5083
5084 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5085 return 1;
5086 }
5087
5088 return 0;
5089}
5090
5091/* Return 1 if it is appropriate to emit `ret' instructions in the
5092 body of a function. Do this only if the epilogue is simple, needing a
5093 couple of insns. Prior to reloading, we can't tell how many registers
5094 must be saved, so return 0 then. Return 0 if there is no frame
5095 marker to de-allocate. */
5096
5097int
5098ix86_can_use_return_insn_p (void)
5099{
5100 struct ix86_frame frame;
5101
5102 if (! reload_completed || frame_pointer_needed)
5103 return 0;
5104
5105 /* Don't allow more than 32 pop, since that's all we can do
5106 with one instruction. */
5107 if (current_function_pops_args
5108 && current_function_args_size >= 32768)
5109 return 0;
5110
5111 ix86_compute_frame_layout (&frame);
5112 return frame.to_allocate == 0 && frame.nregs == 0;
5113}
5114
5115/* Value should be nonzero if functions must have frame pointers.
5116 Zero means the frame pointer need not be set up (and parms may
5117 be accessed via the stack pointer) in functions that seem suitable. */
5118
5119int
5120ix86_frame_pointer_required (void)
5121{
5122 /* If we accessed previous frames, then the generated code expects
5123 to be able to access the saved ebp value in our frame. */
5124 if (cfun->machine->accesses_prev_frame)
5125 return 1;
5126
5127 /* Several x86 os'es need a frame pointer for other reasons,
5128 usually pertaining to setjmp. */
5129 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5130 return 1;
5131
5132 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5133 the frame pointer by default. Turn it back on now if we've not
5134 got a leaf function. */
5135 if (TARGET_OMIT_LEAF_FRAME_POINTER
5136 && (!current_function_is_leaf
5137 || ix86_current_function_calls_tls_descriptor))
5138 return 1;
5139
5140 if (current_function_profile)
5141 return 1;
5142
5143 return 0;
5144}
5145
5146/* Record that the current function accesses previous call frames. */
5147
5148void
5149ix86_setup_frame_addresses (void)
5150{
5151 cfun->machine->accesses_prev_frame = 1;
5152}
5153
5154#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5155# define USE_HIDDEN_LINKONCE 1
5156#else
5157# define USE_HIDDEN_LINKONCE 0
5158#endif
5159
5160static int pic_labels_used;
5161
5162/* Fills in the label name that should be used for a pc thunk for
5163 the given register. */
5164
5165static void
5166get_pc_thunk_name (char name[32], unsigned int regno)
5167{
5168 gcc_assert (!TARGET_64BIT);
5169
5170 if (USE_HIDDEN_LINKONCE)
5171 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5172 else
5173 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5174}
5175
5176
5177/* This function generates code for -fpic that loads %ebx with
5178 the return address of the caller and then returns. */
5179
5180void
5181ix86_file_end (void)
5182{
5183 rtx xops[2];
5184 int regno;
5185
5186 for (regno = 0; regno < 8; ++regno)
5187 {
5188 char name[32];
5189
5190 if (! ((pic_labels_used >> regno) & 1))
5191 continue;
5192
5193 get_pc_thunk_name (name, regno);
5194
5195#if TARGET_MACHO
5196 if (TARGET_MACHO)
5197 {
5198 switch_to_section (darwin_sections[text_coal_section]);
5199 fputs ("\t.weak_definition\t", asm_out_file);
5200 assemble_name (asm_out_file, name);
5201 fputs ("\n\t.private_extern\t", asm_out_file);
5202 assemble_name (asm_out_file, name);
5203 fputs ("\n", asm_out_file);
5204 ASM_OUTPUT_LABEL (asm_out_file, name);
5205 }
5206 else
5207#endif
5208 if (USE_HIDDEN_LINKONCE)
5209 {
5210 tree decl;
5211
5212 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5213 error_mark_node);
5214 TREE_PUBLIC (decl) = 1;
5215 TREE_STATIC (decl) = 1;
5216 DECL_ONE_ONLY (decl) = 1;
5217
5218 (*targetm.asm_out.unique_section) (decl, 0);
5219 switch_to_section (get_named_section (decl, NULL, 0));
5220
5221 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5222 fputs ("\t.hidden\t", asm_out_file);
5223 assemble_name (asm_out_file, name);
5224 fputc ('\n', asm_out_file);
5225 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5226 }
5227 else
5228 {
5229 switch_to_section (text_section);
5230 ASM_OUTPUT_LABEL (asm_out_file, name);
5231 }
5232
5233 xops[0] = gen_rtx_REG (SImode, regno);
5234 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5235 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5236 output_asm_insn ("ret", xops);
5237 }
5238
5239 if (NEED_INDICATE_EXEC_STACK)
5240 file_end_indicate_exec_stack ();
5241}
5242
5243/* Emit code for the SET_GOT patterns. */
5244
5245const char *
5246output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5247{
5248 rtx xops[3];
5249
5250 xops[0] = dest;
5251 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5252
5253 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5254 {
5255 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5256
5257 if (!flag_pic)
5258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5259 else
5260 output_asm_insn ("call\t%a2", xops);
5261
5262#if TARGET_MACHO
5263 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5264 is what will be referenced by the Mach-O PIC subsystem. */
5265 if (!label)
5266 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5267#endif
5268
5269 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5270 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5271
5272 if (flag_pic)
5273 output_asm_insn ("pop{l}\t%0", xops);
5274 }
5275 else
5276 {
5277 char name[32];
5278 get_pc_thunk_name (name, REGNO (dest));
5279 pic_labels_used |= 1 << REGNO (dest);
5280
5281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5282 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5283 output_asm_insn ("call\t%X2", xops);
5284 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5285 is what will be referenced by the Mach-O PIC subsystem. */
5286#if TARGET_MACHO
5287 if (!label)
5288 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5289 else
5290 targetm.asm_out.internal_label (asm_out_file, "L",
5291 CODE_LABEL_NUMBER (label));
5292#endif
5293 }
5294
5295 if (TARGET_MACHO)
5296 return "";
5297
5298 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5299 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5300 else
5301 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5302
5303 return "";
5304}
5305
5306/* Generate an "push" pattern for input ARG. */
5307
5308static rtx
5309gen_push (rtx arg)
5310{
5311 return gen_rtx_SET (VOIDmode,
5312 gen_rtx_MEM (Pmode,
5313 gen_rtx_PRE_DEC (Pmode,
5314 stack_pointer_rtx)),
5315 arg);
5316}
5317
5318/* Return >= 0 if there is an unused call-clobbered register available
5319 for the entire function. */
5320
5321static unsigned int
5322ix86_select_alt_pic_regnum (void)
5323{
5324 if (current_function_is_leaf && !current_function_profile
5325 && !ix86_current_function_calls_tls_descriptor)
5326 {
5327 int i;
5328 for (i = 2; i >= 0; --i)
5329 if (!regs_ever_live[i])
5330 return i;
5331 }
5332
5333 return INVALID_REGNUM;
5334}
5335
5336/* Return 1 if we need to save REGNO. */
5337static int
5338ix86_save_reg (unsigned int regno, int maybe_eh_return)
5339{
5340 if (pic_offset_table_rtx
5341 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5342 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5343 || current_function_profile
5344 || current_function_calls_eh_return
5345 || current_function_uses_const_pool))
5346 {
5347 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5348 return 0;
5349 return 1;
5350 }
5351
5352 if (current_function_calls_eh_return && maybe_eh_return)
5353 {
5354 unsigned i;
5355 for (i = 0; ; i++)
5356 {
5357 unsigned test = EH_RETURN_DATA_REGNO (i);
5358 if (test == INVALID_REGNUM)
5359 break;
5360 if (test == regno)
5361 return 1;
5362 }
5363 }
5364
5365 if (cfun->machine->force_align_arg_pointer
5366 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5367 return 1;
5368
5369 return (regs_ever_live[regno]
5370 && !call_used_regs[regno]
5371 && !fixed_regs[regno]
5372 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5373}
5374
5375/* Return number of registers to be saved on the stack. */
5376
5377static int
5378ix86_nsaved_regs (void)
5379{
5380 int nregs = 0;
5381 int regno;
5382
5383 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5384 if (ix86_save_reg (regno, true))
5385 nregs++;
5386 return nregs;
5387}
5388
5389/* Return the offset between two registers, one to be eliminated, and the other
5390 its replacement, at the start of a routine. */
5391
5392HOST_WIDE_INT
5393ix86_initial_elimination_offset (int from, int to)
5394{
5395 struct ix86_frame frame;
5396 ix86_compute_frame_layout (&frame);
5397
5398 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5399 return frame.hard_frame_pointer_offset;
5400 else if (from == FRAME_POINTER_REGNUM
5401 && to == HARD_FRAME_POINTER_REGNUM)
5402 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5403 else
5404 {
5405 gcc_assert (to == STACK_POINTER_REGNUM);
5406
5407 if (from == ARG_POINTER_REGNUM)
5408 return frame.stack_pointer_offset;
5409
5410 gcc_assert (from == FRAME_POINTER_REGNUM);
5411 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5412 }
5413}
5414
5415/* Fill structure ix86_frame about frame of currently computed function. */
5416
5417static void
5418ix86_compute_frame_layout (struct ix86_frame *frame)
5419{
5420 HOST_WIDE_INT total_size;
5421 unsigned int stack_alignment_needed;
5422 HOST_WIDE_INT offset;
5423 unsigned int preferred_alignment;
5424 HOST_WIDE_INT size = get_frame_size ();
5425
5426 frame->nregs = ix86_nsaved_regs ();
5427 total_size = size;
5428
5429 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5430 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5431
5432 /* During reload iteration the amount of registers saved can change.
5433 Recompute the value as needed. Do not recompute when amount of registers
5434 didn't change as reload does multiple calls to the function and does not
5435 expect the decision to change within single iteration. */
5436 if (!optimize_size
5437 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5438 {
5439 int count = frame->nregs;
5440
5441 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5442 /* The fast prologue uses move instead of push to save registers. This
5443 is significantly longer, but also executes faster as modern hardware
5444 can execute the moves in parallel, but can't do that for push/pop.
5445
5446 Be careful about choosing what prologue to emit: When function takes
5447 many instructions to execute we may use slow version as well as in
5448 case function is known to be outside hot spot (this is known with
5449 feedback only). Weight the size of function by number of registers
5450 to save as it is cheap to use one or two push instructions but very
5451 slow to use many of them. */
5452 if (count)
5453 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5454 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5455 || (flag_branch_probabilities
5456 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5457 cfun->machine->use_fast_prologue_epilogue = false;
5458 else
5459 cfun->machine->use_fast_prologue_epilogue
5460 = !expensive_function_p (count);
5461 }
5462 if (TARGET_PROLOGUE_USING_MOVE
5463 && cfun->machine->use_fast_prologue_epilogue)
5464 frame->save_regs_using_mov = true;
5465 else
5466 frame->save_regs_using_mov = false;
5467
5468
5469 /* Skip return address and saved base pointer. */
5470 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5471
5472 frame->hard_frame_pointer_offset = offset;
5473
5474 /* Do some sanity checking of stack_alignment_needed and
5475 preferred_alignment, since i386 port is the only using those features
5476 that may break easily. */
5477
5478 gcc_assert (!size || stack_alignment_needed);
5479 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5480 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5481 gcc_assert (stack_alignment_needed
5482 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5483
5484 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5485 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5486
5487 /* Register save area */
5488 offset += frame->nregs * UNITS_PER_WORD;
5489
5490 /* Va-arg area */
5491 if (ix86_save_varrargs_registers)
5492 {
5493 offset += X86_64_VARARGS_SIZE;
5494 frame->va_arg_size = X86_64_VARARGS_SIZE;
5495 }
5496 else
5497 frame->va_arg_size = 0;
5498
5499 /* Align start of frame for local function. */
5500 frame->padding1 = ((offset + stack_alignment_needed - 1)
5501 & -stack_alignment_needed) - offset;
5502
5503 offset += frame->padding1;
5504
5505 /* Frame pointer points here. */
5506 frame->frame_pointer_offset = offset;
5507
5508 offset += size;
5509
5510 /* Add outgoing arguments area. Can be skipped if we eliminated
5511 all the function calls as dead code.
5512 Skipping is however impossible when function calls alloca. Alloca
5513 expander assumes that last current_function_outgoing_args_size
5514 of stack frame are unused. */
5515 if (ACCUMULATE_OUTGOING_ARGS
5516 && (!current_function_is_leaf || current_function_calls_alloca
5517 || ix86_current_function_calls_tls_descriptor))
5518 {
5519 offset += current_function_outgoing_args_size;
5520 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5521 }
5522 else
5523 frame->outgoing_arguments_size = 0;
5524
5525 /* Align stack boundary. Only needed if we're calling another function
5526 or using alloca. */
5527 if (!current_function_is_leaf || current_function_calls_alloca
5528 || ix86_current_function_calls_tls_descriptor)
5529 frame->padding2 = ((offset + preferred_alignment - 1)
5530 & -preferred_alignment) - offset;
5531 else
5532 frame->padding2 = 0;
5533
5534 offset += frame->padding2;
5535
5536 /* We've reached end of stack frame. */
5537 frame->stack_pointer_offset = offset;
5538
5539 /* Size prologue needs to allocate. */
5540 frame->to_allocate =
5541 (size + frame->padding1 + frame->padding2
5542 + frame->outgoing_arguments_size + frame->va_arg_size);
5543
5544 if ((!frame->to_allocate && frame->nregs <= 1)
5545 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5546 frame->save_regs_using_mov = false;
5547
5548 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5549 && current_function_is_leaf
5550 && !ix86_current_function_calls_tls_descriptor)
5551 {
5552 frame->red_zone_size = frame->to_allocate;
5553 if (frame->save_regs_using_mov)
5554 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5555 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5556 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5557 }
5558 else
5559 frame->red_zone_size = 0;
5560 frame->to_allocate -= frame->red_zone_size;
5561 frame->stack_pointer_offset -= frame->red_zone_size;
5562#if 0
5563 fprintf (stderr, "nregs: %i\n", frame->nregs);
5564 fprintf (stderr, "size: %i\n", size);
5565 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5566 fprintf (stderr, "padding1: %i\n", frame->padding1);
5567 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5568 fprintf (stderr, "padding2: %i\n", frame->padding2);
5569 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5570 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5571 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5572 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5573 frame->hard_frame_pointer_offset);
5574 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5575#endif
5576}
5577
5578/* Emit code to save registers in the prologue. */
5579
5580static void
5581ix86_emit_save_regs (void)
5582{
5583 unsigned int regno;
5584 rtx insn;
5585
5586 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5587 if (ix86_save_reg (regno, true))
5588 {
5589 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5590 RTX_FRAME_RELATED_P (insn) = 1;
5591 }
5592}
5593
5594/* Emit code to save registers using MOV insns. First register
5595 is restored from POINTER + OFFSET. */
5596static void
5597ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5598{
5599 unsigned int regno;
5600 rtx insn;
5601
5602 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5603 if (ix86_save_reg (regno, true))
5604 {
5605 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5606 Pmode, offset),
5607 gen_rtx_REG (Pmode, regno));
5608 RTX_FRAME_RELATED_P (insn) = 1;
5609 offset += UNITS_PER_WORD;
5610 }
5611}
5612
5613/* Expand prologue or epilogue stack adjustment.
5614 The pattern exist to put a dependency on all ebp-based memory accesses.
5615 STYLE should be negative if instructions should be marked as frame related,
5616 zero if %r11 register is live and cannot be freely used and positive
5617 otherwise. */
5618
5619static void
5620pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5621{
5622 rtx insn;
5623
5624 if (! TARGET_64BIT)
5625 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5626 else if (x86_64_immediate_operand (offset, DImode))
5627 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5628 else
5629 {
5630 rtx r11;
5631 /* r11 is used by indirect sibcall return as well, set before the
5632 epilogue and used after the epilogue. ATM indirect sibcall
5633 shouldn't be used together with huge frame sizes in one
5634 function because of the frame_size check in sibcall.c. */
5635 gcc_assert (style);
5636 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5637 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5638 if (style < 0)
5639 RTX_FRAME_RELATED_P (insn) = 1;
5640 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5641 offset));
5642 }
5643 if (style < 0)
5644 RTX_FRAME_RELATED_P (insn) = 1;
5645}
5646
5647/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5648
5649static rtx
5650ix86_internal_arg_pointer (void)
5651{
5652 bool has_force_align_arg_pointer =
5653 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5654 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5655 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5656 && DECL_NAME (current_function_decl)
5657 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5658 && DECL_FILE_SCOPE_P (current_function_decl))
5659 || ix86_force_align_arg_pointer
5660 || has_force_align_arg_pointer)
5661 {
5662 /* Nested functions can't realign the stack due to a register
5663 conflict. */
5664 if (DECL_CONTEXT (current_function_decl)
5665 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5666 {
5667 if (ix86_force_align_arg_pointer)
5668 warning (0, "-mstackrealign ignored for nested functions");
5669 if (has_force_align_arg_pointer)
5670 error ("%s not supported for nested functions",
5671 ix86_force_align_arg_pointer_string);
5672 return virtual_incoming_args_rtx;
5673 }
5674 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5675 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5676 }
5677 else
5678 return virtual_incoming_args_rtx;
5679}
5680
5681/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5682 This is called from dwarf2out.c to emit call frame instructions
5683 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5684static void
5685ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5686{
5687 rtx unspec = SET_SRC (pattern);
5688 gcc_assert (GET_CODE (unspec) == UNSPEC);
5689
5690 switch (index)
5691 {
5692 case UNSPEC_REG_SAVE:
5693 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5694 SET_DEST (pattern));
5695 break;
5696 case UNSPEC_DEF_CFA:
5697 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5698 INTVAL (XVECEXP (unspec, 0, 0)));
5699 break;
5700 default:
5701 gcc_unreachable ();
5702 }
5703}
5704
5705/* Expand the prologue into a bunch of separate insns. */
5706
5707void
5708ix86_expand_prologue (void)
5709{
5710 rtx insn;
5711 bool pic_reg_used;
5712 struct ix86_frame frame;
5713 HOST_WIDE_INT allocate;
5714
5715 ix86_compute_frame_layout (&frame);
5716
5717 if (cfun->machine->force_align_arg_pointer)
5718 {
5719 rtx x, y;
5720
5721 /* Grab the argument pointer. */
5722 x = plus_constant (stack_pointer_rtx, 4);
5723 y = cfun->machine->force_align_arg_pointer;
5724 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5725 RTX_FRAME_RELATED_P (insn) = 1;
5726
5727 /* The unwind info consists of two parts: install the fafp as the cfa,
5728 and record the fafp as the "save register" of the stack pointer.
5729 The later is there in order that the unwinder can see where it
5730 should restore the stack pointer across the and insn. */
5731 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5732 x = gen_rtx_SET (VOIDmode, y, x);
5733 RTX_FRAME_RELATED_P (x) = 1;
5734 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5735 UNSPEC_REG_SAVE);
5736 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5737 RTX_FRAME_RELATED_P (y) = 1;
5738 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5739 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5740 REG_NOTES (insn) = x;
5741
5742 /* Align the stack. */
5743 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5744 GEN_INT (-16)));
5745
5746 /* And here we cheat like madmen with the unwind info. We force the
5747 cfa register back to sp+4, which is exactly what it was at the
5748 start of the function. Re-pushing the return address results in
5749 the return at the same spot relative to the cfa, and thus is
5750 correct wrt the unwind info. */
5751 x = cfun->machine->force_align_arg_pointer;
5752 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5753 insn = emit_insn (gen_push (x));
5754 RTX_FRAME_RELATED_P (insn) = 1;
5755
5756 x = GEN_INT (4);
5757 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5758 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5759 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5760 REG_NOTES (insn) = x;
5761 }
5762
5763 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5764 slower on all targets. Also sdb doesn't like it. */
5765
5766 if (frame_pointer_needed)
5767 {
5768 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5769 RTX_FRAME_RELATED_P (insn) = 1;
5770
5771 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5772 RTX_FRAME_RELATED_P (insn) = 1;
5773 }
5774
5775 allocate = frame.to_allocate;
5776
5777 if (!frame.save_regs_using_mov)
5778 ix86_emit_save_regs ();
5779 else
5780 allocate += frame.nregs * UNITS_PER_WORD;
5781
5782 /* When using red zone we may start register saving before allocating
5783 the stack frame saving one cycle of the prologue. */
5784 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5785 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5786 : stack_pointer_rtx,
5787 -frame.nregs * UNITS_PER_WORD);
5788
5789 if (allocate == 0)
5790 ;
5791 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5792 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5793 GEN_INT (-allocate), -1);
5794 else
5795 {
5796 /* Only valid for Win32. */
5797 rtx eax = gen_rtx_REG (SImode, 0);
5798 bool eax_live = ix86_eax_live_at_start_p ();
5799 rtx t;
5800
5801 gcc_assert (!TARGET_64BIT);
5802
5803 if (eax_live)
5804 {
5805 emit_insn (gen_push (eax));
5806 allocate -= 4;
5807 }
5808
5809 emit_move_insn (eax, GEN_INT (allocate));
5810
5811 insn = emit_insn (gen_allocate_stack_worker (eax));
5812 RTX_FRAME_RELATED_P (insn) = 1;
5813 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5814 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5815 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5816 t, REG_NOTES (insn));
5817
5818 if (eax_live)
5819 {
5820 if (frame_pointer_needed)
5821 t = plus_constant (hard_frame_pointer_rtx,
5822 allocate
5823 - frame.to_allocate
5824 - frame.nregs * UNITS_PER_WORD);
5825 else
5826 t = plus_constant (stack_pointer_rtx, allocate);
5827 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5828 }
5829 }
5830
5831 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5832 {
5833 if (!frame_pointer_needed || !frame.to_allocate)
5834 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5835 else
5836 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5837 -frame.nregs * UNITS_PER_WORD);
5838 }
5839
5840 pic_reg_used = false;
5841 if (pic_offset_table_rtx
5842 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5843 || current_function_profile))
5844 {
5845 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5846
5847 if (alt_pic_reg_used != INVALID_REGNUM)
5848 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5849
5850 pic_reg_used = true;
5851 }
5852
5853 if (pic_reg_used)
5854 {
5855 if (TARGET_64BIT)
5856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5857 else
5858 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5859
5860 /* Even with accurate pre-reload life analysis, we can wind up
5861 deleting all references to the pic register after reload.
5862 Consider if cross-jumping unifies two sides of a branch
5863 controlled by a comparison vs the only read from a global.
5864 In which case, allow the set_got to be deleted, though we're
5865 too late to do anything about the ebx save in the prologue. */
5866 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5867 }
5868
5869 /* Prevent function calls from be scheduled before the call to mcount.
5870 In the pic_reg_used case, make sure that the got load isn't deleted. */
5871 if (current_function_profile)
5872 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5873}
5874
5875/* Emit code to restore saved registers using MOV insns. First register
5876 is restored from POINTER + OFFSET. */
5877static void
5878ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5879 int maybe_eh_return)
5880{
5881 int regno;
5882 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5883
5884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5885 if (ix86_save_reg (regno, maybe_eh_return))
5886 {
5887 /* Ensure that adjust_address won't be forced to produce pointer
5888 out of range allowed by x86-64 instruction set. */
5889 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5890 {
5891 rtx r11;
5892
5893 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5894 emit_move_insn (r11, GEN_INT (offset));
5895 emit_insn (gen_adddi3 (r11, r11, pointer));
5896 base_address = gen_rtx_MEM (Pmode, r11);
5897 offset = 0;
5898 }
5899 emit_move_insn (gen_rtx_REG (Pmode, regno),
5900 adjust_address (base_address, Pmode, offset));
5901 offset += UNITS_PER_WORD;
5902 }
5903}
5904
5905/* Restore function stack, frame, and registers. */
5906
5907void
5908ix86_expand_epilogue (int style)
5909{
5910 int regno;
5911 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5912 struct ix86_frame frame;
5913 HOST_WIDE_INT offset;
5914
5915 ix86_compute_frame_layout (&frame);
5916
5917 /* Calculate start of saved registers relative to ebp. Special care
5918 must be taken for the normal return case of a function using
5919 eh_return: the eax and edx registers are marked as saved, but not
5920 restored along this path. */
5921 offset = frame.nregs;
5922 if (current_function_calls_eh_return && style != 2)
5923 offset -= 2;
5924 offset *= -UNITS_PER_WORD;
5925
5926 /* If we're only restoring one register and sp is not valid then
5927 using a move instruction to restore the register since it's
5928 less work than reloading sp and popping the register.
5929
5930 The default code result in stack adjustment using add/lea instruction,
5931 while this code results in LEAVE instruction (or discrete equivalent),
5932 so it is profitable in some other cases as well. Especially when there
5933 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5934 and there is exactly one register to pop. This heuristic may need some
5935 tuning in future. */
5936 if ((!sp_valid && frame.nregs <= 1)
5937 || (TARGET_EPILOGUE_USING_MOVE
5938 && cfun->machine->use_fast_prologue_epilogue
5939 && (frame.nregs > 1 || frame.to_allocate))
5940 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5941 || (frame_pointer_needed && TARGET_USE_LEAVE
5942 && cfun->machine->use_fast_prologue_epilogue
5943 && frame.nregs == 1)
5944 || current_function_calls_eh_return)
5945 {
5946 /* Restore registers. We can use ebp or esp to address the memory
5947 locations. If both are available, default to ebp, since offsets
5948 are known to be small. Only exception is esp pointing directly to the
5949 end of block of saved registers, where we may simplify addressing
5950 mode. */
5951
5952 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5953 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5954 frame.to_allocate, style == 2);
5955 else
5956 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5957 offset, style == 2);
5958
5959 /* eh_return epilogues need %ecx added to the stack pointer. */
5960 if (style == 2)
5961 {
5962 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5963
5964 if (frame_pointer_needed)
5965 {
5966 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5967 tmp = plus_constant (tmp, UNITS_PER_WORD);
5968 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5969
5970 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5971 emit_move_insn (hard_frame_pointer_rtx, tmp);
5972
5973 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5974 const0_rtx, style);
5975 }
5976 else
5977 {
5978 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5979 tmp = plus_constant (tmp, (frame.to_allocate
5980 + frame.nregs * UNITS_PER_WORD));
5981 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5982 }
5983 }
5984 else if (!frame_pointer_needed)
5985 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5986 GEN_INT (frame.to_allocate
5987 + frame.nregs * UNITS_PER_WORD),
5988 style);
5989 /* If not an i386, mov & pop is faster than "leave". */
5990 else if (TARGET_USE_LEAVE || optimize_size
5991 || !cfun->machine->use_fast_prologue_epilogue)
5992 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5993 else
5994 {
5995 pro_epilogue_adjust_stack (stack_pointer_rtx,
5996 hard_frame_pointer_rtx,
5997 const0_rtx, style);
5998 if (TARGET_64BIT)
5999 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6000 else
6001 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6002 }
6003 }
6004 else
6005 {
6006 /* First step is to deallocate the stack frame so that we can
6007 pop the registers. */
6008 if (!sp_valid)
6009 {
6010 gcc_assert (frame_pointer_needed);
6011 pro_epilogue_adjust_stack (stack_pointer_rtx,
6012 hard_frame_pointer_rtx,
6013 GEN_INT (offset), style);
6014 }
6015 else if (frame.to_allocate)
6016 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6017 GEN_INT (frame.to_allocate), style);
6018
6019 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6020 if (ix86_save_reg (regno, false))
6021 {
6022 if (TARGET_64BIT)
6023 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6024 else
6025 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6026 }
6027 if (frame_pointer_needed)
6028 {
6029 /* Leave results in shorter dependency chains on CPUs that are
6030 able to grok it fast. */
6031 if (TARGET_USE_LEAVE)
6032 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6033 else if (TARGET_64BIT)
6034 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6035 else
6036 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6037 }
6038 }
6039
6040 if (cfun->machine->force_align_arg_pointer)
6041 {
6042 emit_insn (gen_addsi3 (stack_pointer_rtx,
6043 cfun->machine->force_align_arg_pointer,
6044 GEN_INT (-4)));
6045 }
6046
6047 /* Sibcall epilogues don't want a return instruction. */
6048 if (style == 0)
6049 return;
6050
6051 if (current_function_pops_args && current_function_args_size)
6052 {
6053 rtx popc = GEN_INT (current_function_pops_args);
6054
6055 /* i386 can only pop 64K bytes. If asked to pop more, pop
6056 return address, do explicit add, and jump indirectly to the
6057 caller. */
6058
6059 if (current_function_pops_args >= 65536)
6060 {
6061 rtx ecx = gen_rtx_REG (SImode, 2);
6062
6063 /* There is no "pascal" calling convention in 64bit ABI. */
6064 gcc_assert (!TARGET_64BIT);
6065
6066 emit_insn (gen_popsi1 (ecx));
6067 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6068 emit_jump_insn (gen_return_indirect_internal (ecx));
6069 }
6070 else
6071 emit_jump_insn (gen_return_pop_internal (popc));
6072 }
6073 else
6074 emit_jump_insn (gen_return_internal ());
6075}
6076
6077/* Reset from the function's potential modifications. */
6078
6079static void
6080ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6081 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6082{
6083 if (pic_offset_table_rtx)
6084 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6085#if TARGET_MACHO
6086 /* Mach-O doesn't support labels at the end of objects, so if
6087 it looks like we might want one, insert a NOP. */
6088 {
6089 rtx insn = get_last_insn ();
6090 while (insn
6091 && NOTE_P (insn)
6092 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6093 insn = PREV_INSN (insn);
6094 if (insn
6095 && (LABEL_P (insn)
6096 || (NOTE_P (insn)
6097 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6098 fputs ("\tnop\n", file);
6099 }
6100#endif
6101
6102}
6103
6104/* Extract the parts of an RTL expression that is a valid memory address
6105 for an instruction. Return 0 if the structure of the address is
6106 grossly off. Return -1 if the address contains ASHIFT, so it is not
6107 strictly valid, but still used for computing length of lea instruction. */
6108
6109int
6110ix86_decompose_address (rtx addr, struct ix86_address *out)
6111{
6112 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6113 rtx base_reg, index_reg;
6114 HOST_WIDE_INT scale = 1;
6115 rtx scale_rtx = NULL_RTX;
6116 int retval = 1;
6117 enum ix86_address_seg seg = SEG_DEFAULT;
6118
6119 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6120 base = addr;
6121 else if (GET_CODE (addr) == PLUS)
6122 {
6123 rtx addends[4], op;
6124 int n = 0, i;
6125
6126 op = addr;
6127 do
6128 {
6129 if (n >= 4)
6130 return 0;
6131 addends[n++] = XEXP (op, 1);
6132 op = XEXP (op, 0);
6133 }
6134 while (GET_CODE (op) == PLUS);
6135 if (n >= 4)
6136 return 0;
6137 addends[n] = op;
6138
6139 for (i = n; i >= 0; --i)
6140 {
6141 op = addends[i];
6142 switch (GET_CODE (op))
6143 {
6144 case MULT:
6145 if (index)
6146 return 0;
6147 index = XEXP (op, 0);
6148 scale_rtx = XEXP (op, 1);
6149 break;
6150
6151 case UNSPEC:
6152 if (XINT (op, 1) == UNSPEC_TP
6153 && TARGET_TLS_DIRECT_SEG_REFS
6154 && seg == SEG_DEFAULT)
6155 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6156 else
6157 return 0;
6158 break;
6159
6160 case REG:
6161 case SUBREG:
6162 if (!base)
6163 base = op;
6164 else if (!index)
6165 index = op;
6166 else
6167 return 0;
6168 break;
6169
6170 case CONST:
6171 case CONST_INT:
6172 case SYMBOL_REF:
6173 case LABEL_REF:
6174 if (disp)
6175 return 0;
6176 disp = op;
6177 break;
6178
6179 default:
6180 return 0;
6181 }
6182 }
6183 }
6184 else if (GET_CODE (addr) == MULT)
6185 {
6186 index = XEXP (addr, 0); /* index*scale */
6187 scale_rtx = XEXP (addr, 1);
6188 }
6189 else if (GET_CODE (addr) == ASHIFT)
6190 {
6191 rtx tmp;
6192
6193 /* We're called for lea too, which implements ashift on occasion. */
6194 index = XEXP (addr, 0);
6195 tmp = XEXP (addr, 1);
6196 if (GET_CODE (tmp) != CONST_INT)
6197 return 0;
6198 scale = INTVAL (tmp);
6199 if ((unsigned HOST_WIDE_INT) scale > 3)
6200 return 0;
6201 scale = 1 << scale;
6202 retval = -1;
6203 }
6204 else
6205 disp = addr; /* displacement */
6206
6207 /* Extract the integral value of scale. */
6208 if (scale_rtx)
6209 {
6210 if (GET_CODE (scale_rtx) != CONST_INT)
6211 return 0;
6212 scale = INTVAL (scale_rtx);
6213 }
6214
6215 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6216 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6217
6218 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6219 if (base_reg && index_reg && scale == 1
6220 && (index_reg == arg_pointer_rtx
6221 || index_reg == frame_pointer_rtx
6222 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6223 {
6224 rtx tmp;
6225 tmp = base, base = index, index = tmp;
6226 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6227 }
6228
6229 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6230 if ((base_reg == hard_frame_pointer_rtx
6231 || base_reg == frame_pointer_rtx
6232 || base_reg == arg_pointer_rtx) && !disp)
6233 disp = const0_rtx;
6234
6235 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6236 Avoid this by transforming to [%esi+0]. */
6237 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6238 && base_reg && !index_reg && !disp
6239 && REG_P (base_reg)
6240 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6241 disp = const0_rtx;
6242
6243 /* Special case: encode reg+reg instead of reg*2. */
6244 if (!base && index && scale && scale == 2)
6245 base = index, base_reg = index_reg, scale = 1;
6246
6247 /* Special case: scaling cannot be encoded without base or displacement. */
6248 if (!base && !disp && index && scale != 1)
6249 disp = const0_rtx;
6250
6251 out->base = base;
6252 out->index = index;
6253 out->disp = disp;
6254 out->scale = scale;
6255 out->seg = seg;
6256
6257 return retval;
6258}
6259
6260/* Return cost of the memory address x.
6261 For i386, it is better to use a complex address than let gcc copy
6262 the address into a reg and make a new pseudo. But not if the address
6263 requires to two regs - that would mean more pseudos with longer
6264 lifetimes. */
6265static int
6266ix86_address_cost (rtx x)
6267{
6268 struct ix86_address parts;
6269 int cost = 1;
6270 int ok = ix86_decompose_address (x, &parts);
6271
6272 gcc_assert (ok);
6273
6274 if (parts.base && GET_CODE (parts.base) == SUBREG)
6275 parts.base = SUBREG_REG (parts.base);
6276 if (parts.index && GET_CODE (parts.index) == SUBREG)
6277 parts.index = SUBREG_REG (parts.index);
6278
6279 /* More complex memory references are better. */
6280 if (parts.disp && parts.disp != const0_rtx)
6281 cost--;
6282 if (parts.seg != SEG_DEFAULT)
6283 cost--;
6284
6285 /* Attempt to minimize number of registers in the address. */
6286 if ((parts.base
6287 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6288 || (parts.index
6289 && (!REG_P (parts.index)
6290 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6291 cost++;
6292
6293 if (parts.base
6294 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6295 && parts.index
6296 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6297 && parts.base != parts.index)
6298 cost++;
6299
6300 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6301 since it's predecode logic can't detect the length of instructions
6302 and it degenerates to vector decoded. Increase cost of such
6303 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6304 to split such addresses or even refuse such addresses at all.
6305
6306 Following addressing modes are affected:
6307 [base+scale*index]
6308 [scale*index+disp]
6309 [base+index]
6310
6311 The first and last case may be avoidable by explicitly coding the zero in
6312 memory address, but I don't have AMD-K6 machine handy to check this
6313 theory. */
6314
6315 if (TARGET_K6
6316 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6317 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6318 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6319 cost += 10;
6320
6321 return cost;
6322}
6323
6324/* If X is a machine specific address (i.e. a symbol or label being
6325 referenced as a displacement from the GOT implemented using an
6326 UNSPEC), then return the base term. Otherwise return X. */
6327
6328rtx
6329ix86_find_base_term (rtx x)
6330{
6331 rtx term;
6332
6333 if (TARGET_64BIT)
6334 {
6335 if (GET_CODE (x) != CONST)
6336 return x;
6337 term = XEXP (x, 0);
6338 if (GET_CODE (term) == PLUS
6339 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6340 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6341 term = XEXP (term, 0);
6342 if (GET_CODE (term) != UNSPEC
6343 || XINT (term, 1) != UNSPEC_GOTPCREL)
6344 return x;
6345
6346 term = XVECEXP (term, 0, 0);
6347
6348 if (GET_CODE (term) != SYMBOL_REF
6349 && GET_CODE (term) != LABEL_REF)
6350 return x;
6351
6352 return term;
6353 }
6354
6355 term = ix86_delegitimize_address (x);
6356
6357 if (GET_CODE (term) != SYMBOL_REF
6358 && GET_CODE (term) != LABEL_REF)
6359 return x;
6360
6361 return term;
6362}
6363
6364/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6365 this is used for to form addresses to local data when -fPIC is in
6366 use. */
6367
6368static bool
6369darwin_local_data_pic (rtx disp)
6370{
6371 if (GET_CODE (disp) == MINUS)
6372 {
6373 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6374 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6375 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6376 {
6377 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6378 if (! strcmp (sym_name, "<pic base>"))
6379 return true;
6380 }
6381 }
6382
6383 return false;
6384}
6385
6386/* Determine if a given RTX is a valid constant. We already know this
6387 satisfies CONSTANT_P. */
6388
6389bool
6390legitimate_constant_p (rtx x)
6391{
6392 switch (GET_CODE (x))
6393 {
6394 case CONST:
6395 x = XEXP (x, 0);
6396
6397 if (GET_CODE (x) == PLUS)
6398 {
6399 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6400 return false;
6401 x = XEXP (x, 0);
6402 }
6403
6404 if (TARGET_MACHO && darwin_local_data_pic (x))
6405 return true;
6406
6407 /* Only some unspecs are valid as "constants". */
6408 if (GET_CODE (x) == UNSPEC)
6409 switch (XINT (x, 1))
6410 {
6411 case UNSPEC_GOTOFF:
6412 return TARGET_64BIT;
6413 case UNSPEC_TPOFF:
6414 case UNSPEC_NTPOFF:
6415 x = XVECEXP (x, 0, 0);
6416 return (GET_CODE (x) == SYMBOL_REF
6417 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6418 case UNSPEC_DTPOFF:
6419 x = XVECEXP (x, 0, 0);
6420 return (GET_CODE (x) == SYMBOL_REF
6421 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6422 default:
6423 return false;
6424 }
6425
6426 /* We must have drilled down to a symbol. */
6427 if (GET_CODE (x) == LABEL_REF)
6428 return true;
6429 if (GET_CODE (x) != SYMBOL_REF)
6430 return false;
6431 /* FALLTHRU */
6432
6433 case SYMBOL_REF:
6434 /* TLS symbols are never valid. */
6435 if (SYMBOL_REF_TLS_MODEL (x))
6436 return false;
6437 break;
6438
6439 case CONST_DOUBLE:
6440 if (GET_MODE (x) == TImode
6441 && x != CONST0_RTX (TImode)
6442 && !TARGET_64BIT)
6443 return false;
6444 break;
6445
6446 case CONST_VECTOR:
6447 if (x == CONST0_RTX (GET_MODE (x)))
6448 return true;
6449 return false;
6450
6451 default:
6452 break;
6453 }
6454
6455 /* Otherwise we handle everything else in the move patterns. */
6456 return true;
6457}
6458
6459/* Determine if it's legal to put X into the constant pool. This
6460 is not possible for the address of thread-local symbols, which
6461 is checked above. */
6462
6463static bool
6464ix86_cannot_force_const_mem (rtx x)
6465{
6466 /* We can always put integral constants and vectors in memory. */
6467 switch (GET_CODE (x))
6468 {
6469 case CONST_INT:
6470 case CONST_DOUBLE:
6471 case CONST_VECTOR:
6472 return false;
6473
6474 default:
6475 break;
6476 }
6477 return !legitimate_constant_p (x);
6478}
6479
6480/* Determine if a given RTX is a valid constant address. */
6481
6482bool
6483constant_address_p (rtx x)
6484{
6485 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6486}
6487
6488/* Nonzero if the constant value X is a legitimate general operand
6489 when generating PIC code. It is given that flag_pic is on and
6490 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6491
6492bool
6493legitimate_pic_operand_p (rtx x)
6494{
6495 rtx inner;
6496
6497 switch (GET_CODE (x))
6498 {
6499 case CONST:
6500 inner = XEXP (x, 0);
6501 if (GET_CODE (inner) == PLUS
6502 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6503 inner = XEXP (inner, 0);
6504
6505 /* Only some unspecs are valid as "constants". */
6506 if (GET_CODE (inner) == UNSPEC)
6507 switch (XINT (inner, 1))
6508 {
6509 case UNSPEC_GOTOFF:
6510 return TARGET_64BIT;
6511 case UNSPEC_TPOFF:
6512 x = XVECEXP (inner, 0, 0);
6513 return (GET_CODE (x) == SYMBOL_REF
6514 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6515 default:
6516 return false;
6517 }
6518 /* FALLTHRU */
6519
6520 case SYMBOL_REF:
6521 case LABEL_REF:
6522 return legitimate_pic_address_disp_p (x);
6523
6524 default:
6525 return true;
6526 }
6527}
6528
6529/* Determine if a given CONST RTX is a valid memory displacement
6530 in PIC mode. */
6531
6532int
6533legitimate_pic_address_disp_p (rtx disp)
6534{
6535 bool saw_plus;
6536
6537 /* In 64bit mode we can allow direct addresses of symbols and labels
6538 when they are not dynamic symbols. */
6539 if (TARGET_64BIT)
6540 {
6541 rtx op0 = disp, op1;
6542
6543 switch (GET_CODE (disp))
6544 {
6545 case LABEL_REF:
6546 return true;
6547
6548 case CONST:
6549 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6550 break;
6551 op0 = XEXP (XEXP (disp, 0), 0);
6552 op1 = XEXP (XEXP (disp, 0), 1);
6553 if (GET_CODE (op1) != CONST_INT
6554 || INTVAL (op1) >= 16*1024*1024
6555 || INTVAL (op1) < -16*1024*1024)
6556 break;
6557 if (GET_CODE (op0) == LABEL_REF)
6558 return true;
6559 if (GET_CODE (op0) != SYMBOL_REF)
6560 break;
6561 /* FALLTHRU */
6562
6563 case SYMBOL_REF:
6564 /* TLS references should always be enclosed in UNSPEC. */
6565 if (SYMBOL_REF_TLS_MODEL (op0))
6566 return false;
6567 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6568 return true;
6569 break;
6570
6571 default:
6572 break;
6573 }
6574 }
6575 if (GET_CODE (disp) != CONST)
6576 return 0;
6577 disp = XEXP (disp, 0);
6578
6579 if (TARGET_64BIT)
6580 {
6581 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6582 of GOT tables. We should not need these anyway. */
6583 if (GET_CODE (disp) != UNSPEC
6584 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6585 && XINT (disp, 1) != UNSPEC_GOTOFF))
6586 return 0;
6587
6588 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6589 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6590 return 0;
6591 return 1;
6592 }
6593
6594 saw_plus = false;
6595 if (GET_CODE (disp) == PLUS)
6596 {
6597 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6598 return 0;
6599 disp = XEXP (disp, 0);
6600 saw_plus = true;
6601 }
6602
6603 if (TARGET_MACHO && darwin_local_data_pic (disp))
6604 return 1;
6605
6606 if (GET_CODE (disp) != UNSPEC)
6607 return 0;
6608
6609 switch (XINT (disp, 1))
6610 {
6611 case UNSPEC_GOT:
6612 if (saw_plus)
6613 return false;
6614 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6615 case UNSPEC_GOTOFF:
6616 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6617 While ABI specify also 32bit relocation but we don't produce it in
6618 small PIC model at all. */
6619 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6620 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6621 && !TARGET_64BIT)
6622 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6623 return false;
6624 case UNSPEC_GOTTPOFF:
6625 case UNSPEC_GOTNTPOFF:
6626 case UNSPEC_INDNTPOFF:
6627 if (saw_plus)
6628 return false;
6629 disp = XVECEXP (disp, 0, 0);
6630 return (GET_CODE (disp) == SYMBOL_REF
6631 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6632 case UNSPEC_NTPOFF:
6633 disp = XVECEXP (disp, 0, 0);
6634 return (GET_CODE (disp) == SYMBOL_REF
6635 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6636 case UNSPEC_DTPOFF:
6637 disp = XVECEXP (disp, 0, 0);
6638 return (GET_CODE (disp) == SYMBOL_REF
6639 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6640 }
6641
6642 return 0;
6643}
6644
6645/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6646 memory address for an instruction. The MODE argument is the machine mode
6647 for the MEM expression that wants to use this address.
6648
6649 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6650 convert common non-canonical forms to canonical form so that they will
6651 be recognized. */
6652
6653int
6654legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6655{
6656 struct ix86_address parts;
6657 rtx base, index, disp;
6658 HOST_WIDE_INT scale;
6659 const char *reason = NULL;
6660 rtx reason_rtx = NULL_RTX;
6661
6662 if (TARGET_DEBUG_ADDR)
6663 {
6664 fprintf (stderr,
6665 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6666 GET_MODE_NAME (mode), strict);
6667 debug_rtx (addr);
6668 }
6669
6670 if (ix86_decompose_address (addr, &parts) <= 0)
6671 {
6672 reason = "decomposition failed";
6673 goto report_error;
6674 }
6675
6676 base = parts.base;
6677 index = parts.index;
6678 disp = parts.disp;
6679 scale = parts.scale;
6680
6681 /* Validate base register.
6682
6683 Don't allow SUBREG's that span more than a word here. It can lead to spill
6684 failures when the base is one word out of a two word structure, which is
6685 represented internally as a DImode int. */
6686
6687 if (base)
6688 {
6689 rtx reg;
6690 reason_rtx = base;
6691
6692 if (REG_P (base))
6693 reg = base;
6694 else if (GET_CODE (base) == SUBREG
6695 && REG_P (SUBREG_REG (base))
6696 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6697 <= UNITS_PER_WORD)
6698 reg = SUBREG_REG (base);
6699 else
6700 {
6701 reason = "base is not a register";
6702 goto report_error;
6703 }
6704
6705 if (GET_MODE (base) != Pmode)
6706 {
6707 reason = "base is not in Pmode";
6708 goto report_error;
6709 }
6710
6711 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6712 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6713 {
6714 reason = "base is not valid";
6715 goto report_error;
6716 }
6717 }
6718
6719 /* Validate index register.
6720
6721 Don't allow SUBREG's that span more than a word here -- same as above. */
6722
6723 if (index)
6724 {
6725 rtx reg;
6726 reason_rtx = index;
6727
6728 if (REG_P (index))
6729 reg = index;
6730 else if (GET_CODE (index) == SUBREG
6731 && REG_P (SUBREG_REG (index))
6732 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6733 <= UNITS_PER_WORD)
6734 reg = SUBREG_REG (index);
6735 else
6736 {
6737 reason = "index is not a register";
6738 goto report_error;
6739 }
6740
6741 if (GET_MODE (index) != Pmode)
6742 {
6743 reason = "index is not in Pmode";
6744 goto report_error;
6745 }
6746
6747 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6748 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6749 {
6750 reason = "index is not valid";
6751 goto report_error;
6752 }
6753 }
6754
6755 /* Validate scale factor. */
6756 if (scale != 1)
6757 {
6758 reason_rtx = GEN_INT (scale);
6759 if (!index)
6760 {
6761 reason = "scale without index";
6762 goto report_error;
6763 }
6764
6765 if (scale != 2 && scale != 4 && scale != 8)
6766 {
6767 reason = "scale is not a valid multiplier";
6768 goto report_error;
6769 }
6770 }
6771
6772 /* Validate displacement. */
6773 if (disp)
6774 {
6775 reason_rtx = disp;
6776
6777 if (GET_CODE (disp) == CONST
6778 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6779 switch (XINT (XEXP (disp, 0), 1))
6780 {
6781 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6782 used. While ABI specify also 32bit relocations, we don't produce
6783 them at all and use IP relative instead. */
6784 case UNSPEC_GOT:
6785 case UNSPEC_GOTOFF:
6786 gcc_assert (flag_pic);
6787 if (!TARGET_64BIT)
6788 goto is_legitimate_pic;
6789 reason = "64bit address unspec";
6790 goto report_error;
6791
6792 case UNSPEC_GOTPCREL:
6793 gcc_assert (flag_pic);
6794 goto is_legitimate_pic;
6795
6796 case UNSPEC_GOTTPOFF:
6797 case UNSPEC_GOTNTPOFF:
6798 case UNSPEC_INDNTPOFF:
6799 case UNSPEC_NTPOFF:
6800 case UNSPEC_DTPOFF:
6801 break;
6802
6803 default:
6804 reason = "invalid address unspec";
6805 goto report_error;
6806 }
6807
6808 else if (SYMBOLIC_CONST (disp)
6809 && (flag_pic
6810 || (TARGET_MACHO
6811#if TARGET_MACHO
6812 && MACHOPIC_INDIRECT
6813 && !machopic_operand_p (disp)
6814#endif
6815 )))
6816 {
6817
6818 is_legitimate_pic:
6819 if (TARGET_64BIT && (index || base))
6820 {
6821 /* foo@dtpoff(%rX) is ok. */
6822 if (GET_CODE (disp) != CONST
6823 || GET_CODE (XEXP (disp, 0)) != PLUS
6824 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6825 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6826 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6827 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6828 {
6829 reason = "non-constant pic memory reference";
6830 goto report_error;
6831 }
6832 }
6833 else if (! legitimate_pic_address_disp_p (disp))
6834 {
6835 reason = "displacement is an invalid pic construct";
6836 goto report_error;
6837 }
6838
6839 /* This code used to verify that a symbolic pic displacement
6840 includes the pic_offset_table_rtx register.
6841
6842 While this is good idea, unfortunately these constructs may
6843 be created by "adds using lea" optimization for incorrect
6844 code like:
6845
6846 int a;
6847 int foo(int i)
6848 {
6849 return *(&a+i);
6850 }
6851
6852 This code is nonsensical, but results in addressing
6853 GOT table with pic_offset_table_rtx base. We can't
6854 just refuse it easily, since it gets matched by
6855 "addsi3" pattern, that later gets split to lea in the
6856 case output register differs from input. While this
6857 can be handled by separate addsi pattern for this case
6858 that never results in lea, this seems to be easier and
6859 correct fix for crash to disable this test. */
6860 }
6861 else if (GET_CODE (disp) != LABEL_REF
6862 && GET_CODE (disp) != CONST_INT
6863 && (GET_CODE (disp) != CONST
6864 || !legitimate_constant_p (disp))
6865 && (GET_CODE (disp) != SYMBOL_REF
6866 || !legitimate_constant_p (disp)))
6867 {
6868 reason = "displacement is not constant";
6869 goto report_error;
6870 }
6871 else if (TARGET_64BIT
6872 && !x86_64_immediate_operand (disp, VOIDmode))
6873 {
6874 reason = "displacement is out of range";
6875 goto report_error;
6876 }
6877 }
6878
6879 /* Everything looks valid. */
6880 if (TARGET_DEBUG_ADDR)
6881 fprintf (stderr, "Success.\n");
6882 return TRUE;
6883
6884 report_error:
6885 if (TARGET_DEBUG_ADDR)
6886 {
6887 fprintf (stderr, "Error: %s\n", reason);
6888 debug_rtx (reason_rtx);
6889 }
6890 return FALSE;
6891}
6892
6893/* Return a unique alias set for the GOT. */
6894
6895static HOST_WIDE_INT
6896ix86_GOT_alias_set (void)
6897{
6898 static HOST_WIDE_INT set = -1;
6899 if (set == -1)
6900 set = new_alias_set ();
6901 return set;
6902}
6903
6904/* Return a legitimate reference for ORIG (an address) using the
6905 register REG. If REG is 0, a new pseudo is generated.
6906
6907 There are two types of references that must be handled:
6908
6909 1. Global data references must load the address from the GOT, via
6910 the PIC reg. An insn is emitted to do this load, and the reg is
6911 returned.
6912
6913 2. Static data references, constant pool addresses, and code labels
6914 compute the address as an offset from the GOT, whose base is in
6915 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6916 differentiate them from global data objects. The returned
6917 address is the PIC reg + an unspec constant.
6918
6919 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6920 reg also appears in the address. */
6921
6922static rtx
6923legitimize_pic_address (rtx orig, rtx reg)
6924{
6925 rtx addr = orig;
6926 rtx new = orig;
6927 rtx base;
6928
6929#if TARGET_MACHO
6930 if (TARGET_MACHO && !TARGET_64BIT)
6931 {
6932 if (reg == 0)
6933 reg = gen_reg_rtx (Pmode);
6934 /* Use the generic Mach-O PIC machinery. */
6935 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6936 }
6937#endif
6938
6939 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6940 new = addr;
6941 else if (TARGET_64BIT
6942 && ix86_cmodel != CM_SMALL_PIC
6943 && local_symbolic_operand (addr, Pmode))
6944 {
6945 rtx tmpreg;
6946 /* This symbol may be referenced via a displacement from the PIC
6947 base address (@GOTOFF). */
6948
6949 if (reload_in_progress)
6950 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6951 if (GET_CODE (addr) == CONST)
6952 addr = XEXP (addr, 0);
6953 if (GET_CODE (addr) == PLUS)
6954 {
6955 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6956 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6957 }
6958 else
6959 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6960 new = gen_rtx_CONST (Pmode, new);
6961 if (!reg)
6962 tmpreg = gen_reg_rtx (Pmode);
6963 else
6964 tmpreg = reg;
6965 emit_move_insn (tmpreg, new);
6966
6967 if (reg != 0)
6968 {
6969 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6970 tmpreg, 1, OPTAB_DIRECT);
6971 new = reg;
6972 }
6973 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6974 }
6975 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6976 {
6977 /* This symbol may be referenced via a displacement from the PIC
6978 base address (@GOTOFF). */
6979
6980 if (reload_in_progress)
6981 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6982 if (GET_CODE (addr) == CONST)
6983 addr = XEXP (addr, 0);
6984 if (GET_CODE (addr) == PLUS)
6985 {
6986 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6987 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6988 }
6989 else
6990 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6991 new = gen_rtx_CONST (Pmode, new);
6992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6993
6994 if (reg != 0)
6995 {
6996 emit_move_insn (reg, new);
6997 new = reg;
6998 }
6999 }
7000 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7001 {
7002 if (TARGET_64BIT)
7003 {
7004 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7005 new = gen_rtx_CONST (Pmode, new);
7006 new = gen_const_mem (Pmode, new);
7007 set_mem_alias_set (new, ix86_GOT_alias_set ());
7008
7009 if (reg == 0)
7010 reg = gen_reg_rtx (Pmode);
7011 /* Use directly gen_movsi, otherwise the address is loaded
7012 into register for CSE. We don't want to CSE this addresses,
7013 instead we CSE addresses from the GOT table, so skip this. */
7014 emit_insn (gen_movsi (reg, new));
7015 new = reg;
7016 }
7017 else
7018 {
7019 /* This symbol must be referenced via a load from the
7020 Global Offset Table (@GOT). */
7021
7022 if (reload_in_progress)
7023 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7024 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7025 new = gen_rtx_CONST (Pmode, new);
7026 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7027 new = gen_const_mem (Pmode, new);
7028 set_mem_alias_set (new, ix86_GOT_alias_set ());
7029
7030 if (reg == 0)
7031 reg = gen_reg_rtx (Pmode);
7032 emit_move_insn (reg, new);
7033 new = reg;
7034 }
7035 }
7036 else
7037 {
7038 if (GET_CODE (addr) == CONST_INT
7039 && !x86_64_immediate_operand (addr, VOIDmode))
7040 {
7041 if (reg)
7042 {
7043 emit_move_insn (reg, addr);
7044 new = reg;
7045 }
7046 else
7047 new = force_reg (Pmode, addr);
7048 }
7049 else if (GET_CODE (addr) == CONST)
7050 {
7051 addr = XEXP (addr, 0);
7052
7053 /* We must match stuff we generate before. Assume the only
7054 unspecs that can get here are ours. Not that we could do
7055 anything with them anyway.... */
7056 if (GET_CODE (addr) == UNSPEC
7057 || (GET_CODE (addr) == PLUS
7058 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7059 return orig;
7060 gcc_assert (GET_CODE (addr) == PLUS);
7061 }
7062 if (GET_CODE (addr) == PLUS)
7063 {
7064 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7065
7066 /* Check first to see if this is a constant offset from a @GOTOFF
7067 symbol reference. */
7068 if (local_symbolic_operand (op0, Pmode)
7069 && GET_CODE (op1) == CONST_INT)
7070 {
7071 if (!TARGET_64BIT)
7072 {
7073 if (reload_in_progress)
7074 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7075 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7076 UNSPEC_GOTOFF);
7077 new = gen_rtx_PLUS (Pmode, new, op1);
7078 new = gen_rtx_CONST (Pmode, new);
7079 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7080
7081 if (reg != 0)
7082 {
7083 emit_move_insn (reg, new);
7084 new = reg;
7085 }
7086 }
7087 else
7088 {
7089 if (INTVAL (op1) < -16*1024*1024
7090 || INTVAL (op1) >= 16*1024*1024)
7091 {
7092 if (!x86_64_immediate_operand (op1, Pmode))
7093 op1 = force_reg (Pmode, op1);
7094 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7095 }
7096 }
7097 }
7098 else
7099 {
7100 base = legitimize_pic_address (XEXP (addr, 0), reg);
7101 new = legitimize_pic_address (XEXP (addr, 1),
7102 base == reg ? NULL_RTX : reg);
7103
7104 if (GET_CODE (new) == CONST_INT)
7105 new = plus_constant (base, INTVAL (new));
7106 else
7107 {
7108 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7109 {
7110 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7111 new = XEXP (new, 1);
7112 }
7113 new = gen_rtx_PLUS (Pmode, base, new);
7114 }
7115 }
7116 }
7117 }
7118 return new;
7119}
7120
7121/* Load the thread pointer. If TO_REG is true, force it into a register. */
7122
7123static rtx
7124get_thread_pointer (int to_reg)
7125{
7126 rtx tp, reg, insn;
7127
7128 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7129 if (!to_reg)
7130 return tp;
7131
7132 reg = gen_reg_rtx (Pmode);
7133 insn = gen_rtx_SET (VOIDmode, reg, tp);
7134 insn = emit_insn (insn);
7135
7136 return reg;
7137}
7138
7139/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7140 false if we expect this to be used for a memory address and true if
7141 we expect to load the address into a register. */
7142
7143static rtx
7144legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7145{
7146 rtx dest, base, off, pic, tp;
7147 int type;
7148
7149 switch (model)
7150 {
7151 case TLS_MODEL_GLOBAL_DYNAMIC:
7152 dest = gen_reg_rtx (Pmode);
7153 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7154
7155 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7156 {
7157 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7158
7159 start_sequence ();
7160 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7161 insns = get_insns ();
7162 end_sequence ();
7163
7164 emit_libcall_block (insns, dest, rax, x);
7165 }
7166 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7167 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7168 else
7169 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7170
7171 if (TARGET_GNU2_TLS)
7172 {
7173 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7174
7175 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7176 }
7177 break;
7178
7179 case TLS_MODEL_LOCAL_DYNAMIC:
7180 base = gen_reg_rtx (Pmode);
7181 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7182
7183 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7184 {
7185 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7186
7187 start_sequence ();
7188 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7189 insns = get_insns ();
7190 end_sequence ();
7191
7192 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7193 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7194 emit_libcall_block (insns, base, rax, note);
7195 }
7196 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7197 emit_insn (gen_tls_local_dynamic_base_64 (base));
7198 else
7199 emit_insn (gen_tls_local_dynamic_base_32 (base));
7200
7201 if (TARGET_GNU2_TLS)
7202 {
7203 rtx x = ix86_tls_module_base ();
7204
7205 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7206 gen_rtx_MINUS (Pmode, x, tp));
7207 }
7208
7209 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7210 off = gen_rtx_CONST (Pmode, off);
7211
7212 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7213
7214 if (TARGET_GNU2_TLS)
7215 {
7216 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7217
7218 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7219 }
7220
7221 break;
7222
7223 case TLS_MODEL_INITIAL_EXEC:
7224 if (TARGET_64BIT)
7225 {
7226 pic = NULL;
7227 type = UNSPEC_GOTNTPOFF;
7228 }
7229 else if (flag_pic)
7230 {
7231 if (reload_in_progress)
7232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7233 pic = pic_offset_table_rtx;
7234 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7235 }
7236 else if (!TARGET_ANY_GNU_TLS)
7237 {
7238 pic = gen_reg_rtx (Pmode);
7239 emit_insn (gen_set_got (pic));
7240 type = UNSPEC_GOTTPOFF;
7241 }
7242 else
7243 {
7244 pic = NULL;
7245 type = UNSPEC_INDNTPOFF;
7246 }
7247
7248 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7249 off = gen_rtx_CONST (Pmode, off);
7250 if (pic)
7251 off = gen_rtx_PLUS (Pmode, pic, off);
7252 off = gen_const_mem (Pmode, off);
7253 set_mem_alias_set (off, ix86_GOT_alias_set ());
7254
7255 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7256 {
7257 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7258 off = force_reg (Pmode, off);
7259 return gen_rtx_PLUS (Pmode, base, off);
7260 }
7261 else
7262 {
7263 base = get_thread_pointer (true);
7264 dest = gen_reg_rtx (Pmode);
7265 emit_insn (gen_subsi3 (dest, base, off));
7266 }
7267 break;
7268
7269 case TLS_MODEL_LOCAL_EXEC:
7270 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7271 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7272 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7273 off = gen_rtx_CONST (Pmode, off);
7274
7275 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7276 {
7277 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7278 return gen_rtx_PLUS (Pmode, base, off);
7279 }
7280 else
7281 {
7282 base = get_thread_pointer (true);
7283 dest = gen_reg_rtx (Pmode);
7284 emit_insn (gen_subsi3 (dest, base, off));
7285 }
7286 break;
7287
7288 default:
7289 gcc_unreachable ();
7290 }
7291
7292 return dest;
7293}
7294
7295/* Try machine-dependent ways of modifying an illegitimate address
7296 to be legitimate. If we find one, return the new, valid address.
7297 This macro is used in only one place: `memory_address' in explow.c.
7298
7299 OLDX is the address as it was before break_out_memory_refs was called.
7300 In some cases it is useful to look at this to decide what needs to be done.
7301
7302 MODE and WIN are passed so that this macro can use
7303 GO_IF_LEGITIMATE_ADDRESS.
7304
7305 It is always safe for this macro to do nothing. It exists to recognize
7306 opportunities to optimize the output.
7307
7308 For the 80386, we handle X+REG by loading X into a register R and
7309 using R+REG. R will go in a general reg and indexing will be used.
7310 However, if REG is a broken-out memory address or multiplication,
7311 nothing needs to be done because REG can certainly go in a general reg.
7312
7313 When -fpic is used, special handling is needed for symbolic references.
7314 See comments by legitimize_pic_address in i386.c for details. */
7315
7316rtx
7317legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7318{
7319 int changed = 0;
7320 unsigned log;
7321
7322 if (TARGET_DEBUG_ADDR)
7323 {
7324 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7325 GET_MODE_NAME (mode));
7326 debug_rtx (x);
7327 }
7328
7329 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7330 if (log)
7331 return legitimize_tls_address (x, log, false);
7332 if (GET_CODE (x) == CONST
7333 && GET_CODE (XEXP (x, 0)) == PLUS
7334 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7335 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7336 {
7337 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7338 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7339 }
7340
7341 if (flag_pic && SYMBOLIC_CONST (x))
7342 return legitimize_pic_address (x, 0);
7343
7344 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7345 if (GET_CODE (x) == ASHIFT
7346 && GET_CODE (XEXP (x, 1)) == CONST_INT
7347 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7348 {
7349 changed = 1;
7350 log = INTVAL (XEXP (x, 1));
7351 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7352 GEN_INT (1 << log));
7353 }
7354
7355 if (GET_CODE (x) == PLUS)
7356 {
7357 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7358
7359 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7361 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7362 {
7363 changed = 1;
7364 log = INTVAL (XEXP (XEXP (x, 0), 1));
7365 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7366 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7367 GEN_INT (1 << log));
7368 }
7369
7370 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7371 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7373 {
7374 changed = 1;
7375 log = INTVAL (XEXP (XEXP (x, 1), 1));
7376 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7377 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7378 GEN_INT (1 << log));
7379 }
7380
7381 /* Put multiply first if it isn't already. */
7382 if (GET_CODE (XEXP (x, 1)) == MULT)
7383 {
7384 rtx tmp = XEXP (x, 0);
7385 XEXP (x, 0) = XEXP (x, 1);
7386 XEXP (x, 1) = tmp;
7387 changed = 1;
7388 }
7389
7390 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7391 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7392 created by virtual register instantiation, register elimination, and
7393 similar optimizations. */
7394 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7395 {
7396 changed = 1;
7397 x = gen_rtx_PLUS (Pmode,
7398 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7399 XEXP (XEXP (x, 1), 0)),
7400 XEXP (XEXP (x, 1), 1));
7401 }
7402
7403 /* Canonicalize
7404 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7405 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7406 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7409 && CONSTANT_P (XEXP (x, 1)))
7410 {
7411 rtx constant;
7412 rtx other = NULL_RTX;
7413
7414 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7415 {
7416 constant = XEXP (x, 1);
7417 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7418 }
7419 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7420 {
7421 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7422 other = XEXP (x, 1);
7423 }
7424 else
7425 constant = 0;
7426
7427 if (constant)
7428 {
7429 changed = 1;
7430 x = gen_rtx_PLUS (Pmode,
7431 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7432 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7433 plus_constant (other, INTVAL (constant)));
7434 }
7435 }
7436
7437 if (changed && legitimate_address_p (mode, x, FALSE))
7438 return x;
7439
7440 if (GET_CODE (XEXP (x, 0)) == MULT)
7441 {
7442 changed = 1;
7443 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7444 }
7445
7446 if (GET_CODE (XEXP (x, 1)) == MULT)
7447 {
7448 changed = 1;
7449 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7450 }
7451
7452 if (changed
7453 && GET_CODE (XEXP (x, 1)) == REG
7454 && GET_CODE (XEXP (x, 0)) == REG)
7455 return x;
7456
7457 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7458 {
7459 changed = 1;
7460 x = legitimize_pic_address (x, 0);
7461 }
7462
7463 if (changed && legitimate_address_p (mode, x, FALSE))
7464 return x;
7465
7466 if (GET_CODE (XEXP (x, 0)) == REG)
7467 {
7468 rtx temp = gen_reg_rtx (Pmode);
7469 rtx val = force_operand (XEXP (x, 1), temp);
7470 if (val != temp)
7471 emit_move_insn (temp, val);
7472
7473 XEXP (x, 1) = temp;
7474 return x;
7475 }
7476
7477 else if (GET_CODE (XEXP (x, 1)) == REG)
7478 {
7479 rtx temp = gen_reg_rtx (Pmode);
7480 rtx val = force_operand (XEXP (x, 0), temp);
7481 if (val != temp)
7482 emit_move_insn (temp, val);
7483
7484 XEXP (x, 0) = temp;
7485 return x;
7486 }
7487 }
7488
7489 return x;
7490}
7491
7492/* Print an integer constant expression in assembler syntax. Addition
7493 and subtraction are the only arithmetic that may appear in these
7494 expressions. FILE is the stdio stream to write to, X is the rtx, and
7495 CODE is the operand print code from the output string. */
7496
7497static void
7498output_pic_addr_const (FILE *file, rtx x, int code)
7499{
7500 char buf[256];
7501
7502 switch (GET_CODE (x))
7503 {
7504 case PC:
7505 gcc_assert (flag_pic);
7506 putc ('.', file);
7507 break;
7508
7509 case SYMBOL_REF:
7510 if (! TARGET_MACHO || TARGET_64BIT)
7511 output_addr_const (file, x);
7512 else
7513 {
7514 const char *name = XSTR (x, 0);
7515
7516 /* Mark the decl as referenced so that cgraph will output the function. */
7517 if (SYMBOL_REF_DECL (x))
7518 mark_decl_referenced (SYMBOL_REF_DECL (x));
7519
7520#if TARGET_MACHO
7521 if (MACHOPIC_INDIRECT
7522 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7523 name = machopic_indirection_name (x, /*stub_p=*/true);
7524#endif
7525 assemble_name (file, name);
7526 }
7527 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7528 fputs ("@PLT", file);
7529 break;
7530
7531 case LABEL_REF:
7532 x = XEXP (x, 0);
7533 /* FALLTHRU */
7534 case CODE_LABEL:
7535 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7536 assemble_name (asm_out_file, buf);
7537 break;
7538
7539 case CONST_INT:
7540 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7541 break;
7542
7543 case CONST:
7544 /* This used to output parentheses around the expression,
7545 but that does not work on the 386 (either ATT or BSD assembler). */
7546 output_pic_addr_const (file, XEXP (x, 0), code);
7547 break;
7548
7549 case CONST_DOUBLE:
7550 if (GET_MODE (x) == VOIDmode)
7551 {
7552 /* We can use %d if the number is <32 bits and positive. */
7553 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7554 fprintf (file, "0x%lx%08lx",
7555 (unsigned long) CONST_DOUBLE_HIGH (x),
7556 (unsigned long) CONST_DOUBLE_LOW (x));
7557 else
7558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7559 }
7560 else
7561 /* We can't handle floating point constants;
7562 PRINT_OPERAND must handle them. */
7563 output_operand_lossage ("floating constant misused");
7564 break;
7565
7566 case PLUS:
7567 /* Some assemblers need integer constants to appear first. */
7568 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7569 {
7570 output_pic_addr_const (file, XEXP (x, 0), code);
7571 putc ('+', file);
7572 output_pic_addr_const (file, XEXP (x, 1), code);
7573 }
7574 else
7575 {
7576 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7577 output_pic_addr_const (file, XEXP (x, 1), code);
7578 putc ('+', file);
7579 output_pic_addr_const (file, XEXP (x, 0), code);
7580 }
7581 break;
7582
7583 case MINUS:
7584 if (!TARGET_MACHO)
7585 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7586 output_pic_addr_const (file, XEXP (x, 0), code);
7587 putc ('-', file);
7588 output_pic_addr_const (file, XEXP (x, 1), code);
7589 if (!TARGET_MACHO)
7590 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7591 break;
7592
7593 case UNSPEC:
7594 gcc_assert (XVECLEN (x, 0) == 1);
7595 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7596 switch (XINT (x, 1))
7597 {
7598 case UNSPEC_GOT:
7599 fputs ("@GOT", file);
7600 break;
7601 case UNSPEC_GOTOFF:
7602 fputs ("@GOTOFF", file);
7603 break;
7604 case UNSPEC_GOTPCREL:
7605 fputs ("@GOTPCREL(%rip)", file);
7606 break;
7607 case UNSPEC_GOTTPOFF:
7608 /* FIXME: This might be @TPOFF in Sun ld too. */
7609 fputs ("@GOTTPOFF", file);
7610 break;
7611 case UNSPEC_TPOFF:
7612 fputs ("@TPOFF", file);
7613 break;
7614 case UNSPEC_NTPOFF:
7615 if (TARGET_64BIT)
7616 fputs ("@TPOFF", file);
7617 else
7618 fputs ("@NTPOFF", file);
7619 break;
7620 case UNSPEC_DTPOFF:
7621 fputs ("@DTPOFF", file);
7622 break;
7623 case UNSPEC_GOTNTPOFF:
7624 if (TARGET_64BIT)
7625 fputs ("@GOTTPOFF(%rip)", file);
7626 else
7627 fputs ("@GOTNTPOFF", file);
7628 break;
7629 case UNSPEC_INDNTPOFF:
7630 fputs ("@INDNTPOFF", file);
7631 break;
7632 default:
7633 output_operand_lossage ("invalid UNSPEC as operand");
7634 break;
7635 }
7636 break;
7637
7638 default:
7639 output_operand_lossage ("invalid expression as operand");
7640 }
7641}
7642
7643/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7644 We need to emit DTP-relative relocations. */
7645
7646static void
7647i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7648{
7649 fputs (ASM_LONG, file);
7650 output_addr_const (file, x);
7651 fputs ("@DTPOFF", file);
7652 switch (size)
7653 {
7654 case 4:
7655 break;
7656 case 8:
7657 fputs (", 0", file);
7658 break;
7659 default:
7660 gcc_unreachable ();
7661 }
7662}
7663
7664/* In the name of slightly smaller debug output, and to cater to
7665 general assembler lossage, recognize PIC+GOTOFF and turn it back
7666 into a direct symbol reference.
7667
7668 On Darwin, this is necessary to avoid a crash, because Darwin
7669 has a different PIC label for each routine but the DWARF debugging
7670 information is not associated with any particular routine, so it's
7671 necessary to remove references to the PIC label from RTL stored by
7672 the DWARF output code. */
7673
7674static rtx
7675ix86_delegitimize_address (rtx orig_x)
7676{
7677 rtx x = orig_x;
7678 /* reg_addend is NULL or a multiple of some register. */
7679 rtx reg_addend = NULL_RTX;
7680 /* const_addend is NULL or a const_int. */
7681 rtx const_addend = NULL_RTX;
7682 /* This is the result, or NULL. */
7683 rtx result = NULL_RTX;
7684
7685 if (GET_CODE (x) == MEM)
7686 x = XEXP (x, 0);
7687
7688 if (TARGET_64BIT)
7689 {
7690 if (GET_CODE (x) != CONST
7691 || GET_CODE (XEXP (x, 0)) != UNSPEC
7692 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7693 || GET_CODE (orig_x) != MEM)
7694 return orig_x;
7695 return XVECEXP (XEXP (x, 0), 0, 0);
7696 }
7697
7698 if (GET_CODE (x) != PLUS
7699 || GET_CODE (XEXP (x, 1)) != CONST)
7700 return orig_x;
7701
7702 if (GET_CODE (XEXP (x, 0)) == REG
7703 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7704 /* %ebx + GOT/GOTOFF */
7705 ;
7706 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7707 {
7708 /* %ebx + %reg * scale + GOT/GOTOFF */
7709 reg_addend = XEXP (x, 0);
7710 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7711 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7712 reg_addend = XEXP (reg_addend, 1);
7713 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7714 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7715 reg_addend = XEXP (reg_addend, 0);
7716 else
7717 return orig_x;
7718 if (GET_CODE (reg_addend) != REG
7719 && GET_CODE (reg_addend) != MULT
7720 && GET_CODE (reg_addend) != ASHIFT)
7721 return orig_x;
7722 }
7723 else
7724 return orig_x;
7725
7726 x = XEXP (XEXP (x, 1), 0);
7727 if (GET_CODE (x) == PLUS
7728 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7729 {
7730 const_addend = XEXP (x, 1);
7731 x = XEXP (x, 0);
7732 }
7733
7734 if (GET_CODE (x) == UNSPEC
7735 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7736 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7737 result = XVECEXP (x, 0, 0);
7738
7739 if (TARGET_MACHO && darwin_local_data_pic (x)
7740 && GET_CODE (orig_x) != MEM)
7741 result = XEXP (x, 0);
7742
7743 if (! result)
7744 return orig_x;
7745
7746 if (const_addend)
7747 result = gen_rtx_PLUS (Pmode, result, const_addend);
7748 if (reg_addend)
7749 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7750 return result;
7751}
7752
7753static void
7754put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7755 int fp, FILE *file)
7756{
7757 const char *suffix;
7758
7759 if (mode == CCFPmode || mode == CCFPUmode)
7760 {
7761 enum rtx_code second_code, bypass_code;
7762 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7763 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7764 code = ix86_fp_compare_code_to_integer (code);
7765 mode = CCmode;
7766 }
7767 if (reverse)
7768 code = reverse_condition (code);
7769
7770 switch (code)
7771 {
7772 case EQ:
7773 suffix = "e";
7774 break;
7775 case NE:
7776 suffix = "ne";
7777 break;
7778 case GT:
7779 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7780 suffix = "g";
7781 break;
7782 case GTU:
7783 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7784 Those same assemblers have the same but opposite lossage on cmov. */
7785 gcc_assert (mode == CCmode);
7786 suffix = fp ? "nbe" : "a";
7787 break;
7788 case LT:
7789 switch (mode)
7790 {
7791 case CCNOmode:
7792 case CCGOCmode:
7793 suffix = "s";
7794 break;
7795
7796 case CCmode:
7797 case CCGCmode:
7798 suffix = "l";
7799 break;
7800
7801 default:
7802 gcc_unreachable ();
7803 }
7804 break;
7805 case LTU:
7806 gcc_assert (mode == CCmode);
7807 suffix = "b";
7808 break;
7809 case GE:
7810 switch (mode)
7811 {
7812 case CCNOmode:
7813 case CCGOCmode:
7814 suffix = "ns";
7815 break;
7816
7817 case CCmode:
7818 case CCGCmode:
7819 suffix = "ge";
7820 break;
7821
7822 default:
7823 gcc_unreachable ();
7824 }
7825 break;
7826 case GEU:
7827 /* ??? As above. */
7828 gcc_assert (mode == CCmode);
7829 suffix = fp ? "nb" : "ae";
7830 break;
7831 case LE:
7832 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7833 suffix = "le";
7834 break;
7835 case LEU:
7836 gcc_assert (mode == CCmode);
7837 suffix = "be";
7838 break;
7839 case UNORDERED:
7840 suffix = fp ? "u" : "p";
7841 break;
7842 case ORDERED:
7843 suffix = fp ? "nu" : "np";
7844 break;
7845 default:
7846 gcc_unreachable ();
7847 }
7848 fputs (suffix, file);
7849}
7850
7851/* Print the name of register X to FILE based on its machine mode and number.
7852 If CODE is 'w', pretend the mode is HImode.
7853 If CODE is 'b', pretend the mode is QImode.
7854 If CODE is 'k', pretend the mode is SImode.
7855 If CODE is 'q', pretend the mode is DImode.
7856 If CODE is 'h', pretend the reg is the 'high' byte register.
7857 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7858
7859void
7860print_reg (rtx x, int code, FILE *file)
7861{
7862 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7863 && REGNO (x) != FRAME_POINTER_REGNUM
7864 && REGNO (x) != FLAGS_REG
7865 && REGNO (x) != FPSR_REG);
7866
7867 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7868 putc ('%', file);
7869
7870 if (code == 'w' || MMX_REG_P (x))
7871 code = 2;
7872 else if (code == 'b')
7873 code = 1;
7874 else if (code == 'k')
7875 code = 4;
7876 else if (code == 'q')
7877 code = 8;
7878 else if (code == 'y')
7879 code = 3;
7880 else if (code == 'h')
7881 code = 0;
7882 else
7883 code = GET_MODE_SIZE (GET_MODE (x));
7884
7885 /* Irritatingly, AMD extended registers use different naming convention
7886 from the normal registers. */
7887 if (REX_INT_REG_P (x))
7888 {
7889 gcc_assert (TARGET_64BIT);
7890 switch (code)
7891 {
7892 case 0:
7893 error ("extended registers have no high halves");
7894 break;
7895 case 1:
7896 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7897 break;
7898 case 2:
7899 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7900 break;
7901 case 4:
7902 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7903 break;
7904 case 8:
7905 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7906 break;
7907 default:
7908 error ("unsupported operand size for extended register");
7909 break;
7910 }
7911 return;
7912 }
7913 switch (code)
7914 {
7915 case 3:
7916 if (STACK_TOP_P (x))
7917 {
7918 fputs ("st(0)", file);
7919 break;
7920 }
7921 /* FALLTHRU */
7922 case 8:
7923 case 4:
7924 case 12:
7925 if (! ANY_FP_REG_P (x))
7926 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7927 /* FALLTHRU */
7928 case 16:
7929 case 2:
7930 normal:
7931 fputs (hi_reg_name[REGNO (x)], file);
7932 break;
7933 case 1:
7934 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7935 goto normal;
7936 fputs (qi_reg_name[REGNO (x)], file);
7937 break;
7938 case 0:
7939 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7940 goto normal;
7941 fputs (qi_high_reg_name[REGNO (x)], file);
7942 break;
7943 default:
7944 gcc_unreachable ();
7945 }
7946}
7947
7948/* Locate some local-dynamic symbol still in use by this function
7949 so that we can print its name in some tls_local_dynamic_base
7950 pattern. */
7951
7952static const char *
7953get_some_local_dynamic_name (void)
7954{
7955 rtx insn;
7956
7957 if (cfun->machine->some_ld_name)
7958 return cfun->machine->some_ld_name;
7959
7960 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7961 if (INSN_P (insn)
7962 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7963 return cfun->machine->some_ld_name;
7964
7965 gcc_unreachable ();
7966}
7967
7968static int
7969get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7970{
7971 rtx x = *px;
7972
7973 if (GET_CODE (x) == SYMBOL_REF
7974 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7975 {
7976 cfun->machine->some_ld_name = XSTR (x, 0);
7977 return 1;
7978 }
7979
7980 return 0;
7981}
7982
7983/* Meaning of CODE:
7984 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7985 C -- print opcode suffix for set/cmov insn.
7986 c -- like C, but print reversed condition
7987 F,f -- likewise, but for floating-point.
7988 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7989 otherwise nothing
7990 R -- print the prefix for register names.
7991 z -- print the opcode suffix for the size of the current operand.
7992 * -- print a star (in certain assembler syntax)
7993 A -- print an absolute memory reference.
7994 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7995 s -- print a shift double count, followed by the assemblers argument
7996 delimiter.
7997 b -- print the QImode name of the register for the indicated operand.
7998 %b0 would print %al if operands[0] is reg 0.
7999 w -- likewise, print the HImode name of the register.
8000 k -- likewise, print the SImode name of the register.
8001 q -- likewise, print the DImode name of the register.
8002 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8003 y -- print "st(0)" instead of "st" as a register.
8004 D -- print condition for SSE cmp instruction.
8005 P -- if PIC, print an @PLT suffix.
8006 X -- don't print any sort of PIC '@' suffix for a symbol.
8007 & -- print some in-use local-dynamic symbol name.
8008 H -- print a memory address offset by 8; used for sse high-parts
8009 */
8010
8011void
8012print_operand (FILE *file, rtx x, int code)
8013{
8014 if (code)
8015 {
8016 switch (code)
8017 {
8018 case '*':
8019 if (ASSEMBLER_DIALECT == ASM_ATT)
8020 putc ('*', file);
8021 return;
8022
8023 case '&':
8024 assemble_name (file, get_some_local_dynamic_name ());
8025 return;
8026
8027 case 'A':
8028 switch (ASSEMBLER_DIALECT)
8029 {
8030 case ASM_ATT:
8031 putc ('*', file);
8032 break;
8033
8034 case ASM_INTEL:
8035 /* Intel syntax. For absolute addresses, registers should not
8036 be surrounded by braces. */
8037 if (GET_CODE (x) != REG)
8038 {
8039 putc ('[', file);
8040 PRINT_OPERAND (file, x, 0);
8041 putc (']', file);
8042 return;
8043 }
8044 break;
8045
8046 default:
8047 gcc_unreachable ();
8048 }
8049
8050 PRINT_OPERAND (file, x, 0);
8051 return;
8052
8053
8054 case 'L':
8055 if (ASSEMBLER_DIALECT == ASM_ATT)
8056 putc ('l', file);
8057 return;
8058
8059 case 'W':
8060 if (ASSEMBLER_DIALECT == ASM_ATT)
8061 putc ('w', file);
8062 return;
8063
8064 case 'B':
8065 if (ASSEMBLER_DIALECT == ASM_ATT)
8066 putc ('b', file);
8067 return;
8068
8069 case 'Q':
8070 if (ASSEMBLER_DIALECT == ASM_ATT)
8071 putc ('l', file);
8072 return;
8073
8074 case 'S':
8075 if (ASSEMBLER_DIALECT == ASM_ATT)
8076 putc ('s', file);
8077 return;
8078
8079 case 'T':
8080 if (ASSEMBLER_DIALECT == ASM_ATT)
8081 putc ('t', file);
8082 return;
8083
8084 case 'z':
8085 /* 387 opcodes don't get size suffixes if the operands are
8086 registers. */
8087 if (STACK_REG_P (x))
8088 return;
8089
8090 /* Likewise if using Intel opcodes. */
8091 if (ASSEMBLER_DIALECT == ASM_INTEL)
8092 return;
8093
8094 /* This is the size of op from size of operand. */
8095 switch (GET_MODE_SIZE (GET_MODE (x)))
8096 {
8097 case 2:
8098#ifdef HAVE_GAS_FILDS_FISTS
8099 putc ('s', file);
8100#endif
8101 return;
8102
8103 case 4:
8104 if (GET_MODE (x) == SFmode)
8105 {
8106 putc ('s', file);
8107 return;
8108 }
8109 else
8110 putc ('l', file);
8111 return;
8112
8113 case 12:
8114 case 16:
8115 putc ('t', file);
8116 return;
8117
8118 case 8:
8119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8120 {
8121#ifdef GAS_MNEMONICS
8122 putc ('q', file);
8123#else
8124 putc ('l', file);
8125 putc ('l', file);
8126#endif
8127 }
8128 else
8129 putc ('l', file);
8130 return;
8131
8132 default:
8133 gcc_unreachable ();
8134 }
8135
8136 case 'b':
8137 case 'w':
8138 case 'k':
8139 case 'q':
8140 case 'h':
8141 case 'y':
8142 case 'X':
8143 case 'P':
8144 break;
8145
8146 case 's':
8147 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8148 {
8149 PRINT_OPERAND (file, x, 0);
8150 putc (',', file);
8151 }
8152 return;
8153
8154 case 'D':
8155 /* Little bit of braindamage here. The SSE compare instructions
8156 does use completely different names for the comparisons that the
8157 fp conditional moves. */
8158 switch (GET_CODE (x))
8159 {
8160 case EQ:
8161 case UNEQ:
8162 fputs ("eq", file);
8163 break;
8164 case LT:
8165 case UNLT:
8166 fputs ("lt", file);
8167 break;
8168 case LE:
8169 case UNLE:
8170 fputs ("le", file);
8171 break;
8172 case UNORDERED:
8173 fputs ("unord", file);
8174 break;
8175 case NE:
8176 case LTGT:
8177 fputs ("neq", file);
8178 break;
8179 case UNGE:
8180 case GE:
8181 fputs ("nlt", file);
8182 break;
8183 case UNGT:
8184 case GT:
8185 fputs ("nle", file);
8186 break;
8187 case ORDERED:
8188 fputs ("ord", file);
8189 break;
8190 default:
8191 gcc_unreachable ();
8192 }
8193 return;
8194 case 'O':
8195#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8196 if (ASSEMBLER_DIALECT == ASM_ATT)
8197 {
8198 switch (GET_MODE (x))
8199 {
8200 case HImode: putc ('w', file); break;
8201 case SImode:
8202 case SFmode: putc ('l', file); break;
8203 case DImode:
8204 case DFmode: putc ('q', file); break;
8205 default: gcc_unreachable ();
8206 }
8207 putc ('.', file);
8208 }
8209#endif
8210 return;
8211 case 'C':
8212 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8213 return;
8214 case 'F':
8215#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8216 if (ASSEMBLER_DIALECT == ASM_ATT)
8217 putc ('.', file);
8218#endif
8219 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8220 return;
8221
8222 /* Like above, but reverse condition */
8223 case 'c':
8224 /* Check to see if argument to %c is really a constant
8225 and not a condition code which needs to be reversed. */
8226 if (!COMPARISON_P (x))
8227 {
8228 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8229 return;
8230 }
8231 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8232 return;
8233 case 'f':
8234#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8235 if (ASSEMBLER_DIALECT == ASM_ATT)
8236 putc ('.', file);
8237#endif
8238 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8239 return;
8240
8241 case 'H':
8242 /* It doesn't actually matter what mode we use here, as we're
8243 only going to use this for printing. */
8244 x = adjust_address_nv (x, DImode, 8);
8245 break;
8246
8247 case '+':
8248 {
8249 rtx x;
8250
8251 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8252 return;
8253
8254 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8255 if (x)
8256 {
8257 int pred_val = INTVAL (XEXP (x, 0));
8258
8259 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8260 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8261 {
8262 int taken = pred_val > REG_BR_PROB_BASE / 2;
8263 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8264
8265 /* Emit hints only in the case default branch prediction
8266 heuristics would fail. */
8267 if (taken != cputaken)
8268 {
8269 /* We use 3e (DS) prefix for taken branches and
8270 2e (CS) prefix for not taken branches. */
8271 if (taken)
8272 fputs ("ds ; ", file);
8273 else
8274 fputs ("cs ; ", file);
8275 }
8276 }
8277 }
8278 return;
8279 }
8280 default:
8281 output_operand_lossage ("invalid operand code '%c'", code);
8282 }
8283 }
8284
8285 if (GET_CODE (x) == REG)
8286 print_reg (x, code, file);
8287
8288 else if (GET_CODE (x) == MEM)
8289 {
8290 /* No `byte ptr' prefix for call instructions. */
8291 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8292 {
8293 const char * size;
8294 switch (GET_MODE_SIZE (GET_MODE (x)))
8295 {
8296 case 1: size = "BYTE"; break;
8297 case 2: size = "WORD"; break;
8298 case 4: size = "DWORD"; break;
8299 case 8: size = "QWORD"; break;
8300 case 12: size = "XWORD"; break;
8301 case 16: size = "XMMWORD"; break;
8302 default:
8303 gcc_unreachable ();
8304 }
8305
8306 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8307 if (code == 'b')
8308 size = "BYTE";
8309 else if (code == 'w')
8310 size = "WORD";
8311 else if (code == 'k')
8312 size = "DWORD";
8313
8314 fputs (size, file);
8315 fputs (" PTR ", file);
8316 }
8317
8318 x = XEXP (x, 0);
8319 /* Avoid (%rip) for call operands. */
8320 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8321 && GET_CODE (x) != CONST_INT)
8322 output_addr_const (file, x);
8323 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8324 output_operand_lossage ("invalid constraints for operand");
8325 else
8326 output_address (x);
8327 }
8328
8329 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8330 {
8331 REAL_VALUE_TYPE r;
8332 long l;
8333
8334 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8335 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8336
8337 if (ASSEMBLER_DIALECT == ASM_ATT)
8338 putc ('$', file);
8339 fprintf (file, "0x%08lx", l);
8340 }
8341
8342 /* These float cases don't actually occur as immediate operands. */
8343 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8344 {
8345 char dstr[30];
8346
8347 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8348 fprintf (file, "%s", dstr);
8349 }
8350
8351 else if (GET_CODE (x) == CONST_DOUBLE
8352 && GET_MODE (x) == XFmode)
8353 {
8354 char dstr[30];
8355
8356 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8357 fprintf (file, "%s", dstr);
8358 }
8359
8360 else
8361 {
8362 /* We have patterns that allow zero sets of memory, for instance.
8363 In 64-bit mode, we should probably support all 8-byte vectors,
8364 since we can in fact encode that into an immediate. */
8365 if (GET_CODE (x) == CONST_VECTOR)
8366 {
8367 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8368 x = const0_rtx;
8369 }
8370
8371 if (code != 'P')
8372 {
8373 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8374 {
8375 if (ASSEMBLER_DIALECT == ASM_ATT)
8376 putc ('$', file);
8377 }
8378 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8379 || GET_CODE (x) == LABEL_REF)
8380 {
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8382 putc ('$', file);
8383 else
8384 fputs ("OFFSET FLAT:", file);
8385 }
8386 }
8387 if (GET_CODE (x) == CONST_INT)
8388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8389 else if (flag_pic)
8390 output_pic_addr_const (file, x, code);
8391 else
8392 output_addr_const (file, x);
8393 }
8394}
8395
8396/* Print a memory operand whose address is ADDR. */
8397
8398void
8399print_operand_address (FILE *file, rtx addr)
8400{
8401 struct ix86_address parts;
8402 rtx base, index, disp;
8403 int scale;
8404 int ok = ix86_decompose_address (addr, &parts);
8405
8406 gcc_assert (ok);
8407
8408 base = parts.base;
8409 index = parts.index;
8410 disp = parts.disp;
8411 scale = parts.scale;
8412
8413 switch (parts.seg)
8414 {
8415 case SEG_DEFAULT:
8416 break;
8417 case SEG_FS:
8418 case SEG_GS:
8419 if (USER_LABEL_PREFIX[0] == 0)
8420 putc ('%', file);
8421 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8422 break;
8423 default:
8424 gcc_unreachable ();
8425 }
8426
8427 if (!base && !index)
8428 {
8429 /* Displacement only requires special attention. */
8430
8431 if (GET_CODE (disp) == CONST_INT)
8432 {
8433 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8434 {
8435 if (USER_LABEL_PREFIX[0] == 0)
8436 putc ('%', file);
8437 fputs ("ds:", file);
8438 }
8439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8440 }
8441 else if (flag_pic)
8442 output_pic_addr_const (file, disp, 0);
8443 else
8444 output_addr_const (file, disp);
8445
8446 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8447 if (TARGET_64BIT)
8448 {
8449 if (GET_CODE (disp) == CONST
8450 && GET_CODE (XEXP (disp, 0)) == PLUS
8451 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8452 disp = XEXP (XEXP (disp, 0), 0);
8453 if (GET_CODE (disp) == LABEL_REF
8454 || (GET_CODE (disp) == SYMBOL_REF
8455 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8456 fputs ("(%rip)", file);
8457 }
8458 }
8459 else
8460 {
8461 if (ASSEMBLER_DIALECT == ASM_ATT)
8462 {
8463 if (disp)
8464 {
8465 if (flag_pic)
8466 output_pic_addr_const (file, disp, 0);
8467 else if (GET_CODE (disp) == LABEL_REF)
8468 output_asm_label (disp);
8469 else
8470 output_addr_const (file, disp);
8471 }
8472
8473 putc ('(', file);
8474 if (base)
8475 print_reg (base, 0, file);
8476 if (index)
8477 {
8478 putc (',', file);
8479 print_reg (index, 0, file);
8480 if (scale != 1)
8481 fprintf (file, ",%d", scale);
8482 }
8483 putc (')', file);
8484 }
8485 else
8486 {
8487 rtx offset = NULL_RTX;
8488
8489 if (disp)
8490 {
8491 /* Pull out the offset of a symbol; print any symbol itself. */
8492 if (GET_CODE (disp) == CONST
8493 && GET_CODE (XEXP (disp, 0)) == PLUS
8494 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8495 {
8496 offset = XEXP (XEXP (disp, 0), 1);
8497 disp = gen_rtx_CONST (VOIDmode,
8498 XEXP (XEXP (disp, 0), 0));
8499 }
8500
8501 if (flag_pic)
8502 output_pic_addr_const (file, disp, 0);
8503 else if (GET_CODE (disp) == LABEL_REF)
8504 output_asm_label (disp);
8505 else if (GET_CODE (disp) == CONST_INT)
8506 offset = disp;
8507 else
8508 output_addr_const (file, disp);
8509 }
8510
8511 putc ('[', file);
8512 if (base)
8513 {
8514 print_reg (base, 0, file);
8515 if (offset)
8516 {
8517 if (INTVAL (offset) >= 0)
8518 putc ('+', file);
8519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8520 }
8521 }
8522 else if (offset)
8523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8524 else
8525 putc ('0', file);
8526
8527 if (index)
8528 {
8529 putc ('+', file);
8530 print_reg (index, 0, file);
8531 if (scale != 1)
8532 fprintf (file, "*%d", scale);
8533 }
8534 putc (']', file);
8535 }
8536 }
8537}
8538
8539bool
8540output_addr_const_extra (FILE *file, rtx x)
8541{
8542 rtx op;
8543
8544 if (GET_CODE (x) != UNSPEC)
8545 return false;
8546
8547 op = XVECEXP (x, 0, 0);
8548 switch (XINT (x, 1))
8549 {
8550 case UNSPEC_GOTTPOFF:
8551 output_addr_const (file, op);
8552 /* FIXME: This might be @TPOFF in Sun ld. */
8553 fputs ("@GOTTPOFF", file);
8554 break;
8555 case UNSPEC_TPOFF:
8556 output_addr_const (file, op);
8557 fputs ("@TPOFF", file);
8558 break;
8559 case UNSPEC_NTPOFF:
8560 output_addr_const (file, op);
8561 if (TARGET_64BIT)
8562 fputs ("@TPOFF", file);
8563 else
8564 fputs ("@NTPOFF", file);
8565 break;
8566 case UNSPEC_DTPOFF:
8567 output_addr_const (file, op);
8568 fputs ("@DTPOFF", file);
8569 break;
8570 case UNSPEC_GOTNTPOFF:
8571 output_addr_const (file, op);
8572 if (TARGET_64BIT)
8573 fputs ("@GOTTPOFF(%rip)", file);
8574 else
8575 fputs ("@GOTNTPOFF", file);
8576 break;
8577 case UNSPEC_INDNTPOFF:
8578 output_addr_const (file, op);
8579 fputs ("@INDNTPOFF", file);
8580 break;
8581
8582 default:
8583 return false;
8584 }
8585
8586 return true;
8587}
8588
8589/* Split one or more DImode RTL references into pairs of SImode
8590 references. The RTL can be REG, offsettable MEM, integer constant, or
8591 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8592 split and "num" is its length. lo_half and hi_half are output arrays
8593 that parallel "operands". */
8594
8595void
8596split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8597{
8598 while (num--)
8599 {
8600 rtx op = operands[num];
8601
8602 /* simplify_subreg refuse to split volatile memory addresses,
8603 but we still have to handle it. */
8604 if (GET_CODE (op) == MEM)
8605 {
8606 lo_half[num] = adjust_address (op, SImode, 0);
8607 hi_half[num] = adjust_address (op, SImode, 4);
8608 }
8609 else
8610 {
8611 lo_half[num] = simplify_gen_subreg (SImode, op,
8612 GET_MODE (op) == VOIDmode
8613 ? DImode : GET_MODE (op), 0);
8614 hi_half[num] = simplify_gen_subreg (SImode, op,
8615 GET_MODE (op) == VOIDmode
8616 ? DImode : GET_MODE (op), 4);
8617 }
8618 }
8619}
8620/* Split one or more TImode RTL references into pairs of DImode
8621 references. The RTL can be REG, offsettable MEM, integer constant, or
8622 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8623 split and "num" is its length. lo_half and hi_half are output arrays
8624 that parallel "operands". */
8625
8626void
8627split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8628{
8629 while (num--)
8630 {
8631 rtx op = operands[num];
8632
8633 /* simplify_subreg refuse to split volatile memory addresses, but we
8634 still have to handle it. */
8635 if (GET_CODE (op) == MEM)
8636 {
8637 lo_half[num] = adjust_address (op, DImode, 0);
8638 hi_half[num] = adjust_address (op, DImode, 8);
8639 }
8640 else
8641 {
8642 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8643 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8644 }
8645 }
8646}
8647
8648/* Output code to perform a 387 binary operation in INSN, one of PLUS,
8649 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8650 is the expression of the binary operation. The output may either be
8651 emitted here, or returned to the caller, like all output_* functions.
8652
8653 There is no guarantee that the operands are the same mode, as they
8654 might be within FLOAT or FLOAT_EXTEND expressions. */
8655
8656#ifndef SYSV386_COMPAT
8657/* Set to 1 for compatibility with brain-damaged assemblers. No-one
8658 wants to fix the assemblers because that causes incompatibility
8659 with gcc. No-one wants to fix gcc because that causes
8660 incompatibility with assemblers... You can use the option of
8661 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8662#define SYSV386_COMPAT 1
8663#endif
8664
8665const char *
8666output_387_binary_op (rtx insn, rtx *operands)
8667{
8668 static char buf[30];
8669 const char *p;
8670 const char *ssep;
8671 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8672
8673#ifdef ENABLE_CHECKING
8674 /* Even if we do not want to check the inputs, this documents input
8675 constraints. Which helps in understanding the following code. */
8676 if (STACK_REG_P (operands[0])
8677 && ((REG_P (operands[1])
8678 && REGNO (operands[0]) == REGNO (operands[1])
8679 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8680 || (REG_P (operands[2])
8681 && REGNO (operands[0]) == REGNO (operands[2])
8682 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8683 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8684 ; /* ok */
8685 else
8686 gcc_assert (is_sse);
8687#endif
8688
8689 switch (GET_CODE (operands[3]))
8690 {
8691 case PLUS:
8692 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8693 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8694 p = "fiadd";
8695 else
8696 p = "fadd";
8697 ssep = "add";
8698 break;
8699
8700 case MINUS:
8701 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8702 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8703 p = "fisub";
8704 else
8705 p = "fsub";
8706 ssep = "sub";
8707 break;
8708
8709 case MULT:
8710 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8711 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8712 p = "fimul";
8713 else
8714 p = "fmul";
8715 ssep = "mul";
8716 break;
8717
8718 case DIV:
8719 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8720 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8721 p = "fidiv";
8722 else
8723 p = "fdiv";
8724 ssep = "div";
8725 break;
8726
8727 default:
8728 gcc_unreachable ();
8729 }
8730
8731 if (is_sse)
8732 {
8733 strcpy (buf, ssep);
8734 if (GET_MODE (operands[0]) == SFmode)
8735 strcat (buf, "ss\t{%2, %0|%0, %2}");
8736 else
8737 strcat (buf, "sd\t{%2, %0|%0, %2}");
8738 return buf;
8739 }
8740 strcpy (buf, p);
8741
8742 switch (GET_CODE (operands[3]))
8743 {
8744 case MULT:
8745 case PLUS:
8746 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8747 {
8748 rtx temp = operands[2];
8749 operands[2] = operands[1];
8750 operands[1] = temp;
8751 }
8752
8753 /* know operands[0] == operands[1]. */
8754
8755 if (GET_CODE (operands[2]) == MEM)
8756 {
8757 p = "%z2\t%2";
8758 break;
8759 }
8760
8761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8762 {
8763 if (STACK_TOP_P (operands[0]))
8764 /* How is it that we are storing to a dead operand[2]?
8765 Well, presumably operands[1] is dead too. We can't
8766 store the result to st(0) as st(0) gets popped on this
8767 instruction. Instead store to operands[2] (which I
8768 think has to be st(1)). st(1) will be popped later.
8769 gcc <= 2.8.1 didn't have this check and generated
8770 assembly code that the Unixware assembler rejected. */
8771 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8772 else
8773 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8774 break;
8775 }
8776
8777 if (STACK_TOP_P (operands[0]))
8778 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8779 else
8780 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8781 break;
8782
8783 case MINUS:
8784 case DIV:
8785 if (GET_CODE (operands[1]) == MEM)
8786 {
8787 p = "r%z1\t%1";
8788 break;
8789 }
8790
8791 if (GET_CODE (operands[2]) == MEM)
8792 {
8793 p = "%z2\t%2";
8794 break;
8795 }
8796
8797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8798 {
8799#if SYSV386_COMPAT
8800 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8801 derived assemblers, confusingly reverse the direction of
8802 the operation for fsub{r} and fdiv{r} when the
8803 destination register is not st(0). The Intel assembler
8804 doesn't have this brain damage. Read !SYSV386_COMPAT to
8805 figure out what the hardware really does. */
8806 if (STACK_TOP_P (operands[0]))
8807 p = "{p\t%0, %2|rp\t%2, %0}";
8808 else
8809 p = "{rp\t%2, %0|p\t%0, %2}";
8810#else
8811 if (STACK_TOP_P (operands[0]))
8812 /* As above for fmul/fadd, we can't store to st(0). */
8813 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8814 else
8815 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8816#endif
8817 break;
8818 }
8819
8820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8821 {
8822#if SYSV386_COMPAT
8823 if (STACK_TOP_P (operands[0]))
8824 p = "{rp\t%0, %1|p\t%1, %0}";
8825 else
8826 p = "{p\t%1, %0|rp\t%0, %1}";
8827#else
8828 if (STACK_TOP_P (operands[0]))
8829 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8830 else
8831 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8832#endif
8833 break;
8834 }
8835
8836 if (STACK_TOP_P (operands[0]))
8837 {
8838 if (STACK_TOP_P (operands[1]))
8839 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8840 else
8841 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8842 break;
8843 }
8844 else if (STACK_TOP_P (operands[1]))
8845 {
8846#if SYSV386_COMPAT
8847 p = "{\t%1, %0|r\t%0, %1}";
8848#else
8849 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8850#endif
8851 }
8852 else
8853 {
8854#if SYSV386_COMPAT
8855 p = "{r\t%2, %0|\t%0, %2}";
8856#else
8857 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8858#endif
8859 }
8860 break;
8861
8862 default:
8863 gcc_unreachable ();
8864 }
8865
8866 strcat (buf, p);
8867 return buf;
8868}
8869
8870/* Return needed mode for entity in optimize_mode_switching pass. */
8871
8872int
8873ix86_mode_needed (int entity, rtx insn)
8874{
8875 enum attr_i387_cw mode;
8876
8877 /* The mode UNINITIALIZED is used to store control word after a
8878 function call or ASM pattern. The mode ANY specify that function
8879 has no requirements on the control word and make no changes in the
8880 bits we are interested in. */
8881
8882 if (CALL_P (insn)
8883 || (NONJUMP_INSN_P (insn)
8884 && (asm_noperands (PATTERN (insn)) >= 0
8885 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8886 return I387_CW_UNINITIALIZED;
8887
8888 if (recog_memoized (insn) < 0)
8889 return I387_CW_ANY;
8890
8891 mode = get_attr_i387_cw (insn);
8892
8893 switch (entity)
8894 {
8895 case I387_TRUNC:
8896 if (mode == I387_CW_TRUNC)
8897 return mode;
8898 break;
8899
8900 case I387_FLOOR:
8901 if (mode == I387_CW_FLOOR)
8902 return mode;
8903 break;
8904
8905 case I387_CEIL:
8906 if (mode == I387_CW_CEIL)
8907 return mode;
8908 break;
8909
8910 case I387_MASK_PM:
8911 if (mode == I387_CW_MASK_PM)
8912 return mode;
8913 break;
8914
8915 default:
8916 gcc_unreachable ();
8917 }
8918
8919 return I387_CW_ANY;
8920}
8921
8922/* Output code to initialize control word copies used by trunc?f?i and
8923 rounding patterns. CURRENT_MODE is set to current control word,
8924 while NEW_MODE is set to new control word. */
8925
8926void
8927emit_i387_cw_initialization (int mode)
8928{
8929 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8930 rtx new_mode;
8931
8932 int slot;
8933
8934 rtx reg = gen_reg_rtx (HImode);
8935
8936 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8937 emit_move_insn (reg, stored_mode);
8938
8939 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8940 {
8941 switch (mode)
8942 {
8943 case I387_CW_TRUNC:
8944 /* round toward zero (truncate) */
8945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8946 slot = SLOT_CW_TRUNC;
8947 break;
8948
8949 case I387_CW_FLOOR:
8950 /* round down toward -oo */
8951 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8952 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8953 slot = SLOT_CW_FLOOR;
8954 break;
8955
8956 case I387_CW_CEIL:
8957 /* round up toward +oo */
8958 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8959 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8960 slot = SLOT_CW_CEIL;
8961 break;
8962
8963 case I387_CW_MASK_PM:
8964 /* mask precision exception for nearbyint() */
8965 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8966 slot = SLOT_CW_MASK_PM;
8967 break;
8968
8969 default:
8970 gcc_unreachable ();
8971 }
8972 }
8973 else
8974 {
8975 switch (mode)
8976 {
8977 case I387_CW_TRUNC:
8978 /* round toward zero (truncate) */
8979 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8980 slot = SLOT_CW_TRUNC;
8981 break;
8982
8983 case I387_CW_FLOOR:
8984 /* round down toward -oo */
8985 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8986 slot = SLOT_CW_FLOOR;
8987 break;
8988
8989 case I387_CW_CEIL:
8990 /* round up toward +oo */
8991 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8992 slot = SLOT_CW_CEIL;
8993 break;
8994
8995 case I387_CW_MASK_PM:
8996 /* mask precision exception for nearbyint() */
8997 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8998 slot = SLOT_CW_MASK_PM;
8999 break;
9000
9001 default:
9002 gcc_unreachable ();
9003 }
9004 }
9005
9006 gcc_assert (slot < MAX_386_STACK_LOCALS);
9007
9008 new_mode = assign_386_stack_local (HImode, slot);
9009 emit_move_insn (new_mode, reg);
9010}
9011
9012/* Output code for INSN to convert a float to a signed int. OPERANDS
9013 are the insn operands. The output may be [HSD]Imode and the input
9014 operand may be [SDX]Fmode. */
9015
9016const char *
9017output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9018{
9019 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9020 int dimode_p = GET_MODE (operands[0]) == DImode;
9021 int round_mode = get_attr_i387_cw (insn);
9022
9023 /* Jump through a hoop or two for DImode, since the hardware has no
9024 non-popping instruction. We used to do this a different way, but
9025 that was somewhat fragile and broke with post-reload splitters. */
9026 if ((dimode_p || fisttp) && !stack_top_dies)
9027 output_asm_insn ("fld\t%y1", operands);
9028
9029 gcc_assert (STACK_TOP_P (operands[1]));
9030 gcc_assert (GET_CODE (operands[0]) == MEM);
9031
9032 if (fisttp)
9033 output_asm_insn ("fisttp%z0\t%0", operands);
9034 else
9035 {
9036 if (round_mode != I387_CW_ANY)
9037 output_asm_insn ("fldcw\t%3", operands);
9038 if (stack_top_dies || dimode_p)
9039 output_asm_insn ("fistp%z0\t%0", operands);
9040 else
9041 output_asm_insn ("fist%z0\t%0", operands);
9042 if (round_mode != I387_CW_ANY)
9043 output_asm_insn ("fldcw\t%2", operands);
9044 }
9045
9046 return "";
9047}
9048
9049/* Output code for x87 ffreep insn. The OPNO argument, which may only
9050 have the values zero or one, indicates the ffreep insn's operand
9051 from the OPERANDS array. */
9052
9053static const char *
9054output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9055{
9056 if (TARGET_USE_FFREEP)
9057#if HAVE_AS_IX86_FFREEP
9058 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9059#else
9060 switch (REGNO (operands[opno]))
9061 {
9062 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
9063 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
9064 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
9065 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
9066 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
9067 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
9068 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
9069 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
9070 }
9071#endif
9072
9073 return opno ? "fstp\t%y1" : "fstp\t%y0";
9074}
9075
9076
9077/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9078 should be used. UNORDERED_P is true when fucom should be used. */
9079
9080const char *
9081output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9082{
9083 int stack_top_dies;
9084 rtx cmp_op0, cmp_op1;
9085 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9086
9087 if (eflags_p)
9088 {
9089 cmp_op0 = operands[0];
9090 cmp_op1 = operands[1];
9091 }
9092 else
9093 {
9094 cmp_op0 = operands[1];
9095 cmp_op1 = operands[2];
9096 }
9097
9098 if (is_sse)
9099 {
9100 if (GET_MODE (operands[0]) == SFmode)
9101 if (unordered_p)
9102 return "ucomiss\t{%1, %0|%0, %1}";
9103 else
9104 return "comiss\t{%1, %0|%0, %1}";
9105 else
9106 if (unordered_p)
9107 return "ucomisd\t{%1, %0|%0, %1}";
9108 else
9109 return "comisd\t{%1, %0|%0, %1}";
9110 }
9111
9112 gcc_assert (STACK_TOP_P (cmp_op0));
9113
9114 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9115
9116 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9117 {
9118 if (stack_top_dies)
9119 {
9120 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9121 return output_387_ffreep (operands, 1);
9122 }
9123 else
9124 return "ftst\n\tfnstsw\t%0";
9125 }
9126
9127 if (STACK_REG_P (cmp_op1)
9128 && stack_top_dies
9129 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9130 && REGNO (cmp_op1) != FIRST_STACK_REG)
9131 {
9132 /* If both the top of the 387 stack dies, and the other operand
9133 is also a stack register that dies, then this must be a
9134 `fcompp' float compare */
9135
9136 if (eflags_p)
9137 {
9138 /* There is no double popping fcomi variant. Fortunately,
9139 eflags is immune from the fstp's cc clobbering. */
9140 if (unordered_p)
9141 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9142 else
9143 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9144 return output_387_ffreep (operands, 0);
9145 }
9146 else
9147 {
9148 if (unordered_p)
9149 return "fucompp\n\tfnstsw\t%0";
9150 else
9151 return "fcompp\n\tfnstsw\t%0";
9152 }
9153 }
9154 else
9155 {
9156 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9157
9158 static const char * const alt[16] =
9159 {
9160 "fcom%z2\t%y2\n\tfnstsw\t%0",
9161 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9162 "fucom%z2\t%y2\n\tfnstsw\t%0",
9163 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9164
9165 "ficom%z2\t%y2\n\tfnstsw\t%0",
9166 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9167 NULL,
9168 NULL,
9169
9170 "fcomi\t{%y1, %0|%0, %y1}",
9171 "fcomip\t{%y1, %0|%0, %y1}",
9172 "fucomi\t{%y1, %0|%0, %y1}",
9173 "fucomip\t{%y1, %0|%0, %y1}",
9174
9175 NULL,
9176 NULL,
9177 NULL,
9178 NULL
9179 };
9180
9181 int mask;
9182 const char *ret;
9183
9184 mask = eflags_p << 3;
9185 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9186 mask |= unordered_p << 1;
9187 mask |= stack_top_dies;
9188
9189 gcc_assert (mask < 16);
9190 ret = alt[mask];
9191 gcc_assert (ret);
9192
9193 return ret;
9194 }
9195}
9196
9197void
9198ix86_output_addr_vec_elt (FILE *file, int value)
9199{
9200 const char *directive = ASM_LONG;
9201
9202#ifdef ASM_QUAD
9203 if (TARGET_64BIT)
9204 directive = ASM_QUAD;
9205#else
9206 gcc_assert (!TARGET_64BIT);
9207#endif
9208
9209 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9210}
9211
9212void
9213ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9214{
9215 if (TARGET_64BIT)
9216 fprintf (file, "%s%s%d-%s%d\n",
9217 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9218 else if (HAVE_AS_GOTOFF_IN_DATA)
9219 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9220#if TARGET_MACHO
9221 else if (TARGET_MACHO)
9222 {
9223 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9224 machopic_output_function_base_name (file);
9225 fprintf(file, "\n");
9226 }
9227#endif
9228 else
9229 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9230 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9231}
9232
9233/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9234 for the target. */
9235
9236void
9237ix86_expand_clear (rtx dest)
9238{
9239 rtx tmp;
9240
9241 /* We play register width games, which are only valid after reload. */
9242 gcc_assert (reload_completed);
9243
9244 /* Avoid HImode and its attendant prefix byte. */
9245 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9246 dest = gen_rtx_REG (SImode, REGNO (dest));
9247
9248 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9249
9250 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9251 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9252 {
9253 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9255 }
9256
9257 emit_insn (tmp);
9258}
9259
9260/* X is an unchanging MEM. If it is a constant pool reference, return
9261 the constant pool rtx, else NULL. */
9262
9263rtx
9264maybe_get_pool_constant (rtx x)
9265{
9266 x = ix86_delegitimize_address (XEXP (x, 0));
9267
9268 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9269 return get_pool_constant (x);
9270
9271 return NULL_RTX;
9272}
9273
9274void
9275ix86_expand_move (enum machine_mode mode, rtx operands[])
9276{
9277 int strict = (reload_in_progress || reload_completed);
9278 rtx op0, op1;
9279 enum tls_model model;
9280
9281 op0 = operands[0];
9282 op1 = operands[1];
9283
9284 if (GET_CODE (op1) == SYMBOL_REF)
9285 {
9286 model = SYMBOL_REF_TLS_MODEL (op1);
9287 if (model)
9288 {
9289 op1 = legitimize_tls_address (op1, model, true);
9290 op1 = force_operand (op1, op0);
9291 if (op1 == op0)
9292 return;
9293 }
9294 }
9295 else if (GET_CODE (op1) == CONST
9296 && GET_CODE (XEXP (op1, 0)) == PLUS
9297 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9298 {
9299 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9300 if (model)
9301 {
9302 rtx addend = XEXP (XEXP (op1, 0), 1);
9303 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9304 op1 = force_operand (op1, NULL);
9305 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9306 op0, 1, OPTAB_DIRECT);
9307 if (op1 == op0)
9308 return;
9309 }
9310 }
9311
9312 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9313 {
9314 if (TARGET_MACHO && !TARGET_64BIT)
9315 {
9316#if TARGET_MACHO
9317 if (MACHOPIC_PURE)
9318 {
9319 rtx temp = ((reload_in_progress
9320 || ((op0 && GET_CODE (op0) == REG)
9321 && mode == Pmode))
9322 ? op0 : gen_reg_rtx (Pmode));
9323 op1 = machopic_indirect_data_reference (op1, temp);
9324 op1 = machopic_legitimize_pic_address (op1, mode,
9325 temp == op1 ? 0 : temp);
9326 }
9327 else if (MACHOPIC_INDIRECT)
9328 op1 = machopic_indirect_data_reference (op1, 0);
9329 if (op0 == op1)
9330 return;
9331#endif
9332 }
9333 else
9334 {
9335 if (GET_CODE (op0) == MEM)
9336 op1 = force_reg (Pmode, op1);
9337 else
9338 op1 = legitimize_address (op1, op1, Pmode);
9339 }
9340 }
9341 else
9342 {
9343 if (GET_CODE (op0) == MEM
9344 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9345 || !push_operand (op0, mode))
9346 && GET_CODE (op1) == MEM)
9347 op1 = force_reg (mode, op1);
9348
9349 if (push_operand (op0, mode)
9350 && ! general_no_elim_operand (op1, mode))
9351 op1 = copy_to_mode_reg (mode, op1);
9352
9353 /* Force large constants in 64bit compilation into register
9354 to get them CSEed. */
9355 if (TARGET_64BIT && mode == DImode
9356 && immediate_operand (op1, mode)
9357 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9358 && !register_operand (op0, mode)
9359 && optimize && !reload_completed && !reload_in_progress)
9360 op1 = copy_to_mode_reg (mode, op1);
9361
9362 if (FLOAT_MODE_P (mode))
9363 {
9364 /* If we are loading a floating point constant to a register,
9365 force the value to memory now, since we'll get better code
9366 out the back end. */
9367
9368 if (strict)
9369 ;
9370 else if (GET_CODE (op1) == CONST_DOUBLE)
9371 {
9372 op1 = validize_mem (force_const_mem (mode, op1));
9373 if (!register_operand (op0, mode))
9374 {
9375 rtx temp = gen_reg_rtx (mode);
9376 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9377 emit_move_insn (op0, temp);
9378 return;
9379 }
9380 }
9381 }
9382 }
9383
9384 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9385}
9386
9387void
9388ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9389{
9390 rtx op0 = operands[0], op1 = operands[1];
9391
9392 /* Force constants other than zero into memory. We do not know how
9393 the instructions used to build constants modify the upper 64 bits
9394 of the register, once we have that information we may be able
9395 to handle some of them more efficiently. */
9396 if ((reload_in_progress | reload_completed) == 0
9397 && register_operand (op0, mode)
9398 && CONSTANT_P (op1)
9399 && standard_sse_constant_p (op1) <= 0)
9400 op1 = validize_mem (force_const_mem (mode, op1));
9401
9402 /* Make operand1 a register if it isn't already. */
9403 if (!no_new_pseudos
9404 && !register_operand (op0, mode)
9405 && !register_operand (op1, mode))
9406 {
9407 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9408 return;
9409 }
9410
9411 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9412}
9413
9414/* Implement the movmisalign patterns for SSE. Non-SSE modes go
9415 straight to ix86_expand_vector_move. */
9416
9417void
9418ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9419{
9420 rtx op0, op1, m;
9421
9422 op0 = operands[0];
9423 op1 = operands[1];
9424
9425 if (MEM_P (op1))
9426 {
9427 /* If we're optimizing for size, movups is the smallest. */
9428 if (optimize_size)
9429 {
9430 op0 = gen_lowpart (V4SFmode, op0);
9431 op1 = gen_lowpart (V4SFmode, op1);
9432 emit_insn (gen_sse_movups (op0, op1));
9433 return;
9434 }
9435
9436 /* ??? If we have typed data, then it would appear that using
9437 movdqu is the only way to get unaligned data loaded with
9438 integer type. */
9439 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9440 {
9441 op0 = gen_lowpart (V16QImode, op0);
9442 op1 = gen_lowpart (V16QImode, op1);
9443 emit_insn (gen_sse2_movdqu (op0, op1));
9444 return;
9445 }
9446
9447 if (TARGET_SSE2 && mode == V2DFmode)
9448 {
9449 rtx zero;
9450
9451 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9452 {
9453 op0 = gen_lowpart (V2DFmode, op0);
9454 op1 = gen_lowpart (V2DFmode, op1);
9455 emit_insn (gen_sse2_movupd (op0, op1));
9456 return;
9457 }
9458
9459 /* When SSE registers are split into halves, we can avoid
9460 writing to the top half twice. */
9461 if (TARGET_SSE_SPLIT_REGS)
9462 {
9463 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9464 zero = op0;
9465 }
9466 else
9467 {
9468 /* ??? Not sure about the best option for the Intel chips.
9469 The following would seem to satisfy; the register is
9470 entirely cleared, breaking the dependency chain. We
9471 then store to the upper half, with a dependency depth
9472 of one. A rumor has it that Intel recommends two movsd
9473 followed by an unpacklpd, but this is unconfirmed. And
9474 given that the dependency depth of the unpacklpd would
9475 still be one, I'm not sure why this would be better. */
9476 zero = CONST0_RTX (V2DFmode);
9477 }
9478
9479 m = adjust_address (op1, DFmode, 0);
9480 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9481 m = adjust_address (op1, DFmode, 8);
9482 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9483 }
9484 else
9485 {
9486 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9487 {
9488 op0 = gen_lowpart (V4SFmode, op0);
9489 op1 = gen_lowpart (V4SFmode, op1);
9490 emit_insn (gen_sse_movups (op0, op1));
9491 return;
9492 }
9493
9494 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9495 emit_move_insn (op0, CONST0_RTX (mode));
9496 else
9497 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9498
9499 if (mode != V4SFmode)
9500 op0 = gen_lowpart (V4SFmode, op0);
9501 m = adjust_address (op1, V2SFmode, 0);
9502 emit_insn (gen_sse_loadlps (op0, op0, m));
9503 m = adjust_address (op1, V2SFmode, 8);
9504 emit_insn (gen_sse_loadhps (op0, op0, m));
9505 }
9506 }
9507 else if (MEM_P (op0))
9508 {
9509 /* If we're optimizing for size, movups is the smallest. */
9510 if (optimize_size)
9511 {
9512 op0 = gen_lowpart (V4SFmode, op0);
9513 op1 = gen_lowpart (V4SFmode, op1);
9514 emit_insn (gen_sse_movups (op0, op1));
9515 return;
9516 }
9517
9518 /* ??? Similar to above, only less clear because of quote
9519 typeless stores unquote. */
9520 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9521 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9522 {
9523 op0 = gen_lowpart (V16QImode, op0);
9524 op1 = gen_lowpart (V16QImode, op1);
9525 emit_insn (gen_sse2_movdqu (op0, op1));
9526 return;
9527 }
9528
9529 if (TARGET_SSE2 && mode == V2DFmode)
9530 {
9531 m = adjust_address (op0, DFmode, 0);
9532 emit_insn (gen_sse2_storelpd (m, op1));
9533 m = adjust_address (op0, DFmode, 8);
9534 emit_insn (gen_sse2_storehpd (m, op1));
9535 }
9536 else
9537 {
9538 if (mode != V4SFmode)
9539 op1 = gen_lowpart (V4SFmode, op1);
9540 m = adjust_address (op0, V2SFmode, 0);
9541 emit_insn (gen_sse_storelps (m, op1));
9542 m = adjust_address (op0, V2SFmode, 8);
9543 emit_insn (gen_sse_storehps (m, op1));
9544 }
9545 }
9546 else
9547 gcc_unreachable ();
9548}
9549
9550/* Expand a push in MODE. This is some mode for which we do not support
9551 proper push instructions, at least from the registers that we expect
9552 the value to live in. */
9553
9554void
9555ix86_expand_push (enum machine_mode mode, rtx x)
9556{
9557 rtx tmp;
9558
9559 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9560 GEN_INT (-GET_MODE_SIZE (mode)),
9561 stack_pointer_rtx, 1, OPTAB_DIRECT);
9562 if (tmp != stack_pointer_rtx)
9563 emit_move_insn (stack_pointer_rtx, tmp);
9564
9565 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9566 emit_move_insn (tmp, x);
9567}
9568
9569/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9570 destination to use for the operation. If different from the true
9571 destination in operands[0], a copy operation will be required. */
9572
9573rtx
9574ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9575 rtx operands[])
9576{
9577 int matching_memory;
9578 rtx src1, src2, dst;
9579
9580 dst = operands[0];
9581 src1 = operands[1];
9582 src2 = operands[2];
9583
9584 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9585 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9586 && (rtx_equal_p (dst, src2)
9587 || immediate_operand (src1, mode)))
9588 {
9589 rtx temp = src1;
9590 src1 = src2;
9591 src2 = temp;
9592 }
9593
9594 /* If the destination is memory, and we do not have matching source
9595 operands, do things in registers. */
9596 matching_memory = 0;
9597 if (GET_CODE (dst) == MEM)
9598 {
9599 if (rtx_equal_p (dst, src1))
9600 matching_memory = 1;
9601 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9602 && rtx_equal_p (dst, src2))
9603 matching_memory = 2;
9604 else
9605 dst = gen_reg_rtx (mode);
9606 }
9607
9608 /* Both source operands cannot be in memory. */
9609 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9610 {
9611 if (matching_memory != 2)
9612 src2 = force_reg (mode, src2);
9613 else
9614 src1 = force_reg (mode, src1);
9615 }
9616
9617 /* If the operation is not commutable, source 1 cannot be a constant
9618 or non-matching memory. */
9619 if ((CONSTANT_P (src1)
9620 || (!matching_memory && GET_CODE (src1) == MEM))
9621 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9622 src1 = force_reg (mode, src1);
9623
9624 src1 = operands[1] = src1;
9625 src2 = operands[2] = src2;
9626 return dst;
9627}
9628
9629/* Similarly, but assume that the destination has already been
9630 set up properly. */
9631
9632void
9633ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9634 enum machine_mode mode, rtx operands[])
9635{
9636 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9637 gcc_assert (dst == operands[0]);
9638}
9639
9640/* Attempt to expand a binary operator. Make the expansion closer to the
9641 actual machine, then just general_operand, which will allow 3 separate
9642 memory references (one output, two input) in a single insn. */
9643
9644void
9645ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9646 rtx operands[])
9647{
9648 rtx src1, src2, dst, op, clob;
9649
9650 dst = ix86_fixup_binary_operands (code, mode, operands);
9651 src1 = operands[1];
9652 src2 = operands[2];
9653
9654 /* Emit the instruction. */
9655
9656 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9657 if (reload_in_progress)
9658 {
9659 /* Reload doesn't know about the flags register, and doesn't know that
9660 it doesn't want to clobber it. We can only do this with PLUS. */
9661 gcc_assert (code == PLUS);
9662 emit_insn (op);
9663 }
9664 else
9665 {
9666 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9667 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9668 }
9669
9670 /* Fix up the destination if needed. */
9671 if (dst != operands[0])
9672 emit_move_insn (operands[0], dst);
9673}
9674
9675/* Return TRUE or FALSE depending on whether the binary operator meets the
9676 appropriate constraints. */
9677
9678int
9679ix86_binary_operator_ok (enum rtx_code code,
9680 enum machine_mode mode ATTRIBUTE_UNUSED,
9681 rtx operands[3])
9682{
9683 /* Both source operands cannot be in memory. */
9684 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9685 return 0;
9686 /* If the operation is not commutable, source 1 cannot be a constant. */
9687 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9688 return 0;
9689 /* If the destination is memory, we must have a matching source operand. */
9690 if (GET_CODE (operands[0]) == MEM
9691 && ! (rtx_equal_p (operands[0], operands[1])
9692 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9693 && rtx_equal_p (operands[0], operands[2]))))
9694 return 0;
9695 /* If the operation is not commutable and the source 1 is memory, we must
9696 have a matching destination. */
9697 if (GET_CODE (operands[1]) == MEM
9698 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9699 && ! rtx_equal_p (operands[0], operands[1]))
9700 return 0;
9701 return 1;
9702}
9703
9704/* Attempt to expand a unary operator. Make the expansion closer to the
9705 actual machine, then just general_operand, which will allow 2 separate
9706 memory references (one output, one input) in a single insn. */
9707
9708void
9709ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9710 rtx operands[])
9711{
9712 int matching_memory;
9713 rtx src, dst, op, clob;
9714
9715 dst = operands[0];
9716 src = operands[1];
9717
9718 /* If the destination is memory, and we do not have matching source
9719 operands, do things in registers. */
9720 matching_memory = 0;
9721 if (MEM_P (dst))
9722 {
9723 if (rtx_equal_p (dst, src))
9724 matching_memory = 1;
9725 else
9726 dst = gen_reg_rtx (mode);
9727 }
9728
9729 /* When source operand is memory, destination must match. */
9730 if (MEM_P (src) && !matching_memory)
9731 src = force_reg (mode, src);
9732
9733 /* Emit the instruction. */
9734
9735 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9736 if (reload_in_progress || code == NOT)
9737 {
9738 /* Reload doesn't know about the flags register, and doesn't know that
9739 it doesn't want to clobber it. */
9740 gcc_assert (code == NOT);
9741 emit_insn (op);
9742 }
9743 else
9744 {
9745 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9746 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9747 }
9748
9749 /* Fix up the destination if needed. */
9750 if (dst != operands[0])
9751 emit_move_insn (operands[0], dst);
9752}
9753
9754/* Return TRUE or FALSE depending on whether the unary operator meets the
9755 appropriate constraints. */
9756
9757int
9758ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9759 enum machine_mode mode ATTRIBUTE_UNUSED,
9760 rtx operands[2] ATTRIBUTE_UNUSED)
9761{
9762 /* If one of operands is memory, source and destination must match. */
9763 if ((GET_CODE (operands[0]) == MEM
9764 || GET_CODE (operands[1]) == MEM)
9765 && ! rtx_equal_p (operands[0], operands[1]))
9766 return FALSE;
9767 return TRUE;
9768}
9769
9770/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9771 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9772 true, then replicate the mask for all elements of the vector register.
9773 If INVERT is true, then create a mask excluding the sign bit. */
9774
9775rtx
9776ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9777{
9778 enum machine_mode vec_mode;
9779 HOST_WIDE_INT hi, lo;
9780 int shift = 63;
9781 rtvec v;
9782 rtx mask;
9783
9784 /* Find the sign bit, sign extended to 2*HWI. */
9785 if (mode == SFmode)
9786 lo = 0x80000000, hi = lo < 0;
9787 else if (HOST_BITS_PER_WIDE_INT >= 64)
9788 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9789 else
9790 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9791
9792 if (invert)
9793 lo = ~lo, hi = ~hi;
9794
9795 /* Force this value into the low part of a fp vector constant. */
9796 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9797 mask = gen_lowpart (mode, mask);
9798
9799 if (mode == SFmode)
9800 {
9801 if (vect)
9802 v = gen_rtvec (4, mask, mask, mask, mask);
9803 else
9804 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9805 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9806 vec_mode = V4SFmode;
9807 }
9808 else
9809 {
9810 if (vect)
9811 v = gen_rtvec (2, mask, mask);
9812 else
9813 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9814 vec_mode = V2DFmode;
9815 }
9816
9817 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9818}
9819
9820/* Generate code for floating point ABS or NEG. */
9821
9822void
9823ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9824 rtx operands[])
9825{
9826 rtx mask, set, use, clob, dst, src;
9827 bool matching_memory;
9828 bool use_sse = false;
9829 bool vector_mode = VECTOR_MODE_P (mode);
9830 enum machine_mode elt_mode = mode;
9831
9832 if (vector_mode)
9833 {
9834 elt_mode = GET_MODE_INNER (mode);
9835 use_sse = true;
9836 }
9837 else if (TARGET_SSE_MATH)
9838 use_sse = SSE_FLOAT_MODE_P (mode);
9839
9840 /* NEG and ABS performed with SSE use bitwise mask operations.
9841 Create the appropriate mask now. */
9842 if (use_sse)
9843 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9844 else
9845 mask = NULL_RTX;
9846
9847 dst = operands[0];
9848 src = operands[1];
9849
9850 /* If the destination is memory, and we don't have matching source
9851 operands or we're using the x87, do things in registers. */
9852 matching_memory = false;
9853 if (MEM_P (dst))
9854 {
9855 if (use_sse && rtx_equal_p (dst, src))
9856 matching_memory = true;
9857 else
9858 dst = gen_reg_rtx (mode);
9859 }
9860 if (MEM_P (src) && !matching_memory)
9861 src = force_reg (mode, src);
9862
9863 if (vector_mode)
9864 {
9865 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9866 set = gen_rtx_SET (VOIDmode, dst, set);
9867 emit_insn (set);
9868 }
9869 else
9870 {
9871 set = gen_rtx_fmt_e (code, mode, src);
9872 set = gen_rtx_SET (VOIDmode, dst, set);
9873 if (mask)
9874 {
9875 use = gen_rtx_USE (VOIDmode, mask);
9876 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9877 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9878 gen_rtvec (3, set, use, clob)));
9879 }
9880 else
9881 emit_insn (set);
9882 }
9883
9884 if (dst != operands[0])
9885 emit_move_insn (operands[0], dst);
9886}
9887
9888/* Expand a copysign operation. Special case operand 0 being a constant. */
9889
9890void
9891ix86_expand_copysign (rtx operands[])
9892{
9893 enum machine_mode mode, vmode;
9894 rtx dest, op0, op1, mask, nmask;
9895
9896 dest = operands[0];
9897 op0 = operands[1];
9898 op1 = operands[2];
9899
9900 mode = GET_MODE (dest);
9901 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9902
9903 if (GET_CODE (op0) == CONST_DOUBLE)
9904 {
9905 rtvec v;
9906
9907 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9908 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9909
9910 if (op0 == CONST0_RTX (mode))
9911 op0 = CONST0_RTX (vmode);
9912 else
9913 {
9914 if (mode == SFmode)
9915 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9916 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9917 else
9918 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9919 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9920 }
9921
9922 mask = ix86_build_signbit_mask (mode, 0, 0);
9923
9924 if (mode == SFmode)
9925 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9926 else
9927 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9928 }
9929 else
9930 {
9931 nmask = ix86_build_signbit_mask (mode, 0, 1);
9932 mask = ix86_build_signbit_mask (mode, 0, 0);
9933
9934 if (mode == SFmode)
9935 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9936 else
9937 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9938 }
9939}
9940
9941/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9942 be a constant, and so has already been expanded into a vector constant. */
9943
9944void
9945ix86_split_copysign_const (rtx operands[])
9946{
9947 enum machine_mode mode, vmode;
9948 rtx dest, op0, op1, mask, x;
9949
9950 dest = operands[0];
9951 op0 = operands[1];
9952 op1 = operands[2];
9953 mask = operands[3];
9954
9955 mode = GET_MODE (dest);
9956 vmode = GET_MODE (mask);
9957
9958 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9959 x = gen_rtx_AND (vmode, dest, mask);
9960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9961
9962 if (op0 != CONST0_RTX (vmode))
9963 {
9964 x = gen_rtx_IOR (vmode, dest, op0);
9965 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9966 }
9967}
9968
9969/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9970 so we have to do two masks. */
9971
9972void
9973ix86_split_copysign_var (rtx operands[])
9974{
9975 enum machine_mode mode, vmode;
9976 rtx dest, scratch, op0, op1, mask, nmask, x;
9977
9978 dest = operands[0];
9979 scratch = operands[1];
9980 op0 = operands[2];
9981 op1 = operands[3];
9982 nmask = operands[4];
9983 mask = operands[5];
9984
9985 mode = GET_MODE (dest);
9986 vmode = GET_MODE (mask);
9987
9988 if (rtx_equal_p (op0, op1))
9989 {
9990 /* Shouldn't happen often (it's useless, obviously), but when it does
9991 we'd generate incorrect code if we continue below. */
9992 emit_move_insn (dest, op0);
9993 return;
9994 }
9995
9996 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9997 {
9998 gcc_assert (REGNO (op1) == REGNO (scratch));
9999
10000 x = gen_rtx_AND (vmode, scratch, mask);
10001 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10002
10003 dest = mask;
10004 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10005 x = gen_rtx_NOT (vmode, dest);
10006 x = gen_rtx_AND (vmode, x, op0);
10007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10008 }
10009 else
10010 {
10011 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10012 {
10013 x = gen_rtx_AND (vmode, scratch, mask);
10014 }
10015 else /* alternative 2,4 */
10016 {
10017 gcc_assert (REGNO (mask) == REGNO (scratch));
10018 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10019 x = gen_rtx_AND (vmode, scratch, op1);
10020 }
10021 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10022
10023 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10024 {
10025 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10026 x = gen_rtx_AND (vmode, dest, nmask);
10027 }
10028 else /* alternative 3,4 */
10029 {
10030 gcc_assert (REGNO (nmask) == REGNO (dest));
10031 dest = nmask;
10032 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10033 x = gen_rtx_AND (vmode, dest, op0);
10034 }
10035 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10036 }
10037
10038 x = gen_rtx_IOR (vmode, dest, scratch);
10039 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10040}
10041
10042/* Return TRUE or FALSE depending on whether the first SET in INSN
10043 has source and destination with matching CC modes, and that the
10044 CC mode is at least as constrained as REQ_MODE. */
10045
10046int
10047ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10048{
10049 rtx set;
10050 enum machine_mode set_mode;
10051
10052 set = PATTERN (insn);
10053 if (GET_CODE (set) == PARALLEL)
10054 set = XVECEXP (set, 0, 0);
10055 gcc_assert (GET_CODE (set) == SET);
10056 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10057
10058 set_mode = GET_MODE (SET_DEST (set));
10059 switch (set_mode)
10060 {
10061 case CCNOmode:
10062 if (req_mode != CCNOmode
10063 && (req_mode != CCmode
10064 || XEXP (SET_SRC (set), 1) != const0_rtx))
10065 return 0;
10066 break;
10067 case CCmode:
10068 if (req_mode == CCGCmode)
10069 return 0;
10070 /* FALLTHRU */
10071 case CCGCmode:
10072 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10073 return 0;
10074 /* FALLTHRU */
10075 case CCGOCmode:
10076 if (req_mode == CCZmode)
10077 return 0;
10078 /* FALLTHRU */
10079 case CCZmode:
10080 break;
10081
10082 default:
10083 gcc_unreachable ();
10084 }
10085
10086 return (GET_MODE (SET_SRC (set)) == set_mode);
10087}
10088
10089/* Generate insn patterns to do an integer compare of OPERANDS. */
10090
10091static rtx
10092ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10093{
10094 enum machine_mode cmpmode;
10095 rtx tmp, flags;
10096
10097 cmpmode = SELECT_CC_MODE (code, op0, op1);
10098 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10099
10100 /* This is very simple, but making the interface the same as in the
10101 FP case makes the rest of the code easier. */
10102 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10103 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10104
10105 /* Return the test that should be put into the flags user, i.e.
10106 the bcc, scc, or cmov instruction. */
10107 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10108}
10109
10110/* Figure out whether to use ordered or unordered fp comparisons.
10111 Return the appropriate mode to use. */
10112
10113enum machine_mode
10114ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10115{
10116 /* ??? In order to make all comparisons reversible, we do all comparisons
10117 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10118 all forms trapping and nontrapping comparisons, we can make inequality
10119 comparisons trapping again, since it results in better code when using
10120 FCOM based compares. */
10121 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10122}
10123
10124enum machine_mode
10125ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10126{
10127 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10128 return ix86_fp_compare_mode (code);
10129 switch (code)
10130 {
10131 /* Only zero flag is needed. */
10132 case EQ: /* ZF=0 */
10133 case NE: /* ZF!=0 */
10134 return CCZmode;
10135 /* Codes needing carry flag. */
10136 case GEU: /* CF=0 */
10137 case GTU: /* CF=0 & ZF=0 */
10138 case LTU: /* CF=1 */
10139 case LEU: /* CF=1 | ZF=1 */
10140 return CCmode;
10141 /* Codes possibly doable only with sign flag when
10142 comparing against zero. */
10143 case GE: /* SF=OF or SF=0 */
10144 case LT: /* SF<>OF or SF=1 */
10145 if (op1 == const0_rtx)
10146 return CCGOCmode;
10147 else
10148 /* For other cases Carry flag is not required. */
10149 return CCGCmode;
10150 /* Codes doable only with sign flag when comparing
10151 against zero, but we miss jump instruction for it
10152 so we need to use relational tests against overflow
10153 that thus needs to be zero. */
10154 case GT: /* ZF=0 & SF=OF */
10155 case LE: /* ZF=1 | SF<>OF */
10156 if (op1 == const0_rtx)
10157 return CCNOmode;
10158 else
10159 return CCGCmode;
10160 /* strcmp pattern do (use flags) and combine may ask us for proper
10161 mode. */
10162 case USE:
10163 return CCmode;
10164 default:
10165 gcc_unreachable ();
10166 }
10167}
10168
10169/* Return the fixed registers used for condition codes. */
10170
10171static bool
10172ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10173{
10174 *p1 = FLAGS_REG;
10175 *p2 = FPSR_REG;
10176 return true;
10177}
10178
10179/* If two condition code modes are compatible, return a condition code
10180 mode which is compatible with both. Otherwise, return
10181 VOIDmode. */
10182
10183static enum machine_mode
10184ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10185{
10186 if (m1 == m2)
10187 return m1;
10188
10189 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10190 return VOIDmode;
10191
10192 if ((m1 == CCGCmode && m2 == CCGOCmode)
10193 || (m1 == CCGOCmode && m2 == CCGCmode))
10194 return CCGCmode;
10195
10196 switch (m1)
10197 {
10198 default:
10199 gcc_unreachable ();
10200
10201 case CCmode:
10202 case CCGCmode:
10203 case CCGOCmode:
10204 case CCNOmode:
10205 case CCZmode:
10206 switch (m2)
10207 {
10208 default:
10209 return VOIDmode;
10210
10211 case CCmode:
10212 case CCGCmode:
10213 case CCGOCmode:
10214 case CCNOmode:
10215 case CCZmode:
10216 return CCmode;
10217 }
10218
10219 case CCFPmode:
10220 case CCFPUmode:
10221 /* These are only compatible with themselves, which we already
10222 checked above. */
10223 return VOIDmode;
10224 }
10225}
10226
10227/* Return true if we should use an FCOMI instruction for this fp comparison. */
10228
10229int
10230ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10231{
10232 enum rtx_code swapped_code = swap_condition (code);
10233 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10234 || (ix86_fp_comparison_cost (swapped_code)
10235 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10236}
10237
10238/* Swap, force into registers, or otherwise massage the two operands
10239 to a fp comparison. The operands are updated in place; the new
10240 comparison code is returned. */
10241
10242static enum rtx_code
10243ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10244{
10245 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10246 rtx op0 = *pop0, op1 = *pop1;
10247 enum machine_mode op_mode = GET_MODE (op0);
10248 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10249
10250 /* All of the unordered compare instructions only work on registers.
10251 The same is true of the fcomi compare instructions. The XFmode
10252 compare instructions require registers except when comparing
10253 against zero or when converting operand 1 from fixed point to
10254 floating point. */
10255
10256 if (!is_sse
10257 && (fpcmp_mode == CCFPUmode
10258 || (op_mode == XFmode
10259 && ! (standard_80387_constant_p (op0) == 1
10260 || standard_80387_constant_p (op1) == 1)
10261 && GET_CODE (op1) != FLOAT)
10262 || ix86_use_fcomi_compare (code)))
10263 {
10264 op0 = force_reg (op_mode, op0);
10265 op1 = force_reg (op_mode, op1);
10266 }
10267 else
10268 {
10269 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10270 things around if they appear profitable, otherwise force op0
10271 into a register. */
10272
10273 if (standard_80387_constant_p (op0) == 0
10274 || (GET_CODE (op0) == MEM
10275 && ! (standard_80387_constant_p (op1) == 0
10276 || GET_CODE (op1) == MEM)))
10277 {
10278 rtx tmp;
10279 tmp = op0, op0 = op1, op1 = tmp;
10280 code = swap_condition (code);
10281 }
10282
10283 if (GET_CODE (op0) != REG)
10284 op0 = force_reg (op_mode, op0);
10285
10286 if (CONSTANT_P (op1))
10287 {
10288 int tmp = standard_80387_constant_p (op1);
10289 if (tmp == 0)
10290 op1 = validize_mem (force_const_mem (op_mode, op1));
10291 else if (tmp == 1)
10292 {
10293 if (TARGET_CMOVE)
10294 op1 = force_reg (op_mode, op1);
10295 }
10296 else
10297 op1 = force_reg (op_mode, op1);
10298 }
10299 }
10300
10301 /* Try to rearrange the comparison to make it cheaper. */
10302 if (ix86_fp_comparison_cost (code)
10303 > ix86_fp_comparison_cost (swap_condition (code))
10304 && (GET_CODE (op1) == REG || !no_new_pseudos))
10305 {
10306 rtx tmp;
10307 tmp = op0, op0 = op1, op1 = tmp;
10308 code = swap_condition (code);
10309 if (GET_CODE (op0) != REG)
10310 op0 = force_reg (op_mode, op0);
10311 }
10312
10313 *pop0 = op0;
10314 *pop1 = op1;
10315 return code;
10316}
10317
10318/* Convert comparison codes we use to represent FP comparison to integer
10319 code that will result in proper branch. Return UNKNOWN if no such code
10320 is available. */
10321
10322enum rtx_code
10323ix86_fp_compare_code_to_integer (enum rtx_code code)
10324{
10325 switch (code)
10326 {
10327 case GT:
10328 return GTU;
10329 case GE:
10330 return GEU;
10331 case ORDERED:
10332 case UNORDERED:
10333 return code;
10334 break;
10335 case UNEQ:
10336 return EQ;
10337 break;
10338 case UNLT:
10339 return LTU;
10340 break;
10341 case UNLE:
10342 return LEU;
10343 break;
10344 case LTGT:
10345 return NE;
10346 break;
10347 default:
10348 return UNKNOWN;
10349 }
10350}
10351
10352/* Split comparison code CODE into comparisons we can do using branch
10353 instructions. BYPASS_CODE is comparison code for branch that will
10354 branch around FIRST_CODE and SECOND_CODE. If some of branches
10355 is not required, set value to UNKNOWN.
10356 We never require more than two branches. */
10357
10358void
10359ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10360 enum rtx_code *first_code,
10361 enum rtx_code *second_code)
10362{
10363 *first_code = code;
10364 *bypass_code = UNKNOWN;
10365 *second_code = UNKNOWN;
10366
10367 /* The fcomi comparison sets flags as follows:
10368
10369 cmp ZF PF CF
10370 > 0 0 0
10371 < 0 0 1
10372 = 1 0 0
10373 un 1 1 1 */
10374
10375 switch (code)
10376 {
10377 case GT: /* GTU - CF=0 & ZF=0 */
10378 case GE: /* GEU - CF=0 */
10379 case ORDERED: /* PF=0 */
10380 case UNORDERED: /* PF=1 */
10381 case UNEQ: /* EQ - ZF=1 */
10382 case UNLT: /* LTU - CF=1 */
10383 case UNLE: /* LEU - CF=1 | ZF=1 */
10384 case LTGT: /* EQ - ZF=0 */
10385 break;
10386 case LT: /* LTU - CF=1 - fails on unordered */
10387 *first_code = UNLT;
10388 *bypass_code = UNORDERED;
10389 break;
10390 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10391 *first_code = UNLE;
10392 *bypass_code = UNORDERED;
10393 break;
10394 case EQ: /* EQ - ZF=1 - fails on unordered */
10395 *first_code = UNEQ;
10396 *bypass_code = UNORDERED;
10397 break;
10398 case NE: /* NE - ZF=0 - fails on unordered */
10399 *first_code = LTGT;
10400 *second_code = UNORDERED;
10401 break;
10402 case UNGE: /* GEU - CF=0 - fails on unordered */
10403 *first_code = GE;
10404 *second_code = UNORDERED;
10405 break;
10406 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10407 *first_code = GT;
10408 *second_code = UNORDERED;
10409 break;
10410 default:
10411 gcc_unreachable ();
10412 }
10413 if (!TARGET_IEEE_FP)
10414 {
10415 *second_code = UNKNOWN;
10416 *bypass_code = UNKNOWN;
10417 }
10418}
10419
10420/* Return cost of comparison done fcom + arithmetics operations on AX.
10421 All following functions do use number of instructions as a cost metrics.
10422 In future this should be tweaked to compute bytes for optimize_size and
10423 take into account performance of various instructions on various CPUs. */
10424static int
10425ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10426{
10427 if (!TARGET_IEEE_FP)
10428 return 4;
10429 /* The cost of code output by ix86_expand_fp_compare. */
10430 switch (code)
10431 {
10432 case UNLE:
10433 case UNLT:
10434 case LTGT:
10435 case GT:
10436 case GE:
10437 case UNORDERED:
10438 case ORDERED:
10439 case UNEQ:
10440 return 4;
10441 break;
10442 case LT:
10443 case NE:
10444 case EQ:
10445 case UNGE:
10446 return 5;
10447 break;
10448 case LE:
10449 case UNGT:
10450 return 6;
10451 break;
10452 default:
10453 gcc_unreachable ();
10454 }
10455}
10456
10457/* Return cost of comparison done using fcomi operation.
10458 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10459static int
10460ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10461{
10462 enum rtx_code bypass_code, first_code, second_code;
10463 /* Return arbitrarily high cost when instruction is not supported - this
10464 prevents gcc from using it. */
10465 if (!TARGET_CMOVE)
10466 return 1024;
10467 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10468 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10469}
10470
10471/* Return cost of comparison done using sahf operation.
10472 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10473static int
10474ix86_fp_comparison_sahf_cost (enum rtx_code code)
10475{
10476 enum rtx_code bypass_code, first_code, second_code;
10477 /* Return arbitrarily high cost when instruction is not preferred - this
10478 avoids gcc from using it. */
10479 if (!TARGET_USE_SAHF && !optimize_size)
10480 return 1024;
10481 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10482 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10483}
10484
10485/* Compute cost of the comparison done using any method.
10486 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10487static int
10488ix86_fp_comparison_cost (enum rtx_code code)
10489{
10490 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10491 int min;
10492
10493 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10494 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10495
10496 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10497 if (min > sahf_cost)
10498 min = sahf_cost;
10499 if (min > fcomi_cost)
10500 min = fcomi_cost;
10501 return min;
10502}
10503
10504/* Generate insn patterns to do a floating point compare of OPERANDS. */
10505
10506static rtx
10507ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10508 rtx *second_test, rtx *bypass_test)
10509{
10510 enum machine_mode fpcmp_mode, intcmp_mode;
10511 rtx tmp, tmp2;
10512 int cost = ix86_fp_comparison_cost (code);
10513 enum rtx_code bypass_code, first_code, second_code;
10514
10515 fpcmp_mode = ix86_fp_compare_mode (code);
10516 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10517
10518 if (second_test)
10519 *second_test = NULL_RTX;
10520 if (bypass_test)
10521 *bypass_test = NULL_RTX;
10522
10523 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10524
10525 /* Do fcomi/sahf based test when profitable. */
10526 if ((bypass_code == UNKNOWN || bypass_test)
10527 && (second_code == UNKNOWN || second_test)
10528 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10529 {
10530 if (TARGET_CMOVE)
10531 {
10532 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10533 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10534 tmp);
10535 emit_insn (tmp);
10536 }
10537 else
10538 {
10539 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10540 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10541 if (!scratch)
10542 scratch = gen_reg_rtx (HImode);
10543 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10544 emit_insn (gen_x86_sahf_1 (scratch));
10545 }
10546
10547 /* The FP codes work out to act like unsigned. */
10548 intcmp_mode = fpcmp_mode;
10549 code = first_code;
10550 if (bypass_code != UNKNOWN)
10551 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10552 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10553 const0_rtx);
10554 if (second_code != UNKNOWN)
10555 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10556 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10557 const0_rtx);
10558 }
10559 else
10560 {
10561 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10562 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10563 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10564 if (!scratch)
10565 scratch = gen_reg_rtx (HImode);
10566 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10567
10568 /* In the unordered case, we have to check C2 for NaN's, which
10569 doesn't happen to work out to anything nice combination-wise.
10570 So do some bit twiddling on the value we've got in AH to come
10571 up with an appropriate set of condition codes. */
10572
10573 intcmp_mode = CCNOmode;
10574 switch (code)
10575 {
10576 case GT:
10577 case UNGT:
10578 if (code == GT || !TARGET_IEEE_FP)
10579 {
10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10581 code = EQ;
10582 }
10583 else
10584 {
10585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10586 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10587 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10588 intcmp_mode = CCmode;
10589 code = GEU;
10590 }
10591 break;
10592 case LT:
10593 case UNLT:
10594 if (code == LT && TARGET_IEEE_FP)
10595 {
10596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10597 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10598 intcmp_mode = CCmode;
10599 code = EQ;
10600 }
10601 else
10602 {
10603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10604 code = NE;
10605 }
10606 break;
10607 case GE:
10608 case UNGE:
10609 if (code == GE || !TARGET_IEEE_FP)
10610 {
10611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10612 code = EQ;
10613 }
10614 else
10615 {
10616 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10617 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10618 GEN_INT (0x01)));
10619 code = NE;
10620 }
10621 break;
10622 case LE:
10623 case UNLE:
10624 if (code == LE && TARGET_IEEE_FP)
10625 {
10626 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10627 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10628 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10629 intcmp_mode = CCmode;
10630 code = LTU;
10631 }
10632 else
10633 {
10634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10635 code = NE;
10636 }
10637 break;
10638 case EQ:
10639 case UNEQ:
10640 if (code == EQ && TARGET_IEEE_FP)
10641 {
10642 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10643 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10644 intcmp_mode = CCmode;
10645 code = EQ;
10646 }
10647 else
10648 {
10649 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10650 code = NE;
10651 break;
10652 }
10653 break;
10654 case NE:
10655 case LTGT:
10656 if (code == NE && TARGET_IEEE_FP)
10657 {
10658 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10659 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10660 GEN_INT (0x40)));
10661 code = NE;
10662 }
10663 else
10664 {
10665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10666 code = EQ;
10667 }
10668 break;
10669
10670 case UNORDERED:
10671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10672 code = NE;
10673 break;
10674 case ORDERED:
10675 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10676 code = EQ;
10677 break;
10678
10679 default:
10680 gcc_unreachable ();
10681 }
10682 }
10683
10684 /* Return the test that should be put into the flags user, i.e.
10685 the bcc, scc, or cmov instruction. */
10686 return gen_rtx_fmt_ee (code, VOIDmode,
10687 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10688 const0_rtx);
10689}
10690
10691rtx
10692ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10693{
10694 rtx op0, op1, ret;
10695 op0 = ix86_compare_op0;
10696 op1 = ix86_compare_op1;
10697
10698 if (second_test)
10699 *second_test = NULL_RTX;
10700 if (bypass_test)
10701 *bypass_test = NULL_RTX;
10702
10703 if (ix86_compare_emitted)
10704 {
10705 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10706 ix86_compare_emitted = NULL_RTX;
10707 }
10708 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10709 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10710 second_test, bypass_test);
10711 else
10712 ret = ix86_expand_int_compare (code, op0, op1);
10713
10714 return ret;
10715}
10716
10717/* Return true if the CODE will result in nontrivial jump sequence. */
10718bool
10719ix86_fp_jump_nontrivial_p (enum rtx_code code)
10720{
10721 enum rtx_code bypass_code, first_code, second_code;
10722 if (!TARGET_CMOVE)
10723 return true;
10724 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10725 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10726}
10727
10728void
10729ix86_expand_branch (enum rtx_code code, rtx label)
10730{
10731 rtx tmp;
10732
10733 /* If we have emitted a compare insn, go straight to simple.
10734 ix86_expand_compare won't emit anything if ix86_compare_emitted
10735 is non NULL. */
10736 if (ix86_compare_emitted)
10737 goto simple;
10738
10739 switch (GET_MODE (ix86_compare_op0))
10740 {
10741 case QImode:
10742 case HImode:
10743 case SImode:
10744 simple:
10745 tmp = ix86_expand_compare (code, NULL, NULL);
10746 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10747 gen_rtx_LABEL_REF (VOIDmode, label),
10748 pc_rtx);
10749 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10750 return;
10751
10752 case SFmode:
10753 case DFmode:
10754 case XFmode:
10755 {
10756 rtvec vec;
10757 int use_fcomi;
10758 enum rtx_code bypass_code, first_code, second_code;
10759
10760 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10761 &ix86_compare_op1);
10762
10763 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10764
10765 /* Check whether we will use the natural sequence with one jump. If
10766 so, we can expand jump early. Otherwise delay expansion by
10767 creating compound insn to not confuse optimizers. */
10768 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10769 && TARGET_CMOVE)
10770 {
10771 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10772 gen_rtx_LABEL_REF (VOIDmode, label),
10773 pc_rtx, NULL_RTX, NULL_RTX);
10774 }
10775 else
10776 {
10777 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10778 ix86_compare_op0, ix86_compare_op1);
10779 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10780 gen_rtx_LABEL_REF (VOIDmode, label),
10781 pc_rtx);
10782 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10783
10784 use_fcomi = ix86_use_fcomi_compare (code);
10785 vec = rtvec_alloc (3 + !use_fcomi);
10786 RTVEC_ELT (vec, 0) = tmp;
10787 RTVEC_ELT (vec, 1)
10788 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10789 RTVEC_ELT (vec, 2)
10790 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10791 if (! use_fcomi)
10792 RTVEC_ELT (vec, 3)
10793 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10794
10795 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10796 }
10797 return;
10798 }
10799
10800 case DImode:
10801 if (TARGET_64BIT)
10802 goto simple;
10803 case TImode:
10804 /* Expand DImode branch into multiple compare+branch. */
10805 {
10806 rtx lo[2], hi[2], label2;
10807 enum rtx_code code1, code2, code3;
10808 enum machine_mode submode;
10809
10810 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10811 {
10812 tmp = ix86_compare_op0;
10813 ix86_compare_op0 = ix86_compare_op1;
10814 ix86_compare_op1 = tmp;
10815 code = swap_condition (code);
10816 }
10817 if (GET_MODE (ix86_compare_op0) == DImode)
10818 {
10819 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10820 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10821 submode = SImode;
10822 }
10823 else
10824 {
10825 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10826 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10827 submode = DImode;
10828 }
10829
10830 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10831 avoid two branches. This costs one extra insn, so disable when
10832 optimizing for size. */
10833
10834 if ((code == EQ || code == NE)
10835 && (!optimize_size
10836 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10837 {
10838 rtx xor0, xor1;
10839
10840 xor1 = hi[0];
10841 if (hi[1] != const0_rtx)
10842 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10843 NULL_RTX, 0, OPTAB_WIDEN);
10844
10845 xor0 = lo[0];
10846 if (lo[1] != const0_rtx)
10847 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10848 NULL_RTX, 0, OPTAB_WIDEN);
10849
10850 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10851 NULL_RTX, 0, OPTAB_WIDEN);
10852
10853 ix86_compare_op0 = tmp;
10854 ix86_compare_op1 = const0_rtx;
10855 ix86_expand_branch (code, label);
10856 return;
10857 }
10858
10859 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10860 op1 is a constant and the low word is zero, then we can just
10861 examine the high word. */
10862
10863 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10864 switch (code)
10865 {
10866 case LT: case LTU: case GE: case GEU:
10867 ix86_compare_op0 = hi[0];
10868 ix86_compare_op1 = hi[1];
10869 ix86_expand_branch (code, label);
10870 return;
10871 default:
10872 break;
10873 }
10874
10875 /* Otherwise, we need two or three jumps. */
10876
10877 label2 = gen_label_rtx ();
10878
10879 code1 = code;
10880 code2 = swap_condition (code);
10881 code3 = unsigned_condition (code);
10882
10883 switch (code)
10884 {
10885 case LT: case GT: case LTU: case GTU:
10886 break;
10887
10888 case LE: code1 = LT; code2 = GT; break;
10889 case GE: code1 = GT; code2 = LT; break;
10890 case LEU: code1 = LTU; code2 = GTU; break;
10891 case GEU: code1 = GTU; code2 = LTU; break;
10892
10893 case EQ: code1 = UNKNOWN; code2 = NE; break;
10894 case NE: code2 = UNKNOWN; break;
10895
10896 default:
10897 gcc_unreachable ();
10898 }
10899
10900 /*
10901 * a < b =>
10902 * if (hi(a) < hi(b)) goto true;
10903 * if (hi(a) > hi(b)) goto false;
10904 * if (lo(a) < lo(b)) goto true;
10905 * false:
10906 */
10907
10908 ix86_compare_op0 = hi[0];
10909 ix86_compare_op1 = hi[1];
10910
10911 if (code1 != UNKNOWN)
10912 ix86_expand_branch (code1, label);
10913 if (code2 != UNKNOWN)
10914 ix86_expand_branch (code2, label2);
10915
10916 ix86_compare_op0 = lo[0];
10917 ix86_compare_op1 = lo[1];
10918 ix86_expand_branch (code3, label);
10919
10920 if (code2 != UNKNOWN)
10921 emit_label (label2);
10922 return;
10923 }
10924
10925 default:
10926 gcc_unreachable ();
10927 }
10928}
10929
10930/* Split branch based on floating point condition. */
10931void
10932ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10933 rtx target1, rtx target2, rtx tmp, rtx pushed)
10934{
10935 rtx second, bypass;
10936 rtx label = NULL_RTX;
10937 rtx condition;
10938 int bypass_probability = -1, second_probability = -1, probability = -1;
10939 rtx i;
10940
10941 if (target2 != pc_rtx)
10942 {
10943 rtx tmp = target2;
10944 code = reverse_condition_maybe_unordered (code);
10945 target2 = target1;
10946 target1 = tmp;
10947 }
10948
10949 condition = ix86_expand_fp_compare (code, op1, op2,
10950 tmp, &second, &bypass);
10951
10952 /* Remove pushed operand from stack. */
10953 if (pushed)
10954 ix86_free_from_memory (GET_MODE (pushed));
10955
10956 if (split_branch_probability >= 0)
10957 {
10958 /* Distribute the probabilities across the jumps.
10959 Assume the BYPASS and SECOND to be always test
10960 for UNORDERED. */
10961 probability = split_branch_probability;
10962
10963 /* Value of 1 is low enough to make no need for probability
10964 to be updated. Later we may run some experiments and see
10965 if unordered values are more frequent in practice. */
10966 if (bypass)
10967 bypass_probability = 1;
10968 if (second)
10969 second_probability = 1;
10970 }
10971 if (bypass != NULL_RTX)
10972 {
10973 label = gen_label_rtx ();
10974 i = emit_jump_insn (gen_rtx_SET
10975 (VOIDmode, pc_rtx,
10976 gen_rtx_IF_THEN_ELSE (VOIDmode,
10977 bypass,
10978 gen_rtx_LABEL_REF (VOIDmode,
10979 label),
10980 pc_rtx)));
10981 if (bypass_probability >= 0)
10982 REG_NOTES (i)
10983 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10984 GEN_INT (bypass_probability),
10985 REG_NOTES (i));
10986 }
10987 i = emit_jump_insn (gen_rtx_SET
10988 (VOIDmode, pc_rtx,
10989 gen_rtx_IF_THEN_ELSE (VOIDmode,
10990 condition, target1, target2)));
10991 if (probability >= 0)
10992 REG_NOTES (i)
10993 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10994 GEN_INT (probability),
10995 REG_NOTES (i));
10996 if (second != NULL_RTX)
10997 {
10998 i = emit_jump_insn (gen_rtx_SET
10999 (VOIDmode, pc_rtx,
11000 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11001 target2)));
11002 if (second_probability >= 0)
11003 REG_NOTES (i)
11004 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11005 GEN_INT (second_probability),
11006 REG_NOTES (i));
11007 }
11008 if (label != NULL_RTX)
11009 emit_label (label);
11010}
11011
11012int
11013ix86_expand_setcc (enum rtx_code code, rtx dest)
11014{
11015 rtx ret, tmp, tmpreg, equiv;
11016 rtx second_test, bypass_test;
11017
11018 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11019 return 0; /* FAIL */
11020
11021 gcc_assert (GET_MODE (dest) == QImode);
11022
11023 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11024 PUT_MODE (ret, QImode);
11025
11026 tmp = dest;
11027 tmpreg = dest;
11028
11029 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11030 if (bypass_test || second_test)
11031 {
11032 rtx test = second_test;
11033 int bypass = 0;
11034 rtx tmp2 = gen_reg_rtx (QImode);
11035 if (bypass_test)
11036 {
11037 gcc_assert (!second_test);
11038 test = bypass_test;
11039 bypass = 1;
11040 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11041 }
11042 PUT_MODE (test, QImode);
11043 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11044
11045 if (bypass)
11046 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11047 else
11048 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11049 }
11050
11051 /* Attach a REG_EQUAL note describing the comparison result. */
11052 if (ix86_compare_op0 && ix86_compare_op1)
11053 {
11054 equiv = simplify_gen_relational (code, QImode,
11055 GET_MODE (ix86_compare_op0),
11056 ix86_compare_op0, ix86_compare_op1);
11057 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11058 }
11059
11060 return 1; /* DONE */
11061}
11062
11063/* Expand comparison setting or clearing carry flag. Return true when
11064 successful and set pop for the operation. */
11065static bool
11066ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11067{
11068 enum machine_mode mode =
11069 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11070
11071 /* Do not handle DImode compares that go through special path. Also we can't
11072 deal with FP compares yet. This is possible to add. */
11073 if (mode == (TARGET_64BIT ? TImode : DImode))
11074 return false;
11075 if (FLOAT_MODE_P (mode))
11076 {
11077 rtx second_test = NULL, bypass_test = NULL;
11078 rtx compare_op, compare_seq;
11079
11080 /* Shortcut: following common codes never translate into carry flag compares. */
11081 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11082 || code == ORDERED || code == UNORDERED)
11083 return false;
11084
11085 /* These comparisons require zero flag; swap operands so they won't. */
11086 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11087 && !TARGET_IEEE_FP)
11088 {
11089 rtx tmp = op0;
11090 op0 = op1;
11091 op1 = tmp;
11092 code = swap_condition (code);
11093 }
11094
11095 /* Try to expand the comparison and verify that we end up with carry flag
11096 based comparison. This is fails to be true only when we decide to expand
11097 comparison using arithmetic that is not too common scenario. */
11098 start_sequence ();
11099 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11100 &second_test, &bypass_test);
11101 compare_seq = get_insns ();
11102 end_sequence ();
11103
11104 if (second_test || bypass_test)
11105 return false;
11106 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11107 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11108 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11109 else
11110 code = GET_CODE (compare_op);
11111 if (code != LTU && code != GEU)
11112 return false;
11113 emit_insn (compare_seq);
11114 *pop = compare_op;
11115 return true;
11116 }
11117 if (!INTEGRAL_MODE_P (mode))
11118 return false;
11119 switch (code)
11120 {
11121 case LTU:
11122 case GEU:
11123 break;
11124
11125 /* Convert a==0 into (unsigned)a<1. */
11126 case EQ:
11127 case NE:
11128 if (op1 != const0_rtx)
11129 return false;
11130 op1 = const1_rtx;
11131 code = (code == EQ ? LTU : GEU);
11132 break;
11133
11134 /* Convert a>b into b<a or a>=b-1. */
11135 case GTU:
11136 case LEU:
11137 if (GET_CODE (op1) == CONST_INT)
11138 {
11139 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11140 /* Bail out on overflow. We still can swap operands but that
11141 would force loading of the constant into register. */
11142 if (op1 == const0_rtx
11143 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11144 return false;
11145 code = (code == GTU ? GEU : LTU);
11146 }
11147 else
11148 {
11149 rtx tmp = op1;
11150 op1 = op0;
11151 op0 = tmp;
11152 code = (code == GTU ? LTU : GEU);
11153 }
11154 break;
11155
11156 /* Convert a>=0 into (unsigned)a<0x80000000. */
11157 case LT:
11158 case GE:
11159 if (mode == DImode || op1 != const0_rtx)
11160 return false;
11161 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11162 code = (code == LT ? GEU : LTU);
11163 break;
11164 case LE:
11165 case GT:
11166 if (mode == DImode || op1 != constm1_rtx)
11167 return false;
11168 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11169 code = (code == LE ? GEU : LTU);
11170 break;
11171
11172 default:
11173 return false;
11174 }
11175 /* Swapping operands may cause constant to appear as first operand. */
11176 if (!nonimmediate_operand (op0, VOIDmode))
11177 {
11178 if (no_new_pseudos)
11179 return false;
11180 op0 = force_reg (mode, op0);
11181 }
11182 ix86_compare_op0 = op0;
11183 ix86_compare_op1 = op1;
11184 *pop = ix86_expand_compare (code, NULL, NULL);
11185 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11186 return true;
11187}
11188
11189int
11190ix86_expand_int_movcc (rtx operands[])
11191{
11192 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11193 rtx compare_seq, compare_op;
11194 rtx second_test, bypass_test;
11195 enum machine_mode mode = GET_MODE (operands[0]);
11196 bool sign_bit_compare_p = false;;
11197
11198 start_sequence ();
11199 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11200 compare_seq = get_insns ();
11201 end_sequence ();
11202
11203 compare_code = GET_CODE (compare_op);
11204
11205 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11206 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11207 sign_bit_compare_p = true;
11208
11209 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11210 HImode insns, we'd be swallowed in word prefix ops. */
11211
11212 if ((mode != HImode || TARGET_FAST_PREFIX)
11213 && (mode != (TARGET_64BIT ? TImode : DImode))
11214 && GET_CODE (operands[2]) == CONST_INT
11215 && GET_CODE (operands[3]) == CONST_INT)
11216 {
11217 rtx out = operands[0];
11218 HOST_WIDE_INT ct = INTVAL (operands[2]);
11219 HOST_WIDE_INT cf = INTVAL (operands[3]);
11220 HOST_WIDE_INT diff;
11221
11222 diff = ct - cf;
11223 /* Sign bit compares are better done using shifts than we do by using
11224 sbb. */
11225 if (sign_bit_compare_p
11226 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11227 ix86_compare_op1, &compare_op))
11228 {
11229 /* Detect overlap between destination and compare sources. */
11230 rtx tmp = out;
11231
11232 if (!sign_bit_compare_p)
11233 {
11234 bool fpcmp = false;
11235
11236 compare_code = GET_CODE (compare_op);
11237
11238 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11239 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11240 {
11241 fpcmp = true;
11242 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11243 }
11244
11245 /* To simplify rest of code, restrict to the GEU case. */
11246 if (compare_code == LTU)
11247 {
11248 HOST_WIDE_INT tmp = ct;
11249 ct = cf;
11250 cf = tmp;
11251 compare_code = reverse_condition (compare_code);
11252 code = reverse_condition (code);
11253 }
11254 else
11255 {
11256 if (fpcmp)
11257 PUT_CODE (compare_op,
11258 reverse_condition_maybe_unordered
11259 (GET_CODE (compare_op)));
11260 else
11261 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11262 }
11263 diff = ct - cf;
11264
11265 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11266 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11267 tmp = gen_reg_rtx (mode);
11268
11269 if (mode == DImode)
11270 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11271 else
11272 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11273 }
11274 else
11275 {
11276 if (code == GT || code == GE)
11277 code = reverse_condition (code);
11278 else
11279 {
11280 HOST_WIDE_INT tmp = ct;
11281 ct = cf;
11282 cf = tmp;
11283 diff = ct - cf;
11284 }
11285 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11286 ix86_compare_op1, VOIDmode, 0, -1);
11287 }
11288
11289 if (diff == 1)
11290 {
11291 /*
11292 * cmpl op0,op1
11293 * sbbl dest,dest
11294 * [addl dest, ct]
11295 *
11296 * Size 5 - 8.
11297 */
11298 if (ct)
11299 tmp = expand_simple_binop (mode, PLUS,
11300 tmp, GEN_INT (ct),
11301 copy_rtx (tmp), 1, OPTAB_DIRECT);
11302 }
11303 else if (cf == -1)
11304 {
11305 /*
11306 * cmpl op0,op1
11307 * sbbl dest,dest
11308 * orl $ct, dest
11309 *
11310 * Size 8.
11311 */
11312 tmp = expand_simple_binop (mode, IOR,
11313 tmp, GEN_INT (ct),
11314 copy_rtx (tmp), 1, OPTAB_DIRECT);
11315 }
11316 else if (diff == -1 && ct)
11317 {
11318 /*
11319 * cmpl op0,op1
11320 * sbbl dest,dest
11321 * notl dest
11322 * [addl dest, cf]
11323 *
11324 * Size 8 - 11.
11325 */
11326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11327 if (cf)
11328 tmp = expand_simple_binop (mode, PLUS,
11329 copy_rtx (tmp), GEN_INT (cf),
11330 copy_rtx (tmp), 1, OPTAB_DIRECT);
11331 }
11332 else
11333 {
11334 /*
11335 * cmpl op0,op1
11336 * sbbl dest,dest
11337 * [notl dest]
11338 * andl cf - ct, dest
11339 * [addl dest, ct]
11340 *
11341 * Size 8 - 11.
11342 */
11343
11344 if (cf == 0)
11345 {
11346 cf = ct;
11347 ct = 0;
11348 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11349 }
11350
11351 tmp = expand_simple_binop (mode, AND,
11352 copy_rtx (tmp),
11353 gen_int_mode (cf - ct, mode),
11354 copy_rtx (tmp), 1, OPTAB_DIRECT);
11355 if (ct)
11356 tmp = expand_simple_binop (mode, PLUS,
11357 copy_rtx (tmp), GEN_INT (ct),
11358 copy_rtx (tmp), 1, OPTAB_DIRECT);
11359 }
11360
11361 if (!rtx_equal_p (tmp, out))
11362 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11363
11364 return 1; /* DONE */
11365 }
11366
11367 if (diff < 0)
11368 {
11369 HOST_WIDE_INT tmp;
11370 tmp = ct, ct = cf, cf = tmp;
11371 diff = -diff;
11372 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11373 {
11374 /* We may be reversing unordered compare to normal compare, that
11375 is not valid in general (we may convert non-trapping condition
11376 to trapping one), however on i386 we currently emit all
11377 comparisons unordered. */
11378 compare_code = reverse_condition_maybe_unordered (compare_code);
11379 code = reverse_condition_maybe_unordered (code);
11380 }
11381 else
11382 {
11383 compare_code = reverse_condition (compare_code);
11384 code = reverse_condition (code);
11385 }
11386 }
11387
11388 compare_code = UNKNOWN;
11389 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11390 && GET_CODE (ix86_compare_op1) == CONST_INT)
11391 {
11392 if (ix86_compare_op1 == const0_rtx
11393 && (code == LT || code == GE))
11394 compare_code = code;
11395 else if (ix86_compare_op1 == constm1_rtx)
11396 {
11397 if (code == LE)
11398 compare_code = LT;
11399 else if (code == GT)
11400 compare_code = GE;
11401 }
11402 }
11403
11404 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11405 if (compare_code != UNKNOWN
11406 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11407 && (cf == -1 || ct == -1))
11408 {
11409 /* If lea code below could be used, only optimize
11410 if it results in a 2 insn sequence. */
11411
11412 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11413 || diff == 3 || diff == 5 || diff == 9)
11414 || (compare_code == LT && ct == -1)
11415 || (compare_code == GE && cf == -1))
11416 {
11417 /*
11418 * notl op1 (if necessary)
11419 * sarl $31, op1
11420 * orl cf, op1
11421 */
11422 if (ct != -1)
11423 {
11424 cf = ct;
11425 ct = -1;
11426 code = reverse_condition (code);
11427 }
11428
11429 out = emit_store_flag (out, code, ix86_compare_op0,
11430 ix86_compare_op1, VOIDmode, 0, -1);
11431
11432 out = expand_simple_binop (mode, IOR,
11433 out, GEN_INT (cf),
11434 out, 1, OPTAB_DIRECT);
11435 if (out != operands[0])
11436 emit_move_insn (operands[0], out);
11437
11438 return 1; /* DONE */
11439 }
11440 }
11441
11442
11443 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11444 || diff == 3 || diff == 5 || diff == 9)
11445 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11446 && (mode != DImode
11447 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11448 {
11449 /*
11450 * xorl dest,dest
11451 * cmpl op1,op2
11452 * setcc dest
11453 * lea cf(dest*(ct-cf)),dest
11454 *
11455 * Size 14.
11456 *
11457 * This also catches the degenerate setcc-only case.
11458 */
11459
11460 rtx tmp;
11461 int nops;
11462
11463 out = emit_store_flag (out, code, ix86_compare_op0,
11464 ix86_compare_op1, VOIDmode, 0, 1);
11465
11466 nops = 0;
11467 /* On x86_64 the lea instruction operates on Pmode, so we need
11468 to get arithmetics done in proper mode to match. */
11469 if (diff == 1)
11470 tmp = copy_rtx (out);
11471 else
11472 {
11473 rtx out1;
11474 out1 = copy_rtx (out);
11475 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11476 nops++;
11477 if (diff & 1)
11478 {
11479 tmp = gen_rtx_PLUS (mode, tmp, out1);
11480 nops++;
11481 }
11482 }
11483 if (cf != 0)
11484 {
11485 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11486 nops++;
11487 }
11488 if (!rtx_equal_p (tmp, out))
11489 {
11490 if (nops == 1)
11491 out = force_operand (tmp, copy_rtx (out));
11492 else
11493 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11494 }
11495 if (!rtx_equal_p (out, operands[0]))
11496 emit_move_insn (operands[0], copy_rtx (out));
11497
11498 return 1; /* DONE */
11499 }
11500
11501 /*
11502 * General case: Jumpful:
11503 * xorl dest,dest cmpl op1, op2
11504 * cmpl op1, op2 movl ct, dest
11505 * setcc dest jcc 1f
11506 * decl dest movl cf, dest
11507 * andl (cf-ct),dest 1:
11508 * addl ct,dest
11509 *
11510 * Size 20. Size 14.
11511 *
11512 * This is reasonably steep, but branch mispredict costs are
11513 * high on modern cpus, so consider failing only if optimizing
11514 * for space.
11515 */
11516
11517 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11518 && BRANCH_COST >= 2)
11519 {
11520 if (cf == 0)
11521 {
11522 cf = ct;
11523 ct = 0;
11524 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11525 /* We may be reversing unordered compare to normal compare,
11526 that is not valid in general (we may convert non-trapping
11527 condition to trapping one), however on i386 we currently
11528 emit all comparisons unordered. */
11529 code = reverse_condition_maybe_unordered (code);
11530 else
11531 {
11532 code = reverse_condition (code);
11533 if (compare_code != UNKNOWN)
11534 compare_code = reverse_condition (compare_code);
11535 }
11536 }
11537
11538 if (compare_code != UNKNOWN)
11539 {
11540 /* notl op1 (if needed)
11541 sarl $31, op1
11542 andl (cf-ct), op1
11543 addl ct, op1
11544
11545 For x < 0 (resp. x <= -1) there will be no notl,
11546 so if possible swap the constants to get rid of the
11547 complement.
11548 True/false will be -1/0 while code below (store flag
11549 followed by decrement) is 0/-1, so the constants need
11550 to be exchanged once more. */
11551
11552 if (compare_code == GE || !cf)
11553 {
11554 code = reverse_condition (code);
11555 compare_code = LT;
11556 }
11557 else
11558 {
11559 HOST_WIDE_INT tmp = cf;
11560 cf = ct;
11561 ct = tmp;
11562 }
11563
11564 out = emit_store_flag (out, code, ix86_compare_op0,
11565 ix86_compare_op1, VOIDmode, 0, -1);
11566 }
11567 else
11568 {
11569 out = emit_store_flag (out, code, ix86_compare_op0,
11570 ix86_compare_op1, VOIDmode, 0, 1);
11571
11572 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11573 copy_rtx (out), 1, OPTAB_DIRECT);
11574 }
11575
11576 out = expand_simple_binop (mode, AND, copy_rtx (out),
11577 gen_int_mode (cf - ct, mode),
11578 copy_rtx (out), 1, OPTAB_DIRECT);
11579 if (ct)
11580 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11581 copy_rtx (out), 1, OPTAB_DIRECT);
11582 if (!rtx_equal_p (out, operands[0]))
11583 emit_move_insn (operands[0], copy_rtx (out));
11584
11585 return 1; /* DONE */
11586 }
11587 }
11588
11589 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11590 {
11591 /* Try a few things more with specific constants and a variable. */
11592
11593 optab op;
11594 rtx var, orig_out, out, tmp;
11595
11596 if (BRANCH_COST <= 2)
11597 return 0; /* FAIL */
11598
11599 /* If one of the two operands is an interesting constant, load a
11600 constant with the above and mask it in with a logical operation. */
11601
11602 if (GET_CODE (operands[2]) == CONST_INT)
11603 {
11604 var = operands[3];
11605 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11606 operands[3] = constm1_rtx, op = and_optab;
11607 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11608 operands[3] = const0_rtx, op = ior_optab;
11609 else
11610 return 0; /* FAIL */
11611 }
11612 else if (GET_CODE (operands[3]) == CONST_INT)
11613 {
11614 var = operands[2];
11615 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11616 operands[2] = constm1_rtx, op = and_optab;
11617 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11618 operands[2] = const0_rtx, op = ior_optab;
11619 else
11620 return 0; /* FAIL */
11621 }
11622 else
11623 return 0; /* FAIL */
11624
11625 orig_out = operands[0];
11626 tmp = gen_reg_rtx (mode);
11627 operands[0] = tmp;
11628
11629 /* Recurse to get the constant loaded. */
11630 if (ix86_expand_int_movcc (operands) == 0)
11631 return 0; /* FAIL */
11632
11633 /* Mask in the interesting variable. */
11634 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11635 OPTAB_WIDEN);
11636 if (!rtx_equal_p (out, orig_out))
11637 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11638
11639 return 1; /* DONE */
11640 }
11641
11642 /*
11643 * For comparison with above,
11644 *
11645 * movl cf,dest
11646 * movl ct,tmp
11647 * cmpl op1,op2
11648 * cmovcc tmp,dest
11649 *
11650 * Size 15.
11651 */
11652
11653 if (! nonimmediate_operand (operands[2], mode))
11654 operands[2] = force_reg (mode, operands[2]);
11655 if (! nonimmediate_operand (operands[3], mode))
11656 operands[3] = force_reg (mode, operands[3]);
11657
11658 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11659 {
11660 rtx tmp = gen_reg_rtx (mode);
11661 emit_move_insn (tmp, operands[3]);
11662 operands[3] = tmp;
11663 }
11664 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11665 {
11666 rtx tmp = gen_reg_rtx (mode);
11667 emit_move_insn (tmp, operands[2]);
11668 operands[2] = tmp;
11669 }
11670
11671 if (! register_operand (operands[2], VOIDmode)
11672 && (mode == QImode
11673 || ! register_operand (operands[3], VOIDmode)))
11674 operands[2] = force_reg (mode, operands[2]);
11675
11676 if (mode == QImode
11677 && ! register_operand (operands[3], VOIDmode))
11678 operands[3] = force_reg (mode, operands[3]);
11679
11680 emit_insn (compare_seq);
11681 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11682 gen_rtx_IF_THEN_ELSE (mode,
11683 compare_op, operands[2],
11684 operands[3])));
11685 if (bypass_test)
11686 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11687 gen_rtx_IF_THEN_ELSE (mode,
11688 bypass_test,
11689 copy_rtx (operands[3]),
11690 copy_rtx (operands[0]))));
11691 if (second_test)
11692 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11693 gen_rtx_IF_THEN_ELSE (mode,
11694 second_test,
11695 copy_rtx (operands[2]),
11696 copy_rtx (operands[0]))));
11697
11698 return 1; /* DONE */
11699}
11700
11701/* Swap, force into registers, or otherwise massage the two operands
11702 to an sse comparison with a mask result. Thus we differ a bit from
11703 ix86_prepare_fp_compare_args which expects to produce a flags result.
11704
11705 The DEST operand exists to help determine whether to commute commutative
11706 operators. The POP0/POP1 operands are updated in place. The new
11707 comparison code is returned, or UNKNOWN if not implementable. */
11708
11709static enum rtx_code
11710ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11711 rtx *pop0, rtx *pop1)
11712{
11713 rtx tmp;
11714
11715 switch (code)
11716 {
11717 case LTGT:
11718 case UNEQ:
11719 /* We have no LTGT as an operator. We could implement it with
11720 NE & ORDERED, but this requires an extra temporary. It's
11721 not clear that it's worth it. */
11722 return UNKNOWN;
11723
11724 case LT:
11725 case LE:
11726 case UNGT:
11727 case UNGE:
11728 /* These are supported directly. */
11729 break;
11730
11731 case EQ:
11732 case NE:
11733 case UNORDERED:
11734 case ORDERED:
11735 /* For commutative operators, try to canonicalize the destination
11736 operand to be first in the comparison - this helps reload to
11737 avoid extra moves. */
11738 if (!dest || !rtx_equal_p (dest, *pop1))
11739 break;
11740 /* FALLTHRU */
11741
11742 case GE:
11743 case GT:
11744 case UNLE:
11745 case UNLT:
11746 /* These are not supported directly. Swap the comparison operands
11747 to transform into something that is supported. */
11748 tmp = *pop0;
11749 *pop0 = *pop1;
11750 *pop1 = tmp;
11751 code = swap_condition (code);
11752 break;
11753
11754 default:
11755 gcc_unreachable ();
11756 }
11757
11758 return code;
11759}
11760
11761/* Detect conditional moves that exactly match min/max operational
11762 semantics. Note that this is IEEE safe, as long as we don't
11763 interchange the operands.
11764
11765 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11766 and TRUE if the operation is successful and instructions are emitted. */
11767
11768static bool
11769ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11770 rtx cmp_op1, rtx if_true, rtx if_false)
11771{
11772 enum machine_mode mode;
11773 bool is_min;
11774 rtx tmp;
11775
11776 if (code == LT)
11777 ;
11778 else if (code == UNGE)
11779 {
11780 tmp = if_true;
11781 if_true = if_false;
11782 if_false = tmp;
11783 }
11784 else
11785 return false;
11786
11787 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11788 is_min = true;
11789 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11790 is_min = false;
11791 else
11792 return false;
11793
11794 mode = GET_MODE (dest);
11795
11796 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11797 but MODE may be a vector mode and thus not appropriate. */
11798 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11799 {
11800 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11801 rtvec v;
11802
11803 if_true = force_reg (mode, if_true);
11804 v = gen_rtvec (2, if_true, if_false);
11805 tmp = gen_rtx_UNSPEC (mode, v, u);
11806 }
11807 else
11808 {
11809 code = is_min ? SMIN : SMAX;
11810 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11811 }
11812
11813 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11814 return true;
11815}
11816
11817/* Expand an sse vector comparison. Return the register with the result. */
11818
11819static rtx
11820ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11821 rtx op_true, rtx op_false)
11822{
11823 enum machine_mode mode = GET_MODE (dest);
11824 rtx x;
11825
11826 cmp_op0 = force_reg (mode, cmp_op0);
11827 if (!nonimmediate_operand (cmp_op1, mode))
11828 cmp_op1 = force_reg (mode, cmp_op1);
11829
11830 if (optimize
11831 || reg_overlap_mentioned_p (dest, op_true)
11832 || reg_overlap_mentioned_p (dest, op_false))
11833 dest = gen_reg_rtx (mode);
11834
11835 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11836 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11837
11838 return dest;
11839}
11840
11841/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11842 operations. This is used for both scalar and vector conditional moves. */
11843
11844static void
11845ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11846{
11847 enum machine_mode mode = GET_MODE (dest);
11848 rtx t2, t3, x;
11849
11850 if (op_false == CONST0_RTX (mode))
11851 {
11852 op_true = force_reg (mode, op_true);
11853 x = gen_rtx_AND (mode, cmp, op_true);
11854 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11855 }
11856 else if (op_true == CONST0_RTX (mode))
11857 {
11858 op_false = force_reg (mode, op_false);
11859 x = gen_rtx_NOT (mode, cmp);
11860 x = gen_rtx_AND (mode, x, op_false);
11861 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11862 }
11863 else
11864 {
11865 op_true = force_reg (mode, op_true);
11866 op_false = force_reg (mode, op_false);
11867
11868 t2 = gen_reg_rtx (mode);
11869 if (optimize)
11870 t3 = gen_reg_rtx (mode);
11871 else
11872 t3 = dest;
11873
11874 x = gen_rtx_AND (mode, op_true, cmp);
11875 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11876
11877 x = gen_rtx_NOT (mode, cmp);
11878 x = gen_rtx_AND (mode, x, op_false);
11879 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11880
11881 x = gen_rtx_IOR (mode, t3, t2);
11882 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11883 }
11884}
11885
11886/* Expand a floating-point conditional move. Return true if successful. */
11887
11888int
11889ix86_expand_fp_movcc (rtx operands[])
11890{
11891 enum machine_mode mode = GET_MODE (operands[0]);
11892 enum rtx_code code = GET_CODE (operands[1]);
11893 rtx tmp, compare_op, second_test, bypass_test;
11894
11895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11896 {
11897 enum machine_mode cmode;
11898
11899 /* Since we've no cmove for sse registers, don't force bad register
11900 allocation just to gain access to it. Deny movcc when the
11901 comparison mode doesn't match the move mode. */
11902 cmode = GET_MODE (ix86_compare_op0);
11903 if (cmode == VOIDmode)
11904 cmode = GET_MODE (ix86_compare_op1);
11905 if (cmode != mode)
11906 return 0;
11907
11908 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11909 &ix86_compare_op0,
11910 &ix86_compare_op1);
11911 if (code == UNKNOWN)
11912 return 0;
11913
11914 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11915 ix86_compare_op1, operands[2],
11916 operands[3]))
11917 return 1;
11918
11919 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11920 ix86_compare_op1, operands[2], operands[3]);
11921 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11922 return 1;
11923 }
11924
11925 /* The floating point conditional move instructions don't directly
11926 support conditions resulting from a signed integer comparison. */
11927
11928 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11929
11930 /* The floating point conditional move instructions don't directly
11931 support signed integer comparisons. */
11932
11933 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11934 {
11935 gcc_assert (!second_test && !bypass_test);
11936 tmp = gen_reg_rtx (QImode);
11937 ix86_expand_setcc (code, tmp);
11938 code = NE;
11939 ix86_compare_op0 = tmp;
11940 ix86_compare_op1 = const0_rtx;
11941 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11942 }
11943 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11944 {
11945 tmp = gen_reg_rtx (mode);
11946 emit_move_insn (tmp, operands[3]);
11947 operands[3] = tmp;
11948 }
11949 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11950 {
11951 tmp = gen_reg_rtx (mode);
11952 emit_move_insn (tmp, operands[2]);
11953 operands[2] = tmp;
11954 }
11955
11956 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11957 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11958 operands[2], operands[3])));
11959 if (bypass_test)
11960 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11961 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11962 operands[3], operands[0])));
11963 if (second_test)
11964 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11965 gen_rtx_IF_THEN_ELSE (mode, second_test,
11966 operands[2], operands[0])));
11967
11968 return 1;
11969}
11970
11971/* Expand a floating-point vector conditional move; a vcond operation
11972 rather than a movcc operation. */
11973
11974bool
11975ix86_expand_fp_vcond (rtx operands[])
11976{
11977 enum rtx_code code = GET_CODE (operands[3]);
11978 rtx cmp;
11979
11980 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11981 &operands[4], &operands[5]);
11982 if (code == UNKNOWN)
11983 return false;
11984
11985 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11986 operands[5], operands[1], operands[2]))
11987 return true;
11988
11989 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11990 operands[1], operands[2]);
11991 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11992 return true;
11993}
11994
11995/* Expand a signed integral vector conditional move. */
11996
11997bool
11998ix86_expand_int_vcond (rtx operands[])
11999{
12000 enum machine_mode mode = GET_MODE (operands[0]);
12001 enum rtx_code code = GET_CODE (operands[3]);
12002 bool negate = false;
12003 rtx x, cop0, cop1;
12004
12005 cop0 = operands[4];
12006 cop1 = operands[5];
12007
12008 /* Canonicalize the comparison to EQ, GT, GTU. */
12009 switch (code)
12010 {
12011 case EQ:
12012 case GT:
12013 case GTU:
12014 break;
12015
12016 case NE:
12017 case LE:
12018 case LEU:
12019 code = reverse_condition (code);
12020 negate = true;
12021 break;
12022
12023 case GE:
12024 case GEU:
12025 code = reverse_condition (code);
12026 negate = true;
12027 /* FALLTHRU */
12028
12029 case LT:
12030 case LTU:
12031 code = swap_condition (code);
12032 x = cop0, cop0 = cop1, cop1 = x;
12033 break;
12034
12035 default:
12036 gcc_unreachable ();
12037 }
12038
12039 /* Unsigned parallel compare is not supported by the hardware. Play some
12040 tricks to turn this into a signed comparison against 0. */
12041 if (code == GTU)
12042 {
12043 cop0 = force_reg (mode, cop0);
12044
12045 switch (mode)
12046 {
12047 case V4SImode:
12048 {
12049 rtx t1, t2, mask;
12050
12051 /* Perform a parallel modulo subtraction. */
12052 t1 = gen_reg_rtx (mode);
12053 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12054
12055 /* Extract the original sign bit of op0. */
12056 mask = GEN_INT (-0x80000000);
12057 mask = gen_rtx_CONST_VECTOR (mode,
12058 gen_rtvec (4, mask, mask, mask, mask));
12059 mask = force_reg (mode, mask);
12060 t2 = gen_reg_rtx (mode);
12061 emit_insn (gen_andv4si3 (t2, cop0, mask));
12062
12063 /* XOR it back into the result of the subtraction. This results
12064 in the sign bit set iff we saw unsigned underflow. */
12065 x = gen_reg_rtx (mode);
12066 emit_insn (gen_xorv4si3 (x, t1, t2));
12067
12068 code = GT;
12069 }
12070 break;
12071
12072 case V16QImode:
12073 case V8HImode:
12074 /* Perform a parallel unsigned saturating subtraction. */
12075 x = gen_reg_rtx (mode);
12076 emit_insn (gen_rtx_SET (VOIDmode, x,
12077 gen_rtx_US_MINUS (mode, cop0, cop1)));
12078
12079 code = EQ;
12080 negate = !negate;
12081 break;
12082
12083 default:
12084 gcc_unreachable ();
12085 }
12086
12087 cop0 = x;
12088 cop1 = CONST0_RTX (mode);
12089 }
12090
12091 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12092 operands[1+negate], operands[2-negate]);
12093
12094 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12095 operands[2-negate]);
12096 return true;
12097}
12098
12099/* Expand conditional increment or decrement using adb/sbb instructions.
12100 The default case using setcc followed by the conditional move can be
12101 done by generic code. */
12102int
12103ix86_expand_int_addcc (rtx operands[])
12104{
12105 enum rtx_code code = GET_CODE (operands[1]);
12106 rtx compare_op;
12107 rtx val = const0_rtx;
12108 bool fpcmp = false;
12109 enum machine_mode mode = GET_MODE (operands[0]);
12110
12111 if (operands[3] != const1_rtx
12112 && operands[3] != constm1_rtx)
12113 return 0;
12114 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12115 ix86_compare_op1, &compare_op))
12116 return 0;
12117 code = GET_CODE (compare_op);
12118
12119 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12120 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12121 {
12122 fpcmp = true;
12123 code = ix86_fp_compare_code_to_integer (code);
12124 }
12125
12126 if (code != LTU)
12127 {
12128 val = constm1_rtx;
12129 if (fpcmp)
12130 PUT_CODE (compare_op,
12131 reverse_condition_maybe_unordered
12132 (GET_CODE (compare_op)));
12133 else
12134 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12135 }
12136 PUT_MODE (compare_op, mode);
12137
12138 /* Construct either adc or sbb insn. */
12139 if ((code == LTU) == (operands[3] == constm1_rtx))
12140 {
12141 switch (GET_MODE (operands[0]))
12142 {
12143 case QImode:
12144 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12145 break;
12146 case HImode:
12147 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12148 break;
12149 case SImode:
12150 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12151 break;
12152 case DImode:
12153 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12154 break;
12155 default:
12156 gcc_unreachable ();
12157 }
12158 }
12159 else
12160 {
12161 switch (GET_MODE (operands[0]))
12162 {
12163 case QImode:
12164 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12165 break;
12166 case HImode:
12167 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12168 break;
12169 case SImode:
12170 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12171 break;
12172 case DImode:
12173 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12174 break;
12175 default:
12176 gcc_unreachable ();
12177 }
12178 }
12179 return 1; /* DONE */
12180}
12181
12182
12183/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12184 works for floating pointer parameters and nonoffsetable memories.
12185 For pushes, it returns just stack offsets; the values will be saved
12186 in the right order. Maximally three parts are generated. */
12187
12188static int
12189ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12190{
12191 int size;
12192
12193 if (!TARGET_64BIT)
12194 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12195 else
12196 size = (GET_MODE_SIZE (mode) + 4) / 8;
12197
12198 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12199 gcc_assert (size >= 2 && size <= 3);
12200
12201 /* Optimize constant pool reference to immediates. This is used by fp
12202 moves, that force all constants to memory to allow combining. */
12203 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12204 {
12205 rtx tmp = maybe_get_pool_constant (operand);
12206 if (tmp)
12207 operand = tmp;
12208 }
12209
12210 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12211 {
12212 /* The only non-offsetable memories we handle are pushes. */
12213 int ok = push_operand (operand, VOIDmode);
12214
12215 gcc_assert (ok);
12216
12217 operand = copy_rtx (operand);
12218 PUT_MODE (operand, Pmode);
12219 parts[0] = parts[1] = parts[2] = operand;
12220 return size;
12221 }
12222
12223 if (GET_CODE (operand) == CONST_VECTOR)
12224 {
12225 enum machine_mode imode = int_mode_for_mode (mode);
12226 /* Caution: if we looked through a constant pool memory above,
12227 the operand may actually have a different mode now. That's
12228 ok, since we want to pun this all the way back to an integer. */
12229 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12230 gcc_assert (operand != NULL);
12231 mode = imode;
12232 }
12233
12234 if (!TARGET_64BIT)
12235 {
12236 if (mode == DImode)
12237 split_di (&operand, 1, &parts[0], &parts[1]);
12238 else
12239 {
12240 if (REG_P (operand))
12241 {
12242 gcc_assert (reload_completed);
12243 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12244 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12245 if (size == 3)
12246 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12247 }
12248 else if (offsettable_memref_p (operand))
12249 {
12250 operand = adjust_address (operand, SImode, 0);
12251 parts[0] = operand;
12252 parts[1] = adjust_address (operand, SImode, 4);
12253 if (size == 3)
12254 parts[2] = adjust_address (operand, SImode, 8);
12255 }
12256 else if (GET_CODE (operand) == CONST_DOUBLE)
12257 {
12258 REAL_VALUE_TYPE r;
12259 long l[4];
12260
12261 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12262 switch (mode)
12263 {
12264 case XFmode:
12265 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12266 parts[2] = gen_int_mode (l[2], SImode);
12267 break;
12268 case DFmode:
12269 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12270 break;
12271 default:
12272 gcc_unreachable ();
12273 }
12274 parts[1] = gen_int_mode (l[1], SImode);
12275 parts[0] = gen_int_mode (l[0], SImode);
12276 }
12277 else
12278 gcc_unreachable ();
12279 }
12280 }
12281 else
12282 {
12283 if (mode == TImode)
12284 split_ti (&operand, 1, &parts[0], &parts[1]);
12285 if (mode == XFmode || mode == TFmode)
12286 {
12287 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12288 if (REG_P (operand))
12289 {
12290 gcc_assert (reload_completed);
12291 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12292 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12293 }
12294 else if (offsettable_memref_p (operand))
12295 {
12296 operand = adjust_address (operand, DImode, 0);
12297 parts[0] = operand;
12298 parts[1] = adjust_address (operand, upper_mode, 8);
12299 }
12300 else if (GET_CODE (operand) == CONST_DOUBLE)
12301 {
12302 REAL_VALUE_TYPE r;
12303 long l[4];
12304
12305 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12306 real_to_target (l, &r, mode);
12307
12308 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12309 if (HOST_BITS_PER_WIDE_INT >= 64)
12310 parts[0]
12311 = gen_int_mode
12312 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12313 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12314 DImode);
12315 else
12316 parts[0] = immed_double_const (l[0], l[1], DImode);
12317
12318 if (upper_mode == SImode)
12319 parts[1] = gen_int_mode (l[2], SImode);
12320 else if (HOST_BITS_PER_WIDE_INT >= 64)
12321 parts[1]
12322 = gen_int_mode
12323 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12324 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12325 DImode);
12326 else
12327 parts[1] = immed_double_const (l[2], l[3], DImode);
12328 }
12329 else
12330 gcc_unreachable ();
12331 }
12332 }
12333
12334 return size;
12335}
12336
12337/* Emit insns to perform a move or push of DI, DF, and XF values.
12338 Return false when normal moves are needed; true when all required
12339 insns have been emitted. Operands 2-4 contain the input values
12340 int the correct order; operands 5-7 contain the output values. */
12341
12342void
12343ix86_split_long_move (rtx operands[])
12344{
12345 rtx part[2][3];
12346 int nparts;
12347 int push = 0;
12348 int collisions = 0;
12349 enum machine_mode mode = GET_MODE (operands[0]);
12350
12351 /* The DFmode expanders may ask us to move double.
12352 For 64bit target this is single move. By hiding the fact
12353 here we simplify i386.md splitters. */
12354 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12355 {
12356 /* Optimize constant pool reference to immediates. This is used by
12357 fp moves, that force all constants to memory to allow combining. */
12358
12359 if (GET_CODE (operands[1]) == MEM
12360 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12361 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12362 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12363 if (push_operand (operands[0], VOIDmode))
12364 {
12365 operands[0] = copy_rtx (operands[0]);
12366 PUT_MODE (operands[0], Pmode);
12367 }
12368 else
12369 operands[0] = gen_lowpart (DImode, operands[0]);
12370 operands[1] = gen_lowpart (DImode, operands[1]);
12371 emit_move_insn (operands[0], operands[1]);
12372 return;
12373 }
12374
12375 /* The only non-offsettable memory we handle is push. */
12376 if (push_operand (operands[0], VOIDmode))
12377 push = 1;
12378 else
12379 gcc_assert (GET_CODE (operands[0]) != MEM
12380 || offsettable_memref_p (operands[0]));
12381
12382 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12383 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12384
12385 /* When emitting push, take care for source operands on the stack. */
12386 if (push && GET_CODE (operands[1]) == MEM
12387 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12388 {
12389 if (nparts == 3)
12390 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12391 XEXP (part[1][2], 0));
12392 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12393 XEXP (part[1][1], 0));
12394 }
12395
12396 /* We need to do copy in the right order in case an address register
12397 of the source overlaps the destination. */
12398 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12399 {
12400 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12401 collisions++;
12402 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12403 collisions++;
12404 if (nparts == 3
12405 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12406 collisions++;
12407
12408 /* Collision in the middle part can be handled by reordering. */
12409 if (collisions == 1 && nparts == 3
12410 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12411 {
12412 rtx tmp;
12413 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12414 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12415 }
12416
12417 /* If there are more collisions, we can't handle it by reordering.
12418 Do an lea to the last part and use only one colliding move. */
12419 else if (collisions > 1)
12420 {
12421 rtx base;
12422
12423 collisions = 1;
12424
12425 base = part[0][nparts - 1];
12426
12427 /* Handle the case when the last part isn't valid for lea.
12428 Happens in 64-bit mode storing the 12-byte XFmode. */
12429 if (GET_MODE (base) != Pmode)
12430 base = gen_rtx_REG (Pmode, REGNO (base));
12431
12432 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12433 part[1][0] = replace_equiv_address (part[1][0], base);
12434 part[1][1] = replace_equiv_address (part[1][1],
12435 plus_constant (base, UNITS_PER_WORD));
12436 if (nparts == 3)
12437 part[1][2] = replace_equiv_address (part[1][2],
12438 plus_constant (base, 8));
12439 }
12440 }
12441
12442 if (push)
12443 {
12444 if (!TARGET_64BIT)
12445 {
12446 if (nparts == 3)
12447 {
12448 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12449 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12450 emit_move_insn (part[0][2], part[1][2]);
12451 }
12452 }
12453 else
12454 {
12455 /* In 64bit mode we don't have 32bit push available. In case this is
12456 register, it is OK - we will just use larger counterpart. We also
12457 retype memory - these comes from attempt to avoid REX prefix on
12458 moving of second half of TFmode value. */
12459 if (GET_MODE (part[1][1]) == SImode)
12460 {
12461 switch (GET_CODE (part[1][1]))
12462 {
12463 case MEM:
12464 part[1][1] = adjust_address (part[1][1], DImode, 0);
12465 break;
12466
12467 case REG:
12468 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12469 break;
12470
12471 default:
12472 gcc_unreachable ();
12473 }
12474
12475 if (GET_MODE (part[1][0]) == SImode)
12476 part[1][0] = part[1][1];
12477 }
12478 }
12479 emit_move_insn (part[0][1], part[1][1]);
12480 emit_move_insn (part[0][0], part[1][0]);
12481 return;
12482 }
12483
12484 /* Choose correct order to not overwrite the source before it is copied. */
12485 if ((REG_P (part[0][0])
12486 && REG_P (part[1][1])
12487 && (REGNO (part[0][0]) == REGNO (part[1][1])
12488 || (nparts == 3
12489 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12490 || (collisions > 0
12491 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12492 {
12493 if (nparts == 3)
12494 {
12495 operands[2] = part[0][2];
12496 operands[3] = part[0][1];
12497 operands[4] = part[0][0];
12498 operands[5] = part[1][2];
12499 operands[6] = part[1][1];
12500 operands[7] = part[1][0];
12501 }
12502 else
12503 {
12504 operands[2] = part[0][1];
12505 operands[3] = part[0][0];
12506 operands[5] = part[1][1];
12507 operands[6] = part[1][0];
12508 }
12509 }
12510 else
12511 {
12512 if (nparts == 3)
12513 {
12514 operands[2] = part[0][0];
12515 operands[3] = part[0][1];
12516 operands[4] = part[0][2];
12517 operands[5] = part[1][0];
12518 operands[6] = part[1][1];
12519 operands[7] = part[1][2];
12520 }
12521 else
12522 {
12523 operands[2] = part[0][0];
12524 operands[3] = part[0][1];
12525 operands[5] = part[1][0];
12526 operands[6] = part[1][1];
12527 }
12528 }
12529
12530 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12531 if (optimize_size)
12532 {
12533 if (GET_CODE (operands[5]) == CONST_INT
12534 && operands[5] != const0_rtx
12535 && REG_P (operands[2]))
12536 {
12537 if (GET_CODE (operands[6]) == CONST_INT
12538 && INTVAL (operands[6]) == INTVAL (operands[5]))
12539 operands[6] = operands[2];
12540
12541 if (nparts == 3
12542 && GET_CODE (operands[7]) == CONST_INT
12543 && INTVAL (operands[7]) == INTVAL (operands[5]))
12544 operands[7] = operands[2];
12545 }
12546
12547 if (nparts == 3
12548 && GET_CODE (operands[6]) == CONST_INT
12549 && operands[6] != const0_rtx
12550 && REG_P (operands[3])
12551 && GET_CODE (operands[7]) == CONST_INT
12552 && INTVAL (operands[7]) == INTVAL (operands[6]))
12553 operands[7] = operands[3];
12554 }
12555
12556 emit_move_insn (operands[2], operands[5]);
12557 emit_move_insn (operands[3], operands[6]);
12558 if (nparts == 3)
12559 emit_move_insn (operands[4], operands[7]);
12560
12561 return;
12562}
12563
12564/* Helper function of ix86_split_ashl used to generate an SImode/DImode
12565 left shift by a constant, either using a single shift or
12566 a sequence of add instructions. */
12567
12568static void
12569ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12570{
12571 if (count == 1)
12572 {
12573 emit_insn ((mode == DImode
12574 ? gen_addsi3
12575 : gen_adddi3) (operand, operand, operand));
12576 }
12577 else if (!optimize_size
12578 && count * ix86_cost->add <= ix86_cost->shift_const)
12579 {
12580 int i;
12581 for (i=0; i<count; i++)
12582 {
12583 emit_insn ((mode == DImode
12584 ? gen_addsi3
12585 : gen_adddi3) (operand, operand, operand));
12586 }
12587 }
12588 else
12589 emit_insn ((mode == DImode
12590 ? gen_ashlsi3
12591 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12592}
12593
12594void
12595ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12596{
12597 rtx low[2], high[2];
12598 int count;
12599 const int single_width = mode == DImode ? 32 : 64;
12600
12601 if (GET_CODE (operands[2]) == CONST_INT)
12602 {
12603 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12604 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12605
12606 if (count >= single_width)
12607 {
12608 emit_move_insn (high[0], low[1]);
12609 emit_move_insn (low[0], const0_rtx);
12610
12611 if (count > single_width)
12612 ix86_expand_ashl_const (high[0], count - single_width, mode);
12613 }
12614 else
12615 {
12616 if (!rtx_equal_p (operands[0], operands[1]))
12617 emit_move_insn (operands[0], operands[1]);
12618 emit_insn ((mode == DImode
12619 ? gen_x86_shld_1
12620 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12621 ix86_expand_ashl_const (low[0], count, mode);
12622 }
12623 return;
12624 }
12625
12626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12627
12628 if (operands[1] == const1_rtx)
12629 {
12630 /* Assuming we've chosen a QImode capable registers, then 1 << N
12631 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12632 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12633 {
12634 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12635
12636 ix86_expand_clear (low[0]);
12637 ix86_expand_clear (high[0]);
12638 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12639
12640 d = gen_lowpart (QImode, low[0]);
12641 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12642 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12643 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12644
12645 d = gen_lowpart (QImode, high[0]);
12646 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12647 s = gen_rtx_NE (QImode, flags, const0_rtx);
12648 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12649 }
12650
12651 /* Otherwise, we can get the same results by manually performing
12652 a bit extract operation on bit 5/6, and then performing the two
12653 shifts. The two methods of getting 0/1 into low/high are exactly
12654 the same size. Avoiding the shift in the bit extract case helps
12655 pentium4 a bit; no one else seems to care much either way. */
12656 else
12657 {
12658 rtx x;
12659
12660 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12661 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12662 else
12663 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12664 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12665
12666 emit_insn ((mode == DImode
12667 ? gen_lshrsi3
12668 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12669 emit_insn ((mode == DImode
12670 ? gen_andsi3
12671 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12672 emit_move_insn (low[0], high[0]);
12673 emit_insn ((mode == DImode
12674 ? gen_xorsi3
12675 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12676 }
12677
12678 emit_insn ((mode == DImode
12679 ? gen_ashlsi3
12680 : gen_ashldi3) (low[0], low[0], operands[2]));
12681 emit_insn ((mode == DImode
12682 ? gen_ashlsi3
12683 : gen_ashldi3) (high[0], high[0], operands[2]));
12684 return;
12685 }
12686
12687 if (operands[1] == constm1_rtx)
12688 {
12689 /* For -1 << N, we can avoid the shld instruction, because we
12690 know that we're shifting 0...31/63 ones into a -1. */
12691 emit_move_insn (low[0], constm1_rtx);
12692 if (optimize_size)
12693 emit_move_insn (high[0], low[0]);
12694 else
12695 emit_move_insn (high[0], constm1_rtx);
12696 }
12697 else
12698 {
12699 if (!rtx_equal_p (operands[0], operands[1]))
12700 emit_move_insn (operands[0], operands[1]);
12701
12702 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12703 emit_insn ((mode == DImode
12704 ? gen_x86_shld_1
12705 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12706 }
12707
12708 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12709
12710 if (TARGET_CMOVE && scratch)
12711 {
12712 ix86_expand_clear (scratch);
12713 emit_insn ((mode == DImode
12714 ? gen_x86_shift_adj_1
12715 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12716 }
12717 else
12718 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12719}
12720
12721void
12722ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12723{
12724 rtx low[2], high[2];
12725 int count;
12726 const int single_width = mode == DImode ? 32 : 64;
12727
12728 if (GET_CODE (operands[2]) == CONST_INT)
12729 {
12730 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12731 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12732
12733 if (count == single_width * 2 - 1)
12734 {
12735 emit_move_insn (high[0], high[1]);
12736 emit_insn ((mode == DImode
12737 ? gen_ashrsi3
12738 : gen_ashrdi3) (high[0], high[0],
12739 GEN_INT (single_width - 1)));
12740 emit_move_insn (low[0], high[0]);
12741
12742 }
12743 else if (count >= single_width)
12744 {
12745 emit_move_insn (low[0], high[1]);
12746 emit_move_insn (high[0], low[0]);
12747 emit_insn ((mode == DImode
12748 ? gen_ashrsi3
12749 : gen_ashrdi3) (high[0], high[0],
12750 GEN_INT (single_width - 1)));
12751 if (count > single_width)
12752 emit_insn ((mode == DImode
12753 ? gen_ashrsi3
12754 : gen_ashrdi3) (low[0], low[0],
12755 GEN_INT (count - single_width)));
12756 }
12757 else
12758 {
12759 if (!rtx_equal_p (operands[0], operands[1]))
12760 emit_move_insn (operands[0], operands[1]);
12761 emit_insn ((mode == DImode
12762 ? gen_x86_shrd_1
12763 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12764 emit_insn ((mode == DImode
12765 ? gen_ashrsi3
12766 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12767 }
12768 }
12769 else
12770 {
12771 if (!rtx_equal_p (operands[0], operands[1]))
12772 emit_move_insn (operands[0], operands[1]);
12773
12774 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12775
12776 emit_insn ((mode == DImode
12777 ? gen_x86_shrd_1
12778 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12779 emit_insn ((mode == DImode
12780 ? gen_ashrsi3
12781 : gen_ashrdi3) (high[0], high[0], operands[2]));
12782
12783 if (TARGET_CMOVE && scratch)
12784 {
12785 emit_move_insn (scratch, high[0]);
12786 emit_insn ((mode == DImode
12787 ? gen_ashrsi3
12788 : gen_ashrdi3) (scratch, scratch,
12789 GEN_INT (single_width - 1)));
12790 emit_insn ((mode == DImode
12791 ? gen_x86_shift_adj_1
12792 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12793 scratch));
12794 }
12795 else
12796 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12797 }
12798}
12799
12800void
12801ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12802{
12803 rtx low[2], high[2];
12804 int count;
12805 const int single_width = mode == DImode ? 32 : 64;
12806
12807 if (GET_CODE (operands[2]) == CONST_INT)
12808 {
12809 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12810 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12811
12812 if (count >= single_width)
12813 {
12814 emit_move_insn (low[0], high[1]);
12815 ix86_expand_clear (high[0]);
12816
12817 if (count > single_width)
12818 emit_insn ((mode == DImode
12819 ? gen_lshrsi3
12820 : gen_lshrdi3) (low[0], low[0],
12821 GEN_INT (count - single_width)));
12822 }
12823 else
12824 {
12825 if (!rtx_equal_p (operands[0], operands[1]))
12826 emit_move_insn (operands[0], operands[1]);
12827 emit_insn ((mode == DImode
12828 ? gen_x86_shrd_1
12829 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12830 emit_insn ((mode == DImode
12831 ? gen_lshrsi3
12832 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12833 }
12834 }
12835 else
12836 {
12837 if (!rtx_equal_p (operands[0], operands[1]))
12838 emit_move_insn (operands[0], operands[1]);
12839
12840 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12841
12842 emit_insn ((mode == DImode
12843 ? gen_x86_shrd_1
12844 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12845 emit_insn ((mode == DImode
12846 ? gen_lshrsi3
12847 : gen_lshrdi3) (high[0], high[0], operands[2]));
12848
12849 /* Heh. By reversing the arguments, we can reuse this pattern. */
12850 if (TARGET_CMOVE && scratch)
12851 {
12852 ix86_expand_clear (scratch);
12853 emit_insn ((mode == DImode
12854 ? gen_x86_shift_adj_1
12855 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12856 scratch));
12857 }
12858 else
12859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12860 }
12861}
12862
12863/* Helper function for the string operations below. Dest VARIABLE whether
12864 it is aligned to VALUE bytes. If true, jump to the label. */
12865static rtx
12866ix86_expand_aligntest (rtx variable, int value)
12867{
12868 rtx label = gen_label_rtx ();
12869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12870 if (GET_MODE (variable) == DImode)
12871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12872 else
12873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12875 1, label);
12876 return label;
12877}
12878
12879/* Adjust COUNTER by the VALUE. */
12880static void
12881ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12882{
12883 if (GET_MODE (countreg) == DImode)
12884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12885 else
12886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12887}
12888
12889/* Zero extend possibly SImode EXP to Pmode register. */
12890rtx
12891ix86_zero_extend_to_Pmode (rtx exp)
12892{
12893 rtx r;
12894 if (GET_MODE (exp) == VOIDmode)
12895 return force_reg (Pmode, exp);
12896 if (GET_MODE (exp) == Pmode)
12897 return copy_to_mode_reg (Pmode, exp);
12898 r = gen_reg_rtx (Pmode);
12899 emit_insn (gen_zero_extendsidi2 (r, exp));
12900 return r;
12901}
12902
12903/* Expand string move (memcpy) operation. Use i386 string operations when
12904 profitable. expand_clrmem contains similar code. */
12905int
12906ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12907{
12908 rtx srcreg, destreg, countreg, srcexp, destexp;
12909 enum machine_mode counter_mode;
12910 HOST_WIDE_INT align = 0;
12911 unsigned HOST_WIDE_INT count = 0;
12912
12913 if (GET_CODE (align_exp) == CONST_INT)
12914 align = INTVAL (align_exp);
12915
12916 /* Can't use any of this if the user has appropriated esi or edi. */
12917 if (global_regs[4] || global_regs[5])
12918 return 0;
12919
12920 /* This simple hack avoids all inlining code and simplifies code below. */
12921 if (!TARGET_ALIGN_STRINGOPS)
12922 align = 64;
12923
12924 if (GET_CODE (count_exp) == CONST_INT)
12925 {
12926 count = INTVAL (count_exp);
12927 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12928 return 0;
12929 }
12930
12931 /* Figure out proper mode for counter. For 32bits it is always SImode,
12932 for 64bits use SImode when possible, otherwise DImode.
12933 Set count to number of bytes copied when known at compile time. */
12934 if (!TARGET_64BIT
12935 || GET_MODE (count_exp) == SImode
12936 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12937 counter_mode = SImode;
12938 else
12939 counter_mode = DImode;
12940
12941 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12942
12943 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12944 if (destreg != XEXP (dst, 0))
12945 dst = replace_equiv_address_nv (dst, destreg);
12946 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12947 if (srcreg != XEXP (src, 0))
12948 src = replace_equiv_address_nv (src, srcreg);
12949
12950 /* When optimizing for size emit simple rep ; movsb instruction for
12951 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12952 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12953 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12954 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12955 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12956 known to be zero or not. The rep; movsb sequence causes higher
12957 register pressure though, so take that into account. */
12958
12959 if ((!optimize || optimize_size)
12960 && (count == 0
12961 || ((count & 0x03)
12962 && (!optimize_size
12963 || count > 5 * 4
12964 || (count & 3) + count / 4 > 6))))
12965 {
12966 emit_insn (gen_cld ());
12967 countreg = ix86_zero_extend_to_Pmode (count_exp);
12968 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12969 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12970 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12971 destexp, srcexp));
12972 }
12973
12974 /* For constant aligned (or small unaligned) copies use rep movsl
12975 followed by code copying the rest. For PentiumPro ensure 8 byte
12976 alignment to allow rep movsl acceleration. */
12977
12978 else if (count != 0
12979 && (align >= 8
12980 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12981 || optimize_size || count < (unsigned int) 64))
12982 {
12983 unsigned HOST_WIDE_INT offset = 0;
12984 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12985 rtx srcmem, dstmem;
12986
12987 emit_insn (gen_cld ());
12988 if (count & ~(size - 1))
12989 {
12990 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12991 {
12992 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12993
12994 while (offset < (count & ~(size - 1)))
12995 {
12996 srcmem = adjust_automodify_address_nv (src, movs_mode,
12997 srcreg, offset);
12998 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12999 destreg, offset);
13000 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13001 offset += size;
13002 }
13003 }
13004 else
13005 {
13006 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
13007 & (TARGET_64BIT ? -1 : 0x3fffffff));
13008 countreg = copy_to_mode_reg (counter_mode, countreg);
13009 countreg = ix86_zero_extend_to_Pmode (countreg);
13010
13011 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13012 GEN_INT (size == 4 ? 2 : 3));
13013 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13014 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13015
13016 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13017 countreg, destexp, srcexp));
13018 offset = count & ~(size - 1);
13019 }
13020 }
13021 if (size == 8 && (count & 0x04))
13022 {
13023 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
13024 offset);
13025 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
13026 offset);
13027 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13028 offset += 4;
13029 }
13030 if (count & 0x02)
13031 {
13032 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
13033 offset);
13034 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
13035 offset);
13036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13037 offset += 2;
13038 }
13039 if (count & 0x01)
13040 {
13041 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
13042 offset);
13043 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
13044 offset);
13045 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13046 }
13047 }
13048 /* The generic code based on the glibc implementation:
13049 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
13050 allowing accelerated copying there)
13051 - copy the data using rep movsl
13052 - copy the rest. */
13053 else
13054 {
13055 rtx countreg2;
13056 rtx label = NULL;
13057 rtx srcmem, dstmem;
13058 int desired_alignment = (TARGET_PENTIUMPRO
13059 && (count == 0 || count >= (unsigned int) 260)
13060 ? 8 : UNITS_PER_WORD);
13061 /* Get rid of MEM_OFFSETs, they won't be accurate. */
13062 dst = change_address (dst, BLKmode, destreg);
13063 src = change_address (src, BLKmode, srcreg);
13064
13065 /* In case we don't know anything about the alignment, default to
13066 library version, since it is usually equally fast and result in
13067 shorter code.
13068
13069 Also emit call when we know that the count is large and call overhead
13070 will not be important. */
13071 if (!TARGET_INLINE_ALL_STRINGOPS
13072 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13073 return 0;
13074
13075 if (TARGET_SINGLE_STRINGOP)
13076 emit_insn (gen_cld ());
13077
13078 countreg2 = gen_reg_rtx (Pmode);
13079 countreg = copy_to_mode_reg (counter_mode, count_exp);
13080
13081 /* We don't use loops to align destination and to copy parts smaller
13082 than 4 bytes, because gcc is able to optimize such code better (in
13083 the case the destination or the count really is aligned, gcc is often
13084 able to predict the branches) and also it is friendlier to the
13085 hardware branch prediction.
13086
13087 Using loops is beneficial for generic case, because we can
13088 handle small counts using the loops. Many CPUs (such as Athlon)
13089 have large REP prefix setup costs.
13090
13091 This is quite costly. Maybe we can revisit this decision later or
13092 add some customizability to this code. */
13093
13094 if (count == 0 && align < desired_alignment)
13095 {
13096 label = gen_label_rtx ();
13097 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13098 LEU, 0, counter_mode, 1, label);
13099 }
13100 if (align <= 1)
13101 {
13102 rtx label = ix86_expand_aligntest (destreg, 1);
13103 srcmem = change_address (src, QImode, srcreg);
13104 dstmem = change_address (dst, QImode, destreg);
13105 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13106 ix86_adjust_counter (countreg, 1);
13107 emit_label (label);
13108 LABEL_NUSES (label) = 1;
13109 }
13110 if (align <= 2)
13111 {
13112 rtx label = ix86_expand_aligntest (destreg, 2);
13113 srcmem = change_address (src, HImode, srcreg);
13114 dstmem = change_address (dst, HImode, destreg);
13115 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13116 ix86_adjust_counter (countreg, 2);
13117 emit_label (label);
13118 LABEL_NUSES (label) = 1;
13119 }
13120 if (align <= 4 && desired_alignment > 4)
13121 {
13122 rtx label = ix86_expand_aligntest (destreg, 4);
13123 srcmem = change_address (src, SImode, srcreg);
13124 dstmem = change_address (dst, SImode, destreg);
13125 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13126 ix86_adjust_counter (countreg, 4);
13127 emit_label (label);
13128 LABEL_NUSES (label) = 1;
13129 }
13130
13131 if (label && desired_alignment > 4 && !TARGET_64BIT)
13132 {
13133 emit_label (label);
13134 LABEL_NUSES (label) = 1;
13135 label = NULL_RTX;
13136 }
13137 if (!TARGET_SINGLE_STRINGOP)
13138 emit_insn (gen_cld ());
13139 if (TARGET_64BIT)
13140 {
13141 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13142 GEN_INT (3)));
13143 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13144 }
13145 else
13146 {
13147 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13148 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13149 }
13150 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
13151 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13152 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
13153 countreg2, destexp, srcexp));
13154
13155 if (label)
13156 {
13157 emit_label (label);
13158 LABEL_NUSES (label) = 1;
13159 }
13160 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13161 {
13162 srcmem = change_address (src, SImode, srcreg);
13163 dstmem = change_address (dst, SImode, destreg);
13164 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13165 }
13166 if ((align <= 4 || count == 0) && TARGET_64BIT)
13167 {
13168 rtx label = ix86_expand_aligntest (countreg, 4);
13169 srcmem = change_address (src, SImode, srcreg);
13170 dstmem = change_address (dst, SImode, destreg);
13171 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13172 emit_label (label);
13173 LABEL_NUSES (label) = 1;
13174 }
13175 if (align > 2 && count != 0 && (count & 2))
13176 {
13177 srcmem = change_address (src, HImode, srcreg);
13178 dstmem = change_address (dst, HImode, destreg);
13179 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13180 }
13181 if (align <= 2 || count == 0)
13182 {
13183 rtx label = ix86_expand_aligntest (countreg, 2);
13184 srcmem = change_address (src, HImode, srcreg);
13185 dstmem = change_address (dst, HImode, destreg);
13186 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13187 emit_label (label);
13188 LABEL_NUSES (label) = 1;
13189 }
13190 if (align > 1 && count != 0 && (count & 1))
13191 {
13192 srcmem = change_address (src, QImode, srcreg);
13193 dstmem = change_address (dst, QImode, destreg);
13194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13195 }
13196 if (align <= 1 || count == 0)
13197 {
13198 rtx label = ix86_expand_aligntest (countreg, 1);
13199 srcmem = change_address (src, QImode, srcreg);
13200 dstmem = change_address (dst, QImode, destreg);
13201 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
13202 emit_label (label);
13203 LABEL_NUSES (label) = 1;
13204 }
13205 }
13206
13207 return 1;
13208}
13209
13210/* Expand string clear operation (bzero). Use i386 string operations when
13211 profitable. expand_movmem contains similar code. */
13212int
13213ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
13214{
13215 rtx destreg, zeroreg, countreg, destexp;
13216 enum machine_mode counter_mode;
13217 HOST_WIDE_INT align = 0;
13218 unsigned HOST_WIDE_INT count = 0;
13219
13220 if (GET_CODE (align_exp) == CONST_INT)
13221 align = INTVAL (align_exp);
13222
13223 /* Can't use any of this if the user has appropriated esi. */
13224 if (global_regs[4])
13225 return 0;
13226
13227 /* This simple hack avoids all inlining code and simplifies code below. */
13228 if (!TARGET_ALIGN_STRINGOPS)
13229 align = 32;
13230
13231 if (GET_CODE (count_exp) == CONST_INT)
13232 {
13233 count = INTVAL (count_exp);
13234 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
13235 return 0;
13236 }
13237 /* Figure out proper mode for counter. For 32bits it is always SImode,
13238 for 64bits use SImode when possible, otherwise DImode.
13239 Set count to number of bytes copied when known at compile time. */
13240 if (!TARGET_64BIT
13241 || GET_MODE (count_exp) == SImode
13242 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
13243 counter_mode = SImode;
13244 else
13245 counter_mode = DImode;
13246
13247 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13248 if (destreg != XEXP (dst, 0))
13249 dst = replace_equiv_address_nv (dst, destreg);
13250
13251
13252 /* When optimizing for size emit simple rep ; movsb instruction for
13253 counts not divisible by 4. The movl $N, %ecx; rep; stosb
13254 sequence is 7 bytes long, so if optimizing for size and count is
13255 small enough that some stosl, stosw and stosb instructions without
13256 rep are shorter, fall back into the next if. */
13257
13258 if ((!optimize || optimize_size)
13259 && (count == 0
13260 || ((count & 0x03)
13261 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
13262 {
13263 emit_insn (gen_cld ());
13264
13265 countreg = ix86_zero_extend_to_Pmode (count_exp);
13266 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
13267 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
13268 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
13269 }
13270 else if (count != 0
13271 && (align >= 8
13272 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
13273 || optimize_size || count < (unsigned int) 64))
13274 {
13275 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
13276 unsigned HOST_WIDE_INT offset = 0;
13277
13278 emit_insn (gen_cld ());
13279
13280 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
13281 if (count & ~(size - 1))
13282 {
13283 unsigned HOST_WIDE_INT repcount;
13284 unsigned int max_nonrep;
13285
13286 repcount = count >> (size == 4 ? 2 : 3);
13287 if (!TARGET_64BIT)
13288 repcount &= 0x3fffffff;
13289
13290 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
13291 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
13292 bytes. In both cases the latter seems to be faster for small
13293 values of N. */
13294 max_nonrep = size == 4 ? 7 : 4;
13295 if (!optimize_size)
13296 switch (ix86_tune)
13297 {
13298 case PROCESSOR_PENTIUM4:
13299 case PROCESSOR_NOCONA:
13300 max_nonrep = 3;
13301 break;
13302 default:
13303 break;
13304 }
13305
13306 if (repcount <= max_nonrep)
13307 while (repcount-- > 0)
13308 {
13309 rtx mem = adjust_automodify_address_nv (dst,
13310 GET_MODE (zeroreg),
13311 destreg, offset);
13312 emit_insn (gen_strset (destreg, mem, zeroreg));
13313 offset += size;
13314 }
13315 else
13316 {
13317 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
13318 countreg = ix86_zero_extend_to_Pmode (countreg);
13319 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13320 GEN_INT (size == 4 ? 2 : 3));
13321 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13322 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
13323 destexp));
13324 offset = count & ~(size - 1);
13325 }
13326 }
13327 if (size == 8 && (count & 0x04))
13328 {
13329 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
13330 offset);
13331 emit_insn (gen_strset (destreg, mem,
13332 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13333 offset += 4;
13334 }
13335 if (count & 0x02)
13336 {
13337 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
13338 offset);
13339 emit_insn (gen_strset (destreg, mem,
13340 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13341 offset += 2;
13342 }
13343 if (count & 0x01)
13344 {
13345 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
13346 offset);
13347 emit_insn (gen_strset (destreg, mem,
13348 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13349 }
13350 }
13351 else
13352 {
13353 rtx countreg2;
13354 rtx label = NULL;
13355 /* Compute desired alignment of the string operation. */
13356 int desired_alignment = (TARGET_PENTIUMPRO
13357 && (count == 0 || count >= (unsigned int) 260)
13358 ? 8 : UNITS_PER_WORD);
13359
13360 /* In case we don't know anything about the alignment, default to
13361 library version, since it is usually equally fast and result in
13362 shorter code.
13363
13364 Also emit call when we know that the count is large and call overhead
13365 will not be important. */
13366 if (!TARGET_INLINE_ALL_STRINGOPS
13367 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13368 return 0;
13369
13370 if (TARGET_SINGLE_STRINGOP)
13371 emit_insn (gen_cld ());
13372
13373 countreg2 = gen_reg_rtx (Pmode);
13374 countreg = copy_to_mode_reg (counter_mode, count_exp);
13375 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13376 /* Get rid of MEM_OFFSET, it won't be accurate. */
13377 dst = change_address (dst, BLKmode, destreg);
13378
13379 if (count == 0 && align < desired_alignment)
13380 {
13381 label = gen_label_rtx ();
13382 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13383 LEU, 0, counter_mode, 1, label);
13384 }
13385 if (align <= 1)
13386 {
13387 rtx label = ix86_expand_aligntest (destreg, 1);
13388 emit_insn (gen_strset (destreg, dst,
13389 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13390 ix86_adjust_counter (countreg, 1);
13391 emit_label (label);
13392 LABEL_NUSES (label) = 1;
13393 }
13394 if (align <= 2)
13395 {
13396 rtx label = ix86_expand_aligntest (destreg, 2);
13397 emit_insn (gen_strset (destreg, dst,
13398 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13399 ix86_adjust_counter (countreg, 2);
13400 emit_label (label);
13401 LABEL_NUSES (label) = 1;
13402 }
13403 if (align <= 4 && desired_alignment > 4)
13404 {
13405 rtx label = ix86_expand_aligntest (destreg, 4);
13406 emit_insn (gen_strset (destreg, dst,
13407 (TARGET_64BIT
13408 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13409 : zeroreg)));
13410 ix86_adjust_counter (countreg, 4);
13411 emit_label (label);
13412 LABEL_NUSES (label) = 1;
13413 }
13414
13415 if (label && desired_alignment > 4 && !TARGET_64BIT)
13416 {
13417 emit_label (label);
13418 LABEL_NUSES (label) = 1;
13419 label = NULL_RTX;
13420 }
13421
13422 if (!TARGET_SINGLE_STRINGOP)
13423 emit_insn (gen_cld ());
13424 if (TARGET_64BIT)
13425 {
13426 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13427 GEN_INT (3)));
13428 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13429 }
13430 else
13431 {
13432 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13433 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13434 }
13435 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13436 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13437
13438 if (label)
13439 {
13440 emit_label (label);
13441 LABEL_NUSES (label) = 1;
13442 }
13443
13444 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13445 emit_insn (gen_strset (destreg, dst,
13446 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13447 if (TARGET_64BIT && (align <= 4 || count == 0))
13448 {
13449 rtx label = ix86_expand_aligntest (countreg, 4);
13450 emit_insn (gen_strset (destreg, dst,
13451 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13452 emit_label (label);
13453 LABEL_NUSES (label) = 1;
13454 }
13455 if (align > 2 && count != 0 && (count & 2))
13456 emit_insn (gen_strset (destreg, dst,
13457 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13458 if (align <= 2 || count == 0)
13459 {
13460 rtx label = ix86_expand_aligntest (countreg, 2);
13461 emit_insn (gen_strset (destreg, dst,
13462 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13463 emit_label (label);
13464 LABEL_NUSES (label) = 1;
13465 }
13466 if (align > 1 && count != 0 && (count & 1))
13467 emit_insn (gen_strset (destreg, dst,
13468 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13469 if (align <= 1 || count == 0)
13470 {
13471 rtx label = ix86_expand_aligntest (countreg, 1);
13472 emit_insn (gen_strset (destreg, dst,
13473 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13474 emit_label (label);
13475 LABEL_NUSES (label) = 1;
13476 }
13477 }
13478 return 1;
13479}
13480
13481/* Expand strlen. */
13482int
13483ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13484{
13485 rtx addr, scratch1, scratch2, scratch3, scratch4;
13486
13487 /* The generic case of strlen expander is long. Avoid it's
13488 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13489
13490 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13491 && !TARGET_INLINE_ALL_STRINGOPS
13492 && !optimize_size
13493 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13494 return 0;
13495
13496 addr = force_reg (Pmode, XEXP (src, 0));
13497 scratch1 = gen_reg_rtx (Pmode);
13498
13499 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13500 && !optimize_size)
13501 {
13502 /* Well it seems that some optimizer does not combine a call like
13503 foo(strlen(bar), strlen(bar));
13504 when the move and the subtraction is done here. It does calculate
13505 the length just once when these instructions are done inside of
13506 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13507 often used and I use one fewer register for the lifetime of
13508 output_strlen_unroll() this is better. */
13509
13510 emit_move_insn (out, addr);
13511
13512 ix86_expand_strlensi_unroll_1 (out, src, align);
13513
13514 /* strlensi_unroll_1 returns the address of the zero at the end of
13515 the string, like memchr(), so compute the length by subtracting
13516 the start address. */
13517 if (TARGET_64BIT)
13518 emit_insn (gen_subdi3 (out, out, addr));
13519 else
13520 emit_insn (gen_subsi3 (out, out, addr));
13521 }
13522 else
13523 {
13524 rtx unspec;
13525 scratch2 = gen_reg_rtx (Pmode);
13526 scratch3 = gen_reg_rtx (Pmode);
13527 scratch4 = force_reg (Pmode, constm1_rtx);
13528
13529 emit_move_insn (scratch3, addr);
13530 eoschar = force_reg (QImode, eoschar);
13531
13532 emit_insn (gen_cld ());
13533 src = replace_equiv_address_nv (src, scratch3);
13534
13535 /* If .md starts supporting :P, this can be done in .md. */
13536 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13537 scratch4), UNSPEC_SCAS);
13538 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13539 if (TARGET_64BIT)
13540 {
13541 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13542 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13543 }
13544 else
13545 {
13546 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13547 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13548 }
13549 }
13550 return 1;
13551}
13552
13553/* Expand the appropriate insns for doing strlen if not just doing
13554 repnz; scasb
13555
13556 out = result, initialized with the start address
13557 align_rtx = alignment of the address.
13558 scratch = scratch register, initialized with the startaddress when
13559 not aligned, otherwise undefined
13560
13561 This is just the body. It needs the initializations mentioned above and
13562 some address computing at the end. These things are done in i386.md. */
13563
13564static void
13565ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13566{
13567 int align;
13568 rtx tmp;
13569 rtx align_2_label = NULL_RTX;
13570 rtx align_3_label = NULL_RTX;
13571 rtx align_4_label = gen_label_rtx ();
13572 rtx end_0_label = gen_label_rtx ();
13573 rtx mem;
13574 rtx tmpreg = gen_reg_rtx (SImode);
13575 rtx scratch = gen_reg_rtx (SImode);
13576 rtx cmp;
13577
13578 align = 0;
13579 if (GET_CODE (align_rtx) == CONST_INT)
13580 align = INTVAL (align_rtx);
13581
13582 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13583
13584 /* Is there a known alignment and is it less than 4? */
13585 if (align < 4)
13586 {
13587 rtx scratch1 = gen_reg_rtx (Pmode);
13588 emit_move_insn (scratch1, out);
13589 /* Is there a known alignment and is it not 2? */
13590 if (align != 2)
13591 {
13592 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13593 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13594
13595 /* Leave just the 3 lower bits. */
13596 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13597 NULL_RTX, 0, OPTAB_WIDEN);
13598
13599 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13600 Pmode, 1, align_4_label);
13601 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13602 Pmode, 1, align_2_label);
13603 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13604 Pmode, 1, align_3_label);
13605 }
13606 else
13607 {
13608 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13609 check if is aligned to 4 - byte. */
13610
13611 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13612 NULL_RTX, 0, OPTAB_WIDEN);
13613
13614 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13615 Pmode, 1, align_4_label);
13616 }
13617
13618 mem = change_address (src, QImode, out);
13619
13620 /* Now compare the bytes. */
13621
13622 /* Compare the first n unaligned byte on a byte per byte basis. */
13623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13624 QImode, 1, end_0_label);
13625
13626 /* Increment the address. */
13627 if (TARGET_64BIT)
13628 emit_insn (gen_adddi3 (out, out, const1_rtx));
13629 else
13630 emit_insn (gen_addsi3 (out, out, const1_rtx));
13631
13632 /* Not needed with an alignment of 2 */
13633 if (align != 2)
13634 {
13635 emit_label (align_2_label);
13636
13637 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13638 end_0_label);
13639
13640 if (TARGET_64BIT)
13641 emit_insn (gen_adddi3 (out, out, const1_rtx));
13642 else
13643 emit_insn (gen_addsi3 (out, out, const1_rtx));
13644
13645 emit_label (align_3_label);
13646 }
13647
13648 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13649 end_0_label);
13650
13651 if (TARGET_64BIT)
13652 emit_insn (gen_adddi3 (out, out, const1_rtx));
13653 else
13654 emit_insn (gen_addsi3 (out, out, const1_rtx));
13655 }
13656
13657 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13658 align this loop. It gives only huge programs, but does not help to
13659 speed up. */
13660 emit_label (align_4_label);
13661
13662 mem = change_address (src, SImode, out);
13663 emit_move_insn (scratch, mem);
13664 if (TARGET_64BIT)
13665 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13666 else
13667 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13668
13669 /* This formula yields a nonzero result iff one of the bytes is zero.
13670 This saves three branches inside loop and many cycles. */
13671
13672 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13673 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13674 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13675 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13676 gen_int_mode (0x80808080, SImode)));
13677 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13678 align_4_label);
13679
13680 if (TARGET_CMOVE)
13681 {
13682 rtx reg = gen_reg_rtx (SImode);
13683 rtx reg2 = gen_reg_rtx (Pmode);
13684 emit_move_insn (reg, tmpreg);
13685 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13686
13687 /* If zero is not in the first two bytes, move two bytes forward. */
13688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13690 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13691 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13692 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13693 reg,
13694 tmpreg)));
13695 /* Emit lea manually to avoid clobbering of flags. */
13696 emit_insn (gen_rtx_SET (SImode, reg2,
13697 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13698
13699 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13700 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13701 emit_insn (gen_rtx_SET (VOIDmode, out,
13702 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13703 reg2,
13704 out)));
13705
13706 }
13707 else
13708 {
13709 rtx end_2_label = gen_label_rtx ();
13710 /* Is zero in the first two bytes? */
13711
13712 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13713 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13714 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13715 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13716 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13717 pc_rtx);
13718 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13719 JUMP_LABEL (tmp) = end_2_label;
13720
13721 /* Not in the first two. Move two bytes forward. */
13722 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13723 if (TARGET_64BIT)
13724 emit_insn (gen_adddi3 (out, out, const2_rtx));
13725 else
13726 emit_insn (gen_addsi3 (out, out, const2_rtx));
13727
13728 emit_label (end_2_label);
13729
13730 }
13731
13732 /* Avoid branch in fixing the byte. */
13733 tmpreg = gen_lowpart (QImode, tmpreg);
13734 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13735 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13736 if (TARGET_64BIT)
13737 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13738 else
13739 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13740
13741 emit_label (end_0_label);
13742}
13743
13744void
13745ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13746 rtx callarg2 ATTRIBUTE_UNUSED,
13747 rtx pop, int sibcall)
13748{
13749 rtx use = NULL, call;
13750
13751 if (pop == const0_rtx)
13752 pop = NULL;
13753 gcc_assert (!TARGET_64BIT || !pop);
13754
13755 if (TARGET_MACHO && !TARGET_64BIT)
13756 {
13757#if TARGET_MACHO
13758 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13759 fnaddr = machopic_indirect_call_target (fnaddr);
13760#endif
13761 }
13762 else
13763 {
13764 /* Static functions and indirect calls don't need the pic register. */
13765 if (! TARGET_64BIT && flag_pic
13766 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13767 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13768 use_reg (&use, pic_offset_table_rtx);
13769 }
13770
13771 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13772 {
13773 rtx al = gen_rtx_REG (QImode, 0);
13774 emit_move_insn (al, callarg2);
13775 use_reg (&use, al);
13776 }
13777
13778 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13779 {
13780 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13781 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13782 }
13783 if (sibcall && TARGET_64BIT
13784 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13785 {
13786 rtx addr;
13787 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13788 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13789 emit_move_insn (fnaddr, addr);
13790 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13791 }
13792
13793 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13794 if (retval)
13795 call = gen_rtx_SET (VOIDmode, retval, call);
13796 if (pop)
13797 {
13798 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13799 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13800 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13801 }
13802
13803 call = emit_call_insn (call);
13804 if (use)
13805 CALL_INSN_FUNCTION_USAGE (call) = use;
13806}
13807
13808
13809/* Clear stack slot assignments remembered from previous functions.
13810 This is called from INIT_EXPANDERS once before RTL is emitted for each
13811 function. */
13812
13813static struct machine_function *
13814ix86_init_machine_status (void)
13815{
13816 struct machine_function *f;
13817
13818 f = ggc_alloc_cleared (sizeof (struct machine_function));
13819 f->use_fast_prologue_epilogue_nregs = -1;
13820 f->tls_descriptor_call_expanded_p = 0;
13821
13822 return f;
13823}
13824
13825/* Return a MEM corresponding to a stack slot with mode MODE.
13826 Allocate a new slot if necessary.
13827
13828 The RTL for a function can have several slots available: N is
13829 which slot to use. */
13830
13831rtx
13832assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13833{
13834 struct stack_local_entry *s;
13835
13836 gcc_assert (n < MAX_386_STACK_LOCALS);
13837
13838 /* Virtual slot is valid only before vregs are instantiated. */
13839 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
13840
13841 for (s = ix86_stack_locals; s; s = s->next)
13842 if (s->mode == mode && s->n == n)
13843 return s->rtl;
13844
13845 s = (struct stack_local_entry *)
13846 ggc_alloc (sizeof (struct stack_local_entry));
13847 s->n = n;
13848 s->mode = mode;
13849 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13850
13851 s->next = ix86_stack_locals;
13852 ix86_stack_locals = s;
13853 return s->rtl;
13854}
13855
13856/* Construct the SYMBOL_REF for the tls_get_addr function. */
13857
13858static GTY(()) rtx ix86_tls_symbol;
13859rtx
13860ix86_tls_get_addr (void)
13861{
13862
13863 if (!ix86_tls_symbol)
13864 {
13865 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13866 (TARGET_ANY_GNU_TLS
13867 && !TARGET_64BIT)
13868 ? "___tls_get_addr"
13869 : "__tls_get_addr");
13870 }
13871
13872 return ix86_tls_symbol;
13873}
13874
13875/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13876
13877static GTY(()) rtx ix86_tls_module_base_symbol;
13878rtx
13879ix86_tls_module_base (void)
13880{
13881
13882 if (!ix86_tls_module_base_symbol)
13883 {
13884 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13885 "_TLS_MODULE_BASE_");
13886 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13887 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13888 }
13889
13890 return ix86_tls_module_base_symbol;
13891}
13892
13893/* Calculate the length of the memory address in the instruction
13894 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13895
13896int
13897memory_address_length (rtx addr)
13898{
13899 struct ix86_address parts;
13900 rtx base, index, disp;
13901 int len;
13902 int ok;
13903
13904 if (GET_CODE (addr) == PRE_DEC
13905 || GET_CODE (addr) == POST_INC
13906 || GET_CODE (addr) == PRE_MODIFY
13907 || GET_CODE (addr) == POST_MODIFY)
13908 return 0;
13909
13910 ok = ix86_decompose_address (addr, &parts);
13911 gcc_assert (ok);
13912
13913 if (parts.base && GET_CODE (parts.base) == SUBREG)
13914 parts.base = SUBREG_REG (parts.base);
13915 if (parts.index && GET_CODE (parts.index) == SUBREG)
13916 parts.index = SUBREG_REG (parts.index);
13917
13918 base = parts.base;
13919 index = parts.index;
13920 disp = parts.disp;
13921 len = 0;
13922
13923 /* Rule of thumb:
13924 - esp as the base always wants an index,
13925 - ebp as the base always wants a displacement. */
13926
13927 /* Register Indirect. */
13928 if (base && !index && !disp)
13929 {
13930 /* esp (for its index) and ebp (for its displacement) need
13931 the two-byte modrm form. */
13932 if (addr == stack_pointer_rtx
13933 || addr == arg_pointer_rtx
13934 || addr == frame_pointer_rtx
13935 || addr == hard_frame_pointer_rtx)
13936 len = 1;
13937 }
13938
13939 /* Direct Addressing. */
13940 else if (disp && !base && !index)
13941 len = 4;
13942
13943 else
13944 {
13945 /* Find the length of the displacement constant. */
13946 if (disp)
13947 {
13948 if (base && satisfies_constraint_K (disp))
13949 len = 1;
13950 else
13951 len = 4;
13952 }
13953 /* ebp always wants a displacement. */
13954 else if (base == hard_frame_pointer_rtx)
13955 len = 1;
13956
13957 /* An index requires the two-byte modrm form.... */
13958 if (index
13959 /* ...like esp, which always wants an index. */
13960 || base == stack_pointer_rtx
13961 || base == arg_pointer_rtx
13962 || base == frame_pointer_rtx)
13963 len += 1;
13964 }
13965
13966 return len;
13967}
13968
13969/* Compute default value for "length_immediate" attribute. When SHORTFORM
13970 is set, expect that insn have 8bit immediate alternative. */
13971int
13972ix86_attr_length_immediate_default (rtx insn, int shortform)
13973{
13974 int len = 0;
13975 int i;
13976 extract_insn_cached (insn);
13977 for (i = recog_data.n_operands - 1; i >= 0; --i)
13978 if (CONSTANT_P (recog_data.operand[i]))
13979 {
13980 gcc_assert (!len);
13981 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13982 len = 1;
13983 else
13984 {
13985 switch (get_attr_mode (insn))
13986 {
13987 case MODE_QI:
13988 len+=1;
13989 break;
13990 case MODE_HI:
13991 len+=2;
13992 break;
13993 case MODE_SI:
13994 len+=4;
13995 break;
13996 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13997 case MODE_DI:
13998 len+=4;
13999 break;
14000 default:
14001 fatal_insn ("unknown insn mode", insn);
14002 }
14003 }
14004 }
14005 return len;
14006}
14007/* Compute default value for "length_address" attribute. */
14008int
14009ix86_attr_length_address_default (rtx insn)
14010{
14011 int i;
14012
14013 if (get_attr_type (insn) == TYPE_LEA)
14014 {
14015 rtx set = PATTERN (insn);
14016
14017 if (GET_CODE (set) == PARALLEL)
14018 set = XVECEXP (set, 0, 0);
14019
14020 gcc_assert (GET_CODE (set) == SET);
14021
14022 return memory_address_length (SET_SRC (set));
14023 }
14024
14025 extract_insn_cached (insn);
14026 for (i = recog_data.n_operands - 1; i >= 0; --i)
14027 if (GET_CODE (recog_data.operand[i]) == MEM)
14028 {
14029 return memory_address_length (XEXP (recog_data.operand[i], 0));
14030 break;
14031 }
14032 return 0;
14033}
14034
14035/* Return the maximum number of instructions a cpu can issue. */
14036
14037static int
14038ix86_issue_rate (void)
14039{
14040 switch (ix86_tune)
14041 {
14042 case PROCESSOR_PENTIUM:
14043 case PROCESSOR_K6:
14044 return 2;
14045
14046 case PROCESSOR_PENTIUMPRO:
14047 case PROCESSOR_PENTIUM4:
14048 case PROCESSOR_ATHLON:
14049 case PROCESSOR_K8:
14050 case PROCESSOR_AMDFAM10:
14051 case PROCESSOR_NOCONA:
14052 case PROCESSOR_GENERIC32:
14053 case PROCESSOR_GENERIC64:
14054 return 3;
14055
14056 case PROCESSOR_CORE2:
14057 return 4;
14058
14059 default:
14060 return 1;
14061 }
14062}
14063
14064/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14065 by DEP_INSN and nothing set by DEP_INSN. */
14066
14067static int
14068ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14069{
14070 rtx set, set2;
14071
14072 /* Simplify the test for uninteresting insns. */
14073 if (insn_type != TYPE_SETCC
14074 && insn_type != TYPE_ICMOV
14075 && insn_type != TYPE_FCMOV
14076 && insn_type != TYPE_IBR)
14077 return 0;
14078
14079 if ((set = single_set (dep_insn)) != 0)
14080 {
14081 set = SET_DEST (set);
14082 set2 = NULL_RTX;
14083 }
14084 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14085 && XVECLEN (PATTERN (dep_insn), 0) == 2
14086 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14087 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14088 {
14089 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14090 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14091 }
14092 else
14093 return 0;
14094
14095 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14096 return 0;
14097
14098 /* This test is true if the dependent insn reads the flags but
14099 not any other potentially set register. */
14100 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14101 return 0;
14102
14103 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14104 return 0;
14105
14106 return 1;
14107}
14108
14109/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14110 address with operands set by DEP_INSN. */
14111
14112static int
14113ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14114{
14115 rtx addr;
14116
14117 if (insn_type == TYPE_LEA
14118 && TARGET_PENTIUM)
14119 {
14120 addr = PATTERN (insn);
14121
14122 if (GET_CODE (addr) == PARALLEL)
14123 addr = XVECEXP (addr, 0, 0);
14124
14125 gcc_assert (GET_CODE (addr) == SET);
14126
14127 addr = SET_SRC (addr);
14128 }
14129 else
14130 {
14131 int i;
14132 extract_insn_cached (insn);
14133 for (i = recog_data.n_operands - 1; i >= 0; --i)
14134 if (GET_CODE (recog_data.operand[i]) == MEM)
14135 {
14136 addr = XEXP (recog_data.operand[i], 0);
14137 goto found;
14138 }
14139 return 0;
14140 found:;
14141 }
14142
14143 return modified_in_p (addr, dep_insn);
14144}
14145
14146static int
14147ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14148{
14149 enum attr_type insn_type, dep_insn_type;
14150 enum attr_memory memory;
14151 rtx set, set2;
14152 int dep_insn_code_number;
14153
14154 /* Anti and output dependencies have zero cost on all CPUs. */
14155 if (REG_NOTE_KIND (link) != 0)
14156 return 0;
14157
14158 dep_insn_code_number = recog_memoized (dep_insn);
14159
14160 /* If we can't recognize the insns, we can't really do anything. */
14161 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14162 return cost;
14163
14164 insn_type = get_attr_type (insn);
14165 dep_insn_type = get_attr_type (dep_insn);
14166
14167 switch (ix86_tune)
14168 {
14169 case PROCESSOR_PENTIUM:
14170 /* Address Generation Interlock adds a cycle of latency. */
14171 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14172 cost += 1;
14173
14174 /* ??? Compares pair with jump/setcc. */
14175 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14176 cost = 0;
14177
14178 /* Floating point stores require value to be ready one cycle earlier. */
14179 if (insn_type == TYPE_FMOV
14180 && get_attr_memory (insn) == MEMORY_STORE
14181 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14182 cost += 1;
14183 break;
14184
14185 case PROCESSOR_PENTIUMPRO:
14186 memory = get_attr_memory (insn);
14187
14188 /* INT->FP conversion is expensive. */
14189 if (get_attr_fp_int_src (dep_insn))
14190 cost += 5;
14191
14192 /* There is one cycle extra latency between an FP op and a store. */
14193 if (insn_type == TYPE_FMOV
14194 && (set = single_set (dep_insn)) != NULL_RTX
14195 && (set2 = single_set (insn)) != NULL_RTX
14196 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14197 && GET_CODE (SET_DEST (set2)) == MEM)
14198 cost += 1;
14199
14200 /* Show ability of reorder buffer to hide latency of load by executing
14201 in parallel with previous instruction in case
14202 previous instruction is not needed to compute the address. */
14203 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14204 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14205 {
14206 /* Claim moves to take one cycle, as core can issue one load
14207 at time and the next load can start cycle later. */
14208 if (dep_insn_type == TYPE_IMOV
14209 || dep_insn_type == TYPE_FMOV)
14210 cost = 1;
14211 else if (cost > 1)
14212 cost--;
14213 }
14214 break;
14215
14216 case PROCESSOR_K6:
14217 memory = get_attr_memory (insn);
14218
14219 /* The esp dependency is resolved before the instruction is really
14220 finished. */
14221 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14222 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14223 return 1;
14224
14225 /* INT->FP conversion is expensive. */
14226 if (get_attr_fp_int_src (dep_insn))
14227 cost += 5;
14228
14229 /* Show ability of reorder buffer to hide latency of load by executing
14230 in parallel with previous instruction in case
14231 previous instruction is not needed to compute the address. */
14232 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14233 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14234 {
14235 /* Claim moves to take one cycle, as core can issue one load
14236 at time and the next load can start cycle later. */
14237 if (dep_insn_type == TYPE_IMOV
14238 || dep_insn_type == TYPE_FMOV)
14239 cost = 1;
14240 else if (cost > 2)
14241 cost -= 2;
14242 else
14243 cost = 1;
14244 }
14245 break;
14246
14247 case PROCESSOR_ATHLON:
14248 case PROCESSOR_K8:
14249 case PROCESSOR_AMDFAM10:
14250 case PROCESSOR_GENERIC32:
14251 case PROCESSOR_GENERIC64:
14252 memory = get_attr_memory (insn);
14253
14254 /* Show ability of reorder buffer to hide latency of load by executing
14255 in parallel with previous instruction in case
14256 previous instruction is not needed to compute the address. */
14257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14258 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14259 {
14260 enum attr_unit unit = get_attr_unit (insn);
14261 int loadcost = 3;
14262
14263 /* Because of the difference between the length of integer and
14264 floating unit pipeline preparation stages, the memory operands
14265 for floating point are cheaper.
14266
14267 ??? For Athlon it the difference is most probably 2. */
14268 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14269 loadcost = 3;
14270 else
14271 loadcost = TARGET_ATHLON ? 2 : 0;
14272
14273 if (cost >= loadcost)
14274 cost -= loadcost;
14275 else
14276 cost = 0;
14277 }
14278
14279 default:
14280 break;
14281 }
14282
14283 return cost;
14284}
14285
14286/* How many alternative schedules to try. This should be as wide as the
14287 scheduling freedom in the DFA, but no wider. Making this value too
14288 large results extra work for the scheduler. */
14289
14290static int
14291ia32_multipass_dfa_lookahead (void)
14292{
14293 if (ix86_tune == PROCESSOR_PENTIUM)
14294 return 2;
14295
14296 if (ix86_tune == PROCESSOR_PENTIUMPRO
14297 || ix86_tune == PROCESSOR_K6)
14298 return 1;
14299
14300 else
14301 return 0;
14302}
14303
14304
14305/* Compute the alignment given to a constant that is being placed in memory.
14306 EXP is the constant and ALIGN is the alignment that the object would
14307 ordinarily have.
14308 The value of this function is used instead of that alignment to align
14309 the object. */
14310
14311int
14312ix86_constant_alignment (tree exp, int align)
14313{
14314 if (TREE_CODE (exp) == REAL_CST)
14315 {
14316 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14317 return 64;
14318 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14319 return 128;
14320 }
14321 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14322 && !TARGET_NO_ALIGN_LONG_STRINGS
14323 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14324 return BITS_PER_WORD;
14325
14326 return align;
14327}
14328
14329/* Compute the alignment for a static variable.
14330 TYPE is the data type, and ALIGN is the alignment that
14331 the object would ordinarily have. The value of this function is used
14332 instead of that alignment to align the object. */
14333
14334int
14335ix86_data_alignment (tree type, int align)
14336{
14337 int max_align = optimize_size ? BITS_PER_WORD : 256;
14338
14339 if (AGGREGATE_TYPE_P (type)
14340 && TYPE_SIZE (type)
14341 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14342 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14343 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14344 && align < max_align)
14345 align = max_align;
14346
14347 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14348 to 16byte boundary. */
14349 if (TARGET_64BIT)
14350 {
14351 if (AGGREGATE_TYPE_P (type)
14352 && TYPE_SIZE (type)
14353 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14354 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14355 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14356 return 128;
14357 }
14358
14359 if (TREE_CODE (type) == ARRAY_TYPE)
14360 {
14361 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14362 return 64;
14363 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14364 return 128;
14365 }
14366 else if (TREE_CODE (type) == COMPLEX_TYPE)
14367 {
14368
14369 if (TYPE_MODE (type) == DCmode && align < 64)
14370 return 64;
14371 if (TYPE_MODE (type) == XCmode && align < 128)
14372 return 128;
14373 }
14374 else if ((TREE_CODE (type) == RECORD_TYPE
14375 || TREE_CODE (type) == UNION_TYPE
14376 || TREE_CODE (type) == QUAL_UNION_TYPE)
14377 && TYPE_FIELDS (type))
14378 {
14379 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14380 return 64;
14381 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14382 return 128;
14383 }
14384 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14385 || TREE_CODE (type) == INTEGER_TYPE)
14386 {
14387 if (TYPE_MODE (type) == DFmode && align < 64)
14388 return 64;
14389 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14390 return 128;
14391 }
14392
14393 return align;
14394}
14395
14396/* Compute the alignment for a local variable.
14397 TYPE is the data type, and ALIGN is the alignment that
14398 the object would ordinarily have. The value of this macro is used
14399 instead of that alignment to align the object. */
14400
14401int
14402ix86_local_alignment (tree type, int align)
14403{
14404 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14405 to 16byte boundary. */
14406 if (TARGET_64BIT)
14407 {
14408 if (AGGREGATE_TYPE_P (type)
14409 && TYPE_SIZE (type)
14410 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14411 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14412 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14413 return 128;
14414 }
14415 if (TREE_CODE (type) == ARRAY_TYPE)
14416 {
14417 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14418 return 64;
14419 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14420 return 128;
14421 }
14422 else if (TREE_CODE (type) == COMPLEX_TYPE)
14423 {
14424 if (TYPE_MODE (type) == DCmode && align < 64)
14425 return 64;
14426 if (TYPE_MODE (type) == XCmode && align < 128)
14427 return 128;
14428 }
14429 else if ((TREE_CODE (type) == RECORD_TYPE
14430 || TREE_CODE (type) == UNION_TYPE
14431 || TREE_CODE (type) == QUAL_UNION_TYPE)
14432 && TYPE_FIELDS (type))
14433 {
14434 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14435 return 64;
14436 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14437 return 128;
14438 }
14439 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14440 || TREE_CODE (type) == INTEGER_TYPE)
14441 {
14442
14443 if (TYPE_MODE (type) == DFmode && align < 64)
14444 return 64;
14445 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14446 return 128;
14447 }
14448 return align;
14449}
14450
14451/* Emit RTL insns to initialize the variable parts of a trampoline.
14452 FNADDR is an RTX for the address of the function's pure code.
14453 CXT is an RTX for the static chain value for the function. */
14454void
14455x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14456{
14457 if (!TARGET_64BIT)
14458 {
14459 /* Compute offset from the end of the jmp to the target function. */
14460 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14461 plus_constant (tramp, 10),
14462 NULL_RTX, 1, OPTAB_DIRECT);
14463 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14464 gen_int_mode (0xb9, QImode));
14465 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14466 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14467 gen_int_mode (0xe9, QImode));
14468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14469 }
14470 else
14471 {
14472 int offset = 0;
14473 /* Try to load address using shorter movl instead of movabs.
14474 We may want to support movq for kernel mode, but kernel does not use
14475 trampolines at the moment. */
14476 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14477 {
14478 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14479 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14480 gen_int_mode (0xbb41, HImode));
14481 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14482 gen_lowpart (SImode, fnaddr));
14483 offset += 6;
14484 }
14485 else
14486 {
14487 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14488 gen_int_mode (0xbb49, HImode));
14489 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14490 fnaddr);
14491 offset += 10;
14492 }
14493 /* Load static chain using movabs to r10. */
14494 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14495 gen_int_mode (0xba49, HImode));
14496 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14497 cxt);
14498 offset += 10;
14499 /* Jump to the r11 */
14500 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14501 gen_int_mode (0xff49, HImode));
14502 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14503 gen_int_mode (0xe3, QImode));
14504 offset += 3;
14505 gcc_assert (offset <= TRAMPOLINE_SIZE);
14506 }
14507
14508#ifdef ENABLE_EXECUTE_STACK
14509 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14510 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14511#endif
14512}
14513
14514/* Codes for all the SSE/MMX builtins. */
14515enum ix86_builtins
14516{
14517 IX86_BUILTIN_ADDPS,
14518 IX86_BUILTIN_ADDSS,
14519 IX86_BUILTIN_DIVPS,
14520 IX86_BUILTIN_DIVSS,
14521 IX86_BUILTIN_MULPS,
14522 IX86_BUILTIN_MULSS,
14523 IX86_BUILTIN_SUBPS,
14524 IX86_BUILTIN_SUBSS,
14525
14526 IX86_BUILTIN_CMPEQPS,
14527 IX86_BUILTIN_CMPLTPS,
14528 IX86_BUILTIN_CMPLEPS,
14529 IX86_BUILTIN_CMPGTPS,
14530 IX86_BUILTIN_CMPGEPS,
14531 IX86_BUILTIN_CMPNEQPS,
14532 IX86_BUILTIN_CMPNLTPS,
14533 IX86_BUILTIN_CMPNLEPS,
14534 IX86_BUILTIN_CMPNGTPS,
14535 IX86_BUILTIN_CMPNGEPS,
14536 IX86_BUILTIN_CMPORDPS,
14537 IX86_BUILTIN_CMPUNORDPS,
14538 IX86_BUILTIN_CMPEQSS,
14539 IX86_BUILTIN_CMPLTSS,
14540 IX86_BUILTIN_CMPLESS,
14541 IX86_BUILTIN_CMPNEQSS,
14542 IX86_BUILTIN_CMPNLTSS,
14543 IX86_BUILTIN_CMPNLESS,
14544 IX86_BUILTIN_CMPNGTSS,
14545 IX86_BUILTIN_CMPNGESS,
14546 IX86_BUILTIN_CMPORDSS,
14547 IX86_BUILTIN_CMPUNORDSS,
14548
14549 IX86_BUILTIN_COMIEQSS,
14550 IX86_BUILTIN_COMILTSS,
14551 IX86_BUILTIN_COMILESS,
14552 IX86_BUILTIN_COMIGTSS,
14553 IX86_BUILTIN_COMIGESS,
14554 IX86_BUILTIN_COMINEQSS,
14555 IX86_BUILTIN_UCOMIEQSS,
14556 IX86_BUILTIN_UCOMILTSS,
14557 IX86_BUILTIN_UCOMILESS,
14558 IX86_BUILTIN_UCOMIGTSS,
14559 IX86_BUILTIN_UCOMIGESS,
14560 IX86_BUILTIN_UCOMINEQSS,
14561
14562 IX86_BUILTIN_CVTPI2PS,
14563 IX86_BUILTIN_CVTPS2PI,
14564 IX86_BUILTIN_CVTSI2SS,
14565 IX86_BUILTIN_CVTSI642SS,
14566 IX86_BUILTIN_CVTSS2SI,
14567 IX86_BUILTIN_CVTSS2SI64,
14568 IX86_BUILTIN_CVTTPS2PI,
14569 IX86_BUILTIN_CVTTSS2SI,
14570 IX86_BUILTIN_CVTTSS2SI64,
14571
14572 IX86_BUILTIN_MAXPS,
14573 IX86_BUILTIN_MAXSS,
14574 IX86_BUILTIN_MINPS,
14575 IX86_BUILTIN_MINSS,
14576
14577 IX86_BUILTIN_LOADUPS,
14578 IX86_BUILTIN_STOREUPS,
14579 IX86_BUILTIN_MOVSS,
14580
14581 IX86_BUILTIN_MOVHLPS,
14582 IX86_BUILTIN_MOVLHPS,
14583 IX86_BUILTIN_LOADHPS,
14584 IX86_BUILTIN_LOADLPS,
14585 IX86_BUILTIN_STOREHPS,
14586 IX86_BUILTIN_STORELPS,
14587
14588 IX86_BUILTIN_MASKMOVQ,
14589 IX86_BUILTIN_MOVMSKPS,
14590 IX86_BUILTIN_PMOVMSKB,
14591
14592 IX86_BUILTIN_MOVNTPS,
14593 IX86_BUILTIN_MOVNTQ,
14594
14595 IX86_BUILTIN_LOADDQU,
14596 IX86_BUILTIN_STOREDQU,
14597
14598 IX86_BUILTIN_PACKSSWB,
14599 IX86_BUILTIN_PACKSSDW,
14600 IX86_BUILTIN_PACKUSWB,
14601
14602 IX86_BUILTIN_PADDB,
14603 IX86_BUILTIN_PADDW,
14604 IX86_BUILTIN_PADDD,
14605 IX86_BUILTIN_PADDQ,
14606 IX86_BUILTIN_PADDSB,
14607 IX86_BUILTIN_PADDSW,
14608 IX86_BUILTIN_PADDUSB,
14609 IX86_BUILTIN_PADDUSW,
14610 IX86_BUILTIN_PSUBB,
14611 IX86_BUILTIN_PSUBW,
14612 IX86_BUILTIN_PSUBD,
14613 IX86_BUILTIN_PSUBQ,
14614 IX86_BUILTIN_PSUBSB,
14615 IX86_BUILTIN_PSUBSW,
14616 IX86_BUILTIN_PSUBUSB,
14617 IX86_BUILTIN_PSUBUSW,
14618
14619 IX86_BUILTIN_PAND,
14620 IX86_BUILTIN_PANDN,
14621 IX86_BUILTIN_POR,
14622 IX86_BUILTIN_PXOR,
14623
14624 IX86_BUILTIN_PAVGB,
14625 IX86_BUILTIN_PAVGW,
14626
14627 IX86_BUILTIN_PCMPEQB,
14628 IX86_BUILTIN_PCMPEQW,
14629 IX86_BUILTIN_PCMPEQD,
14630 IX86_BUILTIN_PCMPGTB,
14631 IX86_BUILTIN_PCMPGTW,
14632 IX86_BUILTIN_PCMPGTD,
14633
14634 IX86_BUILTIN_PMADDWD,
14635
14636 IX86_BUILTIN_PMAXSW,
14637 IX86_BUILTIN_PMAXUB,
14638 IX86_BUILTIN_PMINSW,
14639 IX86_BUILTIN_PMINUB,
14640
14641 IX86_BUILTIN_PMULHUW,
14642 IX86_BUILTIN_PMULHW,
14643 IX86_BUILTIN_PMULLW,
14644
14645 IX86_BUILTIN_PSADBW,
14646 IX86_BUILTIN_PSHUFW,
14647
14648 IX86_BUILTIN_PSLLW,
14649 IX86_BUILTIN_PSLLD,
14650 IX86_BUILTIN_PSLLQ,
14651 IX86_BUILTIN_PSRAW,
14652 IX86_BUILTIN_PSRAD,
14653 IX86_BUILTIN_PSRLW,
14654 IX86_BUILTIN_PSRLD,
14655 IX86_BUILTIN_PSRLQ,
14656 IX86_BUILTIN_PSLLWI,
14657 IX86_BUILTIN_PSLLDI,
14658 IX86_BUILTIN_PSLLQI,
14659 IX86_BUILTIN_PSRAWI,
14660 IX86_BUILTIN_PSRADI,
14661 IX86_BUILTIN_PSRLWI,
14662 IX86_BUILTIN_PSRLDI,
14663 IX86_BUILTIN_PSRLQI,
14664
14665 IX86_BUILTIN_PUNPCKHBW,
14666 IX86_BUILTIN_PUNPCKHWD,
14667 IX86_BUILTIN_PUNPCKHDQ,
14668 IX86_BUILTIN_PUNPCKLBW,
14669 IX86_BUILTIN_PUNPCKLWD,
14670 IX86_BUILTIN_PUNPCKLDQ,
14671
14672 IX86_BUILTIN_SHUFPS,
14673
14674 IX86_BUILTIN_RCPPS,
14675 IX86_BUILTIN_RCPSS,
14676 IX86_BUILTIN_RSQRTPS,
14677 IX86_BUILTIN_RSQRTSS,
14678 IX86_BUILTIN_SQRTPS,
14679 IX86_BUILTIN_SQRTSS,
14680
14681 IX86_BUILTIN_UNPCKHPS,
14682 IX86_BUILTIN_UNPCKLPS,
14683
14684 IX86_BUILTIN_ANDPS,
14685 IX86_BUILTIN_ANDNPS,
14686 IX86_BUILTIN_ORPS,
14687 IX86_BUILTIN_XORPS,
14688
14689 IX86_BUILTIN_EMMS,
14690 IX86_BUILTIN_LDMXCSR,
14691 IX86_BUILTIN_STMXCSR,
14692 IX86_BUILTIN_SFENCE,
14693
14694 /* 3DNow! Original */
14695 IX86_BUILTIN_FEMMS,
14696 IX86_BUILTIN_PAVGUSB,
14697 IX86_BUILTIN_PF2ID,
14698 IX86_BUILTIN_PFACC,
14699 IX86_BUILTIN_PFADD,
14700 IX86_BUILTIN_PFCMPEQ,
14701 IX86_BUILTIN_PFCMPGE,
14702 IX86_BUILTIN_PFCMPGT,
14703 IX86_BUILTIN_PFMAX,
14704 IX86_BUILTIN_PFMIN,
14705 IX86_BUILTIN_PFMUL,
14706 IX86_BUILTIN_PFRCP,
14707 IX86_BUILTIN_PFRCPIT1,
14708 IX86_BUILTIN_PFRCPIT2,
14709 IX86_BUILTIN_PFRSQIT1,
14710 IX86_BUILTIN_PFRSQRT,
14711 IX86_BUILTIN_PFSUB,
14712 IX86_BUILTIN_PFSUBR,
14713 IX86_BUILTIN_PI2FD,
14714 IX86_BUILTIN_PMULHRW,
14715
14716 /* 3DNow! Athlon Extensions */
14717 IX86_BUILTIN_PF2IW,
14718 IX86_BUILTIN_PFNACC,
14719 IX86_BUILTIN_PFPNACC,
14720 IX86_BUILTIN_PI2FW,
14721 IX86_BUILTIN_PSWAPDSI,
14722 IX86_BUILTIN_PSWAPDSF,
14723
14724 /* SSE2 */
14725 IX86_BUILTIN_ADDPD,
14726 IX86_BUILTIN_ADDSD,
14727 IX86_BUILTIN_DIVPD,
14728 IX86_BUILTIN_DIVSD,
14729 IX86_BUILTIN_MULPD,
14730 IX86_BUILTIN_MULSD,
14731 IX86_BUILTIN_SUBPD,
14732 IX86_BUILTIN_SUBSD,
14733
14734 IX86_BUILTIN_CMPEQPD,
14735 IX86_BUILTIN_CMPLTPD,
14736 IX86_BUILTIN_CMPLEPD,
14737 IX86_BUILTIN_CMPGTPD,
14738 IX86_BUILTIN_CMPGEPD,
14739 IX86_BUILTIN_CMPNEQPD,
14740 IX86_BUILTIN_CMPNLTPD,
14741 IX86_BUILTIN_CMPNLEPD,
14742 IX86_BUILTIN_CMPNGTPD,
14743 IX86_BUILTIN_CMPNGEPD,
14744 IX86_BUILTIN_CMPORDPD,
14745 IX86_BUILTIN_CMPUNORDPD,
14746 IX86_BUILTIN_CMPNEPD,
14747 IX86_BUILTIN_CMPEQSD,
14748 IX86_BUILTIN_CMPLTSD,
14749 IX86_BUILTIN_CMPLESD,
14750 IX86_BUILTIN_CMPNEQSD,
14751 IX86_BUILTIN_CMPNLTSD,
14752 IX86_BUILTIN_CMPNLESD,
14753 IX86_BUILTIN_CMPORDSD,
14754 IX86_BUILTIN_CMPUNORDSD,
14755 IX86_BUILTIN_CMPNESD,
14756
14757 IX86_BUILTIN_COMIEQSD,
14758 IX86_BUILTIN_COMILTSD,
14759 IX86_BUILTIN_COMILESD,
14760 IX86_BUILTIN_COMIGTSD,
14761 IX86_BUILTIN_COMIGESD,
14762 IX86_BUILTIN_COMINEQSD,
14763 IX86_BUILTIN_UCOMIEQSD,
14764 IX86_BUILTIN_UCOMILTSD,
14765 IX86_BUILTIN_UCOMILESD,
14766 IX86_BUILTIN_UCOMIGTSD,
14767 IX86_BUILTIN_UCOMIGESD,
14768 IX86_BUILTIN_UCOMINEQSD,
14769
14770 IX86_BUILTIN_MAXPD,
14771 IX86_BUILTIN_MAXSD,
14772 IX86_BUILTIN_MINPD,
14773 IX86_BUILTIN_MINSD,
14774
14775 IX86_BUILTIN_ANDPD,
14776 IX86_BUILTIN_ANDNPD,
14777 IX86_BUILTIN_ORPD,
14778 IX86_BUILTIN_XORPD,
14779
14780 IX86_BUILTIN_SQRTPD,
14781 IX86_BUILTIN_SQRTSD,
14782
14783 IX86_BUILTIN_UNPCKHPD,
14784 IX86_BUILTIN_UNPCKLPD,
14785
14786 IX86_BUILTIN_SHUFPD,
14787
14788 IX86_BUILTIN_LOADUPD,
14789 IX86_BUILTIN_STOREUPD,
14790 IX86_BUILTIN_MOVSD,
14791
14792 IX86_BUILTIN_LOADHPD,
14793 IX86_BUILTIN_LOADLPD,
14794
14795 IX86_BUILTIN_CVTDQ2PD,
14796 IX86_BUILTIN_CVTDQ2PS,
14797
14798 IX86_BUILTIN_CVTPD2DQ,
14799 IX86_BUILTIN_CVTPD2PI,
14800 IX86_BUILTIN_CVTPD2PS,
14801 IX86_BUILTIN_CVTTPD2DQ,
14802 IX86_BUILTIN_CVTTPD2PI,
14803
14804 IX86_BUILTIN_CVTPI2PD,
14805 IX86_BUILTIN_CVTSI2SD,
14806 IX86_BUILTIN_CVTSI642SD,
14807
14808 IX86_BUILTIN_CVTSD2SI,
14809 IX86_BUILTIN_CVTSD2SI64,
14810 IX86_BUILTIN_CVTSD2SS,
14811 IX86_BUILTIN_CVTSS2SD,
14812 IX86_BUILTIN_CVTTSD2SI,
14813 IX86_BUILTIN_CVTTSD2SI64,
14814
14815 IX86_BUILTIN_CVTPS2DQ,
14816 IX86_BUILTIN_CVTPS2PD,
14817 IX86_BUILTIN_CVTTPS2DQ,
14818
14819 IX86_BUILTIN_MOVNTI,
14820 IX86_BUILTIN_MOVNTPD,
14821 IX86_BUILTIN_MOVNTDQ,
14822
14823 /* SSE2 MMX */
14824 IX86_BUILTIN_MASKMOVDQU,
14825 IX86_BUILTIN_MOVMSKPD,
14826 IX86_BUILTIN_PMOVMSKB128,
14827
14828 IX86_BUILTIN_PACKSSWB128,
14829 IX86_BUILTIN_PACKSSDW128,
14830 IX86_BUILTIN_PACKUSWB128,
14831
14832 IX86_BUILTIN_PADDB128,
14833 IX86_BUILTIN_PADDW128,
14834 IX86_BUILTIN_PADDD128,
14835 IX86_BUILTIN_PADDQ128,
14836 IX86_BUILTIN_PADDSB128,
14837 IX86_BUILTIN_PADDSW128,
14838 IX86_BUILTIN_PADDUSB128,
14839 IX86_BUILTIN_PADDUSW128,
14840 IX86_BUILTIN_PSUBB128,
14841 IX86_BUILTIN_PSUBW128,
14842 IX86_BUILTIN_PSUBD128,
14843 IX86_BUILTIN_PSUBQ128,
14844 IX86_BUILTIN_PSUBSB128,
14845 IX86_BUILTIN_PSUBSW128,
14846 IX86_BUILTIN_PSUBUSB128,
14847 IX86_BUILTIN_PSUBUSW128,
14848
14849 IX86_BUILTIN_PAND128,
14850 IX86_BUILTIN_PANDN128,
14851 IX86_BUILTIN_POR128,
14852 IX86_BUILTIN_PXOR128,
14853
14854 IX86_BUILTIN_PAVGB128,
14855 IX86_BUILTIN_PAVGW128,
14856
14857 IX86_BUILTIN_PCMPEQB128,
14858 IX86_BUILTIN_PCMPEQW128,
14859 IX86_BUILTIN_PCMPEQD128,
14860 IX86_BUILTIN_PCMPGTB128,
14861 IX86_BUILTIN_PCMPGTW128,
14862 IX86_BUILTIN_PCMPGTD128,
14863
14864 IX86_BUILTIN_PMADDWD128,
14865
14866 IX86_BUILTIN_PMAXSW128,
14867 IX86_BUILTIN_PMAXUB128,
14868 IX86_BUILTIN_PMINSW128,
14869 IX86_BUILTIN_PMINUB128,
14870
14871 IX86_BUILTIN_PMULUDQ,
14872 IX86_BUILTIN_PMULUDQ128,
14873 IX86_BUILTIN_PMULHUW128,
14874 IX86_BUILTIN_PMULHW128,
14875 IX86_BUILTIN_PMULLW128,
14876
14877 IX86_BUILTIN_PSADBW128,
14878 IX86_BUILTIN_PSHUFHW,
14879 IX86_BUILTIN_PSHUFLW,
14880 IX86_BUILTIN_PSHUFD,
14881
14882 IX86_BUILTIN_PSLLW128,
14883 IX86_BUILTIN_PSLLD128,
14884 IX86_BUILTIN_PSLLQ128,
14885 IX86_BUILTIN_PSRAW128,
14886 IX86_BUILTIN_PSRAD128,
14887 IX86_BUILTIN_PSRLW128,
14888 IX86_BUILTIN_PSRLD128,
14889 IX86_BUILTIN_PSRLQ128,
14890 IX86_BUILTIN_PSLLDQI128,
14891 IX86_BUILTIN_PSLLWI128,
14892 IX86_BUILTIN_PSLLDI128,
14893 IX86_BUILTIN_PSLLQI128,
14894 IX86_BUILTIN_PSRAWI128,
14895 IX86_BUILTIN_PSRADI128,
14896 IX86_BUILTIN_PSRLDQI128,
14897 IX86_BUILTIN_PSRLWI128,
14898 IX86_BUILTIN_PSRLDI128,
14899 IX86_BUILTIN_PSRLQI128,
14900
14901 IX86_BUILTIN_PUNPCKHBW128,
14902 IX86_BUILTIN_PUNPCKHWD128,
14903 IX86_BUILTIN_PUNPCKHDQ128,
14904 IX86_BUILTIN_PUNPCKHQDQ128,
14905 IX86_BUILTIN_PUNPCKLBW128,
14906 IX86_BUILTIN_PUNPCKLWD128,
14907 IX86_BUILTIN_PUNPCKLDQ128,
14908 IX86_BUILTIN_PUNPCKLQDQ128,
14909
14910 IX86_BUILTIN_CLFLUSH,
14911 IX86_BUILTIN_MFENCE,
14912 IX86_BUILTIN_LFENCE,
14913
14914 /* Prescott New Instructions. */
14915 IX86_BUILTIN_ADDSUBPS,
14916 IX86_BUILTIN_HADDPS,
14917 IX86_BUILTIN_HSUBPS,
14918 IX86_BUILTIN_MOVSHDUP,
14919 IX86_BUILTIN_MOVSLDUP,
14920 IX86_BUILTIN_ADDSUBPD,
14921 IX86_BUILTIN_HADDPD,
14922 IX86_BUILTIN_HSUBPD,
14923 IX86_BUILTIN_LDDQU,
14924
14925 IX86_BUILTIN_MONITOR,
14926 IX86_BUILTIN_MWAIT,
14927
14928 /* SSSE3. */
14929 IX86_BUILTIN_PHADDW,
14930 IX86_BUILTIN_PHADDD,
14931 IX86_BUILTIN_PHADDSW,
14932 IX86_BUILTIN_PHSUBW,
14933 IX86_BUILTIN_PHSUBD,
14934 IX86_BUILTIN_PHSUBSW,
14935 IX86_BUILTIN_PMADDUBSW,
14936 IX86_BUILTIN_PMULHRSW,
14937 IX86_BUILTIN_PSHUFB,
14938 IX86_BUILTIN_PSIGNB,
14939 IX86_BUILTIN_PSIGNW,
14940 IX86_BUILTIN_PSIGND,
14941 IX86_BUILTIN_PALIGNR,
14942 IX86_BUILTIN_PABSB,
14943 IX86_BUILTIN_PABSW,
14944 IX86_BUILTIN_PABSD,
14945
14946 IX86_BUILTIN_PHADDW128,
14947 IX86_BUILTIN_PHADDD128,
14948 IX86_BUILTIN_PHADDSW128,
14949 IX86_BUILTIN_PHSUBW128,
14950 IX86_BUILTIN_PHSUBD128,
14951 IX86_BUILTIN_PHSUBSW128,
14952 IX86_BUILTIN_PMADDUBSW128,
14953 IX86_BUILTIN_PMULHRSW128,
14954 IX86_BUILTIN_PSHUFB128,
14955 IX86_BUILTIN_PSIGNB128,
14956 IX86_BUILTIN_PSIGNW128,
14957 IX86_BUILTIN_PSIGND128,
14958 IX86_BUILTIN_PALIGNR128,
14959 IX86_BUILTIN_PABSB128,
14960 IX86_BUILTIN_PABSW128,
14961 IX86_BUILTIN_PABSD128,
14962
14963 /* AMDFAM10 - SSE4A New Instructions. */
14964 IX86_BUILTIN_MOVNTSD,
14965 IX86_BUILTIN_MOVNTSS,
14966 IX86_BUILTIN_EXTRQI,
14967 IX86_BUILTIN_EXTRQ,
14968 IX86_BUILTIN_INSERTQI,
14969 IX86_BUILTIN_INSERTQ,
14970
14971 IX86_BUILTIN_VEC_INIT_V2SI,
14972 IX86_BUILTIN_VEC_INIT_V4HI,
14973 IX86_BUILTIN_VEC_INIT_V8QI,
14974 IX86_BUILTIN_VEC_EXT_V2DF,
14975 IX86_BUILTIN_VEC_EXT_V2DI,
14976 IX86_BUILTIN_VEC_EXT_V4SF,
14977 IX86_BUILTIN_VEC_EXT_V4SI,
14978 IX86_BUILTIN_VEC_EXT_V8HI,
14979 IX86_BUILTIN_VEC_EXT_V16QI,
14980 IX86_BUILTIN_VEC_EXT_V2SI,
14981 IX86_BUILTIN_VEC_EXT_V4HI,
14982 IX86_BUILTIN_VEC_SET_V8HI,
14983 IX86_BUILTIN_VEC_SET_V4HI,
14984
14985 IX86_BUILTIN_MAX
14986};
14987
14988#define def_builtin(MASK, NAME, TYPE, CODE) \
14989do { \
14990 if ((MASK) & target_flags \
14991 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14992 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14993 NULL, NULL_TREE); \
14994} while (0)
14995
14996/* Bits for builtin_description.flag. */
14997
14998/* Set when we don't support the comparison natively, and should
14999 swap_comparison in order to support it. */
15000#define BUILTIN_DESC_SWAP_OPERANDS 1
15001
15002struct builtin_description
15003{
15004 const unsigned int mask;
15005 const enum insn_code icode;
15006 const char *const name;
15007 const enum ix86_builtins code;
15008 const enum rtx_code comparison;
15009 const unsigned int flag;
15010};
15011
15012static const struct builtin_description bdesc_comi[] =
15013{
15014 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15015 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15016 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15017 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15018 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15019 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15020 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15021 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15022 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15023 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15024 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15025 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15026 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15027 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15028 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15029 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15030 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15031 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15032 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15033 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15034 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15035 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15036 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15037 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15038};
15039
15040static const struct builtin_description bdesc_2arg[] =
15041{
15042 /* SSE */
15043 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15044 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15045 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15046 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15047 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15048 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15049 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15050 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15051
15052 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15053 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15054 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15055 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15056 BUILTIN_DESC_SWAP_OPERANDS },
15057 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15058 BUILTIN_DESC_SWAP_OPERANDS },
15059 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15060 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15061 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15062 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15063 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15064 BUILTIN_DESC_SWAP_OPERANDS },
15065 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15066 BUILTIN_DESC_SWAP_OPERANDS },
15067 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15068 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15069 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15070 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15071 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15072 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15073 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15074 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15075 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15076 BUILTIN_DESC_SWAP_OPERANDS },
15077 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15078 BUILTIN_DESC_SWAP_OPERANDS },
15079 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
15080
15081 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15082 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15083 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15084 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15085
15086 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15087 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15088 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15089 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15090
15091 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15092 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15093 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15094 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15095 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15096
15097 /* MMX */
15098 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15099 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15100 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15101 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15102 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15103 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15104 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15105 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15106
15107 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15108 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15109 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15110 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15111 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15112 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15113 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15114 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15115
15116 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15117 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15118 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15119
15120 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15121 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15122 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15123 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15124
15125 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15126 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15127
15128 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15129 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15130 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15131 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15132 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15133 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15134
15135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15136 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15137 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15138 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15139
15140 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15141 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15142 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15143 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15144 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15145 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15146
15147 /* Special. */
15148 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15149 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15150 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15151
15152 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15153 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15154 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15155
15156 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15157 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15158 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15159 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15160 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15161 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15162
15163 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15164 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15165 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15166 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15167 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15168 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15169
15170 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15171 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15172 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15173 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15174
15175 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15176 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15177
15178 /* SSE2 */
15179 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15180 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15181 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15182 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15183 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15184 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15185 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15186 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15187
15188 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15189 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15190 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15191 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15192 BUILTIN_DESC_SWAP_OPERANDS },
15193 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15194 BUILTIN_DESC_SWAP_OPERANDS },
15195 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15196 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15197 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15198 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15199 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15200 BUILTIN_DESC_SWAP_OPERANDS },
15201 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15202 BUILTIN_DESC_SWAP_OPERANDS },
15203 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15204 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15205 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15206 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15207 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15208 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15209 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15210 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15211 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15212
15213 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15214 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15215 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15216 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15217
15218 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15219 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15220 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15221 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15222
15223 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15224 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15225 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15226
15227 /* SSE2 MMX */
15228 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15229 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15230 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15231 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15232 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15233 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15234 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15235 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15236
15237 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15238 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15239 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15240 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15241 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15242 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15243 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15244 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15245
15246 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15247 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15248
15249 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15250 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15251 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15252 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15253
15254 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15255 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15256
15257 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15258 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15259 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15260 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15261 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15262 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15263
15264 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15265 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15266 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15267 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15268
15269 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15270 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15271 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15272 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15273 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15274 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15275 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15276 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15277
15278 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15279 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15280 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15281
15282 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15283 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15284
15285 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15286 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15287
15288 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15289 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15290 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15291
15292 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15293 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15294 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15295
15296 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15297 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15298
15299 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15300
15301 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15302 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15303 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15304 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15305
15306 /* SSE3 MMX */
15307 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15308 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15309 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15310 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15311 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15312 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15313
15314 /* SSSE3 */
15315 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15316 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15317 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15318 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15319 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15320 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15321 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15322 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15323 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15324 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15325 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15326 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15327 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15328 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15329 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15330 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15331 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15332 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15333 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15334 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15335 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15336 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15337 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15338 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15339};
15340
15341static const struct builtin_description bdesc_1arg[] =
15342{
15343 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15344 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15345
15346 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15347 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15348 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15349
15350 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15351 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15352 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15353 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15354 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15355 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15356
15357 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15358 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15359
15360 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15361
15362 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15363 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15364
15365 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15366 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15367 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15368 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15369 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15370
15371 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15372
15373 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15374 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15375 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15376 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15377
15378 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15379 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15380 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15381
15382 /* SSE3 */
15383 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15384 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15385
15386 /* SSSE3 */
15387 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15388 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15389 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15390 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15391 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15392 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15393};
15394
15395static void
15396ix86_init_builtins (void)
15397{
15398 if (TARGET_MMX)
15399 ix86_init_mmx_sse_builtins ();
15400}
15401
15402/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15403 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15404 builtins. */
15405static void
15406ix86_init_mmx_sse_builtins (void)
15407{
15408 const struct builtin_description * d;
15409 size_t i;
15410
15411 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15411 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
15412 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15413 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15414 tree V2DI_type_node
15415 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15416 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15417 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15418 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15419 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15412 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15413 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15414 tree V2DI_type_node
15415 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15416 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15417 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15418 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15419 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15420 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15420 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
15421 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15422
15423 tree pchar_type_node = build_pointer_type (char_type_node);
15424 tree pcchar_type_node = build_pointer_type (
15425 build_type_variant (char_type_node, 1, 0));
15426 tree pfloat_type_node = build_pointer_type (float_type_node);
15427 tree pcfloat_type_node = build_pointer_type (
15428 build_type_variant (float_type_node, 1, 0));
15429 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15430 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15431 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15432
15433 /* Comparisons. */
15434 tree int_ftype_v4sf_v4sf
15435 = build_function_type_list (integer_type_node,
15436 V4SF_type_node, V4SF_type_node, NULL_TREE);
15437 tree v4si_ftype_v4sf_v4sf
15438 = build_function_type_list (V4SI_type_node,
15439 V4SF_type_node, V4SF_type_node, NULL_TREE);
15440 /* MMX/SSE/integer conversions. */
15441 tree int_ftype_v4sf
15442 = build_function_type_list (integer_type_node,
15443 V4SF_type_node, NULL_TREE);
15444 tree int64_ftype_v4sf
15445 = build_function_type_list (long_long_integer_type_node,
15446 V4SF_type_node, NULL_TREE);
15447 tree int_ftype_v8qi
15448 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15449 tree v4sf_ftype_v4sf_int
15450 = build_function_type_list (V4SF_type_node,
15451 V4SF_type_node, integer_type_node, NULL_TREE);
15452 tree v4sf_ftype_v4sf_int64
15453 = build_function_type_list (V4SF_type_node,
15454 V4SF_type_node, long_long_integer_type_node,
15455 NULL_TREE);
15456 tree v4sf_ftype_v4sf_v2si
15457 = build_function_type_list (V4SF_type_node,
15458 V4SF_type_node, V2SI_type_node, NULL_TREE);
15459
15460 /* Miscellaneous. */
15461 tree v8qi_ftype_v4hi_v4hi
15462 = build_function_type_list (V8QI_type_node,
15463 V4HI_type_node, V4HI_type_node, NULL_TREE);
15464 tree v4hi_ftype_v2si_v2si
15465 = build_function_type_list (V4HI_type_node,
15466 V2SI_type_node, V2SI_type_node, NULL_TREE);
15467 tree v4sf_ftype_v4sf_v4sf_int
15468 = build_function_type_list (V4SF_type_node,
15469 V4SF_type_node, V4SF_type_node,
15470 integer_type_node, NULL_TREE);
15471 tree v2si_ftype_v4hi_v4hi
15472 = build_function_type_list (V2SI_type_node,
15473 V4HI_type_node, V4HI_type_node, NULL_TREE);
15474 tree v4hi_ftype_v4hi_int
15475 = build_function_type_list (V4HI_type_node,
15476 V4HI_type_node, integer_type_node, NULL_TREE);
15477 tree v4hi_ftype_v4hi_di
15478 = build_function_type_list (V4HI_type_node,
15479 V4HI_type_node, long_long_unsigned_type_node,
15480 NULL_TREE);
15481 tree v2si_ftype_v2si_di
15482 = build_function_type_list (V2SI_type_node,
15483 V2SI_type_node, long_long_unsigned_type_node,
15484 NULL_TREE);
15485 tree void_ftype_void
15486 = build_function_type (void_type_node, void_list_node);
15487 tree void_ftype_unsigned
15488 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15489 tree void_ftype_unsigned_unsigned
15490 = build_function_type_list (void_type_node, unsigned_type_node,
15491 unsigned_type_node, NULL_TREE);
15492 tree void_ftype_pcvoid_unsigned_unsigned
15493 = build_function_type_list (void_type_node, const_ptr_type_node,
15494 unsigned_type_node, unsigned_type_node,
15495 NULL_TREE);
15496 tree unsigned_ftype_void
15497 = build_function_type (unsigned_type_node, void_list_node);
15498 tree v2si_ftype_v4sf
15499 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15500 /* Loads/stores. */
15501 tree void_ftype_v8qi_v8qi_pchar
15502 = build_function_type_list (void_type_node,
15503 V8QI_type_node, V8QI_type_node,
15504 pchar_type_node, NULL_TREE);
15505 tree v4sf_ftype_pcfloat
15506 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15507 /* @@@ the type is bogus */
15508 tree v4sf_ftype_v4sf_pv2si
15509 = build_function_type_list (V4SF_type_node,
15510 V4SF_type_node, pv2si_type_node, NULL_TREE);
15511 tree void_ftype_pv2si_v4sf
15512 = build_function_type_list (void_type_node,
15513 pv2si_type_node, V4SF_type_node, NULL_TREE);
15514 tree void_ftype_pfloat_v4sf
15515 = build_function_type_list (void_type_node,
15516 pfloat_type_node, V4SF_type_node, NULL_TREE);
15517 tree void_ftype_pdi_di
15518 = build_function_type_list (void_type_node,
15519 pdi_type_node, long_long_unsigned_type_node,
15520 NULL_TREE);
15521 tree void_ftype_pv2di_v2di
15522 = build_function_type_list (void_type_node,
15523 pv2di_type_node, V2DI_type_node, NULL_TREE);
15524 /* Normal vector unops. */
15525 tree v4sf_ftype_v4sf
15526 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15527 tree v16qi_ftype_v16qi
15528 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15529 tree v8hi_ftype_v8hi
15530 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15531 tree v4si_ftype_v4si
15532 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15533 tree v8qi_ftype_v8qi
15534 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15535 tree v4hi_ftype_v4hi
15536 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15537
15538 /* Normal vector binops. */
15539 tree v4sf_ftype_v4sf_v4sf
15540 = build_function_type_list (V4SF_type_node,
15541 V4SF_type_node, V4SF_type_node, NULL_TREE);
15542 tree v8qi_ftype_v8qi_v8qi
15543 = build_function_type_list (V8QI_type_node,
15544 V8QI_type_node, V8QI_type_node, NULL_TREE);
15545 tree v4hi_ftype_v4hi_v4hi
15546 = build_function_type_list (V4HI_type_node,
15547 V4HI_type_node, V4HI_type_node, NULL_TREE);
15548 tree v2si_ftype_v2si_v2si
15549 = build_function_type_list (V2SI_type_node,
15550 V2SI_type_node, V2SI_type_node, NULL_TREE);
15551 tree di_ftype_di_di
15552 = build_function_type_list (long_long_unsigned_type_node,
15553 long_long_unsigned_type_node,
15554 long_long_unsigned_type_node, NULL_TREE);
15555
15556 tree di_ftype_di_di_int
15557 = build_function_type_list (long_long_unsigned_type_node,
15558 long_long_unsigned_type_node,
15559 long_long_unsigned_type_node,
15560 integer_type_node, NULL_TREE);
15561
15562 tree v2si_ftype_v2sf
15563 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15564 tree v2sf_ftype_v2si
15565 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15566 tree v2si_ftype_v2si
15567 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15568 tree v2sf_ftype_v2sf
15569 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15570 tree v2sf_ftype_v2sf_v2sf
15571 = build_function_type_list (V2SF_type_node,
15572 V2SF_type_node, V2SF_type_node, NULL_TREE);
15573 tree v2si_ftype_v2sf_v2sf
15574 = build_function_type_list (V2SI_type_node,
15575 V2SF_type_node, V2SF_type_node, NULL_TREE);
15576 tree pint_type_node = build_pointer_type (integer_type_node);
15577 tree pdouble_type_node = build_pointer_type (double_type_node);
15578 tree pcdouble_type_node = build_pointer_type (
15579 build_type_variant (double_type_node, 1, 0));
15580 tree int_ftype_v2df_v2df
15581 = build_function_type_list (integer_type_node,
15582 V2DF_type_node, V2DF_type_node, NULL_TREE);
15583
15584 tree void_ftype_pcvoid
15585 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15586 tree v4sf_ftype_v4si
15587 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15588 tree v4si_ftype_v4sf
15589 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15590 tree v2df_ftype_v4si
15591 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15592 tree v4si_ftype_v2df
15593 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15594 tree v2si_ftype_v2df
15595 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15596 tree v4sf_ftype_v2df
15597 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15598 tree v2df_ftype_v2si
15599 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15600 tree v2df_ftype_v4sf
15601 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15602 tree int_ftype_v2df
15603 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15604 tree int64_ftype_v2df
15605 = build_function_type_list (long_long_integer_type_node,
15606 V2DF_type_node, NULL_TREE);
15607 tree v2df_ftype_v2df_int
15608 = build_function_type_list (V2DF_type_node,
15609 V2DF_type_node, integer_type_node, NULL_TREE);
15610 tree v2df_ftype_v2df_int64
15611 = build_function_type_list (V2DF_type_node,
15612 V2DF_type_node, long_long_integer_type_node,
15613 NULL_TREE);
15614 tree v4sf_ftype_v4sf_v2df
15615 = build_function_type_list (V4SF_type_node,
15616 V4SF_type_node, V2DF_type_node, NULL_TREE);
15617 tree v2df_ftype_v2df_v4sf
15618 = build_function_type_list (V2DF_type_node,
15619 V2DF_type_node, V4SF_type_node, NULL_TREE);
15620 tree v2df_ftype_v2df_v2df_int
15621 = build_function_type_list (V2DF_type_node,
15622 V2DF_type_node, V2DF_type_node,
15623 integer_type_node,
15624 NULL_TREE);
15625 tree v2df_ftype_v2df_pcdouble
15626 = build_function_type_list (V2DF_type_node,
15627 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15628 tree void_ftype_pdouble_v2df
15629 = build_function_type_list (void_type_node,
15630 pdouble_type_node, V2DF_type_node, NULL_TREE);
15631 tree void_ftype_pint_int
15632 = build_function_type_list (void_type_node,
15633 pint_type_node, integer_type_node, NULL_TREE);
15634 tree void_ftype_v16qi_v16qi_pchar
15635 = build_function_type_list (void_type_node,
15636 V16QI_type_node, V16QI_type_node,
15637 pchar_type_node, NULL_TREE);
15638 tree v2df_ftype_pcdouble
15639 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15640 tree v2df_ftype_v2df_v2df
15641 = build_function_type_list (V2DF_type_node,
15642 V2DF_type_node, V2DF_type_node, NULL_TREE);
15643 tree v16qi_ftype_v16qi_v16qi
15644 = build_function_type_list (V16QI_type_node,
15645 V16QI_type_node, V16QI_type_node, NULL_TREE);
15646 tree v8hi_ftype_v8hi_v8hi
15647 = build_function_type_list (V8HI_type_node,
15648 V8HI_type_node, V8HI_type_node, NULL_TREE);
15649 tree v4si_ftype_v4si_v4si
15650 = build_function_type_list (V4SI_type_node,
15651 V4SI_type_node, V4SI_type_node, NULL_TREE);
15652 tree v2di_ftype_v2di_v2di
15653 = build_function_type_list (V2DI_type_node,
15654 V2DI_type_node, V2DI_type_node, NULL_TREE);
15655 tree v2di_ftype_v2df_v2df
15656 = build_function_type_list (V2DI_type_node,
15657 V2DF_type_node, V2DF_type_node, NULL_TREE);
15658 tree v2df_ftype_v2df
15659 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15660 tree v2di_ftype_v2di_int
15661 = build_function_type_list (V2DI_type_node,
15662 V2DI_type_node, integer_type_node, NULL_TREE);
15663 tree v2di_ftype_v2di_v2di_int
15664 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15665 V2DI_type_node, integer_type_node, NULL_TREE);
15666 tree v4si_ftype_v4si_int
15667 = build_function_type_list (V4SI_type_node,
15668 V4SI_type_node, integer_type_node, NULL_TREE);
15669 tree v8hi_ftype_v8hi_int
15670 = build_function_type_list (V8HI_type_node,
15671 V8HI_type_node, integer_type_node, NULL_TREE);
15672 tree v4si_ftype_v8hi_v8hi
15673 = build_function_type_list (V4SI_type_node,
15674 V8HI_type_node, V8HI_type_node, NULL_TREE);
15675 tree di_ftype_v8qi_v8qi
15676 = build_function_type_list (long_long_unsigned_type_node,
15677 V8QI_type_node, V8QI_type_node, NULL_TREE);
15678 tree di_ftype_v2si_v2si
15679 = build_function_type_list (long_long_unsigned_type_node,
15680 V2SI_type_node, V2SI_type_node, NULL_TREE);
15681 tree v2di_ftype_v16qi_v16qi
15682 = build_function_type_list (V2DI_type_node,
15683 V16QI_type_node, V16QI_type_node, NULL_TREE);
15684 tree v2di_ftype_v4si_v4si
15685 = build_function_type_list (V2DI_type_node,
15686 V4SI_type_node, V4SI_type_node, NULL_TREE);
15687 tree int_ftype_v16qi
15688 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15689 tree v16qi_ftype_pcchar
15690 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15691 tree void_ftype_pchar_v16qi
15692 = build_function_type_list (void_type_node,
15693 pchar_type_node, V16QI_type_node, NULL_TREE);
15694
15695 tree v2di_ftype_v2di_unsigned_unsigned
15696 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15697 unsigned_type_node, unsigned_type_node,
15698 NULL_TREE);
15699 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15700 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15701 unsigned_type_node, unsigned_type_node,
15702 NULL_TREE);
15703 tree v2di_ftype_v2di_v16qi
15704 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15705 NULL_TREE);
15706
15707 tree float80_type;
15708 tree float128_type;
15709 tree ftype;
15710
15711 /* The __float80 type. */
15712 if (TYPE_MODE (long_double_type_node) == XFmode)
15713 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15714 "__float80");
15715 else
15716 {
15717 /* The __float80 type. */
15718 float80_type = make_node (REAL_TYPE);
15719 TYPE_PRECISION (float80_type) = 80;
15720 layout_type (float80_type);
15721 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15722 }
15723
15724 if (TARGET_64BIT)
15725 {
15726 float128_type = make_node (REAL_TYPE);
15727 TYPE_PRECISION (float128_type) = 128;
15728 layout_type (float128_type);
15729 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15730 }
15731
15732 /* Add all builtins that are more or less simple operations on two
15733 operands. */
15734 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15735 {
15736 /* Use one of the operands; the target can have a different mode for
15737 mask-generating compares. */
15738 enum machine_mode mode;
15739 tree type;
15740
15741 if (d->name == 0)
15742 continue;
15743 mode = insn_data[d->icode].operand[1].mode;
15744
15745 switch (mode)
15746 {
15747 case V16QImode:
15748 type = v16qi_ftype_v16qi_v16qi;
15749 break;
15750 case V8HImode:
15751 type = v8hi_ftype_v8hi_v8hi;
15752 break;
15753 case V4SImode:
15754 type = v4si_ftype_v4si_v4si;
15755 break;
15756 case V2DImode:
15757 type = v2di_ftype_v2di_v2di;
15758 break;
15759 case V2DFmode:
15760 type = v2df_ftype_v2df_v2df;
15761 break;
15762 case V4SFmode:
15763 type = v4sf_ftype_v4sf_v4sf;
15764 break;
15765 case V8QImode:
15766 type = v8qi_ftype_v8qi_v8qi;
15767 break;
15768 case V4HImode:
15769 type = v4hi_ftype_v4hi_v4hi;
15770 break;
15771 case V2SImode:
15772 type = v2si_ftype_v2si_v2si;
15773 break;
15774 case DImode:
15775 type = di_ftype_di_di;
15776 break;
15777
15778 default:
15779 gcc_unreachable ();
15780 }
15781
15782 /* Override for comparisons. */
15783 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15784 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15785 type = v4si_ftype_v4sf_v4sf;
15786
15787 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15788 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15789 type = v2di_ftype_v2df_v2df;
15790
15791 def_builtin (d->mask, d->name, type, d->code);
15792 }
15793
15794 /* Add all builtins that are more or less simple operations on 1 operand. */
15795 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15796 {
15797 enum machine_mode mode;
15798 tree type;
15799
15800 if (d->name == 0)
15801 continue;
15802 mode = insn_data[d->icode].operand[1].mode;
15803
15804 switch (mode)
15805 {
15806 case V16QImode:
15807 type = v16qi_ftype_v16qi;
15808 break;
15809 case V8HImode:
15810 type = v8hi_ftype_v8hi;
15811 break;
15812 case V4SImode:
15813 type = v4si_ftype_v4si;
15814 break;
15815 case V2DFmode:
15816 type = v2df_ftype_v2df;
15817 break;
15818 case V4SFmode:
15819 type = v4sf_ftype_v4sf;
15820 break;
15821 case V8QImode:
15822 type = v8qi_ftype_v8qi;
15823 break;
15824 case V4HImode:
15825 type = v4hi_ftype_v4hi;
15826 break;
15827 case V2SImode:
15828 type = v2si_ftype_v2si;
15829 break;
15830
15831 default:
15832 abort ();
15833 }
15834
15835 def_builtin (d->mask, d->name, type, d->code);
15836 }
15837
15838 /* Add the remaining MMX insns with somewhat more complicated types. */
15839 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15840 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15841 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15842 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15843
15844 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15845 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15846 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15847
15848 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15849 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15850
15851 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15852 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15853
15854 /* comi/ucomi insns. */
15855 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15856 if (d->mask == MASK_SSE2)
15857 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15858 else
15859 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15860
15861 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15862 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15863 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15864
15865 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15866 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15867 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15868 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15869 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15870 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15871 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15872 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15873 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15874 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15875 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15876
15877 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15878
15879 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15880 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15881
15882 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15884 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15886
15887 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15888 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15889 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15890 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15891
15892 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15893
15894 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15895
15896 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15897 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15898 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15899 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15900 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15901 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15902
15903 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15904
15905 /* Original 3DNow! */
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15912 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15913 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15914 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15915 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15916 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15917 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15918 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15919 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15922 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15924 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15925 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15926
15927 /* 3DNow! extension as used in the Athlon CPU. */
15928 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15929 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15930 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15931 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15932 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15933 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15934
15935 /* SSE2 */
15936 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15937
15938 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15940
15941 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15943
15944 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15945 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15946 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15947 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15948 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15949
15950 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15952 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15954
15955 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15956 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15957
15958 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15959
15960 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15961 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15962
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15965 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15967 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15968
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15970
15971 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15972 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15973 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15974 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15975
15976 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15977 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15978 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15979
15980 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15981 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15982 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15984
15985 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15986 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15988
15989 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15991
15992 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15994
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15996 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15998
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
16001 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16002
16003 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
16004 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
16005
16006 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16007 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16008 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16009 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16010
16011 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16012 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16013 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16014 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16015
16016 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16017 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16018
16019 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16020
16021 /* Prescott New Instructions. */
16022 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16023 void_ftype_pcvoid_unsigned_unsigned,
16024 IX86_BUILTIN_MONITOR);
16025 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16026 void_ftype_unsigned_unsigned,
16027 IX86_BUILTIN_MWAIT);
16028 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16029 v4sf_ftype_v4sf,
16030 IX86_BUILTIN_MOVSHDUP);
16031 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16032 v4sf_ftype_v4sf,
16033 IX86_BUILTIN_MOVSLDUP);
16034 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16035 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16036
16037 /* SSSE3. */
16038 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16039 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16040 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16041 IX86_BUILTIN_PALIGNR);
16042
16043 /* AMDFAM10 SSE4A New built-ins */
16044 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16045 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16046 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16047 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16048 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16049 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16050 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16051 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16052 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16053 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16054 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16055 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16056
16057 /* Access to the vec_init patterns. */
16058 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16059 integer_type_node, NULL_TREE);
16060 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16061 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16062
16063 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16064 short_integer_type_node,
16065 short_integer_type_node,
16066 short_integer_type_node, NULL_TREE);
16067 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16068 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16069
16070 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16071 char_type_node, char_type_node,
16072 char_type_node, char_type_node,
16073 char_type_node, char_type_node,
16074 char_type_node, NULL_TREE);
16075 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16076 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16077
16078 /* Access to the vec_extract patterns. */
16079 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16080 integer_type_node, NULL_TREE);
16081 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16082 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16083
16084 ftype = build_function_type_list (long_long_integer_type_node,
16085 V2DI_type_node, integer_type_node,
16086 NULL_TREE);
16087 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16088 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16089
16090 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16091 integer_type_node, NULL_TREE);
16092 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16093 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16094
16095 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16096 integer_type_node, NULL_TREE);
16097 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16098 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16099
16100 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16101 integer_type_node, NULL_TREE);
16102 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16103 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16104
16105 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16106 integer_type_node, NULL_TREE);
16107 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16108 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16109
16110 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16111 integer_type_node, NULL_TREE);
16112 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16113 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16114
16115 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16116 integer_type_node, NULL_TREE);
16117 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16118
16119 /* Access to the vec_set patterns. */
16120 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16121 intHI_type_node,
16122 integer_type_node, NULL_TREE);
16123 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16124 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16125
16126 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16127 intHI_type_node,
16128 integer_type_node, NULL_TREE);
16129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16130 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16131}
16132
16133/* Errors in the source file can cause expand_expr to return const0_rtx
16134 where we expect a vector. To avoid crashing, use one of the vector
16135 clear instructions. */
16136static rtx
16137safe_vector_operand (rtx x, enum machine_mode mode)
16138{
16139 if (x == const0_rtx)
16140 x = CONST0_RTX (mode);
16141 return x;
16142}
16143
16144/* Subroutine of ix86_expand_builtin to take care of binop insns. */
16145
16146static rtx
16147ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16148{
16149 rtx pat, xops[3];
16150 tree arg0 = TREE_VALUE (arglist);
16151 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152 rtx op0 = expand_normal (arg0);
16153 rtx op1 = expand_normal (arg1);
16154 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16155 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16156 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16157
16158 if (VECTOR_MODE_P (mode0))
16159 op0 = safe_vector_operand (op0, mode0);
16160 if (VECTOR_MODE_P (mode1))
16161 op1 = safe_vector_operand (op1, mode1);
16162
16163 if (optimize || !target
16164 || GET_MODE (target) != tmode
16165 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16166 target = gen_reg_rtx (tmode);
16167
16168 if (GET_MODE (op1) == SImode && mode1 == TImode)
16169 {
16170 rtx x = gen_reg_rtx (V4SImode);
16171 emit_insn (gen_sse2_loadd (x, op1));
16172 op1 = gen_lowpart (TImode, x);
16173 }
16174
16175 /* The insn must want input operands in the same modes as the
16176 result. */
16177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16179
16180 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16181 op0 = copy_to_mode_reg (mode0, op0);
16182 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16183 op1 = copy_to_mode_reg (mode1, op1);
16184
16185 /* ??? Using ix86_fixup_binary_operands is problematic when
16186 we've got mismatched modes. Fake it. */
16187
16188 xops[0] = target;
16189 xops[1] = op0;
16190 xops[2] = op1;
16191
16192 if (tmode == mode0 && tmode == mode1)
16193 {
16194 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16195 op0 = xops[1];
16196 op1 = xops[2];
16197 }
16198 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16199 {
16200 op0 = force_reg (mode0, op0);
16201 op1 = force_reg (mode1, op1);
16202 target = gen_reg_rtx (tmode);
16203 }
16204
16205 pat = GEN_FCN (icode) (target, op0, op1);
16206 if (! pat)
16207 return 0;
16208 emit_insn (pat);
16209 return target;
16210}
16211
16212/* Subroutine of ix86_expand_builtin to take care of stores. */
16213
16214static rtx
16215ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16216{
16217 rtx pat;
16218 tree arg0 = TREE_VALUE (arglist);
16219 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16220 rtx op0 = expand_normal (arg0);
16221 rtx op1 = expand_normal (arg1);
16222 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16223 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16224
16225 if (VECTOR_MODE_P (mode1))
16226 op1 = safe_vector_operand (op1, mode1);
16227
16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229 op1 = copy_to_mode_reg (mode1, op1);
16230
16231 pat = GEN_FCN (icode) (op0, op1);
16232 if (pat)
16233 emit_insn (pat);
16234 return 0;
16235}
16236
16237/* Subroutine of ix86_expand_builtin to take care of unop insns. */
16238
16239static rtx
16240ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16241 rtx target, int do_load)
16242{
16243 rtx pat;
16244 tree arg0 = TREE_VALUE (arglist);
16245 rtx op0 = expand_normal (arg0);
16246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16247 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16248
16249 if (optimize || !target
16250 || GET_MODE (target) != tmode
16251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16252 target = gen_reg_rtx (tmode);
16253 if (do_load)
16254 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16255 else
16256 {
16257 if (VECTOR_MODE_P (mode0))
16258 op0 = safe_vector_operand (op0, mode0);
16259
16260 if ((optimize && !register_operand (op0, mode0))
16261 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16262 op0 = copy_to_mode_reg (mode0, op0);
16263 }
16264
16265 pat = GEN_FCN (icode) (target, op0);
16266 if (! pat)
16267 return 0;
16268 emit_insn (pat);
16269 return target;
16270}
16271
16272/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16273 sqrtss, rsqrtss, rcpss. */
16274
16275static rtx
16276ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16277{
16278 rtx pat;
16279 tree arg0 = TREE_VALUE (arglist);
16280 rtx op1, op0 = expand_normal (arg0);
16281 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16282 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16283
16284 if (optimize || !target
16285 || GET_MODE (target) != tmode
16286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16287 target = gen_reg_rtx (tmode);
16288
16289 if (VECTOR_MODE_P (mode0))
16290 op0 = safe_vector_operand (op0, mode0);
16291
16292 if ((optimize && !register_operand (op0, mode0))
16293 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16294 op0 = copy_to_mode_reg (mode0, op0);
16295
16296 op1 = op0;
16297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16298 op1 = copy_to_mode_reg (mode0, op1);
16299
16300 pat = GEN_FCN (icode) (target, op0, op1);
16301 if (! pat)
16302 return 0;
16303 emit_insn (pat);
16304 return target;
16305}
16306
16307/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16308
16309static rtx
16310ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16311 rtx target)
16312{
16313 rtx pat;
16314 tree arg0 = TREE_VALUE (arglist);
16315 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16316 rtx op0 = expand_normal (arg0);
16317 rtx op1 = expand_normal (arg1);
16318 rtx op2;
16319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16321 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16322 enum rtx_code comparison = d->comparison;
16323
16324 if (VECTOR_MODE_P (mode0))
16325 op0 = safe_vector_operand (op0, mode0);
16326 if (VECTOR_MODE_P (mode1))
16327 op1 = safe_vector_operand (op1, mode1);
16328
16329 /* Swap operands if we have a comparison that isn't available in
16330 hardware. */
16331 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16332 {
16333 rtx tmp = gen_reg_rtx (mode1);
16334 emit_move_insn (tmp, op1);
16335 op1 = op0;
16336 op0 = tmp;
16337 }
16338
16339 if (optimize || !target
16340 || GET_MODE (target) != tmode
16341 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16342 target = gen_reg_rtx (tmode);
16343
16344 if ((optimize && !register_operand (op0, mode0))
16345 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16346 op0 = copy_to_mode_reg (mode0, op0);
16347 if ((optimize && !register_operand (op1, mode1))
16348 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16349 op1 = copy_to_mode_reg (mode1, op1);
16350
16351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16352 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16353 if (! pat)
16354 return 0;
16355 emit_insn (pat);
16356 return target;
16357}
16358
16359/* Subroutine of ix86_expand_builtin to take care of comi insns. */
16360
16361static rtx
16362ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16363 rtx target)
16364{
16365 rtx pat;
16366 tree arg0 = TREE_VALUE (arglist);
16367 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16368 rtx op0 = expand_normal (arg0);
16369 rtx op1 = expand_normal (arg1);
16370 rtx op2;
16371 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16372 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16373 enum rtx_code comparison = d->comparison;
16374
16375 if (VECTOR_MODE_P (mode0))
16376 op0 = safe_vector_operand (op0, mode0);
16377 if (VECTOR_MODE_P (mode1))
16378 op1 = safe_vector_operand (op1, mode1);
16379
16380 /* Swap operands if we have a comparison that isn't available in
16381 hardware. */
16382 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16383 {
16384 rtx tmp = op1;
16385 op1 = op0;
16386 op0 = tmp;
16387 }
16388
16389 target = gen_reg_rtx (SImode);
16390 emit_move_insn (target, const0_rtx);
16391 target = gen_rtx_SUBREG (QImode, target, 0);
16392
16393 if ((optimize && !register_operand (op0, mode0))
16394 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16395 op0 = copy_to_mode_reg (mode0, op0);
16396 if ((optimize && !register_operand (op1, mode1))
16397 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16398 op1 = copy_to_mode_reg (mode1, op1);
16399
16400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16401 pat = GEN_FCN (d->icode) (op0, op1);
16402 if (! pat)
16403 return 0;
16404 emit_insn (pat);
16405 emit_insn (gen_rtx_SET (VOIDmode,
16406 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16407 gen_rtx_fmt_ee (comparison, QImode,
16408 SET_DEST (pat),
16409 const0_rtx)));
16410
16411 return SUBREG_REG (target);
16412}
16413
16414/* Return the integer constant in ARG. Constrain it to be in the range
16415 of the subparts of VEC_TYPE; issue an error if not. */
16416
16417static int
16418get_element_number (tree vec_type, tree arg)
16419{
16420 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16421
16422 if (!host_integerp (arg, 1)
16423 || (elt = tree_low_cst (arg, 1), elt > max))
16424 {
16425 error ("selector must be an integer constant in the range 0..%wi", max);
16426 return 0;
16427 }
16428
16429 return elt;
16430}
16431
16432/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16433 ix86_expand_vector_init. We DO have language-level syntax for this, in
16434 the form of (type){ init-list }. Except that since we can't place emms
16435 instructions from inside the compiler, we can't allow the use of MMX
16436 registers unless the user explicitly asks for it. So we do *not* define
16437 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16438 we have builtins invoked by mmintrin.h that gives us license to emit
16439 these sorts of instructions. */
16440
16441static rtx
16442ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16443{
16444 enum machine_mode tmode = TYPE_MODE (type);
16445 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16446 int i, n_elt = GET_MODE_NUNITS (tmode);
16447 rtvec v = rtvec_alloc (n_elt);
16448
16449 gcc_assert (VECTOR_MODE_P (tmode));
16450
16451 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16452 {
16453 rtx x = expand_normal (TREE_VALUE (arglist));
16454 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16455 }
16456
16457 gcc_assert (arglist == NULL);
16458
16459 if (!target || !register_operand (target, tmode))
16460 target = gen_reg_rtx (tmode);
16461
16462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16463 return target;
16464}
16465
16466/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16468 had a language-level syntax for referencing vector elements. */
16469
16470static rtx
16471ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16472{
16473 enum machine_mode tmode, mode0;
16474 tree arg0, arg1;
16475 int elt;
16476 rtx op0;
16477
16478 arg0 = TREE_VALUE (arglist);
16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16480
16481 op0 = expand_normal (arg0);
16482 elt = get_element_number (TREE_TYPE (arg0), arg1);
16483
16484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16485 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16486 gcc_assert (VECTOR_MODE_P (mode0));
16487
16488 op0 = force_reg (mode0, op0);
16489
16490 if (optimize || !target || !register_operand (target, tmode))
16491 target = gen_reg_rtx (tmode);
16492
16493 ix86_expand_vector_extract (true, target, op0, elt);
16494
16495 return target;
16496}
16497
16498/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16500 a language-level syntax for referencing vector elements. */
16501
16502static rtx
16503ix86_expand_vec_set_builtin (tree arglist)
16504{
16505 enum machine_mode tmode, mode1;
16506 tree arg0, arg1, arg2;
16507 int elt;
16508 rtx op0, op1, target;
16509
16510 arg0 = TREE_VALUE (arglist);
16511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16513
16514 tmode = TYPE_MODE (TREE_TYPE (arg0));
16515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16516 gcc_assert (VECTOR_MODE_P (tmode));
16517
16518 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16519 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16520 elt = get_element_number (TREE_TYPE (arg0), arg2);
16521
16522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16524
16525 op0 = force_reg (tmode, op0);
16526 op1 = force_reg (mode1, op1);
16527
16528 /* OP0 is the source of these builtin functions and shouldn't be
16529 modified. Create a copy, use it and return it as target. */
16530 target = gen_reg_rtx (tmode);
16531 emit_move_insn (target, op0);
16532 ix86_expand_vector_set (true, target, op1, elt);
16533
16534 return target;
16535}
16536
16537/* Expand an expression EXP that calls a built-in function,
16538 with result going to TARGET if that's convenient
16539 (and in mode MODE if that's convenient).
16540 SUBTARGET may be used as the target for computing one of EXP's operands.
16541 IGNORE is nonzero if the value is to be ignored. */
16542
16543static rtx
16544ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16545 enum machine_mode mode ATTRIBUTE_UNUSED,
16546 int ignore ATTRIBUTE_UNUSED)
16547{
16548 const struct builtin_description *d;
16549 size_t i;
16550 enum insn_code icode;
16551 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16552 tree arglist = TREE_OPERAND (exp, 1);
16553 tree arg0, arg1, arg2, arg3;
16554 rtx op0, op1, op2, op3, pat;
16555 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16556 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16557
16558 switch (fcode)
16559 {
16560 case IX86_BUILTIN_EMMS:
16561 emit_insn (gen_mmx_emms ());
16562 return 0;
16563
16564 case IX86_BUILTIN_SFENCE:
16565 emit_insn (gen_sse_sfence ());
16566 return 0;
16567
16568 case IX86_BUILTIN_MASKMOVQ:
16569 case IX86_BUILTIN_MASKMOVDQU:
16570 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16571 ? CODE_FOR_mmx_maskmovq
16572 : CODE_FOR_sse2_maskmovdqu);
16573 /* Note the arg order is different from the operand order. */
16574 arg1 = TREE_VALUE (arglist);
16575 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16576 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16577 op0 = expand_normal (arg0);
16578 op1 = expand_normal (arg1);
16579 op2 = expand_normal (arg2);
16580 mode0 = insn_data[icode].operand[0].mode;
16581 mode1 = insn_data[icode].operand[1].mode;
16582 mode2 = insn_data[icode].operand[2].mode;
16583
16584 op0 = force_reg (Pmode, op0);
16585 op0 = gen_rtx_MEM (mode1, op0);
16586
16587 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16588 op0 = copy_to_mode_reg (mode0, op0);
16589 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16590 op1 = copy_to_mode_reg (mode1, op1);
16591 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16592 op2 = copy_to_mode_reg (mode2, op2);
16593 pat = GEN_FCN (icode) (op0, op1, op2);
16594 if (! pat)
16595 return 0;
16596 emit_insn (pat);
16597 return 0;
16598
16599 case IX86_BUILTIN_SQRTSS:
16600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16601 case IX86_BUILTIN_RSQRTSS:
16602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16603 case IX86_BUILTIN_RCPSS:
16604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16605
16606 case IX86_BUILTIN_LOADUPS:
16607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16608
16609 case IX86_BUILTIN_STOREUPS:
16610 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16611
16612 case IX86_BUILTIN_LOADHPS:
16613 case IX86_BUILTIN_LOADLPS:
16614 case IX86_BUILTIN_LOADHPD:
16615 case IX86_BUILTIN_LOADLPD:
16616 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16617 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16618 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16619 : CODE_FOR_sse2_loadlpd);
16620 arg0 = TREE_VALUE (arglist);
16621 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16622 op0 = expand_normal (arg0);
16623 op1 = expand_normal (arg1);
16624 tmode = insn_data[icode].operand[0].mode;
16625 mode0 = insn_data[icode].operand[1].mode;
16626 mode1 = insn_data[icode].operand[2].mode;
16627
16628 op0 = force_reg (mode0, op0);
16629 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16630 if (optimize || target == 0
16631 || GET_MODE (target) != tmode
16632 || !register_operand (target, tmode))
16633 target = gen_reg_rtx (tmode);
16634 pat = GEN_FCN (icode) (target, op0, op1);
16635 if (! pat)
16636 return 0;
16637 emit_insn (pat);
16638 return target;
16639
16640 case IX86_BUILTIN_STOREHPS:
16641 case IX86_BUILTIN_STORELPS:
16642 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16643 : CODE_FOR_sse_storelps);
16644 arg0 = TREE_VALUE (arglist);
16645 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16646 op0 = expand_normal (arg0);
16647 op1 = expand_normal (arg1);
16648 mode0 = insn_data[icode].operand[0].mode;
16649 mode1 = insn_data[icode].operand[1].mode;
16650
16651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16652 op1 = force_reg (mode1, op1);
16653
16654 pat = GEN_FCN (icode) (op0, op1);
16655 if (! pat)
16656 return 0;
16657 emit_insn (pat);
16658 return const0_rtx;
16659
16660 case IX86_BUILTIN_MOVNTPS:
16661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16662 case IX86_BUILTIN_MOVNTQ:
16663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16664
16665 case IX86_BUILTIN_LDMXCSR:
16666 op0 = expand_normal (TREE_VALUE (arglist));
16667 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16668 emit_move_insn (target, op0);
16669 emit_insn (gen_sse_ldmxcsr (target));
16670 return 0;
16671
16672 case IX86_BUILTIN_STMXCSR:
16673 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16674 emit_insn (gen_sse_stmxcsr (target));
16675 return copy_to_mode_reg (SImode, target);
16676
16677 case IX86_BUILTIN_SHUFPS:
16678 case IX86_BUILTIN_SHUFPD:
16679 icode = (fcode == IX86_BUILTIN_SHUFPS
16680 ? CODE_FOR_sse_shufps
16681 : CODE_FOR_sse2_shufpd);
16682 arg0 = TREE_VALUE (arglist);
16683 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16684 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16685 op0 = expand_normal (arg0);
16686 op1 = expand_normal (arg1);
16687 op2 = expand_normal (arg2);
16688 tmode = insn_data[icode].operand[0].mode;
16689 mode0 = insn_data[icode].operand[1].mode;
16690 mode1 = insn_data[icode].operand[2].mode;
16691 mode2 = insn_data[icode].operand[3].mode;
16692
16693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16694 op0 = copy_to_mode_reg (mode0, op0);
16695 if ((optimize && !register_operand (op1, mode1))
16696 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16697 op1 = copy_to_mode_reg (mode1, op1);
16698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16699 {
16700 /* @@@ better error message */
16701 error ("mask must be an immediate");
16702 return gen_reg_rtx (tmode);
16703 }
16704 if (optimize || target == 0
16705 || GET_MODE (target) != tmode
16706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16707 target = gen_reg_rtx (tmode);
16708 pat = GEN_FCN (icode) (target, op0, op1, op2);
16709 if (! pat)
16710 return 0;
16711 emit_insn (pat);
16712 return target;
16713
16714 case IX86_BUILTIN_PSHUFW:
16715 case IX86_BUILTIN_PSHUFD:
16716 case IX86_BUILTIN_PSHUFHW:
16717 case IX86_BUILTIN_PSHUFLW:
16718 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16719 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16720 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16721 : CODE_FOR_mmx_pshufw);
16722 arg0 = TREE_VALUE (arglist);
16723 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16724 op0 = expand_normal (arg0);
16725 op1 = expand_normal (arg1);
16726 tmode = insn_data[icode].operand[0].mode;
16727 mode1 = insn_data[icode].operand[1].mode;
16728 mode2 = insn_data[icode].operand[2].mode;
16729
16730 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16731 op0 = copy_to_mode_reg (mode1, op0);
16732 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16733 {
16734 /* @@@ better error message */
16735 error ("mask must be an immediate");
16736 return const0_rtx;
16737 }
16738 if (target == 0
16739 || GET_MODE (target) != tmode
16740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16741 target = gen_reg_rtx (tmode);
16742 pat = GEN_FCN (icode) (target, op0, op1);
16743 if (! pat)
16744 return 0;
16745 emit_insn (pat);
16746 return target;
16747
16748 case IX86_BUILTIN_PSLLWI128:
16749 icode = CODE_FOR_ashlv8hi3;
16750 goto do_pshifti;
16751 case IX86_BUILTIN_PSLLDI128:
16752 icode = CODE_FOR_ashlv4si3;
16753 goto do_pshifti;
16754 case IX86_BUILTIN_PSLLQI128:
16755 icode = CODE_FOR_ashlv2di3;
16756 goto do_pshifti;
16757 case IX86_BUILTIN_PSRAWI128:
16758 icode = CODE_FOR_ashrv8hi3;
16759 goto do_pshifti;
16760 case IX86_BUILTIN_PSRADI128:
16761 icode = CODE_FOR_ashrv4si3;
16762 goto do_pshifti;
16763 case IX86_BUILTIN_PSRLWI128:
16764 icode = CODE_FOR_lshrv8hi3;
16765 goto do_pshifti;
16766 case IX86_BUILTIN_PSRLDI128:
16767 icode = CODE_FOR_lshrv4si3;
16768 goto do_pshifti;
16769 case IX86_BUILTIN_PSRLQI128:
16770 icode = CODE_FOR_lshrv2di3;
16771 goto do_pshifti;
16772 do_pshifti:
16773 arg0 = TREE_VALUE (arglist);
16774 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16775 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16776 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16777
16778 if (GET_CODE (op1) != CONST_INT)
16779 {
16780 error ("shift must be an immediate");
16781 return const0_rtx;
16782 }
16783 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16784 op1 = GEN_INT (255);
16785
16786 tmode = insn_data[icode].operand[0].mode;
16787 mode1 = insn_data[icode].operand[1].mode;
16788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16789 op0 = copy_to_reg (op0);
16790
16791 target = gen_reg_rtx (tmode);
16792 pat = GEN_FCN (icode) (target, op0, op1);
16793 if (!pat)
16794 return 0;
16795 emit_insn (pat);
16796 return target;
16797
16798 case IX86_BUILTIN_PSLLW128:
16799 icode = CODE_FOR_ashlv8hi3;
16800 goto do_pshift;
16801 case IX86_BUILTIN_PSLLD128:
16802 icode = CODE_FOR_ashlv4si3;
16803 goto do_pshift;
16804 case IX86_BUILTIN_PSLLQ128:
16805 icode = CODE_FOR_ashlv2di3;
16806 goto do_pshift;
16807 case IX86_BUILTIN_PSRAW128:
16808 icode = CODE_FOR_ashrv8hi3;
16809 goto do_pshift;
16810 case IX86_BUILTIN_PSRAD128:
16811 icode = CODE_FOR_ashrv4si3;
16812 goto do_pshift;
16813 case IX86_BUILTIN_PSRLW128:
16814 icode = CODE_FOR_lshrv8hi3;
16815 goto do_pshift;
16816 case IX86_BUILTIN_PSRLD128:
16817 icode = CODE_FOR_lshrv4si3;
16818 goto do_pshift;
16819 case IX86_BUILTIN_PSRLQ128:
16820 icode = CODE_FOR_lshrv2di3;
16821 goto do_pshift;
16822 do_pshift:
16823 arg0 = TREE_VALUE (arglist);
16824 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16827
16828 tmode = insn_data[icode].operand[0].mode;
16829 mode1 = insn_data[icode].operand[1].mode;
16830
16831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16832 op0 = copy_to_reg (op0);
16833
16834 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16835 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16836 op1 = copy_to_reg (op1);
16837
16838 target = gen_reg_rtx (tmode);
16839 pat = GEN_FCN (icode) (target, op0, op1);
16840 if (!pat)
16841 return 0;
16842 emit_insn (pat);
16843 return target;
16844
16845 case IX86_BUILTIN_PSLLDQI128:
16846 case IX86_BUILTIN_PSRLDQI128:
16847 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16848 : CODE_FOR_sse2_lshrti3);
16849 arg0 = TREE_VALUE (arglist);
16850 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16851 op0 = expand_normal (arg0);
16852 op1 = expand_normal (arg1);
16853 tmode = insn_data[icode].operand[0].mode;
16854 mode1 = insn_data[icode].operand[1].mode;
16855 mode2 = insn_data[icode].operand[2].mode;
16856
16857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16858 {
16859 op0 = copy_to_reg (op0);
16860 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16861 }
16862 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16863 {
16864 error ("shift must be an immediate");
16865 return const0_rtx;
16866 }
16867 target = gen_reg_rtx (V2DImode);
16868 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16869 op0, op1);
16870 if (! pat)
16871 return 0;
16872 emit_insn (pat);
16873 return target;
16874
16875 case IX86_BUILTIN_FEMMS:
16876 emit_insn (gen_mmx_femms ());
16877 return NULL_RTX;
16878
16879 case IX86_BUILTIN_PAVGUSB:
16880 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16881
16882 case IX86_BUILTIN_PF2ID:
16883 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16884
16885 case IX86_BUILTIN_PFACC:
16886 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16887
16888 case IX86_BUILTIN_PFADD:
16889 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16890
16891 case IX86_BUILTIN_PFCMPEQ:
16892 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16893
16894 case IX86_BUILTIN_PFCMPGE:
16895 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16896
16897 case IX86_BUILTIN_PFCMPGT:
16898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16899
16900 case IX86_BUILTIN_PFMAX:
16901 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16902
16903 case IX86_BUILTIN_PFMIN:
16904 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16905
16906 case IX86_BUILTIN_PFMUL:
16907 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16908
16909 case IX86_BUILTIN_PFRCP:
16910 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16911
16912 case IX86_BUILTIN_PFRCPIT1:
16913 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16914
16915 case IX86_BUILTIN_PFRCPIT2:
16916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16917
16918 case IX86_BUILTIN_PFRSQIT1:
16919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16920
16921 case IX86_BUILTIN_PFRSQRT:
16922 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16923
16924 case IX86_BUILTIN_PFSUB:
16925 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16926
16927 case IX86_BUILTIN_PFSUBR:
16928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16929
16930 case IX86_BUILTIN_PI2FD:
16931 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16932
16933 case IX86_BUILTIN_PMULHRW:
16934 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16935
16936 case IX86_BUILTIN_PF2IW:
16937 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16938
16939 case IX86_BUILTIN_PFNACC:
16940 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16941
16942 case IX86_BUILTIN_PFPNACC:
16943 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16944
16945 case IX86_BUILTIN_PI2FW:
16946 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16947
16948 case IX86_BUILTIN_PSWAPDSI:
16949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16950
16951 case IX86_BUILTIN_PSWAPDSF:
16952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16953
16954 case IX86_BUILTIN_SQRTSD:
16955 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16956 case IX86_BUILTIN_LOADUPD:
16957 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16958 case IX86_BUILTIN_STOREUPD:
16959 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16960
16961 case IX86_BUILTIN_MFENCE:
16962 emit_insn (gen_sse2_mfence ());
16963 return 0;
16964 case IX86_BUILTIN_LFENCE:
16965 emit_insn (gen_sse2_lfence ());
16966 return 0;
16967
16968 case IX86_BUILTIN_CLFLUSH:
16969 arg0 = TREE_VALUE (arglist);
16970 op0 = expand_normal (arg0);
16971 icode = CODE_FOR_sse2_clflush;
16972 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16973 op0 = copy_to_mode_reg (Pmode, op0);
16974
16975 emit_insn (gen_sse2_clflush (op0));
16976 return 0;
16977
16978 case IX86_BUILTIN_MOVNTPD:
16979 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16980 case IX86_BUILTIN_MOVNTDQ:
16981 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16982 case IX86_BUILTIN_MOVNTI:
16983 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16984
16985 case IX86_BUILTIN_LOADDQU:
16986 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16987 case IX86_BUILTIN_STOREDQU:
16988 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16989
16990 case IX86_BUILTIN_MONITOR:
16991 arg0 = TREE_VALUE (arglist);
16992 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16993 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16994 op0 = expand_normal (arg0);
16995 op1 = expand_normal (arg1);
16996 op2 = expand_normal (arg2);
16997 if (!REG_P (op0))
16998 op0 = copy_to_mode_reg (Pmode, op0);
16999 if (!REG_P (op1))
17000 op1 = copy_to_mode_reg (SImode, op1);
17001 if (!REG_P (op2))
17002 op2 = copy_to_mode_reg (SImode, op2);
17003 if (!TARGET_64BIT)
17004 emit_insn (gen_sse3_monitor (op0, op1, op2));
17005 else
17006 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17007 return 0;
17008
17009 case IX86_BUILTIN_MWAIT:
17010 arg0 = TREE_VALUE (arglist);
17011 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17012 op0 = expand_normal (arg0);
17013 op1 = expand_normal (arg1);
17014 if (!REG_P (op0))
17015 op0 = copy_to_mode_reg (SImode, op0);
17016 if (!REG_P (op1))
17017 op1 = copy_to_mode_reg (SImode, op1);
17018 emit_insn (gen_sse3_mwait (op0, op1));
17019 return 0;
17020
17021 case IX86_BUILTIN_LDDQU:
17022 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17023 target, 1);
17024
17025 case IX86_BUILTIN_PALIGNR:
17026 case IX86_BUILTIN_PALIGNR128:
17027 if (fcode == IX86_BUILTIN_PALIGNR)
17028 {
17029 icode = CODE_FOR_ssse3_palignrdi;
17030 mode = DImode;
17031 }
17032 else
17033 {
17034 icode = CODE_FOR_ssse3_palignrti;
17035 mode = V2DImode;
17036 }
17037 arg0 = TREE_VALUE (arglist);
17038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17043 tmode = insn_data[icode].operand[0].mode;
17044 mode1 = insn_data[icode].operand[1].mode;
17045 mode2 = insn_data[icode].operand[2].mode;
17046 mode3 = insn_data[icode].operand[3].mode;
17047
17048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17049 {
17050 op0 = copy_to_reg (op0);
17051 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17052 }
17053 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17054 {
17055 op1 = copy_to_reg (op1);
17056 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17057 }
17058 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17059 {
17060 error ("shift must be an immediate");
17061 return const0_rtx;
17062 }
17063 target = gen_reg_rtx (mode);
17064 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17065 op0, op1, op2);
17066 if (! pat)
17067 return 0;
17068 emit_insn (pat);
17069 return target;
17070
17071 case IX86_BUILTIN_MOVNTSD:
17072 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17073
17074 case IX86_BUILTIN_MOVNTSS:
17075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17076
17077 case IX86_BUILTIN_INSERTQ:
17078 case IX86_BUILTIN_EXTRQ:
17079 icode = (fcode == IX86_BUILTIN_EXTRQ
17080 ? CODE_FOR_sse4a_extrq
17081 : CODE_FOR_sse4a_insertq);
17082 arg0 = TREE_VALUE (arglist);
17083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17084 op0 = expand_normal (arg0);
17085 op1 = expand_normal (arg1);
17086 tmode = insn_data[icode].operand[0].mode;
17087 mode1 = insn_data[icode].operand[1].mode;
17088 mode2 = insn_data[icode].operand[2].mode;
17089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17090 op0 = copy_to_mode_reg (mode1, op0);
17091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17092 op1 = copy_to_mode_reg (mode2, op1);
17093 if (optimize || target == 0
17094 || GET_MODE (target) != tmode
17095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17096 target = gen_reg_rtx (tmode);
17097 pat = GEN_FCN (icode) (target, op0, op1);
17098 if (! pat)
17099 return NULL_RTX;
17100 emit_insn (pat);
17101 return target;
17102
17103 case IX86_BUILTIN_EXTRQI:
17104 icode = CODE_FOR_sse4a_extrqi;
17105 arg0 = TREE_VALUE (arglist);
17106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17107 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17108 op0 = expand_normal (arg0);
17109 op1 = expand_normal (arg1);
17110 op2 = expand_normal (arg2);
17111 tmode = insn_data[icode].operand[0].mode;
17112 mode1 = insn_data[icode].operand[1].mode;
17113 mode2 = insn_data[icode].operand[2].mode;
17114 mode3 = insn_data[icode].operand[3].mode;
17115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17116 op0 = copy_to_mode_reg (mode1, op0);
17117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17118 {
17119 error ("index mask must be an immediate");
17120 return gen_reg_rtx (tmode);
17121 }
17122 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17123 {
17124 error ("length mask must be an immediate");
17125 return gen_reg_rtx (tmode);
17126 }
17127 if (optimize || target == 0
17128 || GET_MODE (target) != tmode
17129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17130 target = gen_reg_rtx (tmode);
17131 pat = GEN_FCN (icode) (target, op0, op1, op2);
17132 if (! pat)
17133 return NULL_RTX;
17134 emit_insn (pat);
17135 return target;
17136
17137 case IX86_BUILTIN_INSERTQI:
17138 icode = CODE_FOR_sse4a_insertqi;
17139 arg0 = TREE_VALUE (arglist);
17140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17142 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17143 op0 = expand_normal (arg0);
17144 op1 = expand_normal (arg1);
17145 op2 = expand_normal (arg2);
17146 op3 = expand_normal (arg3);
17147 tmode = insn_data[icode].operand[0].mode;
17148 mode1 = insn_data[icode].operand[1].mode;
17149 mode2 = insn_data[icode].operand[2].mode;
17150 mode3 = insn_data[icode].operand[3].mode;
17151 mode4 = insn_data[icode].operand[4].mode;
17152
17153 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17154 op0 = copy_to_mode_reg (mode1, op0);
17155
17156 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17157 op1 = copy_to_mode_reg (mode2, op1);
17158
17159 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17160 {
17161 error ("index mask must be an immediate");
17162 return gen_reg_rtx (tmode);
17163 }
17164 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17165 {
17166 error ("length mask must be an immediate");
17167 return gen_reg_rtx (tmode);
17168 }
17169 if (optimize || target == 0
17170 || GET_MODE (target) != tmode
17171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17172 target = gen_reg_rtx (tmode);
17173 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17174 if (! pat)
17175 return NULL_RTX;
17176 emit_insn (pat);
17177 return target;
17178
17179 case IX86_BUILTIN_VEC_INIT_V2SI:
17180 case IX86_BUILTIN_VEC_INIT_V4HI:
17181 case IX86_BUILTIN_VEC_INIT_V8QI:
17182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17183
17184 case IX86_BUILTIN_VEC_EXT_V2DF:
17185 case IX86_BUILTIN_VEC_EXT_V2DI:
17186 case IX86_BUILTIN_VEC_EXT_V4SF:
17187 case IX86_BUILTIN_VEC_EXT_V4SI:
17188 case IX86_BUILTIN_VEC_EXT_V8HI:
17189 case IX86_BUILTIN_VEC_EXT_V16QI:
17190 case IX86_BUILTIN_VEC_EXT_V2SI:
17191 case IX86_BUILTIN_VEC_EXT_V4HI:
17192 return ix86_expand_vec_ext_builtin (arglist, target);
17193
17194 case IX86_BUILTIN_VEC_SET_V8HI:
17195 case IX86_BUILTIN_VEC_SET_V4HI:
17196 return ix86_expand_vec_set_builtin (arglist);
17197
17198 default:
17199 break;
17200 }
17201
17202 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17203 if (d->code == fcode)
17204 {
17205 /* Compares are treated specially. */
17206 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17207 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17208 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17209 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17210 return ix86_expand_sse_compare (d, arglist, target);
17211
17212 return ix86_expand_binop_builtin (d->icode, arglist, target);
17213 }
17214
17215 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17216 if (d->code == fcode)
17217 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17218
17219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17220 if (d->code == fcode)
17221 return ix86_expand_sse_comi (d, arglist, target);
17222
17223 gcc_unreachable ();
17224}
17225
17226/* Store OPERAND to the memory after reload is completed. This means
17227 that we can't easily use assign_stack_local. */
17228rtx
17229ix86_force_to_memory (enum machine_mode mode, rtx operand)
17230{
17231 rtx result;
17232
17233 gcc_assert (reload_completed);
17234 if (TARGET_RED_ZONE)
17235 {
17236 result = gen_rtx_MEM (mode,
17237 gen_rtx_PLUS (Pmode,
17238 stack_pointer_rtx,
17239 GEN_INT (-RED_ZONE_SIZE)));
17240 emit_move_insn (result, operand);
17241 }
17242 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17243 {
17244 switch (mode)
17245 {
17246 case HImode:
17247 case SImode:
17248 operand = gen_lowpart (DImode, operand);
17249 /* FALLTHRU */
17250 case DImode:
17251 emit_insn (
17252 gen_rtx_SET (VOIDmode,
17253 gen_rtx_MEM (DImode,
17254 gen_rtx_PRE_DEC (DImode,
17255 stack_pointer_rtx)),
17256 operand));
17257 break;
17258 default:
17259 gcc_unreachable ();
17260 }
17261 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17262 }
17263 else
17264 {
17265 switch (mode)
17266 {
17267 case DImode:
17268 {
17269 rtx operands[2];
17270 split_di (&operand, 1, operands, operands + 1);
17271 emit_insn (
17272 gen_rtx_SET (VOIDmode,
17273 gen_rtx_MEM (SImode,
17274 gen_rtx_PRE_DEC (Pmode,
17275 stack_pointer_rtx)),
17276 operands[1]));
17277 emit_insn (
17278 gen_rtx_SET (VOIDmode,
17279 gen_rtx_MEM (SImode,
17280 gen_rtx_PRE_DEC (Pmode,
17281 stack_pointer_rtx)),
17282 operands[0]));
17283 }
17284 break;
17285 case HImode:
17286 /* Store HImodes as SImodes. */
17287 operand = gen_lowpart (SImode, operand);
17288 /* FALLTHRU */
17289 case SImode:
17290 emit_insn (
17291 gen_rtx_SET (VOIDmode,
17292 gen_rtx_MEM (GET_MODE (operand),
17293 gen_rtx_PRE_DEC (SImode,
17294 stack_pointer_rtx)),
17295 operand));
17296 break;
17297 default:
17298 gcc_unreachable ();
17299 }
17300 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17301 }
17302 return result;
17303}
17304
17305/* Free operand from the memory. */
17306void
17307ix86_free_from_memory (enum machine_mode mode)
17308{
17309 if (!TARGET_RED_ZONE)
17310 {
17311 int size;
17312
17313 if (mode == DImode || TARGET_64BIT)
17314 size = 8;
17315 else
17316 size = 4;
17317 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17318 to pop or add instruction if registers are available. */
17319 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17320 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17321 GEN_INT (size))));
17322 }
17323}
17324
17325/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17326 QImode must go into class Q_REGS.
17327 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17328 movdf to do mem-to-mem moves through integer regs. */
17329enum reg_class
17330ix86_preferred_reload_class (rtx x, enum reg_class class)
17331{
17332 enum machine_mode mode = GET_MODE (x);
17333
17334 /* We're only allowed to return a subclass of CLASS. Many of the
17335 following checks fail for NO_REGS, so eliminate that early. */
17336 if (class == NO_REGS)
17337 return NO_REGS;
17338
17339 /* All classes can load zeros. */
17340 if (x == CONST0_RTX (mode))
17341 return class;
17342
17343 /* Force constants into memory if we are loading a (nonzero) constant into
17344 an MMX or SSE register. This is because there are no MMX/SSE instructions
17345 to load from a constant. */
17346 if (CONSTANT_P (x)
17347 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17348 return NO_REGS;
17349
17350 /* Prefer SSE regs only, if we can use them for math. */
17351 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17352 return SSE_CLASS_P (class) ? class : NO_REGS;
17353
17354 /* Floating-point constants need more complex checks. */
17355 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17356 {
17357 /* General regs can load everything. */
17358 if (reg_class_subset_p (class, GENERAL_REGS))
17359 return class;
17360
17361 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17362 zero above. We only want to wind up preferring 80387 registers if
17363 we plan on doing computation with them. */
17364 if (TARGET_80387
17365 && standard_80387_constant_p (x))
17366 {
17367 /* Limit class to non-sse. */
17368 if (class == FLOAT_SSE_REGS)
17369 return FLOAT_REGS;
17370 if (class == FP_TOP_SSE_REGS)
17371 return FP_TOP_REG;
17372 if (class == FP_SECOND_SSE_REGS)
17373 return FP_SECOND_REG;
17374 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17375 return class;
17376 }
17377
17378 return NO_REGS;
17379 }
17380
17381 /* Generally when we see PLUS here, it's the function invariant
17382 (plus soft-fp const_int). Which can only be computed into general
17383 regs. */
17384 if (GET_CODE (x) == PLUS)
17385 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17386
17387 /* QImode constants are easy to load, but non-constant QImode data
17388 must go into Q_REGS. */
17389 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17390 {
17391 if (reg_class_subset_p (class, Q_REGS))
17392 return class;
17393 if (reg_class_subset_p (Q_REGS, class))
17394 return Q_REGS;
17395 return NO_REGS;
17396 }
17397
17398 return class;
17399}
17400
17401/* Discourage putting floating-point values in SSE registers unless
17402 SSE math is being used, and likewise for the 387 registers. */
17403enum reg_class
17404ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17405{
17406 enum machine_mode mode = GET_MODE (x);
17407
17408 /* Restrict the output reload class to the register bank that we are doing
17409 math on. If we would like not to return a subset of CLASS, reject this
17410 alternative: if reload cannot do this, it will still use its choice. */
17411 mode = GET_MODE (x);
17412 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17413 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17414
17415 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17416 {
17417 if (class == FP_TOP_SSE_REGS)
17418 return FP_TOP_REG;
17419 else if (class == FP_SECOND_SSE_REGS)
17420 return FP_SECOND_REG;
17421 else
17422 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17423 }
17424
17425 return class;
17426}
17427
17428/* If we are copying between general and FP registers, we need a memory
17429 location. The same is true for SSE and MMX registers.
17430
17431 The macro can't work reliably when one of the CLASSES is class containing
17432 registers from multiple units (SSE, MMX, integer). We avoid this by never
17433 combining those units in single alternative in the machine description.
17434 Ensure that this constraint holds to avoid unexpected surprises.
17435
17436 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17437 enforce these sanity checks. */
17438
17439int
17440ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17441 enum machine_mode mode, int strict)
17442{
17443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17449 {
17450 gcc_assert (!strict);
17451 return true;
17452 }
17453
17454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17455 return true;
17456
17457 /* ??? This is a lie. We do have moves between mmx/general, and for
17458 mmx/sse2. But by saying we need secondary memory we discourage the
17459 register allocator from using the mmx registers unless needed. */
17460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17461 return true;
17462
17463 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17464 {
17465 /* SSE1 doesn't have any direct moves from other classes. */
17466 if (!TARGET_SSE2)
17467 return true;
17468
17469 /* If the target says that inter-unit moves are more expensive
17470 than moving through memory, then don't generate them. */
17471 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17472 return true;
17473
17474 /* Between SSE and general, we have moves no larger than word size. */
17475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17476 return true;
17477
17478 /* ??? For the cost of one register reformat penalty, we could use
17479 the same instructions to move SFmode and DFmode data, but the
17480 relevant move patterns don't support those alternatives. */
17481 if (mode == SFmode || mode == DFmode)
17482 return true;
17483 }
17484
17485 return false;
17486}
17487
17488/* Return true if the registers in CLASS cannot represent the change from
17489 modes FROM to TO. */
17490
17491bool
17492ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17493 enum reg_class class)
17494{
17495 if (from == to)
17496 return false;
17497
17498 /* x87 registers can't do subreg at all, as all values are reformatted
17499 to extended precision. */
17500 if (MAYBE_FLOAT_CLASS_P (class))
17501 return true;
17502
17503 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17504 {
17505 /* Vector registers do not support QI or HImode loads. If we don't
17506 disallow a change to these modes, reload will assume it's ok to
17507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17508 the vec_dupv4hi pattern. */
17509 if (GET_MODE_SIZE (from) < 4)
17510 return true;
17511
17512 /* Vector registers do not support subreg with nonzero offsets, which
17513 are otherwise valid for integer registers. Since we can't see
17514 whether we have a nonzero offset from here, prohibit all
17515 nonparadoxical subregs changing size. */
17516 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17517 return true;
17518 }
17519
17520 return false;
17521}
17522
17523/* Return the cost of moving data from a register in class CLASS1 to
17524 one in class CLASS2.
17525
17526 It is not required that the cost always equal 2 when FROM is the same as TO;
17527 on some machines it is expensive to move between registers if they are not
17528 general registers. */
17529
17530int
17531ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17532 enum reg_class class2)
17533{
17534 /* In case we require secondary memory, compute cost of the store followed
17535 by load. In order to avoid bad register allocation choices, we need
17536 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17537
17538 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17539 {
17540 int cost = 1;
17541
17542 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17543 MEMORY_MOVE_COST (mode, class1, 1));
17544 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17545 MEMORY_MOVE_COST (mode, class2, 1));
17546
17547 /* In case of copying from general_purpose_register we may emit multiple
17548 stores followed by single load causing memory size mismatch stall.
17549 Count this as arbitrarily high cost of 20. */
17550 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17551 cost += 20;
17552
17553 /* In the case of FP/MMX moves, the registers actually overlap, and we
17554 have to switch modes in order to treat them differently. */
17555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17557 cost += 20;
17558
17559 return cost;
17560 }
17561
17562 /* Moves between SSE/MMX and integer unit are expensive. */
17563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17565 return ix86_cost->mmxsse_to_integer;
17566 if (MAYBE_FLOAT_CLASS_P (class1))
17567 return ix86_cost->fp_move;
17568 if (MAYBE_SSE_CLASS_P (class1))
17569 return ix86_cost->sse_move;
17570 if (MAYBE_MMX_CLASS_P (class1))
17571 return ix86_cost->mmx_move;
17572 return 2;
17573}
17574
17575/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17576
17577bool
17578ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17579{
17580 /* Flags and only flags can only hold CCmode values. */
17581 if (CC_REGNO_P (regno))
17582 return GET_MODE_CLASS (mode) == MODE_CC;
17583 if (GET_MODE_CLASS (mode) == MODE_CC
17584 || GET_MODE_CLASS (mode) == MODE_RANDOM
17585 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17586 return 0;
17587 if (FP_REGNO_P (regno))
17588 return VALID_FP_MODE_P (mode);
17589 if (SSE_REGNO_P (regno))
17590 {
17591 /* We implement the move patterns for all vector modes into and
17592 out of SSE registers, even when no operation instructions
17593 are available. */
17594 return (VALID_SSE_REG_MODE (mode)
17595 || VALID_SSE2_REG_MODE (mode)
17596 || VALID_MMX_REG_MODE (mode)
17597 || VALID_MMX_REG_MODE_3DNOW (mode));
17598 }
17599 if (MMX_REGNO_P (regno))
17600 {
17601 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17602 so if the register is available at all, then we can move data of
17603 the given mode into or out of it. */
17604 return (VALID_MMX_REG_MODE (mode)
17605 || VALID_MMX_REG_MODE_3DNOW (mode));
17606 }
17607
17608 if (mode == QImode)
17609 {
17610 /* Take care for QImode values - they can be in non-QI regs,
17611 but then they do cause partial register stalls. */
17612 if (regno < 4 || TARGET_64BIT)
17613 return 1;
17614 if (!TARGET_PARTIAL_REG_STALL)
17615 return 1;
17616 return reload_in_progress || reload_completed;
17617 }
17618 /* We handle both integer and floats in the general purpose registers. */
17619 else if (VALID_INT_MODE_P (mode))
17620 return 1;
17621 else if (VALID_FP_MODE_P (mode))
17622 return 1;
17623 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17624 on to use that value in smaller contexts, this can easily force a
17625 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17626 supporting DImode, allow it. */
17627 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17628 return 1;
17629
17630 return 0;
17631}
17632
17633/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17634 tieable integer mode. */
17635
17636static bool
17637ix86_tieable_integer_mode_p (enum machine_mode mode)
17638{
17639 switch (mode)
17640 {
17641 case HImode:
17642 case SImode:
17643 return true;
17644
17645 case QImode:
17646 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17647
17648 case DImode:
17649 return TARGET_64BIT;
17650
17651 default:
17652 return false;
17653 }
17654}
17655
17656/* Return true if MODE1 is accessible in a register that can hold MODE2
17657 without copying. That is, all register classes that can hold MODE2
17658 can also hold MODE1. */
17659
17660bool
17661ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17662{
17663 if (mode1 == mode2)
17664 return true;
17665
17666 if (ix86_tieable_integer_mode_p (mode1)
17667 && ix86_tieable_integer_mode_p (mode2))
17668 return true;
17669
17670 /* MODE2 being XFmode implies fp stack or general regs, which means we
17671 can tie any smaller floating point modes to it. Note that we do not
17672 tie this with TFmode. */
17673 if (mode2 == XFmode)
17674 return mode1 == SFmode || mode1 == DFmode;
17675
17676 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17677 that we can tie it with SFmode. */
17678 if (mode2 == DFmode)
17679 return mode1 == SFmode;
17680
17681 /* If MODE2 is only appropriate for an SSE register, then tie with
17682 any other mode acceptable to SSE registers. */
17683 if (GET_MODE_SIZE (mode2) >= 8
17684 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17685 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17686
17687 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17688 with any other mode acceptable to MMX registers. */
17689 if (GET_MODE_SIZE (mode2) == 8
17690 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17691 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17692
17693 return false;
17694}
17695
17696/* Return the cost of moving data of mode M between a
17697 register and memory. A value of 2 is the default; this cost is
17698 relative to those in `REGISTER_MOVE_COST'.
17699
17700 If moving between registers and memory is more expensive than
17701 between two registers, you should define this macro to express the
17702 relative cost.
17703
17704 Model also increased moving costs of QImode registers in non
17705 Q_REGS classes.
17706 */
17707int
17708ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17709{
17710 if (FLOAT_CLASS_P (class))
17711 {
17712 int index;
17713 switch (mode)
17714 {
17715 case SFmode:
17716 index = 0;
17717 break;
17718 case DFmode:
17719 index = 1;
17720 break;
17721 case XFmode:
17722 index = 2;
17723 break;
17724 default:
17725 return 100;
17726 }
17727 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17728 }
17729 if (SSE_CLASS_P (class))
17730 {
17731 int index;
17732 switch (GET_MODE_SIZE (mode))
17733 {
17734 case 4:
17735 index = 0;
17736 break;
17737 case 8:
17738 index = 1;
17739 break;
17740 case 16:
17741 index = 2;
17742 break;
17743 default:
17744 return 100;
17745 }
17746 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17747 }
17748 if (MMX_CLASS_P (class))
17749 {
17750 int index;
17751 switch (GET_MODE_SIZE (mode))
17752 {
17753 case 4:
17754 index = 0;
17755 break;
17756 case 8:
17757 index = 1;
17758 break;
17759 default:
17760 return 100;
17761 }
17762 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17763 }
17764 switch (GET_MODE_SIZE (mode))
17765 {
17766 case 1:
17767 if (in)
17768 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17769 : ix86_cost->movzbl_load);
17770 else
17771 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17772 : ix86_cost->int_store[0] + 4);
17773 break;
17774 case 2:
17775 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17776 default:
17777 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17778 if (mode == TFmode)
17779 mode = XFmode;
17780 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17781 * (((int) GET_MODE_SIZE (mode)
17782 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17783 }
17784}
17785
17786/* Compute a (partial) cost for rtx X. Return true if the complete
17787 cost has been computed, and false if subexpressions should be
17788 scanned. In either case, *TOTAL contains the cost result. */
17789
17790static bool
17791ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17792{
17793 enum machine_mode mode = GET_MODE (x);
17794
17795 switch (code)
17796 {
17797 case CONST_INT:
17798 case CONST:
17799 case LABEL_REF:
17800 case SYMBOL_REF:
17801 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17802 *total = 3;
17803 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17804 *total = 2;
17805 else if (flag_pic && SYMBOLIC_CONST (x)
17806 && (!TARGET_64BIT
17807 || (!GET_CODE (x) != LABEL_REF
17808 && (GET_CODE (x) != SYMBOL_REF
17809 || !SYMBOL_REF_LOCAL_P (x)))))
17810 *total = 1;
17811 else
17812 *total = 0;
17813 return true;
17814
17815 case CONST_DOUBLE:
17816 if (mode == VOIDmode)
17817 *total = 0;
17818 else
17819 switch (standard_80387_constant_p (x))
17820 {
17821 case 1: /* 0.0 */
17822 *total = 1;
17823 break;
17824 default: /* Other constants */
17825 *total = 2;
17826 break;
17827 case 0:
17828 case -1:
17829 /* Start with (MEM (SYMBOL_REF)), since that's where
17830 it'll probably end up. Add a penalty for size. */
17831 *total = (COSTS_N_INSNS (1)
17832 + (flag_pic != 0 && !TARGET_64BIT)
17833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17834 break;
17835 }
17836 return true;
17837
17838 case ZERO_EXTEND:
17839 /* The zero extensions is often completely free on x86_64, so make
17840 it as cheap as possible. */
17841 if (TARGET_64BIT && mode == DImode
17842 && GET_MODE (XEXP (x, 0)) == SImode)
17843 *total = 1;
17844 else if (TARGET_ZERO_EXTEND_WITH_AND)
17845 *total = ix86_cost->add;
17846 else
17847 *total = ix86_cost->movzx;
17848 return false;
17849
17850 case SIGN_EXTEND:
17851 *total = ix86_cost->movsx;
17852 return false;
17853
17854 case ASHIFT:
17855 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17856 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17857 {
17858 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17859 if (value == 1)
17860 {
17861 *total = ix86_cost->add;
17862 return false;
17863 }
17864 if ((value == 2 || value == 3)
17865 && ix86_cost->lea <= ix86_cost->shift_const)
17866 {
17867 *total = ix86_cost->lea;
17868 return false;
17869 }
17870 }
17871 /* FALLTHRU */
17872
17873 case ROTATE:
17874 case ASHIFTRT:
17875 case LSHIFTRT:
17876 case ROTATERT:
17877 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17878 {
17879 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17880 {
17881 if (INTVAL (XEXP (x, 1)) > 32)
17882 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17883 else
17884 *total = ix86_cost->shift_const * 2;
17885 }
17886 else
17887 {
17888 if (GET_CODE (XEXP (x, 1)) == AND)
17889 *total = ix86_cost->shift_var * 2;
17890 else
17891 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17892 }
17893 }
17894 else
17895 {
17896 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17897 *total = ix86_cost->shift_const;
17898 else
17899 *total = ix86_cost->shift_var;
17900 }
17901 return false;
17902
17903 case MULT:
17904 if (FLOAT_MODE_P (mode))
17905 {
17906 *total = ix86_cost->fmul;
17907 return false;
17908 }
17909 else
17910 {
17911 rtx op0 = XEXP (x, 0);
17912 rtx op1 = XEXP (x, 1);
17913 int nbits;
17914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17915 {
17916 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17917 for (nbits = 0; value != 0; value &= value - 1)
17918 nbits++;
17919 }
17920 else
17921 /* This is arbitrary. */
17922 nbits = 7;
17923
17924 /* Compute costs correctly for widening multiplication. */
17925 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17926 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17927 == GET_MODE_SIZE (mode))
17928 {
17929 int is_mulwiden = 0;
17930 enum machine_mode inner_mode = GET_MODE (op0);
17931
17932 if (GET_CODE (op0) == GET_CODE (op1))
17933 is_mulwiden = 1, op1 = XEXP (op1, 0);
17934 else if (GET_CODE (op1) == CONST_INT)
17935 {
17936 if (GET_CODE (op0) == SIGN_EXTEND)
17937 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17938 == INTVAL (op1);
17939 else
17940 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17941 }
17942
17943 if (is_mulwiden)
17944 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17945 }
17946
17947 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17948 + nbits * ix86_cost->mult_bit
17949 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17950
17951 return true;
17952 }
17953
17954 case DIV:
17955 case UDIV:
17956 case MOD:
17957 case UMOD:
17958 if (FLOAT_MODE_P (mode))
17959 *total = ix86_cost->fdiv;
17960 else
17961 *total = ix86_cost->divide[MODE_INDEX (mode)];
17962 return false;
17963
17964 case PLUS:
17965 if (FLOAT_MODE_P (mode))
17966 *total = ix86_cost->fadd;
17967 else if (GET_MODE_CLASS (mode) == MODE_INT
17968 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17969 {
17970 if (GET_CODE (XEXP (x, 0)) == PLUS
17971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17973 && CONSTANT_P (XEXP (x, 1)))
17974 {
17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17976 if (val == 2 || val == 4 || val == 8)
17977 {
17978 *total = ix86_cost->lea;
17979 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17980 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17981 outer_code);
17982 *total += rtx_cost (XEXP (x, 1), outer_code);
17983 return true;
17984 }
17985 }
17986 else if (GET_CODE (XEXP (x, 0)) == MULT
17987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17988 {
17989 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17990 if (val == 2 || val == 4 || val == 8)
17991 {
17992 *total = ix86_cost->lea;
17993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17994 *total += rtx_cost (XEXP (x, 1), outer_code);
17995 return true;
17996 }
17997 }
17998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17999 {
18000 *total = ix86_cost->lea;
18001 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18002 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18003 *total += rtx_cost (XEXP (x, 1), outer_code);
18004 return true;
18005 }
18006 }
18007 /* FALLTHRU */
18008
18009 case MINUS:
18010 if (FLOAT_MODE_P (mode))
18011 {
18012 *total = ix86_cost->fadd;
18013 return false;
18014 }
18015 /* FALLTHRU */
18016
18017 case AND:
18018 case IOR:
18019 case XOR:
18020 if (!TARGET_64BIT && mode == DImode)
18021 {
18022 *total = (ix86_cost->add * 2
18023 + (rtx_cost (XEXP (x, 0), outer_code)
18024 << (GET_MODE (XEXP (x, 0)) != DImode))
18025 + (rtx_cost (XEXP (x, 1), outer_code)
18026 << (GET_MODE (XEXP (x, 1)) != DImode)));
18027 return true;
18028 }
18029 /* FALLTHRU */
18030
18031 case NEG:
18032 if (FLOAT_MODE_P (mode))
18033 {
18034 *total = ix86_cost->fchs;
18035 return false;
18036 }
18037 /* FALLTHRU */
18038
18039 case NOT:
18040 if (!TARGET_64BIT && mode == DImode)
18041 *total = ix86_cost->add * 2;
18042 else
18043 *total = ix86_cost->add;
18044 return false;
18045
18046 case COMPARE:
18047 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18048 && XEXP (XEXP (x, 0), 1) == const1_rtx
18049 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18050 && XEXP (x, 1) == const0_rtx)
18051 {
18052 /* This kind of construct is implemented using test[bwl].
18053 Treat it as if we had an AND. */
18054 *total = (ix86_cost->add
18055 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18056 + rtx_cost (const1_rtx, outer_code));
18057 return true;
18058 }
18059 return false;
18060
18061 case FLOAT_EXTEND:
18062 if (!TARGET_SSE_MATH
18063 || mode == XFmode
18064 || (mode == DFmode && !TARGET_SSE2))
18065 /* For standard 80387 constants, raise the cost to prevent
18066 compress_float_constant() to generate load from memory. */
18067 switch (standard_80387_constant_p (XEXP (x, 0)))
18068 {
18069 case -1:
18070 case 0:
18071 *total = 0;
18072 break;
18073 case 1: /* 0.0 */
18074 *total = 1;
18075 break;
18076 default:
18077 *total = (x86_ext_80387_constants & TUNEMASK
18078 || optimize_size
18079 ? 1 : 0);
18080 }
18081 return false;
18082
18083 case ABS:
18084 if (FLOAT_MODE_P (mode))
18085 *total = ix86_cost->fabs;
18086 return false;
18087
18088 case SQRT:
18089 if (FLOAT_MODE_P (mode))
18090 *total = ix86_cost->fsqrt;
18091 return false;
18092
18093 case UNSPEC:
18094 if (XINT (x, 1) == UNSPEC_TP)
18095 *total = 0;
18096 return false;
18097
18098 default:
18099 return false;
18100 }
18101}
18102
18103#if TARGET_MACHO
18104
18105static int current_machopic_label_num;
18106
18107/* Given a symbol name and its associated stub, write out the
18108 definition of the stub. */
18109
18110void
18111machopic_output_stub (FILE *file, const char *symb, const char *stub)
18112{
18113 unsigned int length;
18114 char *binder_name, *symbol_name, lazy_ptr_name[32];
18115 int label = ++current_machopic_label_num;
18116
18117 /* For 64-bit we shouldn't get here. */
18118 gcc_assert (!TARGET_64BIT);
18119
18120 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18121 symb = (*targetm.strip_name_encoding) (symb);
18122
18123 length = strlen (stub);
18124 binder_name = alloca (length + 32);
18125 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18126
18127 length = strlen (symb);
18128 symbol_name = alloca (length + 32);
18129 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18130
18131 sprintf (lazy_ptr_name, "L%d$lz", label);
18132
18133 if (MACHOPIC_PURE)
18134 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18135 else
18136 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18137
18138 fprintf (file, "%s:\n", stub);
18139 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18140
18141 if (MACHOPIC_PURE)
18142 {
18143 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18144 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18145 fprintf (file, "\tjmp\t*%%edx\n");
18146 }
18147 else
18148 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18149
18150 fprintf (file, "%s:\n", binder_name);
18151
18152 if (MACHOPIC_PURE)
18153 {
18154 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18155 fprintf (file, "\tpushl\t%%eax\n");
18156 }
18157 else
18158 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18159
18160 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18161
18162 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18163 fprintf (file, "%s:\n", lazy_ptr_name);
18164 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18165 fprintf (file, "\t.long %s\n", binder_name);
18166}
18167
18168void
18169darwin_x86_file_end (void)
18170{
18171 darwin_file_end ();
18172 ix86_file_end ();
18173}
18174#endif /* TARGET_MACHO */
18175
18176/* Order the registers for register allocator. */
18177
18178void
18179x86_order_regs_for_local_alloc (void)
18180{
18181 int pos = 0;
18182 int i;
18183
18184 /* First allocate the local general purpose registers. */
18185 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18186 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18187 reg_alloc_order [pos++] = i;
18188
18189 /* Global general purpose registers. */
18190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18191 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18192 reg_alloc_order [pos++] = i;
18193
18194 /* x87 registers come first in case we are doing FP math
18195 using them. */
18196 if (!TARGET_SSE_MATH)
18197 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18198 reg_alloc_order [pos++] = i;
18199
18200 /* SSE registers. */
18201 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18202 reg_alloc_order [pos++] = i;
18203 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18204 reg_alloc_order [pos++] = i;
18205
18206 /* x87 registers. */
18207 if (TARGET_SSE_MATH)
18208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18209 reg_alloc_order [pos++] = i;
18210
18211 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18212 reg_alloc_order [pos++] = i;
18213
18214 /* Initialize the rest of array as we do not allocate some registers
18215 at all. */
18216 while (pos < FIRST_PSEUDO_REGISTER)
18217 reg_alloc_order [pos++] = 0;
18218}
18219
18220/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18221 struct attribute_spec.handler. */
18222static tree
18223ix86_handle_struct_attribute (tree *node, tree name,
18224 tree args ATTRIBUTE_UNUSED,
18225 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18226{
18227 tree *type = NULL;
18228 if (DECL_P (*node))
18229 {
18230 if (TREE_CODE (*node) == TYPE_DECL)
18231 type = &TREE_TYPE (*node);
18232 }
18233 else
18234 type = node;
18235
18236 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18237 || TREE_CODE (*type) == UNION_TYPE)))
18238 {
18239 warning (OPT_Wattributes, "%qs attribute ignored",
18240 IDENTIFIER_POINTER (name));
18241 *no_add_attrs = true;
18242 }
18243
18244 else if ((is_attribute_p ("ms_struct", name)
18245 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18246 || ((is_attribute_p ("gcc_struct", name)
18247 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18248 {
18249 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18250 IDENTIFIER_POINTER (name));
18251 *no_add_attrs = true;
18252 }
18253
18254 return NULL_TREE;
18255}
18256
18257static bool
18258ix86_ms_bitfield_layout_p (tree record_type)
18259{
18260 return (TARGET_MS_BITFIELD_LAYOUT &&
18261 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18262 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18263}
18264
18265/* Returns an expression indicating where the this parameter is
18266 located on entry to the FUNCTION. */
18267
18268static rtx
18269x86_this_parameter (tree function)
18270{
18271 tree type = TREE_TYPE (function);
18272
18273 if (TARGET_64BIT)
18274 {
18275 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18276 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18277 }
18278
18279 if (ix86_function_regparm (type, function) > 0)
18280 {
18281 tree parm;
18282
18283 parm = TYPE_ARG_TYPES (type);
18284 /* Figure out whether or not the function has a variable number of
18285 arguments. */
18286 for (; parm; parm = TREE_CHAIN (parm))
18287 if (TREE_VALUE (parm) == void_type_node)
18288 break;
18289 /* If not, the this parameter is in the first argument. */
18290 if (parm)
18291 {
18292 int regno = 0;
18293 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18294 regno = 2;
18295 return gen_rtx_REG (SImode, regno);
18296 }
18297 }
18298
18299 if (aggregate_value_p (TREE_TYPE (type), type))
18300 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18301 else
18302 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18303}
18304
18305/* Determine whether x86_output_mi_thunk can succeed. */
18306
18307static bool
18308x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18309 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18310 HOST_WIDE_INT vcall_offset, tree function)
18311{
18312 /* 64-bit can handle anything. */
18313 if (TARGET_64BIT)
18314 return true;
18315
18316 /* For 32-bit, everything's fine if we have one free register. */
18317 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18318 return true;
18319
18320 /* Need a free register for vcall_offset. */
18321 if (vcall_offset)
18322 return false;
18323
18324 /* Need a free register for GOT references. */
18325 if (flag_pic && !(*targetm.binds_local_p) (function))
18326 return false;
18327
18328 /* Otherwise ok. */
18329 return true;
18330}
18331
18332/* Output the assembler code for a thunk function. THUNK_DECL is the
18333 declaration for the thunk function itself, FUNCTION is the decl for
18334 the target function. DELTA is an immediate constant offset to be
18335 added to THIS. If VCALL_OFFSET is nonzero, the word at
18336 *(*this + vcall_offset) should be added to THIS. */
18337
18338static void
18339x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18340 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18341 HOST_WIDE_INT vcall_offset, tree function)
18342{
18343 rtx xops[3];
18344 rtx this = x86_this_parameter (function);
18345 rtx this_reg, tmp;
18346
18347 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18348 pull it in now and let DELTA benefit. */
18349 if (REG_P (this))
18350 this_reg = this;
18351 else if (vcall_offset)
18352 {
18353 /* Put the this parameter into %eax. */
18354 xops[0] = this;
18355 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18356 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18357 }
18358 else
18359 this_reg = NULL_RTX;
18360
18361 /* Adjust the this parameter by a fixed constant. */
18362 if (delta)
18363 {
18364 xops[0] = GEN_INT (delta);
18365 xops[1] = this_reg ? this_reg : this;
18366 if (TARGET_64BIT)
18367 {
18368 if (!x86_64_general_operand (xops[0], DImode))
18369 {
18370 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18371 xops[1] = tmp;
18372 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18373 xops[0] = tmp;
18374 xops[1] = this;
18375 }
18376 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18377 }
18378 else
18379 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18380 }
18381
18382 /* Adjust the this parameter by a value stored in the vtable. */
18383 if (vcall_offset)
18384 {
18385 if (TARGET_64BIT)
18386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18387 else
18388 {
18389 int tmp_regno = 2 /* ECX */;
18390 if (lookup_attribute ("fastcall",
18391 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18392 tmp_regno = 0 /* EAX */;
18393 tmp = gen_rtx_REG (SImode, tmp_regno);
18394 }
18395
18396 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18397 xops[1] = tmp;
18398 if (TARGET_64BIT)
18399 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18400 else
18401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18402
18403 /* Adjust the this parameter. */
18404 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18405 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18406 {
18407 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18408 xops[0] = GEN_INT (vcall_offset);
18409 xops[1] = tmp2;
18410 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18411 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18412 }
18413 xops[1] = this_reg;
18414 if (TARGET_64BIT)
18415 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18416 else
18417 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18418 }
18419
18420 /* If necessary, drop THIS back to its stack slot. */
18421 if (this_reg && this_reg != this)
18422 {
18423 xops[0] = this_reg;
18424 xops[1] = this;
18425 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18426 }
18427
18428 xops[0] = XEXP (DECL_RTL (function), 0);
18429 if (TARGET_64BIT)
18430 {
18431 if (!flag_pic || (*targetm.binds_local_p) (function))
18432 output_asm_insn ("jmp\t%P0", xops);
18433 else
18434 {
18435 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18436 tmp = gen_rtx_CONST (Pmode, tmp);
18437 tmp = gen_rtx_MEM (QImode, tmp);
18438 xops[0] = tmp;
18439 output_asm_insn ("jmp\t%A0", xops);
18440 }
18441 }
18442 else
18443 {
18444 if (!flag_pic || (*targetm.binds_local_p) (function))
18445 output_asm_insn ("jmp\t%P0", xops);
18446 else
18447#if TARGET_MACHO
18448 if (TARGET_MACHO)
18449 {
18450 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18451 tmp = (gen_rtx_SYMBOL_REF
18452 (Pmode,
18453 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18454 tmp = gen_rtx_MEM (QImode, tmp);
18455 xops[0] = tmp;
18456 output_asm_insn ("jmp\t%0", xops);
18457 }
18458 else
18459#endif /* TARGET_MACHO */
18460 {
18461 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18462 output_set_got (tmp, NULL_RTX);
18463
18464 xops[1] = tmp;
18465 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18466 output_asm_insn ("jmp\t{*}%1", xops);
18467 }
18468 }
18469}
18470
18471static void
18472x86_file_start (void)
18473{
18474 default_file_start ();
18475#if TARGET_MACHO
18476 darwin_file_start ();
18477#endif
18478 if (X86_FILE_START_VERSION_DIRECTIVE)
18479 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18480 if (X86_FILE_START_FLTUSED)
18481 fputs ("\t.global\t__fltused\n", asm_out_file);
18482 if (ix86_asm_dialect == ASM_INTEL)
18483 fputs ("\t.intel_syntax\n", asm_out_file);
18484}
18485
18486int
18487x86_field_alignment (tree field, int computed)
18488{
18489 enum machine_mode mode;
18490 tree type = TREE_TYPE (field);
18491
18492 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18493 return computed;
18494 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18495 ? get_inner_array_type (type) : type);
18496 if (mode == DFmode || mode == DCmode
18497 || GET_MODE_CLASS (mode) == MODE_INT
18498 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18499 return MIN (32, computed);
18500 return computed;
18501}
18502
18503/* Output assembler code to FILE to increment profiler label # LABELNO
18504 for profiling a function entry. */
18505void
18506x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18507{
18508 if (TARGET_64BIT)
18509 if (flag_pic)
18510 {
18511#ifndef NO_PROFILE_COUNTERS
18512 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18513#endif
18514 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18515 }
18516 else
18517 {
18518#ifndef NO_PROFILE_COUNTERS
18519 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18520#endif
18521 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18522 }
18523 else if (flag_pic)
18524 {
18525#ifndef NO_PROFILE_COUNTERS
18526 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18527 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18528#endif
18529 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18530 }
18531 else
18532 {
18533#ifndef NO_PROFILE_COUNTERS
18534 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18535 PROFILE_COUNT_REGISTER);
18536#endif
18537 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18538 }
18539}
18540
18541/* We don't have exact information about the insn sizes, but we may assume
18542 quite safely that we are informed about all 1 byte insns and memory
18543 address sizes. This is enough to eliminate unnecessary padding in
18544 99% of cases. */
18545
18546static int
18547min_insn_size (rtx insn)
18548{
18549 int l = 0;
18550
18551 if (!INSN_P (insn) || !active_insn_p (insn))
18552 return 0;
18553
18554 /* Discard alignments we've emit and jump instructions. */
18555 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18556 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18557 return 0;
18558 if (GET_CODE (insn) == JUMP_INSN
18559 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18560 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18561 return 0;
18562
18563 /* Important case - calls are always 5 bytes.
18564 It is common to have many calls in the row. */
18565 if (GET_CODE (insn) == CALL_INSN
18566 && symbolic_reference_mentioned_p (PATTERN (insn))
18567 && !SIBLING_CALL_P (insn))
18568 return 5;
18569 if (get_attr_length (insn) <= 1)
18570 return 1;
18571
18572 /* For normal instructions we may rely on the sizes of addresses
18573 and the presence of symbol to require 4 bytes of encoding.
18574 This is not the case for jumps where references are PC relative. */
18575 if (GET_CODE (insn) != JUMP_INSN)
18576 {
18577 l = get_attr_length_address (insn);
18578 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18579 l = 4;
18580 }
18581 if (l)
18582 return 1+l;
18583 else
18584 return 2;
18585}
18586
18587/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18588 window. */
18589
18590static void
18591ix86_avoid_jump_misspredicts (void)
18592{
18593 rtx insn, start = get_insns ();
18594 int nbytes = 0, njumps = 0;
18595 int isjump = 0;
18596
18597 /* Look for all minimal intervals of instructions containing 4 jumps.
18598 The intervals are bounded by START and INSN. NBYTES is the total
18599 size of instructions in the interval including INSN and not including
18600 START. When the NBYTES is smaller than 16 bytes, it is possible
18601 that the end of START and INSN ends up in the same 16byte page.
18602
18603 The smallest offset in the page INSN can start is the case where START
18604 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18605 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18606 */
18607 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18608 {
18609
18610 nbytes += min_insn_size (insn);
18611 if (dump_file)
18612 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18613 INSN_UID (insn), min_insn_size (insn));
18614 if ((GET_CODE (insn) == JUMP_INSN
18615 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18616 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18617 || GET_CODE (insn) == CALL_INSN)
18618 njumps++;
18619 else
18620 continue;
18621
18622 while (njumps > 3)
18623 {
18624 start = NEXT_INSN (start);
18625 if ((GET_CODE (start) == JUMP_INSN
18626 && GET_CODE (PATTERN (start)) != ADDR_VEC
18627 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18628 || GET_CODE (start) == CALL_INSN)
18629 njumps--, isjump = 1;
18630 else
18631 isjump = 0;
18632 nbytes -= min_insn_size (start);
18633 }
18634 gcc_assert (njumps >= 0);
18635 if (dump_file)
18636 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18637 INSN_UID (start), INSN_UID (insn), nbytes);
18638
18639 if (njumps == 3 && isjump && nbytes < 16)
18640 {
18641 int padsize = 15 - nbytes + min_insn_size (insn);
18642
18643 if (dump_file)
18644 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18645 INSN_UID (insn), padsize);
18646 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18647 }
18648 }
18649}
18650
18651/* AMD Athlon works faster
18652 when RET is not destination of conditional jump or directly preceded
18653 by other jump instruction. We avoid the penalty by inserting NOP just
18654 before the RET instructions in such cases. */
18655static void
18656ix86_pad_returns (void)
18657{
18658 edge e;
18659 edge_iterator ei;
18660
18661 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18662 {
18663 basic_block bb = e->src;
18664 rtx ret = BB_END (bb);
18665 rtx prev;
18666 bool replace = false;
18667
18668 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18669 || !maybe_hot_bb_p (bb))
18670 continue;
18671 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18672 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18673 break;
18674 if (prev && GET_CODE (prev) == CODE_LABEL)
18675 {
18676 edge e;
18677 edge_iterator ei;
18678
18679 FOR_EACH_EDGE (e, ei, bb->preds)
18680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18681 && !(e->flags & EDGE_FALLTHRU))
18682 replace = true;
18683 }
18684 if (!replace)
18685 {
18686 prev = prev_active_insn (ret);
18687 if (prev
18688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18689 || GET_CODE (prev) == CALL_INSN))
18690 replace = true;
18691 /* Empty functions get branch mispredict even when the jump destination
18692 is not visible to us. */
18693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18694 replace = true;
18695 }
18696 if (replace)
18697 {
18698 emit_insn_before (gen_return_internal_long (), ret);
18699 delete_insn (ret);
18700 }
18701 }
18702}
18703
18704/* Implement machine specific optimizations. We implement padding of returns
18705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18706static void
18707ix86_reorg (void)
18708{
18709 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18710 ix86_pad_returns ();
18711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18712 ix86_avoid_jump_misspredicts ();
18713}
18714
18715/* Return nonzero when QImode register that must be represented via REX prefix
18716 is used. */
18717bool
18718x86_extended_QIreg_mentioned_p (rtx insn)
18719{
18720 int i;
18721 extract_insn_cached (insn);
18722 for (i = 0; i < recog_data.n_operands; i++)
18723 if (REG_P (recog_data.operand[i])
18724 && REGNO (recog_data.operand[i]) >= 4)
18725 return true;
18726 return false;
18727}
18728
18729/* Return nonzero when P points to register encoded via REX prefix.
18730 Called via for_each_rtx. */
18731static int
18732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18733{
18734 unsigned int regno;
18735 if (!REG_P (*p))
18736 return 0;
18737 regno = REGNO (*p);
18738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18739}
18740
18741/* Return true when INSN mentions register that must be encoded using REX
18742 prefix. */
18743bool
18744x86_extended_reg_mentioned_p (rtx insn)
18745{
18746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18747}
18748
18749/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18750 optabs would emit if we didn't have TFmode patterns. */
18751
18752void
18753x86_emit_floatuns (rtx operands[2])
18754{
18755 rtx neglab, donelab, i0, i1, f0, in, out;
18756 enum machine_mode mode, inmode;
18757
18758 inmode = GET_MODE (operands[1]);
18759 gcc_assert (inmode == SImode || inmode == DImode);
18760
18761 out = operands[0];
18762 in = force_reg (inmode, operands[1]);
18763 mode = GET_MODE (out);
18764 neglab = gen_label_rtx ();
18765 donelab = gen_label_rtx ();
18766 i1 = gen_reg_rtx (Pmode);
18767 f0 = gen_reg_rtx (mode);
18768
18769 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18770
18771 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18772 emit_jump_insn (gen_jump (donelab));
18773 emit_barrier ();
18774
18775 emit_label (neglab);
18776
18777 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18778 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18779 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18780 expand_float (f0, i0, 0);
18781 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18782
18783 emit_label (donelab);
18784}
18785
18786/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18787 with all elements equal to VAR. Return true if successful. */
18788
18789static bool
18790ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18791 rtx target, rtx val)
18792{
18793 enum machine_mode smode, wsmode, wvmode;
18794 rtx x;
18795
18796 switch (mode)
18797 {
18798 case V2SImode:
18799 case V2SFmode:
18800 if (!mmx_ok)
18801 return false;
18802 /* FALLTHRU */
18803
18804 case V2DFmode:
18805 case V2DImode:
18806 case V4SFmode:
18807 case V4SImode:
18808 val = force_reg (GET_MODE_INNER (mode), val);
18809 x = gen_rtx_VEC_DUPLICATE (mode, val);
18810 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18811 return true;
18812
18813 case V4HImode:
18814 if (!mmx_ok)
18815 return false;
18816 if (TARGET_SSE || TARGET_3DNOW_A)
18817 {
18818 val = gen_lowpart (SImode, val);
18819 x = gen_rtx_TRUNCATE (HImode, val);
18820 x = gen_rtx_VEC_DUPLICATE (mode, x);
18821 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18822 return true;
18823 }
18824 else
18825 {
18826 smode = HImode;
18827 wsmode = SImode;
18828 wvmode = V2SImode;
18829 goto widen;
18830 }
18831
18832 case V8QImode:
18833 if (!mmx_ok)
18834 return false;
18835 smode = QImode;
18836 wsmode = HImode;
18837 wvmode = V4HImode;
18838 goto widen;
18839 case V8HImode:
18840 if (TARGET_SSE2)
18841 {
18842 rtx tmp1, tmp2;
18843 /* Extend HImode to SImode using a paradoxical SUBREG. */
18844 tmp1 = gen_reg_rtx (SImode);
18845 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18846 /* Insert the SImode value as low element of V4SImode vector. */
18847 tmp2 = gen_reg_rtx (V4SImode);
18848 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18849 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18850 CONST0_RTX (V4SImode),
18851 const1_rtx);
18852 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18853 /* Cast the V4SImode vector back to a V8HImode vector. */
18854 tmp1 = gen_reg_rtx (V8HImode);
18855 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18856 /* Duplicate the low short through the whole low SImode word. */
18857 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18858 /* Cast the V8HImode vector back to a V4SImode vector. */
18859 tmp2 = gen_reg_rtx (V4SImode);
18860 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18861 /* Replicate the low element of the V4SImode vector. */
18862 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18863 /* Cast the V2SImode back to V8HImode, and store in target. */
18864 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18865 return true;
18866 }
18867 smode = HImode;
18868 wsmode = SImode;
18869 wvmode = V4SImode;
18870 goto widen;
18871 case V16QImode:
18872 if (TARGET_SSE2)
18873 {
18874 rtx tmp1, tmp2;
18875 /* Extend QImode to SImode using a paradoxical SUBREG. */
18876 tmp1 = gen_reg_rtx (SImode);
18877 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18878 /* Insert the SImode value as low element of V4SImode vector. */
18879 tmp2 = gen_reg_rtx (V4SImode);
18880 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18881 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18882 CONST0_RTX (V4SImode),
18883 const1_rtx);
18884 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18885 /* Cast the V4SImode vector back to a V16QImode vector. */
18886 tmp1 = gen_reg_rtx (V16QImode);
18887 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18888 /* Duplicate the low byte through the whole low SImode word. */
18889 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18890 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18891 /* Cast the V16QImode vector back to a V4SImode vector. */
18892 tmp2 = gen_reg_rtx (V4SImode);
18893 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18894 /* Replicate the low element of the V4SImode vector. */
18895 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18896 /* Cast the V2SImode back to V16QImode, and store in target. */
18897 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18898 return true;
18899 }
18900 smode = QImode;
18901 wsmode = HImode;
18902 wvmode = V8HImode;
18903 goto widen;
18904 widen:
18905 /* Replicate the value once into the next wider mode and recurse. */
18906 val = convert_modes (wsmode, smode, val, true);
18907 x = expand_simple_binop (wsmode, ASHIFT, val,
18908 GEN_INT (GET_MODE_BITSIZE (smode)),
18909 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18910 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18911
18912 x = gen_reg_rtx (wvmode);
18913 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18914 gcc_unreachable ();
18915 emit_move_insn (target, gen_lowpart (mode, x));
18916 return true;
18917
18918 default:
18919 return false;
18920 }
18921}
18922
18923/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18924 whose ONE_VAR element is VAR, and other elements are zero. Return true
18925 if successful. */
18926
18927static bool
18928ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18929 rtx target, rtx var, int one_var)
18930{
18931 enum machine_mode vsimode;
18932 rtx new_target;
18933 rtx x, tmp;
18934
18935 switch (mode)
18936 {
18937 case V2SFmode:
18938 case V2SImode:
18939 if (!mmx_ok)
18940 return false;
18941 /* FALLTHRU */
18942
18943 case V2DFmode:
18944 case V2DImode:
18945 if (one_var != 0)
18946 return false;
18947 var = force_reg (GET_MODE_INNER (mode), var);
18948 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18949 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18950 return true;
18951
18952 case V4SFmode:
18953 case V4SImode:
18954 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18955 new_target = gen_reg_rtx (mode);
18956 else
18957 new_target = target;
18958 var = force_reg (GET_MODE_INNER (mode), var);
18959 x = gen_rtx_VEC_DUPLICATE (mode, var);
18960 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18961 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18962 if (one_var != 0)
18963 {
18964 /* We need to shuffle the value to the correct position, so
18965 create a new pseudo to store the intermediate result. */
18966
18967 /* With SSE2, we can use the integer shuffle insns. */
18968 if (mode != V4SFmode && TARGET_SSE2)
18969 {
18970 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18971 GEN_INT (1),
18972 GEN_INT (one_var == 1 ? 0 : 1),
18973 GEN_INT (one_var == 2 ? 0 : 1),
18974 GEN_INT (one_var == 3 ? 0 : 1)));
18975 if (target != new_target)
18976 emit_move_insn (target, new_target);
18977 return true;
18978 }
18979
18980 /* Otherwise convert the intermediate result to V4SFmode and
18981 use the SSE1 shuffle instructions. */
18982 if (mode != V4SFmode)
18983 {
18984 tmp = gen_reg_rtx (V4SFmode);
18985 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18986 }
18987 else
18988 tmp = new_target;
18989
18990 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18991 GEN_INT (1),
18992 GEN_INT (one_var == 1 ? 0 : 1),
18993 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18994 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18995
18996 if (mode != V4SFmode)
18997 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18998 else if (tmp != target)
18999 emit_move_insn (target, tmp);
19000 }
19001 else if (target != new_target)
19002 emit_move_insn (target, new_target);
19003 return true;
19004
19005 case V8HImode:
19006 case V16QImode:
19007 vsimode = V4SImode;
19008 goto widen;
19009 case V4HImode:
19010 case V8QImode:
19011 if (!mmx_ok)
19012 return false;
19013 vsimode = V2SImode;
19014 goto widen;
19015 widen:
19016 if (one_var != 0)
19017 return false;
19018
19019 /* Zero extend the variable element to SImode and recurse. */
19020 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19021
19022 x = gen_reg_rtx (vsimode);
19023 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19024 var, one_var))
19025 gcc_unreachable ();
19026
19027 emit_move_insn (target, gen_lowpart (mode, x));
19028 return true;
19029
19030 default:
19031 return false;
19032 }
19033}
19034
19035/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19036 consisting of the values in VALS. It is known that all elements
19037 except ONE_VAR are constants. Return true if successful. */
19038
19039static bool
19040ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19041 rtx target, rtx vals, int one_var)
19042{
19043 rtx var = XVECEXP (vals, 0, one_var);
19044 enum machine_mode wmode;
19045 rtx const_vec, x;
19046
19047 const_vec = copy_rtx (vals);
19048 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19049 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19050
19051 switch (mode)
19052 {
19053 case V2DFmode:
19054 case V2DImode:
19055 case V2SFmode:
19056 case V2SImode:
19057 /* For the two element vectors, it's just as easy to use
19058 the general case. */
19059 return false;
19060
19061 case V4SFmode:
19062 case V4SImode:
19063 case V8HImode:
19064 case V4HImode:
19065 break;
19066
19067 case V16QImode:
19068 wmode = V8HImode;
19069 goto widen;
19070 case V8QImode:
19071 wmode = V4HImode;
19072 goto widen;
19073 widen:
19074 /* There's no way to set one QImode entry easily. Combine
19075 the variable value with its adjacent constant value, and
19076 promote to an HImode set. */
19077 x = XVECEXP (vals, 0, one_var ^ 1);
19078 if (one_var & 1)
19079 {
19080 var = convert_modes (HImode, QImode, var, true);
19081 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19082 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19083 x = GEN_INT (INTVAL (x) & 0xff);
19084 }
19085 else
19086 {
19087 var = convert_modes (HImode, QImode, var, true);
19088 x = gen_int_mode (INTVAL (x) << 8, HImode);
19089 }
19090 if (x != const0_rtx)
19091 var = expand_simple_binop (HImode, IOR, var, x, var,
19092 1, OPTAB_LIB_WIDEN);
19093
19094 x = gen_reg_rtx (wmode);
19095 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19096 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19097
19098 emit_move_insn (target, gen_lowpart (mode, x));
19099 return true;
19100
19101 default:
19102 return false;
19103 }
19104
19105 emit_move_insn (target, const_vec);
19106 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19107 return true;
19108}
19109
19110/* A subroutine of ix86_expand_vector_init. Handle the most general case:
19111 all values variable, and none identical. */
19112
19113static void
19114ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19115 rtx target, rtx vals)
19116{
19117 enum machine_mode half_mode = GET_MODE_INNER (mode);
19118 rtx op0 = NULL, op1 = NULL;
19119 bool use_vec_concat = false;
19120
19121 switch (mode)
19122 {
19123 case V2SFmode:
19124 case V2SImode:
19125 if (!mmx_ok && !TARGET_SSE)
19126 break;
19127 /* FALLTHRU */
19128
19129 case V2DFmode:
19130 case V2DImode:
19131 /* For the two element vectors, we always implement VEC_CONCAT. */
19132 op0 = XVECEXP (vals, 0, 0);
19133 op1 = XVECEXP (vals, 0, 1);
19134 use_vec_concat = true;
19135 break;
19136
19137 case V4SFmode:
19138 half_mode = V2SFmode;
19139 goto half;
19140 case V4SImode:
19141 half_mode = V2SImode;
19142 goto half;
19143 half:
19144 {
19145 rtvec v;
19146
19147 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19148 Recurse to load the two halves. */
19149
19150 op0 = gen_reg_rtx (half_mode);
19151 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19152 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19153
19154 op1 = gen_reg_rtx (half_mode);
19155 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19156 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19157
19158 use_vec_concat = true;
19159 }
19160 break;
19161
19162 case V8HImode:
19163 case V16QImode:
19164 case V4HImode:
19165 case V8QImode:
19166 break;
19167
19168 default:
19169 gcc_unreachable ();
19170 }
19171
19172 if (use_vec_concat)
19173 {
19174 if (!register_operand (op0, half_mode))
19175 op0 = force_reg (half_mode, op0);
19176 if (!register_operand (op1, half_mode))
19177 op1 = force_reg (half_mode, op1);
19178
19179 emit_insn (gen_rtx_SET (VOIDmode, target,
19180 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19181 }
19182 else
19183 {
19184 int i, j, n_elts, n_words, n_elt_per_word;
19185 enum machine_mode inner_mode;
19186 rtx words[4], shift;
19187
19188 inner_mode = GET_MODE_INNER (mode);
19189 n_elts = GET_MODE_NUNITS (mode);
19190 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19191 n_elt_per_word = n_elts / n_words;
19192 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19193
19194 for (i = 0; i < n_words; ++i)
19195 {
19196 rtx word = NULL_RTX;
19197
19198 for (j = 0; j < n_elt_per_word; ++j)
19199 {
19200 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19201 elt = convert_modes (word_mode, inner_mode, elt, true);
19202
19203 if (j == 0)
19204 word = elt;
19205 else
19206 {
19207 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19208 word, 1, OPTAB_LIB_WIDEN);
19209 word = expand_simple_binop (word_mode, IOR, word, elt,
19210 word, 1, OPTAB_LIB_WIDEN);
19211 }
19212 }
19213
19214 words[i] = word;
19215 }
19216
19217 if (n_words == 1)
19218 emit_move_insn (target, gen_lowpart (mode, words[0]));
19219 else if (n_words == 2)
19220 {
19221 rtx tmp = gen_reg_rtx (mode);
19222 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19223 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19224 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19225 emit_move_insn (target, tmp);
19226 }
19227 else if (n_words == 4)
19228 {
19229 rtx tmp = gen_reg_rtx (V4SImode);
19230 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19231 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19232 emit_move_insn (target, gen_lowpart (mode, tmp));
19233 }
19234 else
19235 gcc_unreachable ();
19236 }
19237}
19238
19239/* Initialize vector TARGET via VALS. Suppress the use of MMX
19240 instructions unless MMX_OK is true. */
19241
19242void
19243ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19244{
19245 enum machine_mode mode = GET_MODE (target);
19246 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19247 int n_elts = GET_MODE_NUNITS (mode);
19248 int n_var = 0, one_var = -1;
19249 bool all_same = true, all_const_zero = true;
19250 int i;
19251 rtx x;
19252
19253 for (i = 0; i < n_elts; ++i)
19254 {
19255 x = XVECEXP (vals, 0, i);
19256 if (!CONSTANT_P (x))
19257 n_var++, one_var = i;
19258 else if (x != CONST0_RTX (inner_mode))
19259 all_const_zero = false;
19260 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19261 all_same = false;
19262 }
19263
19264 /* Constants are best loaded from the constant pool. */
19265 if (n_var == 0)
19266 {
19267 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19268 return;
19269 }
19270
19271 /* If all values are identical, broadcast the value. */
19272 if (all_same
19273 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19274 XVECEXP (vals, 0, 0)))
19275 return;
19276
19277 /* Values where only one field is non-constant are best loaded from
19278 the pool and overwritten via move later. */
19279 if (n_var == 1)
19280 {
19281 if (all_const_zero
19282 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19283 XVECEXP (vals, 0, one_var),
19284 one_var))
19285 return;
19286
19287 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19288 return;
19289 }
19290
19291 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19292}
19293
19294void
19295ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19296{
19297 enum machine_mode mode = GET_MODE (target);
19298 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19299 bool use_vec_merge = false;
19300 rtx tmp;
19301
19302 switch (mode)
19303 {
19304 case V2SFmode:
19305 case V2SImode:
19306 if (mmx_ok)
19307 {
19308 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19309 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19310 if (elt == 0)
19311 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19312 else
19313 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19314 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19315 return;
19316 }
19317 break;
19318
19319 case V2DFmode:
19320 case V2DImode:
19321 {
19322 rtx op0, op1;
19323
19324 /* For the two element vectors, we implement a VEC_CONCAT with
19325 the extraction of the other element. */
19326
19327 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19328 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19329
19330 if (elt == 0)
19331 op0 = val, op1 = tmp;
19332 else
19333 op0 = tmp, op1 = val;
19334
19335 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19336 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19337 }
19338 return;
19339
19340 case V4SFmode:
19341 switch (elt)
19342 {
19343 case 0:
19344 use_vec_merge = true;
19345 break;
19346
19347 case 1:
19348 /* tmp = target = A B C D */
19349 tmp = copy_to_reg (target);
19350 /* target = A A B B */
19351 emit_insn (gen_sse_unpcklps (target, target, target));
19352 /* target = X A B B */
19353 ix86_expand_vector_set (false, target, val, 0);
19354 /* target = A X C D */
19355 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19356 GEN_INT (1), GEN_INT (0),
19357 GEN_INT (2+4), GEN_INT (3+4)));
19358 return;
19359
19360 case 2:
19361 /* tmp = target = A B C D */
19362 tmp = copy_to_reg (target);
19363 /* tmp = X B C D */
19364 ix86_expand_vector_set (false, tmp, val, 0);
19365 /* target = A B X D */
19366 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19367 GEN_INT (0), GEN_INT (1),
19368 GEN_INT (0+4), GEN_INT (3+4)));
19369 return;
19370
19371 case 3:
19372 /* tmp = target = A B C D */
19373 tmp = copy_to_reg (target);
19374 /* tmp = X B C D */
19375 ix86_expand_vector_set (false, tmp, val, 0);
19376 /* target = A B X D */
19377 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19378 GEN_INT (0), GEN_INT (1),
19379 GEN_INT (2+4), GEN_INT (0+4)));
19380 return;
19381
19382 default:
19383 gcc_unreachable ();
19384 }
19385 break;
19386
19387 case V4SImode:
19388 /* Element 0 handled by vec_merge below. */
19389 if (elt == 0)
19390 {
19391 use_vec_merge = true;
19392 break;
19393 }
19394
19395 if (TARGET_SSE2)
19396 {
19397 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19398 store into element 0, then shuffle them back. */
19399
19400 rtx order[4];
19401
19402 order[0] = GEN_INT (elt);
19403 order[1] = const1_rtx;
19404 order[2] = const2_rtx;
19405 order[3] = GEN_INT (3);
19406 order[elt] = const0_rtx;
19407
19408 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19409 order[1], order[2], order[3]));
19410
19411 ix86_expand_vector_set (false, target, val, 0);
19412
19413 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19414 order[1], order[2], order[3]));
19415 }
19416 else
19417 {
19418 /* For SSE1, we have to reuse the V4SF code. */
19419 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19420 gen_lowpart (SFmode, val), elt);
19421 }
19422 return;
19423
19424 case V8HImode:
19425 use_vec_merge = TARGET_SSE2;
19426 break;
19427 case V4HImode:
19428 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19429 break;
19430
19431 case V16QImode:
19432 case V8QImode:
19433 default:
19434 break;
19435 }
19436
19437 if (use_vec_merge)
19438 {
19439 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19440 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19442 }
19443 else
19444 {
19445 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19446
19447 emit_move_insn (mem, target);
19448
19449 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19450 emit_move_insn (tmp, val);
19451
19452 emit_move_insn (target, mem);
19453 }
19454}
19455
19456void
19457ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19458{
19459 enum machine_mode mode = GET_MODE (vec);
19460 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19461 bool use_vec_extr = false;
19462 rtx tmp;
19463
19464 switch (mode)
19465 {
19466 case V2SImode:
19467 case V2SFmode:
19468 if (!mmx_ok)
19469 break;
19470 /* FALLTHRU */
19471
19472 case V2DFmode:
19473 case V2DImode:
19474 use_vec_extr = true;
19475 break;
19476
19477 case V4SFmode:
19478 switch (elt)
19479 {
19480 case 0:
19481 tmp = vec;
19482 break;
19483
19484 case 1:
19485 case 3:
19486 tmp = gen_reg_rtx (mode);
19487 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19488 GEN_INT (elt), GEN_INT (elt),
19489 GEN_INT (elt+4), GEN_INT (elt+4)));
19490 break;
19491
19492 case 2:
19493 tmp = gen_reg_rtx (mode);
19494 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19495 break;
19496
19497 default:
19498 gcc_unreachable ();
19499 }
19500 vec = tmp;
19501 use_vec_extr = true;
19502 elt = 0;
19503 break;
19504
19505 case V4SImode:
19506 if (TARGET_SSE2)
19507 {
19508 switch (elt)
19509 {
19510 case 0:
19511 tmp = vec;
19512 break;
19513
19514 case 1:
19515 case 3:
19516 tmp = gen_reg_rtx (mode);
19517 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19518 GEN_INT (elt), GEN_INT (elt),
19519 GEN_INT (elt), GEN_INT (elt)));
19520 break;
19521
19522 case 2:
19523 tmp = gen_reg_rtx (mode);
19524 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19525 break;
19526
19527 default:
19528 gcc_unreachable ();
19529 }
19530 vec = tmp;
19531 use_vec_extr = true;
19532 elt = 0;
19533 }
19534 else
19535 {
19536 /* For SSE1, we have to reuse the V4SF code. */
19537 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19538 gen_lowpart (V4SFmode, vec), elt);
19539 return;
19540 }
19541 break;
19542
19543 case V8HImode:
19544 use_vec_extr = TARGET_SSE2;
19545 break;
19546 case V4HImode:
19547 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19548 break;
19549
19550 case V16QImode:
19551 case V8QImode:
19552 /* ??? Could extract the appropriate HImode element and shift. */
19553 default:
19554 break;
19555 }
19556
19557 if (use_vec_extr)
19558 {
19559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19560 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19561
19562 /* Let the rtl optimizers know about the zero extension performed. */
19563 if (inner_mode == HImode)
19564 {
19565 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19566 target = gen_lowpart (SImode, target);
19567 }
19568
19569 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19570 }
19571 else
19572 {
19573 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19574
19575 emit_move_insn (mem, vec);
19576
19577 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19578 emit_move_insn (target, tmp);
19579 }
19580}
19581
19582/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19583 pattern to reduce; DEST is the destination; IN is the input vector. */
19584
19585void
19586ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19587{
19588 rtx tmp1, tmp2, tmp3;
19589
19590 tmp1 = gen_reg_rtx (V4SFmode);
19591 tmp2 = gen_reg_rtx (V4SFmode);
19592 tmp3 = gen_reg_rtx (V4SFmode);
19593
19594 emit_insn (gen_sse_movhlps (tmp1, in, in));
19595 emit_insn (fn (tmp2, tmp1, in));
19596
19597 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19598 GEN_INT (1), GEN_INT (1),
19599 GEN_INT (1+4), GEN_INT (1+4)));
19600 emit_insn (fn (dest, tmp2, tmp3));
19601}
19602
19603/* Target hook for scalar_mode_supported_p. */
19604static bool
19605ix86_scalar_mode_supported_p (enum machine_mode mode)
19606{
19607 if (DECIMAL_FLOAT_MODE_P (mode))
19608 return true;
19609 else
19610 return default_scalar_mode_supported_p (mode);
19611}
19612
19613/* Implements target hook vector_mode_supported_p. */
19614static bool
19615ix86_vector_mode_supported_p (enum machine_mode mode)
19616{
19617 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19618 return true;
19619 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19620 return true;
19621 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19622 return true;
19623 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19624 return true;
19625 return false;
19626}
19627
19628/* Worker function for TARGET_MD_ASM_CLOBBERS.
19629
19630 We do this in the new i386 backend to maintain source compatibility
19631 with the old cc0-based compiler. */
19632
19633static tree
19634ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19635 tree inputs ATTRIBUTE_UNUSED,
19636 tree clobbers)
19637{
19638 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19639 clobbers);
19640 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19641 clobbers);
19642 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19643 clobbers);
19644 return clobbers;
19645}
19646
19647/* Return true if this goes in small data/bss. */
19648
19649static bool
19650ix86_in_large_data_p (tree exp)
19651{
19652 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19653 return false;
19654
19655 /* Functions are never large data. */
19656 if (TREE_CODE (exp) == FUNCTION_DECL)
19657 return false;
19658
19659 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19660 {
19661 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19662 if (strcmp (section, ".ldata") == 0
19663 || strcmp (section, ".lbss") == 0)
19664 return true;
19665 return false;
19666 }
19667 else
19668 {
19669 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19670
19671 /* If this is an incomplete type with size 0, then we can't put it
19672 in data because it might be too big when completed. */
19673 if (!size || size > ix86_section_threshold)
19674 return true;
19675 }
19676
19677 return false;
19678}
19679static void
19680ix86_encode_section_info (tree decl, rtx rtl, int first)
19681{
19682 default_encode_section_info (decl, rtl, first);
19683
19684 if (TREE_CODE (decl) == VAR_DECL
19685 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19686 && ix86_in_large_data_p (decl))
19687 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19688}
19689
19690/* Worker function for REVERSE_CONDITION. */
19691
19692enum rtx_code
19693ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19694{
19695 return (mode != CCFPmode && mode != CCFPUmode
19696 ? reverse_condition (code)
19697 : reverse_condition_maybe_unordered (code));
19698}
19699
19700/* Output code to perform an x87 FP register move, from OPERANDS[1]
19701 to OPERANDS[0]. */
19702
19703const char *
19704output_387_reg_move (rtx insn, rtx *operands)
19705{
19706 if (REG_P (operands[1])
19707 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19708 {
19709 if (REGNO (operands[0]) == FIRST_STACK_REG)
19710 return output_387_ffreep (operands, 0);
19711 return "fstp\t%y0";
19712 }
19713 if (STACK_TOP_P (operands[0]))
19714 return "fld%z1\t%y1";
19715 return "fst\t%y0";
19716}
19717
19718/* Output code to perform a conditional jump to LABEL, if C2 flag in
19719 FP status register is set. */
19720
19721void
19722ix86_emit_fp_unordered_jump (rtx label)
19723{
19724 rtx reg = gen_reg_rtx (HImode);
19725 rtx temp;
19726
19727 emit_insn (gen_x86_fnstsw_1 (reg));
19728
19729 if (TARGET_USE_SAHF)
19730 {
19731 emit_insn (gen_x86_sahf_1 (reg));
19732
19733 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19734 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19735 }
19736 else
19737 {
19738 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19739
19740 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19741 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19742 }
19743
19744 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19745 gen_rtx_LABEL_REF (VOIDmode, label),
19746 pc_rtx);
19747 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19748 emit_jump_insn (temp);
19749}
19750
19751/* Output code to perform a log1p XFmode calculation. */
19752
19753void ix86_emit_i387_log1p (rtx op0, rtx op1)
19754{
19755 rtx label1 = gen_label_rtx ();
19756 rtx label2 = gen_label_rtx ();
19757
19758 rtx tmp = gen_reg_rtx (XFmode);
19759 rtx tmp2 = gen_reg_rtx (XFmode);
19760
19761 emit_insn (gen_absxf2 (tmp, op1));
19762 emit_insn (gen_cmpxf (tmp,
19763 CONST_DOUBLE_FROM_REAL_VALUE (
19764 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19765 XFmode)));
19766 emit_jump_insn (gen_bge (label1));
19767
19768 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19769 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19770 emit_jump (label2);
19771
19772 emit_label (label1);
19773 emit_move_insn (tmp, CONST1_RTX (XFmode));
19774 emit_insn (gen_addxf3 (tmp, op1, tmp));
19775 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19776 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19777
19778 emit_label (label2);
19779}
19780
19781/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19782
19783static void
19784i386_solaris_elf_named_section (const char *name, unsigned int flags,
19785 tree decl)
19786{
19787 /* With Binutils 2.15, the "@unwind" marker must be specified on
19788 every occurrence of the ".eh_frame" section, not just the first
19789 one. */
19790 if (TARGET_64BIT
19791 && strcmp (name, ".eh_frame") == 0)
19792 {
19793 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19794 flags & SECTION_WRITE ? "aw" : "a");
19795 return;
19796 }
19797 default_elf_asm_named_section (name, flags, decl);
19798}
19799
19800/* Return the mangling of TYPE if it is an extended fundamental type. */
19801
19802static const char *
19803ix86_mangle_fundamental_type (tree type)
19804{
19805 switch (TYPE_MODE (type))
19806 {
19807 case TFmode:
19808 /* __float128 is "g". */
19809 return "g";
19810 case XFmode:
19811 /* "long double" or __float80 is "e". */
19812 return "e";
19813 default:
19814 return NULL;
19815 }
19816}
19817
19818/* For 32-bit code we can save PIC register setup by using
19819 __stack_chk_fail_local hidden function instead of calling
19820 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19821 register, so it is better to call __stack_chk_fail directly. */
19822
19823static tree
19824ix86_stack_protect_fail (void)
19825{
19826 return TARGET_64BIT
19827 ? default_external_stack_protect_fail ()
19828 : default_hidden_stack_protect_fail ();
19829}
19830
19831/* Select a format to encode pointers in exception handling data. CODE
19832 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19833 true if the symbol may be affected by dynamic relocations.
19834
19835 ??? All x86 object file formats are capable of representing this.
19836 After all, the relocation needed is the same as for the call insn.
19837 Whether or not a particular assembler allows us to enter such, I
19838 guess we'll have to see. */
19839int
19840asm_preferred_eh_data_format (int code, int global)
19841{
19842 if (flag_pic)
19843 {
19844 int type = DW_EH_PE_sdata8;
19845 if (!TARGET_64BIT
19846 || ix86_cmodel == CM_SMALL_PIC
19847 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19848 type = DW_EH_PE_sdata4;
19849 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19850 }
19851 if (ix86_cmodel == CM_SMALL
19852 || (ix86_cmodel == CM_MEDIUM && code))
19853 return DW_EH_PE_udata4;
19854 return DW_EH_PE_absptr;
19855}
19856
19857#include "gt-i386.h"
15421 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15422
15423 tree pchar_type_node = build_pointer_type (char_type_node);
15424 tree pcchar_type_node = build_pointer_type (
15425 build_type_variant (char_type_node, 1, 0));
15426 tree pfloat_type_node = build_pointer_type (float_type_node);
15427 tree pcfloat_type_node = build_pointer_type (
15428 build_type_variant (float_type_node, 1, 0));
15429 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15430 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15431 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15432
15433 /* Comparisons. */
15434 tree int_ftype_v4sf_v4sf
15435 = build_function_type_list (integer_type_node,
15436 V4SF_type_node, V4SF_type_node, NULL_TREE);
15437 tree v4si_ftype_v4sf_v4sf
15438 = build_function_type_list (V4SI_type_node,
15439 V4SF_type_node, V4SF_type_node, NULL_TREE);
15440 /* MMX/SSE/integer conversions. */
15441 tree int_ftype_v4sf
15442 = build_function_type_list (integer_type_node,
15443 V4SF_type_node, NULL_TREE);
15444 tree int64_ftype_v4sf
15445 = build_function_type_list (long_long_integer_type_node,
15446 V4SF_type_node, NULL_TREE);
15447 tree int_ftype_v8qi
15448 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15449 tree v4sf_ftype_v4sf_int
15450 = build_function_type_list (V4SF_type_node,
15451 V4SF_type_node, integer_type_node, NULL_TREE);
15452 tree v4sf_ftype_v4sf_int64
15453 = build_function_type_list (V4SF_type_node,
15454 V4SF_type_node, long_long_integer_type_node,
15455 NULL_TREE);
15456 tree v4sf_ftype_v4sf_v2si
15457 = build_function_type_list (V4SF_type_node,
15458 V4SF_type_node, V2SI_type_node, NULL_TREE);
15459
15460 /* Miscellaneous. */
15461 tree v8qi_ftype_v4hi_v4hi
15462 = build_function_type_list (V8QI_type_node,
15463 V4HI_type_node, V4HI_type_node, NULL_TREE);
15464 tree v4hi_ftype_v2si_v2si
15465 = build_function_type_list (V4HI_type_node,
15466 V2SI_type_node, V2SI_type_node, NULL_TREE);
15467 tree v4sf_ftype_v4sf_v4sf_int
15468 = build_function_type_list (V4SF_type_node,
15469 V4SF_type_node, V4SF_type_node,
15470 integer_type_node, NULL_TREE);
15471 tree v2si_ftype_v4hi_v4hi
15472 = build_function_type_list (V2SI_type_node,
15473 V4HI_type_node, V4HI_type_node, NULL_TREE);
15474 tree v4hi_ftype_v4hi_int
15475 = build_function_type_list (V4HI_type_node,
15476 V4HI_type_node, integer_type_node, NULL_TREE);
15477 tree v4hi_ftype_v4hi_di
15478 = build_function_type_list (V4HI_type_node,
15479 V4HI_type_node, long_long_unsigned_type_node,
15480 NULL_TREE);
15481 tree v2si_ftype_v2si_di
15482 = build_function_type_list (V2SI_type_node,
15483 V2SI_type_node, long_long_unsigned_type_node,
15484 NULL_TREE);
15485 tree void_ftype_void
15486 = build_function_type (void_type_node, void_list_node);
15487 tree void_ftype_unsigned
15488 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15489 tree void_ftype_unsigned_unsigned
15490 = build_function_type_list (void_type_node, unsigned_type_node,
15491 unsigned_type_node, NULL_TREE);
15492 tree void_ftype_pcvoid_unsigned_unsigned
15493 = build_function_type_list (void_type_node, const_ptr_type_node,
15494 unsigned_type_node, unsigned_type_node,
15495 NULL_TREE);
15496 tree unsigned_ftype_void
15497 = build_function_type (unsigned_type_node, void_list_node);
15498 tree v2si_ftype_v4sf
15499 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15500 /* Loads/stores. */
15501 tree void_ftype_v8qi_v8qi_pchar
15502 = build_function_type_list (void_type_node,
15503 V8QI_type_node, V8QI_type_node,
15504 pchar_type_node, NULL_TREE);
15505 tree v4sf_ftype_pcfloat
15506 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15507 /* @@@ the type is bogus */
15508 tree v4sf_ftype_v4sf_pv2si
15509 = build_function_type_list (V4SF_type_node,
15510 V4SF_type_node, pv2si_type_node, NULL_TREE);
15511 tree void_ftype_pv2si_v4sf
15512 = build_function_type_list (void_type_node,
15513 pv2si_type_node, V4SF_type_node, NULL_TREE);
15514 tree void_ftype_pfloat_v4sf
15515 = build_function_type_list (void_type_node,
15516 pfloat_type_node, V4SF_type_node, NULL_TREE);
15517 tree void_ftype_pdi_di
15518 = build_function_type_list (void_type_node,
15519 pdi_type_node, long_long_unsigned_type_node,
15520 NULL_TREE);
15521 tree void_ftype_pv2di_v2di
15522 = build_function_type_list (void_type_node,
15523 pv2di_type_node, V2DI_type_node, NULL_TREE);
15524 /* Normal vector unops. */
15525 tree v4sf_ftype_v4sf
15526 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15527 tree v16qi_ftype_v16qi
15528 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15529 tree v8hi_ftype_v8hi
15530 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15531 tree v4si_ftype_v4si
15532 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15533 tree v8qi_ftype_v8qi
15534 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15535 tree v4hi_ftype_v4hi
15536 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15537
15538 /* Normal vector binops. */
15539 tree v4sf_ftype_v4sf_v4sf
15540 = build_function_type_list (V4SF_type_node,
15541 V4SF_type_node, V4SF_type_node, NULL_TREE);
15542 tree v8qi_ftype_v8qi_v8qi
15543 = build_function_type_list (V8QI_type_node,
15544 V8QI_type_node, V8QI_type_node, NULL_TREE);
15545 tree v4hi_ftype_v4hi_v4hi
15546 = build_function_type_list (V4HI_type_node,
15547 V4HI_type_node, V4HI_type_node, NULL_TREE);
15548 tree v2si_ftype_v2si_v2si
15549 = build_function_type_list (V2SI_type_node,
15550 V2SI_type_node, V2SI_type_node, NULL_TREE);
15551 tree di_ftype_di_di
15552 = build_function_type_list (long_long_unsigned_type_node,
15553 long_long_unsigned_type_node,
15554 long_long_unsigned_type_node, NULL_TREE);
15555
15556 tree di_ftype_di_di_int
15557 = build_function_type_list (long_long_unsigned_type_node,
15558 long_long_unsigned_type_node,
15559 long_long_unsigned_type_node,
15560 integer_type_node, NULL_TREE);
15561
15562 tree v2si_ftype_v2sf
15563 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15564 tree v2sf_ftype_v2si
15565 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15566 tree v2si_ftype_v2si
15567 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15568 tree v2sf_ftype_v2sf
15569 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15570 tree v2sf_ftype_v2sf_v2sf
15571 = build_function_type_list (V2SF_type_node,
15572 V2SF_type_node, V2SF_type_node, NULL_TREE);
15573 tree v2si_ftype_v2sf_v2sf
15574 = build_function_type_list (V2SI_type_node,
15575 V2SF_type_node, V2SF_type_node, NULL_TREE);
15576 tree pint_type_node = build_pointer_type (integer_type_node);
15577 tree pdouble_type_node = build_pointer_type (double_type_node);
15578 tree pcdouble_type_node = build_pointer_type (
15579 build_type_variant (double_type_node, 1, 0));
15580 tree int_ftype_v2df_v2df
15581 = build_function_type_list (integer_type_node,
15582 V2DF_type_node, V2DF_type_node, NULL_TREE);
15583
15584 tree void_ftype_pcvoid
15585 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15586 tree v4sf_ftype_v4si
15587 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15588 tree v4si_ftype_v4sf
15589 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15590 tree v2df_ftype_v4si
15591 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15592 tree v4si_ftype_v2df
15593 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15594 tree v2si_ftype_v2df
15595 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15596 tree v4sf_ftype_v2df
15597 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15598 tree v2df_ftype_v2si
15599 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15600 tree v2df_ftype_v4sf
15601 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15602 tree int_ftype_v2df
15603 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15604 tree int64_ftype_v2df
15605 = build_function_type_list (long_long_integer_type_node,
15606 V2DF_type_node, NULL_TREE);
15607 tree v2df_ftype_v2df_int
15608 = build_function_type_list (V2DF_type_node,
15609 V2DF_type_node, integer_type_node, NULL_TREE);
15610 tree v2df_ftype_v2df_int64
15611 = build_function_type_list (V2DF_type_node,
15612 V2DF_type_node, long_long_integer_type_node,
15613 NULL_TREE);
15614 tree v4sf_ftype_v4sf_v2df
15615 = build_function_type_list (V4SF_type_node,
15616 V4SF_type_node, V2DF_type_node, NULL_TREE);
15617 tree v2df_ftype_v2df_v4sf
15618 = build_function_type_list (V2DF_type_node,
15619 V2DF_type_node, V4SF_type_node, NULL_TREE);
15620 tree v2df_ftype_v2df_v2df_int
15621 = build_function_type_list (V2DF_type_node,
15622 V2DF_type_node, V2DF_type_node,
15623 integer_type_node,
15624 NULL_TREE);
15625 tree v2df_ftype_v2df_pcdouble
15626 = build_function_type_list (V2DF_type_node,
15627 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15628 tree void_ftype_pdouble_v2df
15629 = build_function_type_list (void_type_node,
15630 pdouble_type_node, V2DF_type_node, NULL_TREE);
15631 tree void_ftype_pint_int
15632 = build_function_type_list (void_type_node,
15633 pint_type_node, integer_type_node, NULL_TREE);
15634 tree void_ftype_v16qi_v16qi_pchar
15635 = build_function_type_list (void_type_node,
15636 V16QI_type_node, V16QI_type_node,
15637 pchar_type_node, NULL_TREE);
15638 tree v2df_ftype_pcdouble
15639 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15640 tree v2df_ftype_v2df_v2df
15641 = build_function_type_list (V2DF_type_node,
15642 V2DF_type_node, V2DF_type_node, NULL_TREE);
15643 tree v16qi_ftype_v16qi_v16qi
15644 = build_function_type_list (V16QI_type_node,
15645 V16QI_type_node, V16QI_type_node, NULL_TREE);
15646 tree v8hi_ftype_v8hi_v8hi
15647 = build_function_type_list (V8HI_type_node,
15648 V8HI_type_node, V8HI_type_node, NULL_TREE);
15649 tree v4si_ftype_v4si_v4si
15650 = build_function_type_list (V4SI_type_node,
15651 V4SI_type_node, V4SI_type_node, NULL_TREE);
15652 tree v2di_ftype_v2di_v2di
15653 = build_function_type_list (V2DI_type_node,
15654 V2DI_type_node, V2DI_type_node, NULL_TREE);
15655 tree v2di_ftype_v2df_v2df
15656 = build_function_type_list (V2DI_type_node,
15657 V2DF_type_node, V2DF_type_node, NULL_TREE);
15658 tree v2df_ftype_v2df
15659 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15660 tree v2di_ftype_v2di_int
15661 = build_function_type_list (V2DI_type_node,
15662 V2DI_type_node, integer_type_node, NULL_TREE);
15663 tree v2di_ftype_v2di_v2di_int
15664 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15665 V2DI_type_node, integer_type_node, NULL_TREE);
15666 tree v4si_ftype_v4si_int
15667 = build_function_type_list (V4SI_type_node,
15668 V4SI_type_node, integer_type_node, NULL_TREE);
15669 tree v8hi_ftype_v8hi_int
15670 = build_function_type_list (V8HI_type_node,
15671 V8HI_type_node, integer_type_node, NULL_TREE);
15672 tree v4si_ftype_v8hi_v8hi
15673 = build_function_type_list (V4SI_type_node,
15674 V8HI_type_node, V8HI_type_node, NULL_TREE);
15675 tree di_ftype_v8qi_v8qi
15676 = build_function_type_list (long_long_unsigned_type_node,
15677 V8QI_type_node, V8QI_type_node, NULL_TREE);
15678 tree di_ftype_v2si_v2si
15679 = build_function_type_list (long_long_unsigned_type_node,
15680 V2SI_type_node, V2SI_type_node, NULL_TREE);
15681 tree v2di_ftype_v16qi_v16qi
15682 = build_function_type_list (V2DI_type_node,
15683 V16QI_type_node, V16QI_type_node, NULL_TREE);
15684 tree v2di_ftype_v4si_v4si
15685 = build_function_type_list (V2DI_type_node,
15686 V4SI_type_node, V4SI_type_node, NULL_TREE);
15687 tree int_ftype_v16qi
15688 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15689 tree v16qi_ftype_pcchar
15690 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15691 tree void_ftype_pchar_v16qi
15692 = build_function_type_list (void_type_node,
15693 pchar_type_node, V16QI_type_node, NULL_TREE);
15694
15695 tree v2di_ftype_v2di_unsigned_unsigned
15696 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15697 unsigned_type_node, unsigned_type_node,
15698 NULL_TREE);
15699 tree v2di_ftype_v2di_v2di_unsigned_unsigned
15700 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
15701 unsigned_type_node, unsigned_type_node,
15702 NULL_TREE);
15703 tree v2di_ftype_v2di_v16qi
15704 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
15705 NULL_TREE);
15706
15707 tree float80_type;
15708 tree float128_type;
15709 tree ftype;
15710
15711 /* The __float80 type. */
15712 if (TYPE_MODE (long_double_type_node) == XFmode)
15713 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15714 "__float80");
15715 else
15716 {
15717 /* The __float80 type. */
15718 float80_type = make_node (REAL_TYPE);
15719 TYPE_PRECISION (float80_type) = 80;
15720 layout_type (float80_type);
15721 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15722 }
15723
15724 if (TARGET_64BIT)
15725 {
15726 float128_type = make_node (REAL_TYPE);
15727 TYPE_PRECISION (float128_type) = 128;
15728 layout_type (float128_type);
15729 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15730 }
15731
15732 /* Add all builtins that are more or less simple operations on two
15733 operands. */
15734 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15735 {
15736 /* Use one of the operands; the target can have a different mode for
15737 mask-generating compares. */
15738 enum machine_mode mode;
15739 tree type;
15740
15741 if (d->name == 0)
15742 continue;
15743 mode = insn_data[d->icode].operand[1].mode;
15744
15745 switch (mode)
15746 {
15747 case V16QImode:
15748 type = v16qi_ftype_v16qi_v16qi;
15749 break;
15750 case V8HImode:
15751 type = v8hi_ftype_v8hi_v8hi;
15752 break;
15753 case V4SImode:
15754 type = v4si_ftype_v4si_v4si;
15755 break;
15756 case V2DImode:
15757 type = v2di_ftype_v2di_v2di;
15758 break;
15759 case V2DFmode:
15760 type = v2df_ftype_v2df_v2df;
15761 break;
15762 case V4SFmode:
15763 type = v4sf_ftype_v4sf_v4sf;
15764 break;
15765 case V8QImode:
15766 type = v8qi_ftype_v8qi_v8qi;
15767 break;
15768 case V4HImode:
15769 type = v4hi_ftype_v4hi_v4hi;
15770 break;
15771 case V2SImode:
15772 type = v2si_ftype_v2si_v2si;
15773 break;
15774 case DImode:
15775 type = di_ftype_di_di;
15776 break;
15777
15778 default:
15779 gcc_unreachable ();
15780 }
15781
15782 /* Override for comparisons. */
15783 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15784 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15785 type = v4si_ftype_v4sf_v4sf;
15786
15787 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15788 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15789 type = v2di_ftype_v2df_v2df;
15790
15791 def_builtin (d->mask, d->name, type, d->code);
15792 }
15793
15794 /* Add all builtins that are more or less simple operations on 1 operand. */
15795 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15796 {
15797 enum machine_mode mode;
15798 tree type;
15799
15800 if (d->name == 0)
15801 continue;
15802 mode = insn_data[d->icode].operand[1].mode;
15803
15804 switch (mode)
15805 {
15806 case V16QImode:
15807 type = v16qi_ftype_v16qi;
15808 break;
15809 case V8HImode:
15810 type = v8hi_ftype_v8hi;
15811 break;
15812 case V4SImode:
15813 type = v4si_ftype_v4si;
15814 break;
15815 case V2DFmode:
15816 type = v2df_ftype_v2df;
15817 break;
15818 case V4SFmode:
15819 type = v4sf_ftype_v4sf;
15820 break;
15821 case V8QImode:
15822 type = v8qi_ftype_v8qi;
15823 break;
15824 case V4HImode:
15825 type = v4hi_ftype_v4hi;
15826 break;
15827 case V2SImode:
15828 type = v2si_ftype_v2si;
15829 break;
15830
15831 default:
15832 abort ();
15833 }
15834
15835 def_builtin (d->mask, d->name, type, d->code);
15836 }
15837
15838 /* Add the remaining MMX insns with somewhat more complicated types. */
15839 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15840 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15841 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15842 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15843
15844 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15845 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15846 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15847
15848 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15849 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15850
15851 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15852 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15853
15854 /* comi/ucomi insns. */
15855 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15856 if (d->mask == MASK_SSE2)
15857 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15858 else
15859 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15860
15861 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15862 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15863 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15864
15865 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15866 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15867 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15868 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15869 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15870 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15871 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15872 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15873 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15874 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15875 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15876
15877 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15878
15879 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15880 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15881
15882 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15883 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15884 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15885 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15886
15887 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15888 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15889 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15890 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15891
15892 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15893
15894 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15895
15896 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15897 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15898 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15899 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15900 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15901 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15902
15903 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15904
15905 /* Original 3DNow! */
15906 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15907 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15908 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15909 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15910 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15911 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15912 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15913 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15914 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15915 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15916 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15917 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15918 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15919 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15920 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15921 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15922 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15923 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15924 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15925 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15926
15927 /* 3DNow! extension as used in the Athlon CPU. */
15928 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15929 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15930 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15931 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15932 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15933 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15934
15935 /* SSE2 */
15936 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15937
15938 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15939 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15940
15941 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15942 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15943
15944 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15945 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15946 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15947 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15948 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15949
15950 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15951 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15952 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15953 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15954
15955 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15956 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15957
15958 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15959
15960 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15961 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15962
15963 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15964 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15965 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15966 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15967 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15968
15969 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15970
15971 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15972 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15973 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15974 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15975
15976 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15977 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15978 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15979
15980 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15981 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15982 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15983 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15984
15985 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15986 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15987 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15988
15989 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15990 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15991
15992 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15993 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15994
15995 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
15996 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
15997 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15998
15999 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
16000 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
16001 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16002
16003 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
16004 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
16005
16006 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16007 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16008 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16009 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16010
16011 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16012 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16013 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16014 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16015
16016 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16017 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16018
16019 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16020
16021 /* Prescott New Instructions. */
16022 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16023 void_ftype_pcvoid_unsigned_unsigned,
16024 IX86_BUILTIN_MONITOR);
16025 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16026 void_ftype_unsigned_unsigned,
16027 IX86_BUILTIN_MWAIT);
16028 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16029 v4sf_ftype_v4sf,
16030 IX86_BUILTIN_MOVSHDUP);
16031 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16032 v4sf_ftype_v4sf,
16033 IX86_BUILTIN_MOVSLDUP);
16034 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16035 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16036
16037 /* SSSE3. */
16038 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16039 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16040 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16041 IX86_BUILTIN_PALIGNR);
16042
16043 /* AMDFAM10 SSE4A New built-ins */
16044 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
16045 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
16046 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
16047 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
16048 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
16049 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
16050 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
16051 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
16052 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
16053 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
16054 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
16055 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
16056
16057 /* Access to the vec_init patterns. */
16058 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16059 integer_type_node, NULL_TREE);
16060 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16061 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16062
16063 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16064 short_integer_type_node,
16065 short_integer_type_node,
16066 short_integer_type_node, NULL_TREE);
16067 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16068 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16069
16070 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16071 char_type_node, char_type_node,
16072 char_type_node, char_type_node,
16073 char_type_node, char_type_node,
16074 char_type_node, NULL_TREE);
16075 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16076 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16077
16078 /* Access to the vec_extract patterns. */
16079 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16080 integer_type_node, NULL_TREE);
16081 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
16082 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16083
16084 ftype = build_function_type_list (long_long_integer_type_node,
16085 V2DI_type_node, integer_type_node,
16086 NULL_TREE);
16087 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
16088 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16089
16090 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16091 integer_type_node, NULL_TREE);
16092 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16093 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16094
16095 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16096 integer_type_node, NULL_TREE);
16097 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
16098 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16099
16100 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16101 integer_type_node, NULL_TREE);
16102 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
16103 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16104
16105 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16106 integer_type_node, NULL_TREE);
16107 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16108 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16109
16110 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16111 integer_type_node, NULL_TREE);
16112 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16113 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16114
16115 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
16116 integer_type_node, NULL_TREE);
16117 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
16118
16119 /* Access to the vec_set patterns. */
16120 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16121 intHI_type_node,
16122 integer_type_node, NULL_TREE);
16123 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
16124 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16125
16126 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16127 intHI_type_node,
16128 integer_type_node, NULL_TREE);
16129 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16130 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16131}
16132
16133/* Errors in the source file can cause expand_expr to return const0_rtx
16134 where we expect a vector. To avoid crashing, use one of the vector
16135 clear instructions. */
16136static rtx
16137safe_vector_operand (rtx x, enum machine_mode mode)
16138{
16139 if (x == const0_rtx)
16140 x = CONST0_RTX (mode);
16141 return x;
16142}
16143
16144/* Subroutine of ix86_expand_builtin to take care of binop insns. */
16145
16146static rtx
16147ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16148{
16149 rtx pat, xops[3];
16150 tree arg0 = TREE_VALUE (arglist);
16151 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152 rtx op0 = expand_normal (arg0);
16153 rtx op1 = expand_normal (arg1);
16154 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16155 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16156 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16157
16158 if (VECTOR_MODE_P (mode0))
16159 op0 = safe_vector_operand (op0, mode0);
16160 if (VECTOR_MODE_P (mode1))
16161 op1 = safe_vector_operand (op1, mode1);
16162
16163 if (optimize || !target
16164 || GET_MODE (target) != tmode
16165 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16166 target = gen_reg_rtx (tmode);
16167
16168 if (GET_MODE (op1) == SImode && mode1 == TImode)
16169 {
16170 rtx x = gen_reg_rtx (V4SImode);
16171 emit_insn (gen_sse2_loadd (x, op1));
16172 op1 = gen_lowpart (TImode, x);
16173 }
16174
16175 /* The insn must want input operands in the same modes as the
16176 result. */
16177 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16178 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16179
16180 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16181 op0 = copy_to_mode_reg (mode0, op0);
16182 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16183 op1 = copy_to_mode_reg (mode1, op1);
16184
16185 /* ??? Using ix86_fixup_binary_operands is problematic when
16186 we've got mismatched modes. Fake it. */
16187
16188 xops[0] = target;
16189 xops[1] = op0;
16190 xops[2] = op1;
16191
16192 if (tmode == mode0 && tmode == mode1)
16193 {
16194 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16195 op0 = xops[1];
16196 op1 = xops[2];
16197 }
16198 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16199 {
16200 op0 = force_reg (mode0, op0);
16201 op1 = force_reg (mode1, op1);
16202 target = gen_reg_rtx (tmode);
16203 }
16204
16205 pat = GEN_FCN (icode) (target, op0, op1);
16206 if (! pat)
16207 return 0;
16208 emit_insn (pat);
16209 return target;
16210}
16211
16212/* Subroutine of ix86_expand_builtin to take care of stores. */
16213
16214static rtx
16215ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16216{
16217 rtx pat;
16218 tree arg0 = TREE_VALUE (arglist);
16219 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16220 rtx op0 = expand_normal (arg0);
16221 rtx op1 = expand_normal (arg1);
16222 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16223 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16224
16225 if (VECTOR_MODE_P (mode1))
16226 op1 = safe_vector_operand (op1, mode1);
16227
16228 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16229 op1 = copy_to_mode_reg (mode1, op1);
16230
16231 pat = GEN_FCN (icode) (op0, op1);
16232 if (pat)
16233 emit_insn (pat);
16234 return 0;
16235}
16236
16237/* Subroutine of ix86_expand_builtin to take care of unop insns. */
16238
16239static rtx
16240ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16241 rtx target, int do_load)
16242{
16243 rtx pat;
16244 tree arg0 = TREE_VALUE (arglist);
16245 rtx op0 = expand_normal (arg0);
16246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16247 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16248
16249 if (optimize || !target
16250 || GET_MODE (target) != tmode
16251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16252 target = gen_reg_rtx (tmode);
16253 if (do_load)
16254 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16255 else
16256 {
16257 if (VECTOR_MODE_P (mode0))
16258 op0 = safe_vector_operand (op0, mode0);
16259
16260 if ((optimize && !register_operand (op0, mode0))
16261 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16262 op0 = copy_to_mode_reg (mode0, op0);
16263 }
16264
16265 pat = GEN_FCN (icode) (target, op0);
16266 if (! pat)
16267 return 0;
16268 emit_insn (pat);
16269 return target;
16270}
16271
16272/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16273 sqrtss, rsqrtss, rcpss. */
16274
16275static rtx
16276ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16277{
16278 rtx pat;
16279 tree arg0 = TREE_VALUE (arglist);
16280 rtx op1, op0 = expand_normal (arg0);
16281 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16282 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16283
16284 if (optimize || !target
16285 || GET_MODE (target) != tmode
16286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16287 target = gen_reg_rtx (tmode);
16288
16289 if (VECTOR_MODE_P (mode0))
16290 op0 = safe_vector_operand (op0, mode0);
16291
16292 if ((optimize && !register_operand (op0, mode0))
16293 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16294 op0 = copy_to_mode_reg (mode0, op0);
16295
16296 op1 = op0;
16297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16298 op1 = copy_to_mode_reg (mode0, op1);
16299
16300 pat = GEN_FCN (icode) (target, op0, op1);
16301 if (! pat)
16302 return 0;
16303 emit_insn (pat);
16304 return target;
16305}
16306
16307/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16308
16309static rtx
16310ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16311 rtx target)
16312{
16313 rtx pat;
16314 tree arg0 = TREE_VALUE (arglist);
16315 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16316 rtx op0 = expand_normal (arg0);
16317 rtx op1 = expand_normal (arg1);
16318 rtx op2;
16319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16321 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16322 enum rtx_code comparison = d->comparison;
16323
16324 if (VECTOR_MODE_P (mode0))
16325 op0 = safe_vector_operand (op0, mode0);
16326 if (VECTOR_MODE_P (mode1))
16327 op1 = safe_vector_operand (op1, mode1);
16328
16329 /* Swap operands if we have a comparison that isn't available in
16330 hardware. */
16331 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16332 {
16333 rtx tmp = gen_reg_rtx (mode1);
16334 emit_move_insn (tmp, op1);
16335 op1 = op0;
16336 op0 = tmp;
16337 }
16338
16339 if (optimize || !target
16340 || GET_MODE (target) != tmode
16341 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16342 target = gen_reg_rtx (tmode);
16343
16344 if ((optimize && !register_operand (op0, mode0))
16345 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16346 op0 = copy_to_mode_reg (mode0, op0);
16347 if ((optimize && !register_operand (op1, mode1))
16348 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16349 op1 = copy_to_mode_reg (mode1, op1);
16350
16351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16352 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16353 if (! pat)
16354 return 0;
16355 emit_insn (pat);
16356 return target;
16357}
16358
16359/* Subroutine of ix86_expand_builtin to take care of comi insns. */
16360
16361static rtx
16362ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16363 rtx target)
16364{
16365 rtx pat;
16366 tree arg0 = TREE_VALUE (arglist);
16367 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16368 rtx op0 = expand_normal (arg0);
16369 rtx op1 = expand_normal (arg1);
16370 rtx op2;
16371 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16372 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16373 enum rtx_code comparison = d->comparison;
16374
16375 if (VECTOR_MODE_P (mode0))
16376 op0 = safe_vector_operand (op0, mode0);
16377 if (VECTOR_MODE_P (mode1))
16378 op1 = safe_vector_operand (op1, mode1);
16379
16380 /* Swap operands if we have a comparison that isn't available in
16381 hardware. */
16382 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16383 {
16384 rtx tmp = op1;
16385 op1 = op0;
16386 op0 = tmp;
16387 }
16388
16389 target = gen_reg_rtx (SImode);
16390 emit_move_insn (target, const0_rtx);
16391 target = gen_rtx_SUBREG (QImode, target, 0);
16392
16393 if ((optimize && !register_operand (op0, mode0))
16394 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16395 op0 = copy_to_mode_reg (mode0, op0);
16396 if ((optimize && !register_operand (op1, mode1))
16397 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16398 op1 = copy_to_mode_reg (mode1, op1);
16399
16400 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16401 pat = GEN_FCN (d->icode) (op0, op1);
16402 if (! pat)
16403 return 0;
16404 emit_insn (pat);
16405 emit_insn (gen_rtx_SET (VOIDmode,
16406 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16407 gen_rtx_fmt_ee (comparison, QImode,
16408 SET_DEST (pat),
16409 const0_rtx)));
16410
16411 return SUBREG_REG (target);
16412}
16413
16414/* Return the integer constant in ARG. Constrain it to be in the range
16415 of the subparts of VEC_TYPE; issue an error if not. */
16416
16417static int
16418get_element_number (tree vec_type, tree arg)
16419{
16420 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16421
16422 if (!host_integerp (arg, 1)
16423 || (elt = tree_low_cst (arg, 1), elt > max))
16424 {
16425 error ("selector must be an integer constant in the range 0..%wi", max);
16426 return 0;
16427 }
16428
16429 return elt;
16430}
16431
16432/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16433 ix86_expand_vector_init. We DO have language-level syntax for this, in
16434 the form of (type){ init-list }. Except that since we can't place emms
16435 instructions from inside the compiler, we can't allow the use of MMX
16436 registers unless the user explicitly asks for it. So we do *not* define
16437 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16438 we have builtins invoked by mmintrin.h that gives us license to emit
16439 these sorts of instructions. */
16440
16441static rtx
16442ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16443{
16444 enum machine_mode tmode = TYPE_MODE (type);
16445 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16446 int i, n_elt = GET_MODE_NUNITS (tmode);
16447 rtvec v = rtvec_alloc (n_elt);
16448
16449 gcc_assert (VECTOR_MODE_P (tmode));
16450
16451 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16452 {
16453 rtx x = expand_normal (TREE_VALUE (arglist));
16454 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16455 }
16456
16457 gcc_assert (arglist == NULL);
16458
16459 if (!target || !register_operand (target, tmode))
16460 target = gen_reg_rtx (tmode);
16461
16462 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16463 return target;
16464}
16465
16466/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16467 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16468 had a language-level syntax for referencing vector elements. */
16469
16470static rtx
16471ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16472{
16473 enum machine_mode tmode, mode0;
16474 tree arg0, arg1;
16475 int elt;
16476 rtx op0;
16477
16478 arg0 = TREE_VALUE (arglist);
16479 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16480
16481 op0 = expand_normal (arg0);
16482 elt = get_element_number (TREE_TYPE (arg0), arg1);
16483
16484 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16485 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16486 gcc_assert (VECTOR_MODE_P (mode0));
16487
16488 op0 = force_reg (mode0, op0);
16489
16490 if (optimize || !target || !register_operand (target, tmode))
16491 target = gen_reg_rtx (tmode);
16492
16493 ix86_expand_vector_extract (true, target, op0, elt);
16494
16495 return target;
16496}
16497
16498/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16499 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16500 a language-level syntax for referencing vector elements. */
16501
16502static rtx
16503ix86_expand_vec_set_builtin (tree arglist)
16504{
16505 enum machine_mode tmode, mode1;
16506 tree arg0, arg1, arg2;
16507 int elt;
16508 rtx op0, op1, target;
16509
16510 arg0 = TREE_VALUE (arglist);
16511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16513
16514 tmode = TYPE_MODE (TREE_TYPE (arg0));
16515 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16516 gcc_assert (VECTOR_MODE_P (tmode));
16517
16518 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16519 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16520 elt = get_element_number (TREE_TYPE (arg0), arg2);
16521
16522 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16523 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16524
16525 op0 = force_reg (tmode, op0);
16526 op1 = force_reg (mode1, op1);
16527
16528 /* OP0 is the source of these builtin functions and shouldn't be
16529 modified. Create a copy, use it and return it as target. */
16530 target = gen_reg_rtx (tmode);
16531 emit_move_insn (target, op0);
16532 ix86_expand_vector_set (true, target, op1, elt);
16533
16534 return target;
16535}
16536
16537/* Expand an expression EXP that calls a built-in function,
16538 with result going to TARGET if that's convenient
16539 (and in mode MODE if that's convenient).
16540 SUBTARGET may be used as the target for computing one of EXP's operands.
16541 IGNORE is nonzero if the value is to be ignored. */
16542
16543static rtx
16544ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16545 enum machine_mode mode ATTRIBUTE_UNUSED,
16546 int ignore ATTRIBUTE_UNUSED)
16547{
16548 const struct builtin_description *d;
16549 size_t i;
16550 enum insn_code icode;
16551 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16552 tree arglist = TREE_OPERAND (exp, 1);
16553 tree arg0, arg1, arg2, arg3;
16554 rtx op0, op1, op2, op3, pat;
16555 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
16556 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16557
16558 switch (fcode)
16559 {
16560 case IX86_BUILTIN_EMMS:
16561 emit_insn (gen_mmx_emms ());
16562 return 0;
16563
16564 case IX86_BUILTIN_SFENCE:
16565 emit_insn (gen_sse_sfence ());
16566 return 0;
16567
16568 case IX86_BUILTIN_MASKMOVQ:
16569 case IX86_BUILTIN_MASKMOVDQU:
16570 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16571 ? CODE_FOR_mmx_maskmovq
16572 : CODE_FOR_sse2_maskmovdqu);
16573 /* Note the arg order is different from the operand order. */
16574 arg1 = TREE_VALUE (arglist);
16575 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16576 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16577 op0 = expand_normal (arg0);
16578 op1 = expand_normal (arg1);
16579 op2 = expand_normal (arg2);
16580 mode0 = insn_data[icode].operand[0].mode;
16581 mode1 = insn_data[icode].operand[1].mode;
16582 mode2 = insn_data[icode].operand[2].mode;
16583
16584 op0 = force_reg (Pmode, op0);
16585 op0 = gen_rtx_MEM (mode1, op0);
16586
16587 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16588 op0 = copy_to_mode_reg (mode0, op0);
16589 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16590 op1 = copy_to_mode_reg (mode1, op1);
16591 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16592 op2 = copy_to_mode_reg (mode2, op2);
16593 pat = GEN_FCN (icode) (op0, op1, op2);
16594 if (! pat)
16595 return 0;
16596 emit_insn (pat);
16597 return 0;
16598
16599 case IX86_BUILTIN_SQRTSS:
16600 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16601 case IX86_BUILTIN_RSQRTSS:
16602 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16603 case IX86_BUILTIN_RCPSS:
16604 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16605
16606 case IX86_BUILTIN_LOADUPS:
16607 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16608
16609 case IX86_BUILTIN_STOREUPS:
16610 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16611
16612 case IX86_BUILTIN_LOADHPS:
16613 case IX86_BUILTIN_LOADLPS:
16614 case IX86_BUILTIN_LOADHPD:
16615 case IX86_BUILTIN_LOADLPD:
16616 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16617 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16618 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16619 : CODE_FOR_sse2_loadlpd);
16620 arg0 = TREE_VALUE (arglist);
16621 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16622 op0 = expand_normal (arg0);
16623 op1 = expand_normal (arg1);
16624 tmode = insn_data[icode].operand[0].mode;
16625 mode0 = insn_data[icode].operand[1].mode;
16626 mode1 = insn_data[icode].operand[2].mode;
16627
16628 op0 = force_reg (mode0, op0);
16629 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16630 if (optimize || target == 0
16631 || GET_MODE (target) != tmode
16632 || !register_operand (target, tmode))
16633 target = gen_reg_rtx (tmode);
16634 pat = GEN_FCN (icode) (target, op0, op1);
16635 if (! pat)
16636 return 0;
16637 emit_insn (pat);
16638 return target;
16639
16640 case IX86_BUILTIN_STOREHPS:
16641 case IX86_BUILTIN_STORELPS:
16642 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16643 : CODE_FOR_sse_storelps);
16644 arg0 = TREE_VALUE (arglist);
16645 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16646 op0 = expand_normal (arg0);
16647 op1 = expand_normal (arg1);
16648 mode0 = insn_data[icode].operand[0].mode;
16649 mode1 = insn_data[icode].operand[1].mode;
16650
16651 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16652 op1 = force_reg (mode1, op1);
16653
16654 pat = GEN_FCN (icode) (op0, op1);
16655 if (! pat)
16656 return 0;
16657 emit_insn (pat);
16658 return const0_rtx;
16659
16660 case IX86_BUILTIN_MOVNTPS:
16661 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16662 case IX86_BUILTIN_MOVNTQ:
16663 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16664
16665 case IX86_BUILTIN_LDMXCSR:
16666 op0 = expand_normal (TREE_VALUE (arglist));
16667 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16668 emit_move_insn (target, op0);
16669 emit_insn (gen_sse_ldmxcsr (target));
16670 return 0;
16671
16672 case IX86_BUILTIN_STMXCSR:
16673 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
16674 emit_insn (gen_sse_stmxcsr (target));
16675 return copy_to_mode_reg (SImode, target);
16676
16677 case IX86_BUILTIN_SHUFPS:
16678 case IX86_BUILTIN_SHUFPD:
16679 icode = (fcode == IX86_BUILTIN_SHUFPS
16680 ? CODE_FOR_sse_shufps
16681 : CODE_FOR_sse2_shufpd);
16682 arg0 = TREE_VALUE (arglist);
16683 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16684 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16685 op0 = expand_normal (arg0);
16686 op1 = expand_normal (arg1);
16687 op2 = expand_normal (arg2);
16688 tmode = insn_data[icode].operand[0].mode;
16689 mode0 = insn_data[icode].operand[1].mode;
16690 mode1 = insn_data[icode].operand[2].mode;
16691 mode2 = insn_data[icode].operand[3].mode;
16692
16693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16694 op0 = copy_to_mode_reg (mode0, op0);
16695 if ((optimize && !register_operand (op1, mode1))
16696 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16697 op1 = copy_to_mode_reg (mode1, op1);
16698 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16699 {
16700 /* @@@ better error message */
16701 error ("mask must be an immediate");
16702 return gen_reg_rtx (tmode);
16703 }
16704 if (optimize || target == 0
16705 || GET_MODE (target) != tmode
16706 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16707 target = gen_reg_rtx (tmode);
16708 pat = GEN_FCN (icode) (target, op0, op1, op2);
16709 if (! pat)
16710 return 0;
16711 emit_insn (pat);
16712 return target;
16713
16714 case IX86_BUILTIN_PSHUFW:
16715 case IX86_BUILTIN_PSHUFD:
16716 case IX86_BUILTIN_PSHUFHW:
16717 case IX86_BUILTIN_PSHUFLW:
16718 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16719 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16720 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16721 : CODE_FOR_mmx_pshufw);
16722 arg0 = TREE_VALUE (arglist);
16723 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16724 op0 = expand_normal (arg0);
16725 op1 = expand_normal (arg1);
16726 tmode = insn_data[icode].operand[0].mode;
16727 mode1 = insn_data[icode].operand[1].mode;
16728 mode2 = insn_data[icode].operand[2].mode;
16729
16730 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16731 op0 = copy_to_mode_reg (mode1, op0);
16732 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16733 {
16734 /* @@@ better error message */
16735 error ("mask must be an immediate");
16736 return const0_rtx;
16737 }
16738 if (target == 0
16739 || GET_MODE (target) != tmode
16740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16741 target = gen_reg_rtx (tmode);
16742 pat = GEN_FCN (icode) (target, op0, op1);
16743 if (! pat)
16744 return 0;
16745 emit_insn (pat);
16746 return target;
16747
16748 case IX86_BUILTIN_PSLLWI128:
16749 icode = CODE_FOR_ashlv8hi3;
16750 goto do_pshifti;
16751 case IX86_BUILTIN_PSLLDI128:
16752 icode = CODE_FOR_ashlv4si3;
16753 goto do_pshifti;
16754 case IX86_BUILTIN_PSLLQI128:
16755 icode = CODE_FOR_ashlv2di3;
16756 goto do_pshifti;
16757 case IX86_BUILTIN_PSRAWI128:
16758 icode = CODE_FOR_ashrv8hi3;
16759 goto do_pshifti;
16760 case IX86_BUILTIN_PSRADI128:
16761 icode = CODE_FOR_ashrv4si3;
16762 goto do_pshifti;
16763 case IX86_BUILTIN_PSRLWI128:
16764 icode = CODE_FOR_lshrv8hi3;
16765 goto do_pshifti;
16766 case IX86_BUILTIN_PSRLDI128:
16767 icode = CODE_FOR_lshrv4si3;
16768 goto do_pshifti;
16769 case IX86_BUILTIN_PSRLQI128:
16770 icode = CODE_FOR_lshrv2di3;
16771 goto do_pshifti;
16772 do_pshifti:
16773 arg0 = TREE_VALUE (arglist);
16774 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16775 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16776 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16777
16778 if (GET_CODE (op1) != CONST_INT)
16779 {
16780 error ("shift must be an immediate");
16781 return const0_rtx;
16782 }
16783 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
16784 op1 = GEN_INT (255);
16785
16786 tmode = insn_data[icode].operand[0].mode;
16787 mode1 = insn_data[icode].operand[1].mode;
16788 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16789 op0 = copy_to_reg (op0);
16790
16791 target = gen_reg_rtx (tmode);
16792 pat = GEN_FCN (icode) (target, op0, op1);
16793 if (!pat)
16794 return 0;
16795 emit_insn (pat);
16796 return target;
16797
16798 case IX86_BUILTIN_PSLLW128:
16799 icode = CODE_FOR_ashlv8hi3;
16800 goto do_pshift;
16801 case IX86_BUILTIN_PSLLD128:
16802 icode = CODE_FOR_ashlv4si3;
16803 goto do_pshift;
16804 case IX86_BUILTIN_PSLLQ128:
16805 icode = CODE_FOR_ashlv2di3;
16806 goto do_pshift;
16807 case IX86_BUILTIN_PSRAW128:
16808 icode = CODE_FOR_ashrv8hi3;
16809 goto do_pshift;
16810 case IX86_BUILTIN_PSRAD128:
16811 icode = CODE_FOR_ashrv4si3;
16812 goto do_pshift;
16813 case IX86_BUILTIN_PSRLW128:
16814 icode = CODE_FOR_lshrv8hi3;
16815 goto do_pshift;
16816 case IX86_BUILTIN_PSRLD128:
16817 icode = CODE_FOR_lshrv4si3;
16818 goto do_pshift;
16819 case IX86_BUILTIN_PSRLQ128:
16820 icode = CODE_FOR_lshrv2di3;
16821 goto do_pshift;
16822 do_pshift:
16823 arg0 = TREE_VALUE (arglist);
16824 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16827
16828 tmode = insn_data[icode].operand[0].mode;
16829 mode1 = insn_data[icode].operand[1].mode;
16830
16831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16832 op0 = copy_to_reg (op0);
16833
16834 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
16835 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
16836 op1 = copy_to_reg (op1);
16837
16838 target = gen_reg_rtx (tmode);
16839 pat = GEN_FCN (icode) (target, op0, op1);
16840 if (!pat)
16841 return 0;
16842 emit_insn (pat);
16843 return target;
16844
16845 case IX86_BUILTIN_PSLLDQI128:
16846 case IX86_BUILTIN_PSRLDQI128:
16847 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16848 : CODE_FOR_sse2_lshrti3);
16849 arg0 = TREE_VALUE (arglist);
16850 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16851 op0 = expand_normal (arg0);
16852 op1 = expand_normal (arg1);
16853 tmode = insn_data[icode].operand[0].mode;
16854 mode1 = insn_data[icode].operand[1].mode;
16855 mode2 = insn_data[icode].operand[2].mode;
16856
16857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16858 {
16859 op0 = copy_to_reg (op0);
16860 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16861 }
16862 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16863 {
16864 error ("shift must be an immediate");
16865 return const0_rtx;
16866 }
16867 target = gen_reg_rtx (V2DImode);
16868 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
16869 op0, op1);
16870 if (! pat)
16871 return 0;
16872 emit_insn (pat);
16873 return target;
16874
16875 case IX86_BUILTIN_FEMMS:
16876 emit_insn (gen_mmx_femms ());
16877 return NULL_RTX;
16878
16879 case IX86_BUILTIN_PAVGUSB:
16880 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16881
16882 case IX86_BUILTIN_PF2ID:
16883 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16884
16885 case IX86_BUILTIN_PFACC:
16886 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16887
16888 case IX86_BUILTIN_PFADD:
16889 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16890
16891 case IX86_BUILTIN_PFCMPEQ:
16892 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16893
16894 case IX86_BUILTIN_PFCMPGE:
16895 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16896
16897 case IX86_BUILTIN_PFCMPGT:
16898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16899
16900 case IX86_BUILTIN_PFMAX:
16901 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16902
16903 case IX86_BUILTIN_PFMIN:
16904 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16905
16906 case IX86_BUILTIN_PFMUL:
16907 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16908
16909 case IX86_BUILTIN_PFRCP:
16910 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16911
16912 case IX86_BUILTIN_PFRCPIT1:
16913 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16914
16915 case IX86_BUILTIN_PFRCPIT2:
16916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16917
16918 case IX86_BUILTIN_PFRSQIT1:
16919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16920
16921 case IX86_BUILTIN_PFRSQRT:
16922 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16923
16924 case IX86_BUILTIN_PFSUB:
16925 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16926
16927 case IX86_BUILTIN_PFSUBR:
16928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16929
16930 case IX86_BUILTIN_PI2FD:
16931 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16932
16933 case IX86_BUILTIN_PMULHRW:
16934 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16935
16936 case IX86_BUILTIN_PF2IW:
16937 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16938
16939 case IX86_BUILTIN_PFNACC:
16940 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16941
16942 case IX86_BUILTIN_PFPNACC:
16943 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16944
16945 case IX86_BUILTIN_PI2FW:
16946 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16947
16948 case IX86_BUILTIN_PSWAPDSI:
16949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16950
16951 case IX86_BUILTIN_PSWAPDSF:
16952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16953
16954 case IX86_BUILTIN_SQRTSD:
16955 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16956 case IX86_BUILTIN_LOADUPD:
16957 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16958 case IX86_BUILTIN_STOREUPD:
16959 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16960
16961 case IX86_BUILTIN_MFENCE:
16962 emit_insn (gen_sse2_mfence ());
16963 return 0;
16964 case IX86_BUILTIN_LFENCE:
16965 emit_insn (gen_sse2_lfence ());
16966 return 0;
16967
16968 case IX86_BUILTIN_CLFLUSH:
16969 arg0 = TREE_VALUE (arglist);
16970 op0 = expand_normal (arg0);
16971 icode = CODE_FOR_sse2_clflush;
16972 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16973 op0 = copy_to_mode_reg (Pmode, op0);
16974
16975 emit_insn (gen_sse2_clflush (op0));
16976 return 0;
16977
16978 case IX86_BUILTIN_MOVNTPD:
16979 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16980 case IX86_BUILTIN_MOVNTDQ:
16981 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16982 case IX86_BUILTIN_MOVNTI:
16983 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16984
16985 case IX86_BUILTIN_LOADDQU:
16986 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16987 case IX86_BUILTIN_STOREDQU:
16988 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16989
16990 case IX86_BUILTIN_MONITOR:
16991 arg0 = TREE_VALUE (arglist);
16992 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16993 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16994 op0 = expand_normal (arg0);
16995 op1 = expand_normal (arg1);
16996 op2 = expand_normal (arg2);
16997 if (!REG_P (op0))
16998 op0 = copy_to_mode_reg (Pmode, op0);
16999 if (!REG_P (op1))
17000 op1 = copy_to_mode_reg (SImode, op1);
17001 if (!REG_P (op2))
17002 op2 = copy_to_mode_reg (SImode, op2);
17003 if (!TARGET_64BIT)
17004 emit_insn (gen_sse3_monitor (op0, op1, op2));
17005 else
17006 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17007 return 0;
17008
17009 case IX86_BUILTIN_MWAIT:
17010 arg0 = TREE_VALUE (arglist);
17011 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17012 op0 = expand_normal (arg0);
17013 op1 = expand_normal (arg1);
17014 if (!REG_P (op0))
17015 op0 = copy_to_mode_reg (SImode, op0);
17016 if (!REG_P (op1))
17017 op1 = copy_to_mode_reg (SImode, op1);
17018 emit_insn (gen_sse3_mwait (op0, op1));
17019 return 0;
17020
17021 case IX86_BUILTIN_LDDQU:
17022 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17023 target, 1);
17024
17025 case IX86_BUILTIN_PALIGNR:
17026 case IX86_BUILTIN_PALIGNR128:
17027 if (fcode == IX86_BUILTIN_PALIGNR)
17028 {
17029 icode = CODE_FOR_ssse3_palignrdi;
17030 mode = DImode;
17031 }
17032 else
17033 {
17034 icode = CODE_FOR_ssse3_palignrti;
17035 mode = V2DImode;
17036 }
17037 arg0 = TREE_VALUE (arglist);
17038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17039 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17042 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17043 tmode = insn_data[icode].operand[0].mode;
17044 mode1 = insn_data[icode].operand[1].mode;
17045 mode2 = insn_data[icode].operand[2].mode;
17046 mode3 = insn_data[icode].operand[3].mode;
17047
17048 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17049 {
17050 op0 = copy_to_reg (op0);
17051 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17052 }
17053 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17054 {
17055 op1 = copy_to_reg (op1);
17056 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17057 }
17058 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17059 {
17060 error ("shift must be an immediate");
17061 return const0_rtx;
17062 }
17063 target = gen_reg_rtx (mode);
17064 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17065 op0, op1, op2);
17066 if (! pat)
17067 return 0;
17068 emit_insn (pat);
17069 return target;
17070
17071 case IX86_BUILTIN_MOVNTSD:
17072 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
17073
17074 case IX86_BUILTIN_MOVNTSS:
17075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
17076
17077 case IX86_BUILTIN_INSERTQ:
17078 case IX86_BUILTIN_EXTRQ:
17079 icode = (fcode == IX86_BUILTIN_EXTRQ
17080 ? CODE_FOR_sse4a_extrq
17081 : CODE_FOR_sse4a_insertq);
17082 arg0 = TREE_VALUE (arglist);
17083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17084 op0 = expand_normal (arg0);
17085 op1 = expand_normal (arg1);
17086 tmode = insn_data[icode].operand[0].mode;
17087 mode1 = insn_data[icode].operand[1].mode;
17088 mode2 = insn_data[icode].operand[2].mode;
17089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17090 op0 = copy_to_mode_reg (mode1, op0);
17091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17092 op1 = copy_to_mode_reg (mode2, op1);
17093 if (optimize || target == 0
17094 || GET_MODE (target) != tmode
17095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17096 target = gen_reg_rtx (tmode);
17097 pat = GEN_FCN (icode) (target, op0, op1);
17098 if (! pat)
17099 return NULL_RTX;
17100 emit_insn (pat);
17101 return target;
17102
17103 case IX86_BUILTIN_EXTRQI:
17104 icode = CODE_FOR_sse4a_extrqi;
17105 arg0 = TREE_VALUE (arglist);
17106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17107 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17108 op0 = expand_normal (arg0);
17109 op1 = expand_normal (arg1);
17110 op2 = expand_normal (arg2);
17111 tmode = insn_data[icode].operand[0].mode;
17112 mode1 = insn_data[icode].operand[1].mode;
17113 mode2 = insn_data[icode].operand[2].mode;
17114 mode3 = insn_data[icode].operand[3].mode;
17115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17116 op0 = copy_to_mode_reg (mode1, op0);
17117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17118 {
17119 error ("index mask must be an immediate");
17120 return gen_reg_rtx (tmode);
17121 }
17122 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17123 {
17124 error ("length mask must be an immediate");
17125 return gen_reg_rtx (tmode);
17126 }
17127 if (optimize || target == 0
17128 || GET_MODE (target) != tmode
17129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17130 target = gen_reg_rtx (tmode);
17131 pat = GEN_FCN (icode) (target, op0, op1, op2);
17132 if (! pat)
17133 return NULL_RTX;
17134 emit_insn (pat);
17135 return target;
17136
17137 case IX86_BUILTIN_INSERTQI:
17138 icode = CODE_FOR_sse4a_insertqi;
17139 arg0 = TREE_VALUE (arglist);
17140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17141 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17142 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
17143 op0 = expand_normal (arg0);
17144 op1 = expand_normal (arg1);
17145 op2 = expand_normal (arg2);
17146 op3 = expand_normal (arg3);
17147 tmode = insn_data[icode].operand[0].mode;
17148 mode1 = insn_data[icode].operand[1].mode;
17149 mode2 = insn_data[icode].operand[2].mode;
17150 mode3 = insn_data[icode].operand[3].mode;
17151 mode4 = insn_data[icode].operand[4].mode;
17152
17153 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17154 op0 = copy_to_mode_reg (mode1, op0);
17155
17156 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17157 op1 = copy_to_mode_reg (mode2, op1);
17158
17159 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17160 {
17161 error ("index mask must be an immediate");
17162 return gen_reg_rtx (tmode);
17163 }
17164 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
17165 {
17166 error ("length mask must be an immediate");
17167 return gen_reg_rtx (tmode);
17168 }
17169 if (optimize || target == 0
17170 || GET_MODE (target) != tmode
17171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17172 target = gen_reg_rtx (tmode);
17173 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
17174 if (! pat)
17175 return NULL_RTX;
17176 emit_insn (pat);
17177 return target;
17178
17179 case IX86_BUILTIN_VEC_INIT_V2SI:
17180 case IX86_BUILTIN_VEC_INIT_V4HI:
17181 case IX86_BUILTIN_VEC_INIT_V8QI:
17182 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17183
17184 case IX86_BUILTIN_VEC_EXT_V2DF:
17185 case IX86_BUILTIN_VEC_EXT_V2DI:
17186 case IX86_BUILTIN_VEC_EXT_V4SF:
17187 case IX86_BUILTIN_VEC_EXT_V4SI:
17188 case IX86_BUILTIN_VEC_EXT_V8HI:
17189 case IX86_BUILTIN_VEC_EXT_V16QI:
17190 case IX86_BUILTIN_VEC_EXT_V2SI:
17191 case IX86_BUILTIN_VEC_EXT_V4HI:
17192 return ix86_expand_vec_ext_builtin (arglist, target);
17193
17194 case IX86_BUILTIN_VEC_SET_V8HI:
17195 case IX86_BUILTIN_VEC_SET_V4HI:
17196 return ix86_expand_vec_set_builtin (arglist);
17197
17198 default:
17199 break;
17200 }
17201
17202 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17203 if (d->code == fcode)
17204 {
17205 /* Compares are treated specially. */
17206 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17207 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17208 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17209 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17210 return ix86_expand_sse_compare (d, arglist, target);
17211
17212 return ix86_expand_binop_builtin (d->icode, arglist, target);
17213 }
17214
17215 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17216 if (d->code == fcode)
17217 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17218
17219 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17220 if (d->code == fcode)
17221 return ix86_expand_sse_comi (d, arglist, target);
17222
17223 gcc_unreachable ();
17224}
17225
17226/* Store OPERAND to the memory after reload is completed. This means
17227 that we can't easily use assign_stack_local. */
17228rtx
17229ix86_force_to_memory (enum machine_mode mode, rtx operand)
17230{
17231 rtx result;
17232
17233 gcc_assert (reload_completed);
17234 if (TARGET_RED_ZONE)
17235 {
17236 result = gen_rtx_MEM (mode,
17237 gen_rtx_PLUS (Pmode,
17238 stack_pointer_rtx,
17239 GEN_INT (-RED_ZONE_SIZE)));
17240 emit_move_insn (result, operand);
17241 }
17242 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17243 {
17244 switch (mode)
17245 {
17246 case HImode:
17247 case SImode:
17248 operand = gen_lowpart (DImode, operand);
17249 /* FALLTHRU */
17250 case DImode:
17251 emit_insn (
17252 gen_rtx_SET (VOIDmode,
17253 gen_rtx_MEM (DImode,
17254 gen_rtx_PRE_DEC (DImode,
17255 stack_pointer_rtx)),
17256 operand));
17257 break;
17258 default:
17259 gcc_unreachable ();
17260 }
17261 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17262 }
17263 else
17264 {
17265 switch (mode)
17266 {
17267 case DImode:
17268 {
17269 rtx operands[2];
17270 split_di (&operand, 1, operands, operands + 1);
17271 emit_insn (
17272 gen_rtx_SET (VOIDmode,
17273 gen_rtx_MEM (SImode,
17274 gen_rtx_PRE_DEC (Pmode,
17275 stack_pointer_rtx)),
17276 operands[1]));
17277 emit_insn (
17278 gen_rtx_SET (VOIDmode,
17279 gen_rtx_MEM (SImode,
17280 gen_rtx_PRE_DEC (Pmode,
17281 stack_pointer_rtx)),
17282 operands[0]));
17283 }
17284 break;
17285 case HImode:
17286 /* Store HImodes as SImodes. */
17287 operand = gen_lowpart (SImode, operand);
17288 /* FALLTHRU */
17289 case SImode:
17290 emit_insn (
17291 gen_rtx_SET (VOIDmode,
17292 gen_rtx_MEM (GET_MODE (operand),
17293 gen_rtx_PRE_DEC (SImode,
17294 stack_pointer_rtx)),
17295 operand));
17296 break;
17297 default:
17298 gcc_unreachable ();
17299 }
17300 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17301 }
17302 return result;
17303}
17304
17305/* Free operand from the memory. */
17306void
17307ix86_free_from_memory (enum machine_mode mode)
17308{
17309 if (!TARGET_RED_ZONE)
17310 {
17311 int size;
17312
17313 if (mode == DImode || TARGET_64BIT)
17314 size = 8;
17315 else
17316 size = 4;
17317 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17318 to pop or add instruction if registers are available. */
17319 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17320 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17321 GEN_INT (size))));
17322 }
17323}
17324
17325/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17326 QImode must go into class Q_REGS.
17327 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17328 movdf to do mem-to-mem moves through integer regs. */
17329enum reg_class
17330ix86_preferred_reload_class (rtx x, enum reg_class class)
17331{
17332 enum machine_mode mode = GET_MODE (x);
17333
17334 /* We're only allowed to return a subclass of CLASS. Many of the
17335 following checks fail for NO_REGS, so eliminate that early. */
17336 if (class == NO_REGS)
17337 return NO_REGS;
17338
17339 /* All classes can load zeros. */
17340 if (x == CONST0_RTX (mode))
17341 return class;
17342
17343 /* Force constants into memory if we are loading a (nonzero) constant into
17344 an MMX or SSE register. This is because there are no MMX/SSE instructions
17345 to load from a constant. */
17346 if (CONSTANT_P (x)
17347 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17348 return NO_REGS;
17349
17350 /* Prefer SSE regs only, if we can use them for math. */
17351 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17352 return SSE_CLASS_P (class) ? class : NO_REGS;
17353
17354 /* Floating-point constants need more complex checks. */
17355 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17356 {
17357 /* General regs can load everything. */
17358 if (reg_class_subset_p (class, GENERAL_REGS))
17359 return class;
17360
17361 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17362 zero above. We only want to wind up preferring 80387 registers if
17363 we plan on doing computation with them. */
17364 if (TARGET_80387
17365 && standard_80387_constant_p (x))
17366 {
17367 /* Limit class to non-sse. */
17368 if (class == FLOAT_SSE_REGS)
17369 return FLOAT_REGS;
17370 if (class == FP_TOP_SSE_REGS)
17371 return FP_TOP_REG;
17372 if (class == FP_SECOND_SSE_REGS)
17373 return FP_SECOND_REG;
17374 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17375 return class;
17376 }
17377
17378 return NO_REGS;
17379 }
17380
17381 /* Generally when we see PLUS here, it's the function invariant
17382 (plus soft-fp const_int). Which can only be computed into general
17383 regs. */
17384 if (GET_CODE (x) == PLUS)
17385 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17386
17387 /* QImode constants are easy to load, but non-constant QImode data
17388 must go into Q_REGS. */
17389 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17390 {
17391 if (reg_class_subset_p (class, Q_REGS))
17392 return class;
17393 if (reg_class_subset_p (Q_REGS, class))
17394 return Q_REGS;
17395 return NO_REGS;
17396 }
17397
17398 return class;
17399}
17400
17401/* Discourage putting floating-point values in SSE registers unless
17402 SSE math is being used, and likewise for the 387 registers. */
17403enum reg_class
17404ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17405{
17406 enum machine_mode mode = GET_MODE (x);
17407
17408 /* Restrict the output reload class to the register bank that we are doing
17409 math on. If we would like not to return a subset of CLASS, reject this
17410 alternative: if reload cannot do this, it will still use its choice. */
17411 mode = GET_MODE (x);
17412 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17413 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17414
17415 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17416 {
17417 if (class == FP_TOP_SSE_REGS)
17418 return FP_TOP_REG;
17419 else if (class == FP_SECOND_SSE_REGS)
17420 return FP_SECOND_REG;
17421 else
17422 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17423 }
17424
17425 return class;
17426}
17427
17428/* If we are copying between general and FP registers, we need a memory
17429 location. The same is true for SSE and MMX registers.
17430
17431 The macro can't work reliably when one of the CLASSES is class containing
17432 registers from multiple units (SSE, MMX, integer). We avoid this by never
17433 combining those units in single alternative in the machine description.
17434 Ensure that this constraint holds to avoid unexpected surprises.
17435
17436 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17437 enforce these sanity checks. */
17438
17439int
17440ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17441 enum machine_mode mode, int strict)
17442{
17443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17449 {
17450 gcc_assert (!strict);
17451 return true;
17452 }
17453
17454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17455 return true;
17456
17457 /* ??? This is a lie. We do have moves between mmx/general, and for
17458 mmx/sse2. But by saying we need secondary memory we discourage the
17459 register allocator from using the mmx registers unless needed. */
17460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17461 return true;
17462
17463 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17464 {
17465 /* SSE1 doesn't have any direct moves from other classes. */
17466 if (!TARGET_SSE2)
17467 return true;
17468
17469 /* If the target says that inter-unit moves are more expensive
17470 than moving through memory, then don't generate them. */
17471 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17472 return true;
17473
17474 /* Between SSE and general, we have moves no larger than word size. */
17475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17476 return true;
17477
17478 /* ??? For the cost of one register reformat penalty, we could use
17479 the same instructions to move SFmode and DFmode data, but the
17480 relevant move patterns don't support those alternatives. */
17481 if (mode == SFmode || mode == DFmode)
17482 return true;
17483 }
17484
17485 return false;
17486}
17487
17488/* Return true if the registers in CLASS cannot represent the change from
17489 modes FROM to TO. */
17490
17491bool
17492ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17493 enum reg_class class)
17494{
17495 if (from == to)
17496 return false;
17497
17498 /* x87 registers can't do subreg at all, as all values are reformatted
17499 to extended precision. */
17500 if (MAYBE_FLOAT_CLASS_P (class))
17501 return true;
17502
17503 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17504 {
17505 /* Vector registers do not support QI or HImode loads. If we don't
17506 disallow a change to these modes, reload will assume it's ok to
17507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17508 the vec_dupv4hi pattern. */
17509 if (GET_MODE_SIZE (from) < 4)
17510 return true;
17511
17512 /* Vector registers do not support subreg with nonzero offsets, which
17513 are otherwise valid for integer registers. Since we can't see
17514 whether we have a nonzero offset from here, prohibit all
17515 nonparadoxical subregs changing size. */
17516 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17517 return true;
17518 }
17519
17520 return false;
17521}
17522
17523/* Return the cost of moving data from a register in class CLASS1 to
17524 one in class CLASS2.
17525
17526 It is not required that the cost always equal 2 when FROM is the same as TO;
17527 on some machines it is expensive to move between registers if they are not
17528 general registers. */
17529
17530int
17531ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17532 enum reg_class class2)
17533{
17534 /* In case we require secondary memory, compute cost of the store followed
17535 by load. In order to avoid bad register allocation choices, we need
17536 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17537
17538 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17539 {
17540 int cost = 1;
17541
17542 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17543 MEMORY_MOVE_COST (mode, class1, 1));
17544 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17545 MEMORY_MOVE_COST (mode, class2, 1));
17546
17547 /* In case of copying from general_purpose_register we may emit multiple
17548 stores followed by single load causing memory size mismatch stall.
17549 Count this as arbitrarily high cost of 20. */
17550 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17551 cost += 20;
17552
17553 /* In the case of FP/MMX moves, the registers actually overlap, and we
17554 have to switch modes in order to treat them differently. */
17555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17557 cost += 20;
17558
17559 return cost;
17560 }
17561
17562 /* Moves between SSE/MMX and integer unit are expensive. */
17563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17565 return ix86_cost->mmxsse_to_integer;
17566 if (MAYBE_FLOAT_CLASS_P (class1))
17567 return ix86_cost->fp_move;
17568 if (MAYBE_SSE_CLASS_P (class1))
17569 return ix86_cost->sse_move;
17570 if (MAYBE_MMX_CLASS_P (class1))
17571 return ix86_cost->mmx_move;
17572 return 2;
17573}
17574
17575/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17576
17577bool
17578ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17579{
17580 /* Flags and only flags can only hold CCmode values. */
17581 if (CC_REGNO_P (regno))
17582 return GET_MODE_CLASS (mode) == MODE_CC;
17583 if (GET_MODE_CLASS (mode) == MODE_CC
17584 || GET_MODE_CLASS (mode) == MODE_RANDOM
17585 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17586 return 0;
17587 if (FP_REGNO_P (regno))
17588 return VALID_FP_MODE_P (mode);
17589 if (SSE_REGNO_P (regno))
17590 {
17591 /* We implement the move patterns for all vector modes into and
17592 out of SSE registers, even when no operation instructions
17593 are available. */
17594 return (VALID_SSE_REG_MODE (mode)
17595 || VALID_SSE2_REG_MODE (mode)
17596 || VALID_MMX_REG_MODE (mode)
17597 || VALID_MMX_REG_MODE_3DNOW (mode));
17598 }
17599 if (MMX_REGNO_P (regno))
17600 {
17601 /* We implement the move patterns for 3DNOW modes even in MMX mode,
17602 so if the register is available at all, then we can move data of
17603 the given mode into or out of it. */
17604 return (VALID_MMX_REG_MODE (mode)
17605 || VALID_MMX_REG_MODE_3DNOW (mode));
17606 }
17607
17608 if (mode == QImode)
17609 {
17610 /* Take care for QImode values - they can be in non-QI regs,
17611 but then they do cause partial register stalls. */
17612 if (regno < 4 || TARGET_64BIT)
17613 return 1;
17614 if (!TARGET_PARTIAL_REG_STALL)
17615 return 1;
17616 return reload_in_progress || reload_completed;
17617 }
17618 /* We handle both integer and floats in the general purpose registers. */
17619 else if (VALID_INT_MODE_P (mode))
17620 return 1;
17621 else if (VALID_FP_MODE_P (mode))
17622 return 1;
17623 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17624 on to use that value in smaller contexts, this can easily force a
17625 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17626 supporting DImode, allow it. */
17627 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17628 return 1;
17629
17630 return 0;
17631}
17632
17633/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17634 tieable integer mode. */
17635
17636static bool
17637ix86_tieable_integer_mode_p (enum machine_mode mode)
17638{
17639 switch (mode)
17640 {
17641 case HImode:
17642 case SImode:
17643 return true;
17644
17645 case QImode:
17646 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17647
17648 case DImode:
17649 return TARGET_64BIT;
17650
17651 default:
17652 return false;
17653 }
17654}
17655
17656/* Return true if MODE1 is accessible in a register that can hold MODE2
17657 without copying. That is, all register classes that can hold MODE2
17658 can also hold MODE1. */
17659
17660bool
17661ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17662{
17663 if (mode1 == mode2)
17664 return true;
17665
17666 if (ix86_tieable_integer_mode_p (mode1)
17667 && ix86_tieable_integer_mode_p (mode2))
17668 return true;
17669
17670 /* MODE2 being XFmode implies fp stack or general regs, which means we
17671 can tie any smaller floating point modes to it. Note that we do not
17672 tie this with TFmode. */
17673 if (mode2 == XFmode)
17674 return mode1 == SFmode || mode1 == DFmode;
17675
17676 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17677 that we can tie it with SFmode. */
17678 if (mode2 == DFmode)
17679 return mode1 == SFmode;
17680
17681 /* If MODE2 is only appropriate for an SSE register, then tie with
17682 any other mode acceptable to SSE registers. */
17683 if (GET_MODE_SIZE (mode2) >= 8
17684 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17685 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17686
17687 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17688 with any other mode acceptable to MMX registers. */
17689 if (GET_MODE_SIZE (mode2) == 8
17690 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17691 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17692
17693 return false;
17694}
17695
17696/* Return the cost of moving data of mode M between a
17697 register and memory. A value of 2 is the default; this cost is
17698 relative to those in `REGISTER_MOVE_COST'.
17699
17700 If moving between registers and memory is more expensive than
17701 between two registers, you should define this macro to express the
17702 relative cost.
17703
17704 Model also increased moving costs of QImode registers in non
17705 Q_REGS classes.
17706 */
17707int
17708ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17709{
17710 if (FLOAT_CLASS_P (class))
17711 {
17712 int index;
17713 switch (mode)
17714 {
17715 case SFmode:
17716 index = 0;
17717 break;
17718 case DFmode:
17719 index = 1;
17720 break;
17721 case XFmode:
17722 index = 2;
17723 break;
17724 default:
17725 return 100;
17726 }
17727 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17728 }
17729 if (SSE_CLASS_P (class))
17730 {
17731 int index;
17732 switch (GET_MODE_SIZE (mode))
17733 {
17734 case 4:
17735 index = 0;
17736 break;
17737 case 8:
17738 index = 1;
17739 break;
17740 case 16:
17741 index = 2;
17742 break;
17743 default:
17744 return 100;
17745 }
17746 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17747 }
17748 if (MMX_CLASS_P (class))
17749 {
17750 int index;
17751 switch (GET_MODE_SIZE (mode))
17752 {
17753 case 4:
17754 index = 0;
17755 break;
17756 case 8:
17757 index = 1;
17758 break;
17759 default:
17760 return 100;
17761 }
17762 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17763 }
17764 switch (GET_MODE_SIZE (mode))
17765 {
17766 case 1:
17767 if (in)
17768 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17769 : ix86_cost->movzbl_load);
17770 else
17771 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17772 : ix86_cost->int_store[0] + 4);
17773 break;
17774 case 2:
17775 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17776 default:
17777 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17778 if (mode == TFmode)
17779 mode = XFmode;
17780 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17781 * (((int) GET_MODE_SIZE (mode)
17782 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17783 }
17784}
17785
17786/* Compute a (partial) cost for rtx X. Return true if the complete
17787 cost has been computed, and false if subexpressions should be
17788 scanned. In either case, *TOTAL contains the cost result. */
17789
17790static bool
17791ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17792{
17793 enum machine_mode mode = GET_MODE (x);
17794
17795 switch (code)
17796 {
17797 case CONST_INT:
17798 case CONST:
17799 case LABEL_REF:
17800 case SYMBOL_REF:
17801 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17802 *total = 3;
17803 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17804 *total = 2;
17805 else if (flag_pic && SYMBOLIC_CONST (x)
17806 && (!TARGET_64BIT
17807 || (!GET_CODE (x) != LABEL_REF
17808 && (GET_CODE (x) != SYMBOL_REF
17809 || !SYMBOL_REF_LOCAL_P (x)))))
17810 *total = 1;
17811 else
17812 *total = 0;
17813 return true;
17814
17815 case CONST_DOUBLE:
17816 if (mode == VOIDmode)
17817 *total = 0;
17818 else
17819 switch (standard_80387_constant_p (x))
17820 {
17821 case 1: /* 0.0 */
17822 *total = 1;
17823 break;
17824 default: /* Other constants */
17825 *total = 2;
17826 break;
17827 case 0:
17828 case -1:
17829 /* Start with (MEM (SYMBOL_REF)), since that's where
17830 it'll probably end up. Add a penalty for size. */
17831 *total = (COSTS_N_INSNS (1)
17832 + (flag_pic != 0 && !TARGET_64BIT)
17833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17834 break;
17835 }
17836 return true;
17837
17838 case ZERO_EXTEND:
17839 /* The zero extensions is often completely free on x86_64, so make
17840 it as cheap as possible. */
17841 if (TARGET_64BIT && mode == DImode
17842 && GET_MODE (XEXP (x, 0)) == SImode)
17843 *total = 1;
17844 else if (TARGET_ZERO_EXTEND_WITH_AND)
17845 *total = ix86_cost->add;
17846 else
17847 *total = ix86_cost->movzx;
17848 return false;
17849
17850 case SIGN_EXTEND:
17851 *total = ix86_cost->movsx;
17852 return false;
17853
17854 case ASHIFT:
17855 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17856 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17857 {
17858 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17859 if (value == 1)
17860 {
17861 *total = ix86_cost->add;
17862 return false;
17863 }
17864 if ((value == 2 || value == 3)
17865 && ix86_cost->lea <= ix86_cost->shift_const)
17866 {
17867 *total = ix86_cost->lea;
17868 return false;
17869 }
17870 }
17871 /* FALLTHRU */
17872
17873 case ROTATE:
17874 case ASHIFTRT:
17875 case LSHIFTRT:
17876 case ROTATERT:
17877 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17878 {
17879 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17880 {
17881 if (INTVAL (XEXP (x, 1)) > 32)
17882 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17883 else
17884 *total = ix86_cost->shift_const * 2;
17885 }
17886 else
17887 {
17888 if (GET_CODE (XEXP (x, 1)) == AND)
17889 *total = ix86_cost->shift_var * 2;
17890 else
17891 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17892 }
17893 }
17894 else
17895 {
17896 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17897 *total = ix86_cost->shift_const;
17898 else
17899 *total = ix86_cost->shift_var;
17900 }
17901 return false;
17902
17903 case MULT:
17904 if (FLOAT_MODE_P (mode))
17905 {
17906 *total = ix86_cost->fmul;
17907 return false;
17908 }
17909 else
17910 {
17911 rtx op0 = XEXP (x, 0);
17912 rtx op1 = XEXP (x, 1);
17913 int nbits;
17914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17915 {
17916 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17917 for (nbits = 0; value != 0; value &= value - 1)
17918 nbits++;
17919 }
17920 else
17921 /* This is arbitrary. */
17922 nbits = 7;
17923
17924 /* Compute costs correctly for widening multiplication. */
17925 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17926 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17927 == GET_MODE_SIZE (mode))
17928 {
17929 int is_mulwiden = 0;
17930 enum machine_mode inner_mode = GET_MODE (op0);
17931
17932 if (GET_CODE (op0) == GET_CODE (op1))
17933 is_mulwiden = 1, op1 = XEXP (op1, 0);
17934 else if (GET_CODE (op1) == CONST_INT)
17935 {
17936 if (GET_CODE (op0) == SIGN_EXTEND)
17937 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17938 == INTVAL (op1);
17939 else
17940 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17941 }
17942
17943 if (is_mulwiden)
17944 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17945 }
17946
17947 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17948 + nbits * ix86_cost->mult_bit
17949 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17950
17951 return true;
17952 }
17953
17954 case DIV:
17955 case UDIV:
17956 case MOD:
17957 case UMOD:
17958 if (FLOAT_MODE_P (mode))
17959 *total = ix86_cost->fdiv;
17960 else
17961 *total = ix86_cost->divide[MODE_INDEX (mode)];
17962 return false;
17963
17964 case PLUS:
17965 if (FLOAT_MODE_P (mode))
17966 *total = ix86_cost->fadd;
17967 else if (GET_MODE_CLASS (mode) == MODE_INT
17968 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17969 {
17970 if (GET_CODE (XEXP (x, 0)) == PLUS
17971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17973 && CONSTANT_P (XEXP (x, 1)))
17974 {
17975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17976 if (val == 2 || val == 4 || val == 8)
17977 {
17978 *total = ix86_cost->lea;
17979 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17980 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17981 outer_code);
17982 *total += rtx_cost (XEXP (x, 1), outer_code);
17983 return true;
17984 }
17985 }
17986 else if (GET_CODE (XEXP (x, 0)) == MULT
17987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17988 {
17989 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17990 if (val == 2 || val == 4 || val == 8)
17991 {
17992 *total = ix86_cost->lea;
17993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17994 *total += rtx_cost (XEXP (x, 1), outer_code);
17995 return true;
17996 }
17997 }
17998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17999 {
18000 *total = ix86_cost->lea;
18001 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18002 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18003 *total += rtx_cost (XEXP (x, 1), outer_code);
18004 return true;
18005 }
18006 }
18007 /* FALLTHRU */
18008
18009 case MINUS:
18010 if (FLOAT_MODE_P (mode))
18011 {
18012 *total = ix86_cost->fadd;
18013 return false;
18014 }
18015 /* FALLTHRU */
18016
18017 case AND:
18018 case IOR:
18019 case XOR:
18020 if (!TARGET_64BIT && mode == DImode)
18021 {
18022 *total = (ix86_cost->add * 2
18023 + (rtx_cost (XEXP (x, 0), outer_code)
18024 << (GET_MODE (XEXP (x, 0)) != DImode))
18025 + (rtx_cost (XEXP (x, 1), outer_code)
18026 << (GET_MODE (XEXP (x, 1)) != DImode)));
18027 return true;
18028 }
18029 /* FALLTHRU */
18030
18031 case NEG:
18032 if (FLOAT_MODE_P (mode))
18033 {
18034 *total = ix86_cost->fchs;
18035 return false;
18036 }
18037 /* FALLTHRU */
18038
18039 case NOT:
18040 if (!TARGET_64BIT && mode == DImode)
18041 *total = ix86_cost->add * 2;
18042 else
18043 *total = ix86_cost->add;
18044 return false;
18045
18046 case COMPARE:
18047 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18048 && XEXP (XEXP (x, 0), 1) == const1_rtx
18049 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18050 && XEXP (x, 1) == const0_rtx)
18051 {
18052 /* This kind of construct is implemented using test[bwl].
18053 Treat it as if we had an AND. */
18054 *total = (ix86_cost->add
18055 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18056 + rtx_cost (const1_rtx, outer_code));
18057 return true;
18058 }
18059 return false;
18060
18061 case FLOAT_EXTEND:
18062 if (!TARGET_SSE_MATH
18063 || mode == XFmode
18064 || (mode == DFmode && !TARGET_SSE2))
18065 /* For standard 80387 constants, raise the cost to prevent
18066 compress_float_constant() to generate load from memory. */
18067 switch (standard_80387_constant_p (XEXP (x, 0)))
18068 {
18069 case -1:
18070 case 0:
18071 *total = 0;
18072 break;
18073 case 1: /* 0.0 */
18074 *total = 1;
18075 break;
18076 default:
18077 *total = (x86_ext_80387_constants & TUNEMASK
18078 || optimize_size
18079 ? 1 : 0);
18080 }
18081 return false;
18082
18083 case ABS:
18084 if (FLOAT_MODE_P (mode))
18085 *total = ix86_cost->fabs;
18086 return false;
18087
18088 case SQRT:
18089 if (FLOAT_MODE_P (mode))
18090 *total = ix86_cost->fsqrt;
18091 return false;
18092
18093 case UNSPEC:
18094 if (XINT (x, 1) == UNSPEC_TP)
18095 *total = 0;
18096 return false;
18097
18098 default:
18099 return false;
18100 }
18101}
18102
18103#if TARGET_MACHO
18104
18105static int current_machopic_label_num;
18106
18107/* Given a symbol name and its associated stub, write out the
18108 definition of the stub. */
18109
18110void
18111machopic_output_stub (FILE *file, const char *symb, const char *stub)
18112{
18113 unsigned int length;
18114 char *binder_name, *symbol_name, lazy_ptr_name[32];
18115 int label = ++current_machopic_label_num;
18116
18117 /* For 64-bit we shouldn't get here. */
18118 gcc_assert (!TARGET_64BIT);
18119
18120 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18121 symb = (*targetm.strip_name_encoding) (symb);
18122
18123 length = strlen (stub);
18124 binder_name = alloca (length + 32);
18125 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18126
18127 length = strlen (symb);
18128 symbol_name = alloca (length + 32);
18129 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18130
18131 sprintf (lazy_ptr_name, "L%d$lz", label);
18132
18133 if (MACHOPIC_PURE)
18134 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18135 else
18136 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18137
18138 fprintf (file, "%s:\n", stub);
18139 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18140
18141 if (MACHOPIC_PURE)
18142 {
18143 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18144 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18145 fprintf (file, "\tjmp\t*%%edx\n");
18146 }
18147 else
18148 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18149
18150 fprintf (file, "%s:\n", binder_name);
18151
18152 if (MACHOPIC_PURE)
18153 {
18154 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18155 fprintf (file, "\tpushl\t%%eax\n");
18156 }
18157 else
18158 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18159
18160 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18161
18162 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18163 fprintf (file, "%s:\n", lazy_ptr_name);
18164 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18165 fprintf (file, "\t.long %s\n", binder_name);
18166}
18167
18168void
18169darwin_x86_file_end (void)
18170{
18171 darwin_file_end ();
18172 ix86_file_end ();
18173}
18174#endif /* TARGET_MACHO */
18175
18176/* Order the registers for register allocator. */
18177
18178void
18179x86_order_regs_for_local_alloc (void)
18180{
18181 int pos = 0;
18182 int i;
18183
18184 /* First allocate the local general purpose registers. */
18185 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18186 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18187 reg_alloc_order [pos++] = i;
18188
18189 /* Global general purpose registers. */
18190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18191 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18192 reg_alloc_order [pos++] = i;
18193
18194 /* x87 registers come first in case we are doing FP math
18195 using them. */
18196 if (!TARGET_SSE_MATH)
18197 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18198 reg_alloc_order [pos++] = i;
18199
18200 /* SSE registers. */
18201 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18202 reg_alloc_order [pos++] = i;
18203 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18204 reg_alloc_order [pos++] = i;
18205
18206 /* x87 registers. */
18207 if (TARGET_SSE_MATH)
18208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18209 reg_alloc_order [pos++] = i;
18210
18211 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18212 reg_alloc_order [pos++] = i;
18213
18214 /* Initialize the rest of array as we do not allocate some registers
18215 at all. */
18216 while (pos < FIRST_PSEUDO_REGISTER)
18217 reg_alloc_order [pos++] = 0;
18218}
18219
18220/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18221 struct attribute_spec.handler. */
18222static tree
18223ix86_handle_struct_attribute (tree *node, tree name,
18224 tree args ATTRIBUTE_UNUSED,
18225 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18226{
18227 tree *type = NULL;
18228 if (DECL_P (*node))
18229 {
18230 if (TREE_CODE (*node) == TYPE_DECL)
18231 type = &TREE_TYPE (*node);
18232 }
18233 else
18234 type = node;
18235
18236 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18237 || TREE_CODE (*type) == UNION_TYPE)))
18238 {
18239 warning (OPT_Wattributes, "%qs attribute ignored",
18240 IDENTIFIER_POINTER (name));
18241 *no_add_attrs = true;
18242 }
18243
18244 else if ((is_attribute_p ("ms_struct", name)
18245 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18246 || ((is_attribute_p ("gcc_struct", name)
18247 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18248 {
18249 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18250 IDENTIFIER_POINTER (name));
18251 *no_add_attrs = true;
18252 }
18253
18254 return NULL_TREE;
18255}
18256
18257static bool
18258ix86_ms_bitfield_layout_p (tree record_type)
18259{
18260 return (TARGET_MS_BITFIELD_LAYOUT &&
18261 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18262 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18263}
18264
18265/* Returns an expression indicating where the this parameter is
18266 located on entry to the FUNCTION. */
18267
18268static rtx
18269x86_this_parameter (tree function)
18270{
18271 tree type = TREE_TYPE (function);
18272
18273 if (TARGET_64BIT)
18274 {
18275 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18276 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18277 }
18278
18279 if (ix86_function_regparm (type, function) > 0)
18280 {
18281 tree parm;
18282
18283 parm = TYPE_ARG_TYPES (type);
18284 /* Figure out whether or not the function has a variable number of
18285 arguments. */
18286 for (; parm; parm = TREE_CHAIN (parm))
18287 if (TREE_VALUE (parm) == void_type_node)
18288 break;
18289 /* If not, the this parameter is in the first argument. */
18290 if (parm)
18291 {
18292 int regno = 0;
18293 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18294 regno = 2;
18295 return gen_rtx_REG (SImode, regno);
18296 }
18297 }
18298
18299 if (aggregate_value_p (TREE_TYPE (type), type))
18300 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18301 else
18302 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18303}
18304
18305/* Determine whether x86_output_mi_thunk can succeed. */
18306
18307static bool
18308x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18309 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18310 HOST_WIDE_INT vcall_offset, tree function)
18311{
18312 /* 64-bit can handle anything. */
18313 if (TARGET_64BIT)
18314 return true;
18315
18316 /* For 32-bit, everything's fine if we have one free register. */
18317 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18318 return true;
18319
18320 /* Need a free register for vcall_offset. */
18321 if (vcall_offset)
18322 return false;
18323
18324 /* Need a free register for GOT references. */
18325 if (flag_pic && !(*targetm.binds_local_p) (function))
18326 return false;
18327
18328 /* Otherwise ok. */
18329 return true;
18330}
18331
18332/* Output the assembler code for a thunk function. THUNK_DECL is the
18333 declaration for the thunk function itself, FUNCTION is the decl for
18334 the target function. DELTA is an immediate constant offset to be
18335 added to THIS. If VCALL_OFFSET is nonzero, the word at
18336 *(*this + vcall_offset) should be added to THIS. */
18337
18338static void
18339x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18340 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18341 HOST_WIDE_INT vcall_offset, tree function)
18342{
18343 rtx xops[3];
18344 rtx this = x86_this_parameter (function);
18345 rtx this_reg, tmp;
18346
18347 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18348 pull it in now and let DELTA benefit. */
18349 if (REG_P (this))
18350 this_reg = this;
18351 else if (vcall_offset)
18352 {
18353 /* Put the this parameter into %eax. */
18354 xops[0] = this;
18355 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18356 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18357 }
18358 else
18359 this_reg = NULL_RTX;
18360
18361 /* Adjust the this parameter by a fixed constant. */
18362 if (delta)
18363 {
18364 xops[0] = GEN_INT (delta);
18365 xops[1] = this_reg ? this_reg : this;
18366 if (TARGET_64BIT)
18367 {
18368 if (!x86_64_general_operand (xops[0], DImode))
18369 {
18370 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18371 xops[1] = tmp;
18372 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18373 xops[0] = tmp;
18374 xops[1] = this;
18375 }
18376 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18377 }
18378 else
18379 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18380 }
18381
18382 /* Adjust the this parameter by a value stored in the vtable. */
18383 if (vcall_offset)
18384 {
18385 if (TARGET_64BIT)
18386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18387 else
18388 {
18389 int tmp_regno = 2 /* ECX */;
18390 if (lookup_attribute ("fastcall",
18391 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18392 tmp_regno = 0 /* EAX */;
18393 tmp = gen_rtx_REG (SImode, tmp_regno);
18394 }
18395
18396 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18397 xops[1] = tmp;
18398 if (TARGET_64BIT)
18399 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18400 else
18401 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18402
18403 /* Adjust the this parameter. */
18404 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18405 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18406 {
18407 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18408 xops[0] = GEN_INT (vcall_offset);
18409 xops[1] = tmp2;
18410 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18411 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18412 }
18413 xops[1] = this_reg;
18414 if (TARGET_64BIT)
18415 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18416 else
18417 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18418 }
18419
18420 /* If necessary, drop THIS back to its stack slot. */
18421 if (this_reg && this_reg != this)
18422 {
18423 xops[0] = this_reg;
18424 xops[1] = this;
18425 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18426 }
18427
18428 xops[0] = XEXP (DECL_RTL (function), 0);
18429 if (TARGET_64BIT)
18430 {
18431 if (!flag_pic || (*targetm.binds_local_p) (function))
18432 output_asm_insn ("jmp\t%P0", xops);
18433 else
18434 {
18435 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18436 tmp = gen_rtx_CONST (Pmode, tmp);
18437 tmp = gen_rtx_MEM (QImode, tmp);
18438 xops[0] = tmp;
18439 output_asm_insn ("jmp\t%A0", xops);
18440 }
18441 }
18442 else
18443 {
18444 if (!flag_pic || (*targetm.binds_local_p) (function))
18445 output_asm_insn ("jmp\t%P0", xops);
18446 else
18447#if TARGET_MACHO
18448 if (TARGET_MACHO)
18449 {
18450 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18451 tmp = (gen_rtx_SYMBOL_REF
18452 (Pmode,
18453 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18454 tmp = gen_rtx_MEM (QImode, tmp);
18455 xops[0] = tmp;
18456 output_asm_insn ("jmp\t%0", xops);
18457 }
18458 else
18459#endif /* TARGET_MACHO */
18460 {
18461 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18462 output_set_got (tmp, NULL_RTX);
18463
18464 xops[1] = tmp;
18465 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18466 output_asm_insn ("jmp\t{*}%1", xops);
18467 }
18468 }
18469}
18470
18471static void
18472x86_file_start (void)
18473{
18474 default_file_start ();
18475#if TARGET_MACHO
18476 darwin_file_start ();
18477#endif
18478 if (X86_FILE_START_VERSION_DIRECTIVE)
18479 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18480 if (X86_FILE_START_FLTUSED)
18481 fputs ("\t.global\t__fltused\n", asm_out_file);
18482 if (ix86_asm_dialect == ASM_INTEL)
18483 fputs ("\t.intel_syntax\n", asm_out_file);
18484}
18485
18486int
18487x86_field_alignment (tree field, int computed)
18488{
18489 enum machine_mode mode;
18490 tree type = TREE_TYPE (field);
18491
18492 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18493 return computed;
18494 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18495 ? get_inner_array_type (type) : type);
18496 if (mode == DFmode || mode == DCmode
18497 || GET_MODE_CLASS (mode) == MODE_INT
18498 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18499 return MIN (32, computed);
18500 return computed;
18501}
18502
18503/* Output assembler code to FILE to increment profiler label # LABELNO
18504 for profiling a function entry. */
18505void
18506x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18507{
18508 if (TARGET_64BIT)
18509 if (flag_pic)
18510 {
18511#ifndef NO_PROFILE_COUNTERS
18512 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18513#endif
18514 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18515 }
18516 else
18517 {
18518#ifndef NO_PROFILE_COUNTERS
18519 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18520#endif
18521 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18522 }
18523 else if (flag_pic)
18524 {
18525#ifndef NO_PROFILE_COUNTERS
18526 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18527 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18528#endif
18529 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18530 }
18531 else
18532 {
18533#ifndef NO_PROFILE_COUNTERS
18534 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18535 PROFILE_COUNT_REGISTER);
18536#endif
18537 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18538 }
18539}
18540
18541/* We don't have exact information about the insn sizes, but we may assume
18542 quite safely that we are informed about all 1 byte insns and memory
18543 address sizes. This is enough to eliminate unnecessary padding in
18544 99% of cases. */
18545
18546static int
18547min_insn_size (rtx insn)
18548{
18549 int l = 0;
18550
18551 if (!INSN_P (insn) || !active_insn_p (insn))
18552 return 0;
18553
18554 /* Discard alignments we've emit and jump instructions. */
18555 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18556 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18557 return 0;
18558 if (GET_CODE (insn) == JUMP_INSN
18559 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18560 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18561 return 0;
18562
18563 /* Important case - calls are always 5 bytes.
18564 It is common to have many calls in the row. */
18565 if (GET_CODE (insn) == CALL_INSN
18566 && symbolic_reference_mentioned_p (PATTERN (insn))
18567 && !SIBLING_CALL_P (insn))
18568 return 5;
18569 if (get_attr_length (insn) <= 1)
18570 return 1;
18571
18572 /* For normal instructions we may rely on the sizes of addresses
18573 and the presence of symbol to require 4 bytes of encoding.
18574 This is not the case for jumps where references are PC relative. */
18575 if (GET_CODE (insn) != JUMP_INSN)
18576 {
18577 l = get_attr_length_address (insn);
18578 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18579 l = 4;
18580 }
18581 if (l)
18582 return 1+l;
18583 else
18584 return 2;
18585}
18586
18587/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18588 window. */
18589
18590static void
18591ix86_avoid_jump_misspredicts (void)
18592{
18593 rtx insn, start = get_insns ();
18594 int nbytes = 0, njumps = 0;
18595 int isjump = 0;
18596
18597 /* Look for all minimal intervals of instructions containing 4 jumps.
18598 The intervals are bounded by START and INSN. NBYTES is the total
18599 size of instructions in the interval including INSN and not including
18600 START. When the NBYTES is smaller than 16 bytes, it is possible
18601 that the end of START and INSN ends up in the same 16byte page.
18602
18603 The smallest offset in the page INSN can start is the case where START
18604 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
18605 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
18606 */
18607 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18608 {
18609
18610 nbytes += min_insn_size (insn);
18611 if (dump_file)
18612 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
18613 INSN_UID (insn), min_insn_size (insn));
18614 if ((GET_CODE (insn) == JUMP_INSN
18615 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18616 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18617 || GET_CODE (insn) == CALL_INSN)
18618 njumps++;
18619 else
18620 continue;
18621
18622 while (njumps > 3)
18623 {
18624 start = NEXT_INSN (start);
18625 if ((GET_CODE (start) == JUMP_INSN
18626 && GET_CODE (PATTERN (start)) != ADDR_VEC
18627 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18628 || GET_CODE (start) == CALL_INSN)
18629 njumps--, isjump = 1;
18630 else
18631 isjump = 0;
18632 nbytes -= min_insn_size (start);
18633 }
18634 gcc_assert (njumps >= 0);
18635 if (dump_file)
18636 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18637 INSN_UID (start), INSN_UID (insn), nbytes);
18638
18639 if (njumps == 3 && isjump && nbytes < 16)
18640 {
18641 int padsize = 15 - nbytes + min_insn_size (insn);
18642
18643 if (dump_file)
18644 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18645 INSN_UID (insn), padsize);
18646 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18647 }
18648 }
18649}
18650
18651/* AMD Athlon works faster
18652 when RET is not destination of conditional jump or directly preceded
18653 by other jump instruction. We avoid the penalty by inserting NOP just
18654 before the RET instructions in such cases. */
18655static void
18656ix86_pad_returns (void)
18657{
18658 edge e;
18659 edge_iterator ei;
18660
18661 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18662 {
18663 basic_block bb = e->src;
18664 rtx ret = BB_END (bb);
18665 rtx prev;
18666 bool replace = false;
18667
18668 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18669 || !maybe_hot_bb_p (bb))
18670 continue;
18671 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18672 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18673 break;
18674 if (prev && GET_CODE (prev) == CODE_LABEL)
18675 {
18676 edge e;
18677 edge_iterator ei;
18678
18679 FOR_EACH_EDGE (e, ei, bb->preds)
18680 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18681 && !(e->flags & EDGE_FALLTHRU))
18682 replace = true;
18683 }
18684 if (!replace)
18685 {
18686 prev = prev_active_insn (ret);
18687 if (prev
18688 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18689 || GET_CODE (prev) == CALL_INSN))
18690 replace = true;
18691 /* Empty functions get branch mispredict even when the jump destination
18692 is not visible to us. */
18693 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18694 replace = true;
18695 }
18696 if (replace)
18697 {
18698 emit_insn_before (gen_return_internal_long (), ret);
18699 delete_insn (ret);
18700 }
18701 }
18702}
18703
18704/* Implement machine specific optimizations. We implement padding of returns
18705 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18706static void
18707ix86_reorg (void)
18708{
18709 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18710 ix86_pad_returns ();
18711 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18712 ix86_avoid_jump_misspredicts ();
18713}
18714
18715/* Return nonzero when QImode register that must be represented via REX prefix
18716 is used. */
18717bool
18718x86_extended_QIreg_mentioned_p (rtx insn)
18719{
18720 int i;
18721 extract_insn_cached (insn);
18722 for (i = 0; i < recog_data.n_operands; i++)
18723 if (REG_P (recog_data.operand[i])
18724 && REGNO (recog_data.operand[i]) >= 4)
18725 return true;
18726 return false;
18727}
18728
18729/* Return nonzero when P points to register encoded via REX prefix.
18730 Called via for_each_rtx. */
18731static int
18732extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18733{
18734 unsigned int regno;
18735 if (!REG_P (*p))
18736 return 0;
18737 regno = REGNO (*p);
18738 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18739}
18740
18741/* Return true when INSN mentions register that must be encoded using REX
18742 prefix. */
18743bool
18744x86_extended_reg_mentioned_p (rtx insn)
18745{
18746 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18747}
18748
18749/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18750 optabs would emit if we didn't have TFmode patterns. */
18751
18752void
18753x86_emit_floatuns (rtx operands[2])
18754{
18755 rtx neglab, donelab, i0, i1, f0, in, out;
18756 enum machine_mode mode, inmode;
18757
18758 inmode = GET_MODE (operands[1]);
18759 gcc_assert (inmode == SImode || inmode == DImode);
18760
18761 out = operands[0];
18762 in = force_reg (inmode, operands[1]);
18763 mode = GET_MODE (out);
18764 neglab = gen_label_rtx ();
18765 donelab = gen_label_rtx ();
18766 i1 = gen_reg_rtx (Pmode);
18767 f0 = gen_reg_rtx (mode);
18768
18769 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18770
18771 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18772 emit_jump_insn (gen_jump (donelab));
18773 emit_barrier ();
18774
18775 emit_label (neglab);
18776
18777 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18778 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18779 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18780 expand_float (f0, i0, 0);
18781 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18782
18783 emit_label (donelab);
18784}
18785
18786/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18787 with all elements equal to VAR. Return true if successful. */
18788
18789static bool
18790ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18791 rtx target, rtx val)
18792{
18793 enum machine_mode smode, wsmode, wvmode;
18794 rtx x;
18795
18796 switch (mode)
18797 {
18798 case V2SImode:
18799 case V2SFmode:
18800 if (!mmx_ok)
18801 return false;
18802 /* FALLTHRU */
18803
18804 case V2DFmode:
18805 case V2DImode:
18806 case V4SFmode:
18807 case V4SImode:
18808 val = force_reg (GET_MODE_INNER (mode), val);
18809 x = gen_rtx_VEC_DUPLICATE (mode, val);
18810 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18811 return true;
18812
18813 case V4HImode:
18814 if (!mmx_ok)
18815 return false;
18816 if (TARGET_SSE || TARGET_3DNOW_A)
18817 {
18818 val = gen_lowpart (SImode, val);
18819 x = gen_rtx_TRUNCATE (HImode, val);
18820 x = gen_rtx_VEC_DUPLICATE (mode, x);
18821 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18822 return true;
18823 }
18824 else
18825 {
18826 smode = HImode;
18827 wsmode = SImode;
18828 wvmode = V2SImode;
18829 goto widen;
18830 }
18831
18832 case V8QImode:
18833 if (!mmx_ok)
18834 return false;
18835 smode = QImode;
18836 wsmode = HImode;
18837 wvmode = V4HImode;
18838 goto widen;
18839 case V8HImode:
18840 if (TARGET_SSE2)
18841 {
18842 rtx tmp1, tmp2;
18843 /* Extend HImode to SImode using a paradoxical SUBREG. */
18844 tmp1 = gen_reg_rtx (SImode);
18845 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18846 /* Insert the SImode value as low element of V4SImode vector. */
18847 tmp2 = gen_reg_rtx (V4SImode);
18848 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18849 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18850 CONST0_RTX (V4SImode),
18851 const1_rtx);
18852 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18853 /* Cast the V4SImode vector back to a V8HImode vector. */
18854 tmp1 = gen_reg_rtx (V8HImode);
18855 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18856 /* Duplicate the low short through the whole low SImode word. */
18857 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18858 /* Cast the V8HImode vector back to a V4SImode vector. */
18859 tmp2 = gen_reg_rtx (V4SImode);
18860 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18861 /* Replicate the low element of the V4SImode vector. */
18862 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18863 /* Cast the V2SImode back to V8HImode, and store in target. */
18864 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18865 return true;
18866 }
18867 smode = HImode;
18868 wsmode = SImode;
18869 wvmode = V4SImode;
18870 goto widen;
18871 case V16QImode:
18872 if (TARGET_SSE2)
18873 {
18874 rtx tmp1, tmp2;
18875 /* Extend QImode to SImode using a paradoxical SUBREG. */
18876 tmp1 = gen_reg_rtx (SImode);
18877 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18878 /* Insert the SImode value as low element of V4SImode vector. */
18879 tmp2 = gen_reg_rtx (V4SImode);
18880 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18881 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18882 CONST0_RTX (V4SImode),
18883 const1_rtx);
18884 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18885 /* Cast the V4SImode vector back to a V16QImode vector. */
18886 tmp1 = gen_reg_rtx (V16QImode);
18887 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18888 /* Duplicate the low byte through the whole low SImode word. */
18889 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18890 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18891 /* Cast the V16QImode vector back to a V4SImode vector. */
18892 tmp2 = gen_reg_rtx (V4SImode);
18893 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18894 /* Replicate the low element of the V4SImode vector. */
18895 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18896 /* Cast the V2SImode back to V16QImode, and store in target. */
18897 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18898 return true;
18899 }
18900 smode = QImode;
18901 wsmode = HImode;
18902 wvmode = V8HImode;
18903 goto widen;
18904 widen:
18905 /* Replicate the value once into the next wider mode and recurse. */
18906 val = convert_modes (wsmode, smode, val, true);
18907 x = expand_simple_binop (wsmode, ASHIFT, val,
18908 GEN_INT (GET_MODE_BITSIZE (smode)),
18909 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18910 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18911
18912 x = gen_reg_rtx (wvmode);
18913 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18914 gcc_unreachable ();
18915 emit_move_insn (target, gen_lowpart (mode, x));
18916 return true;
18917
18918 default:
18919 return false;
18920 }
18921}
18922
18923/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18924 whose ONE_VAR element is VAR, and other elements are zero. Return true
18925 if successful. */
18926
18927static bool
18928ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18929 rtx target, rtx var, int one_var)
18930{
18931 enum machine_mode vsimode;
18932 rtx new_target;
18933 rtx x, tmp;
18934
18935 switch (mode)
18936 {
18937 case V2SFmode:
18938 case V2SImode:
18939 if (!mmx_ok)
18940 return false;
18941 /* FALLTHRU */
18942
18943 case V2DFmode:
18944 case V2DImode:
18945 if (one_var != 0)
18946 return false;
18947 var = force_reg (GET_MODE_INNER (mode), var);
18948 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18949 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18950 return true;
18951
18952 case V4SFmode:
18953 case V4SImode:
18954 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18955 new_target = gen_reg_rtx (mode);
18956 else
18957 new_target = target;
18958 var = force_reg (GET_MODE_INNER (mode), var);
18959 x = gen_rtx_VEC_DUPLICATE (mode, var);
18960 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18961 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18962 if (one_var != 0)
18963 {
18964 /* We need to shuffle the value to the correct position, so
18965 create a new pseudo to store the intermediate result. */
18966
18967 /* With SSE2, we can use the integer shuffle insns. */
18968 if (mode != V4SFmode && TARGET_SSE2)
18969 {
18970 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18971 GEN_INT (1),
18972 GEN_INT (one_var == 1 ? 0 : 1),
18973 GEN_INT (one_var == 2 ? 0 : 1),
18974 GEN_INT (one_var == 3 ? 0 : 1)));
18975 if (target != new_target)
18976 emit_move_insn (target, new_target);
18977 return true;
18978 }
18979
18980 /* Otherwise convert the intermediate result to V4SFmode and
18981 use the SSE1 shuffle instructions. */
18982 if (mode != V4SFmode)
18983 {
18984 tmp = gen_reg_rtx (V4SFmode);
18985 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18986 }
18987 else
18988 tmp = new_target;
18989
18990 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18991 GEN_INT (1),
18992 GEN_INT (one_var == 1 ? 0 : 1),
18993 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18994 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18995
18996 if (mode != V4SFmode)
18997 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18998 else if (tmp != target)
18999 emit_move_insn (target, tmp);
19000 }
19001 else if (target != new_target)
19002 emit_move_insn (target, new_target);
19003 return true;
19004
19005 case V8HImode:
19006 case V16QImode:
19007 vsimode = V4SImode;
19008 goto widen;
19009 case V4HImode:
19010 case V8QImode:
19011 if (!mmx_ok)
19012 return false;
19013 vsimode = V2SImode;
19014 goto widen;
19015 widen:
19016 if (one_var != 0)
19017 return false;
19018
19019 /* Zero extend the variable element to SImode and recurse. */
19020 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19021
19022 x = gen_reg_rtx (vsimode);
19023 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19024 var, one_var))
19025 gcc_unreachable ();
19026
19027 emit_move_insn (target, gen_lowpart (mode, x));
19028 return true;
19029
19030 default:
19031 return false;
19032 }
19033}
19034
19035/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19036 consisting of the values in VALS. It is known that all elements
19037 except ONE_VAR are constants. Return true if successful. */
19038
19039static bool
19040ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19041 rtx target, rtx vals, int one_var)
19042{
19043 rtx var = XVECEXP (vals, 0, one_var);
19044 enum machine_mode wmode;
19045 rtx const_vec, x;
19046
19047 const_vec = copy_rtx (vals);
19048 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19049 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19050
19051 switch (mode)
19052 {
19053 case V2DFmode:
19054 case V2DImode:
19055 case V2SFmode:
19056 case V2SImode:
19057 /* For the two element vectors, it's just as easy to use
19058 the general case. */
19059 return false;
19060
19061 case V4SFmode:
19062 case V4SImode:
19063 case V8HImode:
19064 case V4HImode:
19065 break;
19066
19067 case V16QImode:
19068 wmode = V8HImode;
19069 goto widen;
19070 case V8QImode:
19071 wmode = V4HImode;
19072 goto widen;
19073 widen:
19074 /* There's no way to set one QImode entry easily. Combine
19075 the variable value with its adjacent constant value, and
19076 promote to an HImode set. */
19077 x = XVECEXP (vals, 0, one_var ^ 1);
19078 if (one_var & 1)
19079 {
19080 var = convert_modes (HImode, QImode, var, true);
19081 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19082 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19083 x = GEN_INT (INTVAL (x) & 0xff);
19084 }
19085 else
19086 {
19087 var = convert_modes (HImode, QImode, var, true);
19088 x = gen_int_mode (INTVAL (x) << 8, HImode);
19089 }
19090 if (x != const0_rtx)
19091 var = expand_simple_binop (HImode, IOR, var, x, var,
19092 1, OPTAB_LIB_WIDEN);
19093
19094 x = gen_reg_rtx (wmode);
19095 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19096 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19097
19098 emit_move_insn (target, gen_lowpart (mode, x));
19099 return true;
19100
19101 default:
19102 return false;
19103 }
19104
19105 emit_move_insn (target, const_vec);
19106 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19107 return true;
19108}
19109
19110/* A subroutine of ix86_expand_vector_init. Handle the most general case:
19111 all values variable, and none identical. */
19112
19113static void
19114ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19115 rtx target, rtx vals)
19116{
19117 enum machine_mode half_mode = GET_MODE_INNER (mode);
19118 rtx op0 = NULL, op1 = NULL;
19119 bool use_vec_concat = false;
19120
19121 switch (mode)
19122 {
19123 case V2SFmode:
19124 case V2SImode:
19125 if (!mmx_ok && !TARGET_SSE)
19126 break;
19127 /* FALLTHRU */
19128
19129 case V2DFmode:
19130 case V2DImode:
19131 /* For the two element vectors, we always implement VEC_CONCAT. */
19132 op0 = XVECEXP (vals, 0, 0);
19133 op1 = XVECEXP (vals, 0, 1);
19134 use_vec_concat = true;
19135 break;
19136
19137 case V4SFmode:
19138 half_mode = V2SFmode;
19139 goto half;
19140 case V4SImode:
19141 half_mode = V2SImode;
19142 goto half;
19143 half:
19144 {
19145 rtvec v;
19146
19147 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19148 Recurse to load the two halves. */
19149
19150 op0 = gen_reg_rtx (half_mode);
19151 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19152 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19153
19154 op1 = gen_reg_rtx (half_mode);
19155 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19156 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19157
19158 use_vec_concat = true;
19159 }
19160 break;
19161
19162 case V8HImode:
19163 case V16QImode:
19164 case V4HImode:
19165 case V8QImode:
19166 break;
19167
19168 default:
19169 gcc_unreachable ();
19170 }
19171
19172 if (use_vec_concat)
19173 {
19174 if (!register_operand (op0, half_mode))
19175 op0 = force_reg (half_mode, op0);
19176 if (!register_operand (op1, half_mode))
19177 op1 = force_reg (half_mode, op1);
19178
19179 emit_insn (gen_rtx_SET (VOIDmode, target,
19180 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19181 }
19182 else
19183 {
19184 int i, j, n_elts, n_words, n_elt_per_word;
19185 enum machine_mode inner_mode;
19186 rtx words[4], shift;
19187
19188 inner_mode = GET_MODE_INNER (mode);
19189 n_elts = GET_MODE_NUNITS (mode);
19190 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19191 n_elt_per_word = n_elts / n_words;
19192 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19193
19194 for (i = 0; i < n_words; ++i)
19195 {
19196 rtx word = NULL_RTX;
19197
19198 for (j = 0; j < n_elt_per_word; ++j)
19199 {
19200 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19201 elt = convert_modes (word_mode, inner_mode, elt, true);
19202
19203 if (j == 0)
19204 word = elt;
19205 else
19206 {
19207 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19208 word, 1, OPTAB_LIB_WIDEN);
19209 word = expand_simple_binop (word_mode, IOR, word, elt,
19210 word, 1, OPTAB_LIB_WIDEN);
19211 }
19212 }
19213
19214 words[i] = word;
19215 }
19216
19217 if (n_words == 1)
19218 emit_move_insn (target, gen_lowpart (mode, words[0]));
19219 else if (n_words == 2)
19220 {
19221 rtx tmp = gen_reg_rtx (mode);
19222 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19223 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19224 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19225 emit_move_insn (target, tmp);
19226 }
19227 else if (n_words == 4)
19228 {
19229 rtx tmp = gen_reg_rtx (V4SImode);
19230 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19231 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19232 emit_move_insn (target, gen_lowpart (mode, tmp));
19233 }
19234 else
19235 gcc_unreachable ();
19236 }
19237}
19238
19239/* Initialize vector TARGET via VALS. Suppress the use of MMX
19240 instructions unless MMX_OK is true. */
19241
19242void
19243ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19244{
19245 enum machine_mode mode = GET_MODE (target);
19246 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19247 int n_elts = GET_MODE_NUNITS (mode);
19248 int n_var = 0, one_var = -1;
19249 bool all_same = true, all_const_zero = true;
19250 int i;
19251 rtx x;
19252
19253 for (i = 0; i < n_elts; ++i)
19254 {
19255 x = XVECEXP (vals, 0, i);
19256 if (!CONSTANT_P (x))
19257 n_var++, one_var = i;
19258 else if (x != CONST0_RTX (inner_mode))
19259 all_const_zero = false;
19260 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19261 all_same = false;
19262 }
19263
19264 /* Constants are best loaded from the constant pool. */
19265 if (n_var == 0)
19266 {
19267 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19268 return;
19269 }
19270
19271 /* If all values are identical, broadcast the value. */
19272 if (all_same
19273 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19274 XVECEXP (vals, 0, 0)))
19275 return;
19276
19277 /* Values where only one field is non-constant are best loaded from
19278 the pool and overwritten via move later. */
19279 if (n_var == 1)
19280 {
19281 if (all_const_zero
19282 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19283 XVECEXP (vals, 0, one_var),
19284 one_var))
19285 return;
19286
19287 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19288 return;
19289 }
19290
19291 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19292}
19293
19294void
19295ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19296{
19297 enum machine_mode mode = GET_MODE (target);
19298 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19299 bool use_vec_merge = false;
19300 rtx tmp;
19301
19302 switch (mode)
19303 {
19304 case V2SFmode:
19305 case V2SImode:
19306 if (mmx_ok)
19307 {
19308 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19309 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19310 if (elt == 0)
19311 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19312 else
19313 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19314 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19315 return;
19316 }
19317 break;
19318
19319 case V2DFmode:
19320 case V2DImode:
19321 {
19322 rtx op0, op1;
19323
19324 /* For the two element vectors, we implement a VEC_CONCAT with
19325 the extraction of the other element. */
19326
19327 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19328 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19329
19330 if (elt == 0)
19331 op0 = val, op1 = tmp;
19332 else
19333 op0 = tmp, op1 = val;
19334
19335 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19336 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19337 }
19338 return;
19339
19340 case V4SFmode:
19341 switch (elt)
19342 {
19343 case 0:
19344 use_vec_merge = true;
19345 break;
19346
19347 case 1:
19348 /* tmp = target = A B C D */
19349 tmp = copy_to_reg (target);
19350 /* target = A A B B */
19351 emit_insn (gen_sse_unpcklps (target, target, target));
19352 /* target = X A B B */
19353 ix86_expand_vector_set (false, target, val, 0);
19354 /* target = A X C D */
19355 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19356 GEN_INT (1), GEN_INT (0),
19357 GEN_INT (2+4), GEN_INT (3+4)));
19358 return;
19359
19360 case 2:
19361 /* tmp = target = A B C D */
19362 tmp = copy_to_reg (target);
19363 /* tmp = X B C D */
19364 ix86_expand_vector_set (false, tmp, val, 0);
19365 /* target = A B X D */
19366 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19367 GEN_INT (0), GEN_INT (1),
19368 GEN_INT (0+4), GEN_INT (3+4)));
19369 return;
19370
19371 case 3:
19372 /* tmp = target = A B C D */
19373 tmp = copy_to_reg (target);
19374 /* tmp = X B C D */
19375 ix86_expand_vector_set (false, tmp, val, 0);
19376 /* target = A B X D */
19377 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19378 GEN_INT (0), GEN_INT (1),
19379 GEN_INT (2+4), GEN_INT (0+4)));
19380 return;
19381
19382 default:
19383 gcc_unreachable ();
19384 }
19385 break;
19386
19387 case V4SImode:
19388 /* Element 0 handled by vec_merge below. */
19389 if (elt == 0)
19390 {
19391 use_vec_merge = true;
19392 break;
19393 }
19394
19395 if (TARGET_SSE2)
19396 {
19397 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19398 store into element 0, then shuffle them back. */
19399
19400 rtx order[4];
19401
19402 order[0] = GEN_INT (elt);
19403 order[1] = const1_rtx;
19404 order[2] = const2_rtx;
19405 order[3] = GEN_INT (3);
19406 order[elt] = const0_rtx;
19407
19408 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19409 order[1], order[2], order[3]));
19410
19411 ix86_expand_vector_set (false, target, val, 0);
19412
19413 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19414 order[1], order[2], order[3]));
19415 }
19416 else
19417 {
19418 /* For SSE1, we have to reuse the V4SF code. */
19419 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19420 gen_lowpart (SFmode, val), elt);
19421 }
19422 return;
19423
19424 case V8HImode:
19425 use_vec_merge = TARGET_SSE2;
19426 break;
19427 case V4HImode:
19428 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19429 break;
19430
19431 case V16QImode:
19432 case V8QImode:
19433 default:
19434 break;
19435 }
19436
19437 if (use_vec_merge)
19438 {
19439 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19440 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19441 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19442 }
19443 else
19444 {
19445 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19446
19447 emit_move_insn (mem, target);
19448
19449 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19450 emit_move_insn (tmp, val);
19451
19452 emit_move_insn (target, mem);
19453 }
19454}
19455
19456void
19457ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19458{
19459 enum machine_mode mode = GET_MODE (vec);
19460 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19461 bool use_vec_extr = false;
19462 rtx tmp;
19463
19464 switch (mode)
19465 {
19466 case V2SImode:
19467 case V2SFmode:
19468 if (!mmx_ok)
19469 break;
19470 /* FALLTHRU */
19471
19472 case V2DFmode:
19473 case V2DImode:
19474 use_vec_extr = true;
19475 break;
19476
19477 case V4SFmode:
19478 switch (elt)
19479 {
19480 case 0:
19481 tmp = vec;
19482 break;
19483
19484 case 1:
19485 case 3:
19486 tmp = gen_reg_rtx (mode);
19487 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19488 GEN_INT (elt), GEN_INT (elt),
19489 GEN_INT (elt+4), GEN_INT (elt+4)));
19490 break;
19491
19492 case 2:
19493 tmp = gen_reg_rtx (mode);
19494 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19495 break;
19496
19497 default:
19498 gcc_unreachable ();
19499 }
19500 vec = tmp;
19501 use_vec_extr = true;
19502 elt = 0;
19503 break;
19504
19505 case V4SImode:
19506 if (TARGET_SSE2)
19507 {
19508 switch (elt)
19509 {
19510 case 0:
19511 tmp = vec;
19512 break;
19513
19514 case 1:
19515 case 3:
19516 tmp = gen_reg_rtx (mode);
19517 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19518 GEN_INT (elt), GEN_INT (elt),
19519 GEN_INT (elt), GEN_INT (elt)));
19520 break;
19521
19522 case 2:
19523 tmp = gen_reg_rtx (mode);
19524 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19525 break;
19526
19527 default:
19528 gcc_unreachable ();
19529 }
19530 vec = tmp;
19531 use_vec_extr = true;
19532 elt = 0;
19533 }
19534 else
19535 {
19536 /* For SSE1, we have to reuse the V4SF code. */
19537 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19538 gen_lowpart (V4SFmode, vec), elt);
19539 return;
19540 }
19541 break;
19542
19543 case V8HImode:
19544 use_vec_extr = TARGET_SSE2;
19545 break;
19546 case V4HImode:
19547 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19548 break;
19549
19550 case V16QImode:
19551 case V8QImode:
19552 /* ??? Could extract the appropriate HImode element and shift. */
19553 default:
19554 break;
19555 }
19556
19557 if (use_vec_extr)
19558 {
19559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19560 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19561
19562 /* Let the rtl optimizers know about the zero extension performed. */
19563 if (inner_mode == HImode)
19564 {
19565 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19566 target = gen_lowpart (SImode, target);
19567 }
19568
19569 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19570 }
19571 else
19572 {
19573 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19574
19575 emit_move_insn (mem, vec);
19576
19577 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19578 emit_move_insn (target, tmp);
19579 }
19580}
19581
19582/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19583 pattern to reduce; DEST is the destination; IN is the input vector. */
19584
19585void
19586ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19587{
19588 rtx tmp1, tmp2, tmp3;
19589
19590 tmp1 = gen_reg_rtx (V4SFmode);
19591 tmp2 = gen_reg_rtx (V4SFmode);
19592 tmp3 = gen_reg_rtx (V4SFmode);
19593
19594 emit_insn (gen_sse_movhlps (tmp1, in, in));
19595 emit_insn (fn (tmp2, tmp1, in));
19596
19597 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19598 GEN_INT (1), GEN_INT (1),
19599 GEN_INT (1+4), GEN_INT (1+4)));
19600 emit_insn (fn (dest, tmp2, tmp3));
19601}
19602
19603/* Target hook for scalar_mode_supported_p. */
19604static bool
19605ix86_scalar_mode_supported_p (enum machine_mode mode)
19606{
19607 if (DECIMAL_FLOAT_MODE_P (mode))
19608 return true;
19609 else
19610 return default_scalar_mode_supported_p (mode);
19611}
19612
19613/* Implements target hook vector_mode_supported_p. */
19614static bool
19615ix86_vector_mode_supported_p (enum machine_mode mode)
19616{
19617 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19618 return true;
19619 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19620 return true;
19621 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19622 return true;
19623 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19624 return true;
19625 return false;
19626}
19627
19628/* Worker function for TARGET_MD_ASM_CLOBBERS.
19629
19630 We do this in the new i386 backend to maintain source compatibility
19631 with the old cc0-based compiler. */
19632
19633static tree
19634ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19635 tree inputs ATTRIBUTE_UNUSED,
19636 tree clobbers)
19637{
19638 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19639 clobbers);
19640 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19641 clobbers);
19642 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19643 clobbers);
19644 return clobbers;
19645}
19646
19647/* Return true if this goes in small data/bss. */
19648
19649static bool
19650ix86_in_large_data_p (tree exp)
19651{
19652 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19653 return false;
19654
19655 /* Functions are never large data. */
19656 if (TREE_CODE (exp) == FUNCTION_DECL)
19657 return false;
19658
19659 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19660 {
19661 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19662 if (strcmp (section, ".ldata") == 0
19663 || strcmp (section, ".lbss") == 0)
19664 return true;
19665 return false;
19666 }
19667 else
19668 {
19669 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19670
19671 /* If this is an incomplete type with size 0, then we can't put it
19672 in data because it might be too big when completed. */
19673 if (!size || size > ix86_section_threshold)
19674 return true;
19675 }
19676
19677 return false;
19678}
19679static void
19680ix86_encode_section_info (tree decl, rtx rtl, int first)
19681{
19682 default_encode_section_info (decl, rtl, first);
19683
19684 if (TREE_CODE (decl) == VAR_DECL
19685 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19686 && ix86_in_large_data_p (decl))
19687 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19688}
19689
19690/* Worker function for REVERSE_CONDITION. */
19691
19692enum rtx_code
19693ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19694{
19695 return (mode != CCFPmode && mode != CCFPUmode
19696 ? reverse_condition (code)
19697 : reverse_condition_maybe_unordered (code));
19698}
19699
19700/* Output code to perform an x87 FP register move, from OPERANDS[1]
19701 to OPERANDS[0]. */
19702
19703const char *
19704output_387_reg_move (rtx insn, rtx *operands)
19705{
19706 if (REG_P (operands[1])
19707 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19708 {
19709 if (REGNO (operands[0]) == FIRST_STACK_REG)
19710 return output_387_ffreep (operands, 0);
19711 return "fstp\t%y0";
19712 }
19713 if (STACK_TOP_P (operands[0]))
19714 return "fld%z1\t%y1";
19715 return "fst\t%y0";
19716}
19717
19718/* Output code to perform a conditional jump to LABEL, if C2 flag in
19719 FP status register is set. */
19720
19721void
19722ix86_emit_fp_unordered_jump (rtx label)
19723{
19724 rtx reg = gen_reg_rtx (HImode);
19725 rtx temp;
19726
19727 emit_insn (gen_x86_fnstsw_1 (reg));
19728
19729 if (TARGET_USE_SAHF)
19730 {
19731 emit_insn (gen_x86_sahf_1 (reg));
19732
19733 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19734 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19735 }
19736 else
19737 {
19738 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19739
19740 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19741 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19742 }
19743
19744 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19745 gen_rtx_LABEL_REF (VOIDmode, label),
19746 pc_rtx);
19747 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19748 emit_jump_insn (temp);
19749}
19750
19751/* Output code to perform a log1p XFmode calculation. */
19752
19753void ix86_emit_i387_log1p (rtx op0, rtx op1)
19754{
19755 rtx label1 = gen_label_rtx ();
19756 rtx label2 = gen_label_rtx ();
19757
19758 rtx tmp = gen_reg_rtx (XFmode);
19759 rtx tmp2 = gen_reg_rtx (XFmode);
19760
19761 emit_insn (gen_absxf2 (tmp, op1));
19762 emit_insn (gen_cmpxf (tmp,
19763 CONST_DOUBLE_FROM_REAL_VALUE (
19764 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19765 XFmode)));
19766 emit_jump_insn (gen_bge (label1));
19767
19768 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19769 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19770 emit_jump (label2);
19771
19772 emit_label (label1);
19773 emit_move_insn (tmp, CONST1_RTX (XFmode));
19774 emit_insn (gen_addxf3 (tmp, op1, tmp));
19775 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19776 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19777
19778 emit_label (label2);
19779}
19780
19781/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19782
19783static void
19784i386_solaris_elf_named_section (const char *name, unsigned int flags,
19785 tree decl)
19786{
19787 /* With Binutils 2.15, the "@unwind" marker must be specified on
19788 every occurrence of the ".eh_frame" section, not just the first
19789 one. */
19790 if (TARGET_64BIT
19791 && strcmp (name, ".eh_frame") == 0)
19792 {
19793 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19794 flags & SECTION_WRITE ? "aw" : "a");
19795 return;
19796 }
19797 default_elf_asm_named_section (name, flags, decl);
19798}
19799
19800/* Return the mangling of TYPE if it is an extended fundamental type. */
19801
19802static const char *
19803ix86_mangle_fundamental_type (tree type)
19804{
19805 switch (TYPE_MODE (type))
19806 {
19807 case TFmode:
19808 /* __float128 is "g". */
19809 return "g";
19810 case XFmode:
19811 /* "long double" or __float80 is "e". */
19812 return "e";
19813 default:
19814 return NULL;
19815 }
19816}
19817
19818/* For 32-bit code we can save PIC register setup by using
19819 __stack_chk_fail_local hidden function instead of calling
19820 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19821 register, so it is better to call __stack_chk_fail directly. */
19822
19823static tree
19824ix86_stack_protect_fail (void)
19825{
19826 return TARGET_64BIT
19827 ? default_external_stack_protect_fail ()
19828 : default_hidden_stack_protect_fail ();
19829}
19830
19831/* Select a format to encode pointers in exception handling data. CODE
19832 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19833 true if the symbol may be affected by dynamic relocations.
19834
19835 ??? All x86 object file formats are capable of representing this.
19836 After all, the relocation needed is the same as for the call insn.
19837 Whether or not a particular assembler allows us to enter such, I
19838 guess we'll have to see. */
19839int
19840asm_preferred_eh_data_format (int code, int global)
19841{
19842 if (flag_pic)
19843 {
19844 int type = DW_EH_PE_sdata8;
19845 if (!TARGET_64BIT
19846 || ix86_cmodel == CM_SMALL_PIC
19847 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19848 type = DW_EH_PE_sdata4;
19849 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19850 }
19851 if (ix86_cmodel == CM_SMALL
19852 || (ix86_cmodel == CM_MEDIUM && code))
19853 return DW_EH_PE_udata4;
19854 return DW_EH_PE_absptr;
19855}
19856
19857#include "gt-i386.h"