Deleted Added
full compact
i386.c (146908) i386.c (161660)
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22
1/* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 146908 2005-06-03 04:02:20Z kan $ */
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 161660 2006-08-26 21:37:21Z kan $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "coretypes.h"
29#include "tm.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "regs.h"
34#include "hard-reg-set.h"
35#include "real.h"
36#include "insn-config.h"
37#include "conditions.h"
38#include "output.h"
39#include "insn-attr.h"
40#include "flags.h"
41#include "except.h"
42#include "function.h"
43#include "recog.h"
44#include "expr.h"
45#include "optabs.h"
46#include "toplev.h"
47#include "basic-block.h"
48#include "ggc.h"
49#include "target.h"
50#include "target-def.h"
51#include "langhooks.h"
52#include "cgraph.h"
53
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT (-1)
56#endif
57
58/* Return index of given mode in mult and division cost tables. */
59#define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
66/* Processor costs (relative to an add) */
67static const
68struct processor_costs size_cost = { /* costs for tunning for size */
69 2, /* cost of an add instruction */
70 3, /* cost of a lea instruction */
71 2, /* variable shift costs */
72 3, /* constant shift costs */
73 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
74 0, /* cost of multiply per each bit set */
75 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
76 3, /* cost of movsx */
77 3, /* cost of movzx */
78 0, /* "large" insn */
79 2, /* MOVE_RATIO */
80 2, /* cost for loading QImode using movzbl */
81 {2, 2, 2}, /* cost of loading integer registers
82 in QImode, HImode and SImode.
83 Relative to reg-reg move (2). */
84 {2, 2, 2}, /* cost of storing integer registers */
85 2, /* cost of reg,reg fld/fst */
86 {2, 2, 2}, /* cost of loading fp registers
87 in SFmode, DFmode and XFmode */
88 {2, 2, 2}, /* cost of loading integer registers */
89 3, /* cost of moving MMX register */
90 {3, 3}, /* cost of loading MMX registers
91 in SImode and DImode */
92 {3, 3}, /* cost of storing MMX registers
93 in SImode and DImode */
94 3, /* cost of moving SSE register */
95 {3, 3, 3}, /* cost of loading SSE registers
96 in SImode, DImode and TImode */
97 {3, 3, 3}, /* cost of storing SSE registers
98 in SImode, DImode and TImode */
99 3, /* MMX or SSE register to integer */
100 0, /* size of prefetch block */
101 0, /* number of parallel prefetches */
102 1, /* Branch cost */
103 2, /* cost of FADD and FSUB insns. */
104 2, /* cost of FMUL instruction. */
105 2, /* cost of FDIV instruction. */
106 2, /* cost of FABS instruction. */
107 2, /* cost of FCHS instruction. */
108 2, /* cost of FSQRT instruction. */
109};
110
111/* Processor costs (relative to an add) */
112static const
113struct processor_costs i386_cost = { /* 386 specific costs */
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 3, /* variable shift costs */
117 2, /* constant shift costs */
118 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
119 1, /* cost of multiply per each bit set */
120 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
121 3, /* cost of movsx */
122 2, /* cost of movzx */
123 15, /* "large" insn */
124 3, /* MOVE_RATIO */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3, /* MMX or SSE register to integer */
145 0, /* size of prefetch block */
146 0, /* number of parallel prefetches */
147 1, /* Branch cost */
148 23, /* cost of FADD and FSUB insns. */
149 27, /* cost of FMUL instruction. */
150 88, /* cost of FDIV instruction. */
151 22, /* cost of FABS instruction. */
152 24, /* cost of FCHS instruction. */
153 122, /* cost of FSQRT instruction. */
154};
155
156static const
157struct processor_costs i486_cost = { /* 486 specific costs */
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 3, /* variable shift costs */
161 2, /* constant shift costs */
162 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
163 1, /* cost of multiply per each bit set */
164 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 15, /* "large" insn */
168 3, /* MOVE_RATIO */
169 4, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {8, 8, 8}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {8, 8, 8}, /* cost of loading integer registers */
178 2, /* cost of moving MMX register */
179 {4, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {4, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3, /* MMX or SSE register to integer */
189 0, /* size of prefetch block */
190 0, /* number of parallel prefetches */
191 1, /* Branch cost */
192 8, /* cost of FADD and FSUB insns. */
193 16, /* cost of FMUL instruction. */
194 73, /* cost of FDIV instruction. */
195 3, /* cost of FABS instruction. */
196 3, /* cost of FCHS instruction. */
197 83, /* cost of FSQRT instruction. */
198};
199
200static const
201struct processor_costs pentium_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 4, /* variable shift costs */
205 1, /* constant shift costs */
206 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
209 3, /* cost of movsx */
210 2, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 6, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 8, /* cost of moving MMX register */
223 {8, 8}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {8, 8}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {4, 8, 16}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {4, 8, 16}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
235 2, /* Branch cost */
236 3, /* cost of FADD and FSUB insns. */
237 3, /* cost of FMUL instruction. */
238 39, /* cost of FDIV instruction. */
239 1, /* cost of FABS instruction. */
240 1, /* cost of FCHS instruction. */
241 70, /* cost of FSQRT instruction. */
242};
243
244static const
245struct processor_costs pentiumpro_cost = {
246 1, /* cost of an add instruction */
247 1, /* cost of a lea instruction */
248 1, /* variable shift costs */
249 1, /* constant shift costs */
250 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
251 0, /* cost of multiply per each bit set */
252 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
253 1, /* cost of movsx */
254 1, /* cost of movzx */
255 8, /* "large" insn */
256 6, /* MOVE_RATIO */
257 2, /* cost for loading QImode using movzbl */
258 {4, 4, 4}, /* cost of loading integer registers
259 in QImode, HImode and SImode.
260 Relative to reg-reg move (2). */
261 {2, 2, 2}, /* cost of storing integer registers */
262 2, /* cost of reg,reg fld/fst */
263 {2, 2, 6}, /* cost of loading fp registers
264 in SFmode, DFmode and XFmode */
265 {4, 4, 6}, /* cost of loading integer registers */
266 2, /* cost of moving MMX register */
267 {2, 2}, /* cost of loading MMX registers
268 in SImode and DImode */
269 {2, 2}, /* cost of storing MMX registers
270 in SImode and DImode */
271 2, /* cost of moving SSE register */
272 {2, 2, 8}, /* cost of loading SSE registers
273 in SImode, DImode and TImode */
274 {2, 2, 8}, /* cost of storing SSE registers
275 in SImode, DImode and TImode */
276 3, /* MMX or SSE register to integer */
277 32, /* size of prefetch block */
278 6, /* number of parallel prefetches */
279 2, /* Branch cost */
280 3, /* cost of FADD and FSUB insns. */
281 5, /* cost of FMUL instruction. */
282 56, /* cost of FDIV instruction. */
283 2, /* cost of FABS instruction. */
284 2, /* cost of FCHS instruction. */
285 56, /* cost of FSQRT instruction. */
286};
287
288static const
289struct processor_costs k6_cost = {
290 1, /* cost of an add instruction */
291 2, /* cost of a lea instruction */
292 1, /* variable shift costs */
293 1, /* constant shift costs */
294 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
295 0, /* cost of multiply per each bit set */
296 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
297 2, /* cost of movsx */
298 2, /* cost of movzx */
299 8, /* "large" insn */
300 4, /* MOVE_RATIO */
301 3, /* cost for loading QImode using movzbl */
302 {4, 5, 4}, /* cost of loading integer registers
303 in QImode, HImode and SImode.
304 Relative to reg-reg move (2). */
305 {2, 3, 2}, /* cost of storing integer registers */
306 4, /* cost of reg,reg fld/fst */
307 {6, 6, 6}, /* cost of loading fp registers
308 in SFmode, DFmode and XFmode */
309 {4, 4, 4}, /* cost of loading integer registers */
310 2, /* cost of moving MMX register */
311 {2, 2}, /* cost of loading MMX registers
312 in SImode and DImode */
313 {2, 2}, /* cost of storing MMX registers
314 in SImode and DImode */
315 2, /* cost of moving SSE register */
316 {2, 2, 8}, /* cost of loading SSE registers
317 in SImode, DImode and TImode */
318 {2, 2, 8}, /* cost of storing SSE registers
319 in SImode, DImode and TImode */
320 6, /* MMX or SSE register to integer */
321 32, /* size of prefetch block */
322 1, /* number of parallel prefetches */
323 1, /* Branch cost */
324 2, /* cost of FADD and FSUB insns. */
325 2, /* cost of FMUL instruction. */
326 56, /* cost of FDIV instruction. */
327 2, /* cost of FABS instruction. */
328 2, /* cost of FCHS instruction. */
329 56, /* cost of FSQRT instruction. */
330};
331
332static const
333struct processor_costs athlon_cost = {
334 1, /* cost of an add instruction */
335 2, /* cost of a lea instruction */
336 1, /* variable shift costs */
337 1, /* constant shift costs */
338 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
339 0, /* cost of multiply per each bit set */
340 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
341 1, /* cost of movsx */
342 1, /* cost of movzx */
343 8, /* "large" insn */
344 9, /* MOVE_RATIO */
345 4, /* cost for loading QImode using movzbl */
346 {3, 4, 3}, /* cost of loading integer registers
347 in QImode, HImode and SImode.
348 Relative to reg-reg move (2). */
349 {3, 4, 3}, /* cost of storing integer registers */
350 4, /* cost of reg,reg fld/fst */
351 {4, 4, 12}, /* cost of loading fp registers
352 in SFmode, DFmode and XFmode */
353 {6, 6, 8}, /* cost of loading integer registers */
354 2, /* cost of moving MMX register */
355 {4, 4}, /* cost of loading MMX registers
356 in SImode and DImode */
357 {4, 4}, /* cost of storing MMX registers
358 in SImode and DImode */
359 2, /* cost of moving SSE register */
360 {4, 4, 6}, /* cost of loading SSE registers
361 in SImode, DImode and TImode */
362 {4, 4, 5}, /* cost of storing SSE registers
363 in SImode, DImode and TImode */
364 5, /* MMX or SSE register to integer */
365 64, /* size of prefetch block */
366 6, /* number of parallel prefetches */
367 2, /* Branch cost */
368 4, /* cost of FADD and FSUB insns. */
369 4, /* cost of FMUL instruction. */
370 24, /* cost of FDIV instruction. */
371 2, /* cost of FABS instruction. */
372 2, /* cost of FCHS instruction. */
373 35, /* cost of FSQRT instruction. */
374};
375
376static const
377struct processor_costs k8_cost = {
378 1, /* cost of an add instruction */
379 2, /* cost of a lea instruction */
380 1, /* variable shift costs */
381 1, /* constant shift costs */
382 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
383 0, /* cost of multiply per each bit set */
384 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
385 1, /* cost of movsx */
386 1, /* cost of movzx */
387 8, /* "large" insn */
388 9, /* MOVE_RATIO */
389 4, /* cost for loading QImode using movzbl */
390 {3, 4, 3}, /* cost of loading integer registers
391 in QImode, HImode and SImode.
392 Relative to reg-reg move (2). */
393 {3, 4, 3}, /* cost of storing integer registers */
394 4, /* cost of reg,reg fld/fst */
395 {4, 4, 12}, /* cost of loading fp registers
396 in SFmode, DFmode and XFmode */
397 {6, 6, 8}, /* cost of loading integer registers */
398 2, /* cost of moving MMX register */
399 {3, 3}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {4, 4}, /* cost of storing MMX registers
402 in SImode and DImode */
403 2, /* cost of moving SSE register */
404 {4, 3, 6}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {4, 4, 5}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 5, /* MMX or SSE register to integer */
409 64, /* size of prefetch block */
410 6, /* number of parallel prefetches */
411 2, /* Branch cost */
412 4, /* cost of FADD and FSUB insns. */
413 4, /* cost of FMUL instruction. */
414 19, /* cost of FDIV instruction. */
415 2, /* cost of FABS instruction. */
416 2, /* cost of FCHS instruction. */
417 35, /* cost of FSQRT instruction. */
418};
419
420static const
421struct processor_costs pentium4_cost = {
422 1, /* cost of an add instruction */
423 1, /* cost of a lea instruction */
424 4, /* variable shift costs */
425 4, /* constant shift costs */
426 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
427 0, /* cost of multiply per each bit set */
428 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
429 1, /* cost of movsx */
430 1, /* cost of movzx */
431 16, /* "large" insn */
432 6, /* MOVE_RATIO */
433 2, /* cost for loading QImode using movzbl */
434 {4, 5, 4}, /* cost of loading integer registers
435 in QImode, HImode and SImode.
436 Relative to reg-reg move (2). */
437 {2, 3, 2}, /* cost of storing integer registers */
438 2, /* cost of reg,reg fld/fst */
439 {2, 2, 6}, /* cost of loading fp registers
440 in SFmode, DFmode and XFmode */
441 {4, 4, 6}, /* cost of loading integer registers */
442 2, /* cost of moving MMX register */
443 {2, 2}, /* cost of loading MMX registers
444 in SImode and DImode */
445 {2, 2}, /* cost of storing MMX registers
446 in SImode and DImode */
447 12, /* cost of moving SSE register */
448 {12, 12, 12}, /* cost of loading SSE registers
449 in SImode, DImode and TImode */
450 {2, 2, 8}, /* cost of storing SSE registers
451 in SImode, DImode and TImode */
452 10, /* MMX or SSE register to integer */
453 64, /* size of prefetch block */
454 6, /* number of parallel prefetches */
455 2, /* Branch cost */
456 5, /* cost of FADD and FSUB insns. */
457 7, /* cost of FMUL instruction. */
458 43, /* cost of FDIV instruction. */
459 2, /* cost of FABS instruction. */
460 2, /* cost of FCHS instruction. */
461 43, /* cost of FSQRT instruction. */
462};
463
464const struct processor_costs *ix86_cost = &pentium_cost;
465
466/* Processor feature/optimization bitmasks. */
467#define m_386 (1<<PROCESSOR_I386)
468#define m_486 (1<<PROCESSOR_I486)
469#define m_PENT (1<<PROCESSOR_PENTIUM)
470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
471#define m_K6 (1<<PROCESSOR_K6)
472#define m_ATHLON (1<<PROCESSOR_ATHLON)
473#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
474#define m_K8 (1<<PROCESSOR_K8)
475#define m_ATHLON_K8 (m_K8 | m_ATHLON)
476
477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
479const int x86_zero_extend_with_and = m_486 | m_PENT;
480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
481const int x86_double_with_add = ~m_386;
482const int x86_use_bit_test = m_386;
483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
485const int x86_3dnow_a = m_ATHLON_K8;
486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
487const int x86_branch_hints = m_PENT4;
488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
489const int x86_partial_reg_stall = m_PPRO;
490const int x86_use_loop = m_K6;
491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
492const int x86_use_mov0 = m_K6;
493const int x86_use_cltd = ~(m_PENT | m_K6);
494const int x86_read_modify_write = ~m_PENT;
495const int x86_read_modify = ~(m_PENT | m_PPRO);
496const int x86_split_long_moves = m_PPRO;
497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
499const int x86_single_stringop = m_386 | m_PENT4;
500const int x86_qimode_math = ~(0);
501const int x86_promote_qi_regs = 0;
502const int x86_himode_math = ~(m_PPRO);
503const int x86_promote_hi_regs = m_PPRO;
504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
514const int x86_decompose_lea = m_PENT4;
515const int x86_shift1 = ~m_486;
516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
518/* Set for machines where the type and dependencies are resolved on SSE register
519 parts instead of whole registers, so we may maintain just lower part of
520 scalar values in proper format leaving the upper part undefined. */
521const int x86_sse_partial_regs = m_ATHLON_K8;
522/* Athlon optimizes partial-register FPS special case, thus avoiding the
523 need for extra instructions beforehand */
524const int x86_sse_partial_regs_for_cvtsd2ss = 0;
525const int x86_sse_typeless_stores = m_ATHLON_K8;
526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
527const int x86_use_ffreep = m_ATHLON_K8;
528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
529
530/* ??? HACK! The following is a lie. SSE can hold e.g. SImode, and
531 indeed *must* be able to hold SImode so that SSE2 shifts are able
532 to work right. But this can result in some mighty surprising
533 register allocation when building kernels. Turning this off should
534 make us less likely to all-of-the-sudden select an SSE register. */
535const int x86_inter_unit_moves = 0; /* ~(m_ATHLON_K8) */
536
537const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
538
539/* In case the average insn count for single function invocation is
540 lower than this constant, emit fast (but longer) prologue and
541 epilogue code. */
542#define FAST_PROLOGUE_INSN_COUNT 20
543
544/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
545static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
546static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
547static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
548
549/* Array of the smallest class containing reg number REGNO, indexed by
550 REGNO. Used by REGNO_REG_CLASS in i386.h. */
551
552enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
553{
554 /* ax, dx, cx, bx */
555 AREG, DREG, CREG, BREG,
556 /* si, di, bp, sp */
557 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
558 /* FP registers */
559 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
560 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
561 /* arg pointer */
562 NON_Q_REGS,
563 /* flags, fpsr, dirflag, frame */
564 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
565 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
566 SSE_REGS, SSE_REGS,
567 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
568 MMX_REGS, MMX_REGS,
569 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
570 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
571 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
572 SSE_REGS, SSE_REGS,
573};
574
575/* The "default" register map used in 32bit mode. */
576
577int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
578{
579 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
580 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
581 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
582 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
583 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
584 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
585 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
586};
587
588static int const x86_64_int_parameter_registers[6] =
589{
590 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
591 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
592};
593
594static int const x86_64_int_return_registers[4] =
595{
596 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
597};
598
599/* The "default" register map used in 64bit mode. */
600int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
601{
602 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
603 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
604 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
605 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
606 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
607 8,9,10,11,12,13,14,15, /* extended integer registers */
608 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
609};
610
611/* Define the register numbers to be used in Dwarf debugging information.
612 The SVR4 reference port C compiler uses the following register numbers
613 in its Dwarf output code:
614 0 for %eax (gcc regno = 0)
615 1 for %ecx (gcc regno = 2)
616 2 for %edx (gcc regno = 1)
617 3 for %ebx (gcc regno = 3)
618 4 for %esp (gcc regno = 7)
619 5 for %ebp (gcc regno = 6)
620 6 for %esi (gcc regno = 4)
621 7 for %edi (gcc regno = 5)
622 The following three DWARF register numbers are never generated by
623 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
624 believes these numbers have these meanings.
625 8 for %eip (no gcc equivalent)
626 9 for %eflags (gcc regno = 17)
627 10 for %trapno (no gcc equivalent)
628 It is not at all clear how we should number the FP stack registers
629 for the x86 architecture. If the version of SDB on x86/svr4 were
630 a bit less brain dead with respect to floating-point then we would
631 have a precedent to follow with respect to DWARF register numbers
632 for x86 FP registers, but the SDB on x86/svr4 is so completely
633 broken with respect to FP registers that it is hardly worth thinking
634 of it as something to strive for compatibility with.
635 The version of x86/svr4 SDB I have at the moment does (partially)
636 seem to believe that DWARF register number 11 is associated with
637 the x86 register %st(0), but that's about all. Higher DWARF
638 register numbers don't seem to be associated with anything in
639 particular, and even for DWARF regno 11, SDB only seems to under-
640 stand that it should say that a variable lives in %st(0) (when
641 asked via an `=' command) if we said it was in DWARF regno 11,
642 but SDB still prints garbage when asked for the value of the
643 variable in question (via a `/' command).
644 (Also note that the labels SDB prints for various FP stack regs
645 when doing an `x' command are all wrong.)
646 Note that these problems generally don't affect the native SVR4
647 C compiler because it doesn't allow the use of -O with -g and
648 because when it is *not* optimizing, it allocates a memory
649 location for each floating-point variable, and the memory
650 location is what gets described in the DWARF AT_location
651 attribute for the variable in question.
652 Regardless of the severe mental illness of the x86/svr4 SDB, we
653 do something sensible here and we use the following DWARF
654 register numbers. Note that these are all stack-top-relative
655 numbers.
656 11 for %st(0) (gcc regno = 8)
657 12 for %st(1) (gcc regno = 9)
658 13 for %st(2) (gcc regno = 10)
659 14 for %st(3) (gcc regno = 11)
660 15 for %st(4) (gcc regno = 12)
661 16 for %st(5) (gcc regno = 13)
662 17 for %st(6) (gcc regno = 14)
663 18 for %st(7) (gcc regno = 15)
664*/
665int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
666{
667 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
668 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
669 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
670 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
671 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
672 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
673 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
674};
675
676/* Test and compare insns in i386.md store the information needed to
677 generate branch and scc insns here. */
678
679rtx ix86_compare_op0 = NULL_RTX;
680rtx ix86_compare_op1 = NULL_RTX;
681
682#define MAX_386_STACK_LOCALS 3
683/* Size of the register save area. */
684#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
685
686/* Define the structure for the machine field in struct function. */
687
688struct stack_local_entry GTY(())
689{
690 unsigned short mode;
691 unsigned short n;
692 rtx rtl;
693 struct stack_local_entry *next;
694};
695
696/* Structure describing stack frame layout.
697 Stack grows downward:
698
699 [arguments]
700 <- ARG_POINTER
701 saved pc
702
703 saved frame pointer if frame_pointer_needed
704 <- HARD_FRAME_POINTER
705 [saved regs]
706
707 [padding1] \
708 )
709 [va_arg registers] (
710 > to_allocate <- FRAME_POINTER
711 [frame] (
712 )
713 [padding2] /
714 */
715struct ix86_frame
716{
717 int nregs;
718 int padding1;
719 int va_arg_size;
720 HOST_WIDE_INT frame;
721 int padding2;
722 int outgoing_arguments_size;
723 int red_zone_size;
724
725 HOST_WIDE_INT to_allocate;
726 /* The offsets relative to ARG_POINTER. */
727 HOST_WIDE_INT frame_pointer_offset;
728 HOST_WIDE_INT hard_frame_pointer_offset;
729 HOST_WIDE_INT stack_pointer_offset;
730
731 /* When save_regs_using_mov is set, emit prologue using
732 move instead of push instructions. */
733 bool save_regs_using_mov;
734};
735
736/* Used to enable/disable debugging features. */
737const char *ix86_debug_arg_string, *ix86_debug_addr_string;
738/* Code model option as passed by user. */
739const char *ix86_cmodel_string;
740/* Parsed value. */
741enum cmodel ix86_cmodel;
742/* Asm dialect. */
743const char *ix86_asm_string;
744enum asm_dialect ix86_asm_dialect = ASM_ATT;
745/* TLS dialext. */
746const char *ix86_tls_dialect_string;
747enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
748
749/* Which unit we are generating floating point math for. */
750enum fpmath_unit ix86_fpmath;
751
752/* Which cpu are we scheduling for. */
753enum processor_type ix86_tune;
754/* Which instruction set architecture to use. */
755enum processor_type ix86_arch;
756
757/* Strings to hold which cpu and instruction set architecture to use. */
758const char *ix86_tune_string; /* for -mtune=<xxx> */
759const char *ix86_arch_string; /* for -march=<xxx> */
760const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
761
762/* # of registers to use to pass arguments. */
763const char *ix86_regparm_string;
764
765/* true if sse prefetch instruction is not NOOP. */
766int x86_prefetch_sse;
767
768/* ix86_regparm_string as a number */
769int ix86_regparm;
770
771/* Alignment to use for loops and jumps: */
772
773/* Power of two alignment for loops. */
774const char *ix86_align_loops_string;
775
776/* Power of two alignment for non-loop jumps. */
777const char *ix86_align_jumps_string;
778
779/* Power of two alignment for stack boundary in bytes. */
780const char *ix86_preferred_stack_boundary_string;
781
782/* Preferred alignment for stack boundary in bits. */
783int ix86_preferred_stack_boundary;
784
785/* Values 1-5: see jump.c */
786int ix86_branch_cost;
787const char *ix86_branch_cost_string;
788
789/* Power of two alignment for functions. */
790const char *ix86_align_funcs_string;
791
792/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
793static char internal_label_prefix[16];
794static int internal_label_prefix_len;
795
796static int local_symbolic_operand (rtx, enum machine_mode);
797static int tls_symbolic_operand_1 (rtx, enum tls_model);
798static void output_pic_addr_const (FILE *, rtx, int);
799static void put_condition_code (enum rtx_code, enum machine_mode,
800 int, int, FILE *);
801static const char *get_some_local_dynamic_name (void);
802static int get_some_local_dynamic_name_1 (rtx *, void *);
803static rtx maybe_get_pool_constant (rtx);
804static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
805static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
806 rtx *);
807static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
808static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
809 enum machine_mode);
810static rtx get_thread_pointer (int);
811static rtx legitimize_tls_address (rtx, enum tls_model, int);
812static void get_pc_thunk_name (char [32], unsigned int);
813static rtx gen_push (rtx);
814static int memory_address_length (rtx addr);
815static int ix86_flags_dependant (rtx, rtx, enum attr_type);
816static int ix86_agi_dependant (rtx, rtx, enum attr_type);
817static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
818static void ix86_dump_ppro_packet (FILE *);
819static void ix86_reorder_insn (rtx *, rtx *);
820static struct machine_function * ix86_init_machine_status (void);
821static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
822static int ix86_nsaved_regs (void);
823static void ix86_emit_save_regs (void);
824static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
825static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
826static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
827static void ix86_sched_reorder_ppro (rtx *, rtx *);
828static HOST_WIDE_INT ix86_GOT_alias_set (void);
829static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
830static rtx ix86_expand_aligntest (rtx, int);
831static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
832static int ix86_issue_rate (void);
833static int ix86_adjust_cost (rtx, rtx, rtx, int);
834static void ix86_sched_init (FILE *, int, int);
835static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
836static int ix86_variable_issue (FILE *, int, rtx, int);
837static int ia32_use_dfa_pipeline_interface (void);
838static int ia32_multipass_dfa_lookahead (void);
839static void ix86_init_mmx_sse_builtins (void);
840static rtx x86_this_parameter (tree);
841static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
842 HOST_WIDE_INT, tree);
843static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
844static void x86_file_start (void);
845static void ix86_reorg (void);
846static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
847static tree ix86_build_builtin_va_list (void);
848
849struct ix86_address
850{
851 rtx base, index, disp;
852 HOST_WIDE_INT scale;
853 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
854};
855
856static int ix86_decompose_address (rtx, struct ix86_address *);
857static int ix86_address_cost (rtx);
858static bool ix86_cannot_force_const_mem (rtx);
859static rtx ix86_delegitimize_address (rtx);
860
861struct builtin_description;
862static rtx ix86_expand_sse_comi (const struct builtin_description *,
863 tree, rtx);
864static rtx ix86_expand_sse_compare (const struct builtin_description *,
865 tree, rtx);
866static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
867static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
868static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
869static rtx ix86_expand_store_builtin (enum insn_code, tree);
870static rtx safe_vector_operand (rtx, enum machine_mode);
871static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
872static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
873 enum rtx_code *, enum rtx_code *);
874static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
875static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
876static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
877static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
878static int ix86_fp_comparison_cost (enum rtx_code code);
879static unsigned int ix86_select_alt_pic_regnum (void);
880static int ix86_save_reg (unsigned int, int);
881static void ix86_compute_frame_layout (struct ix86_frame *);
882static int ix86_comp_type_attributes (tree, tree);
883static int ix86_function_regparm (tree, tree);
884const struct attribute_spec ix86_attribute_table[];
885static bool ix86_function_ok_for_sibcall (tree, tree);
886static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
887static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
888static int ix86_value_regno (enum machine_mode);
889static bool contains_128bit_aligned_vector_p (tree);
890static bool ix86_ms_bitfield_layout_p (tree);
891static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
892static int extended_reg_mentioned_1 (rtx *, void *);
893static bool ix86_rtx_costs (rtx, int, int, int *);
894static int min_insn_size (rtx);
895static void k8_avoid_jump_misspredicts (void);
896
897#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
898static void ix86_svr3_asm_out_constructor (rtx, int);
899#endif
900
901/* Register class used for passing given 64bit part of the argument.
902 These represent classes as documented by the PS ABI, with the exception
903 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
904 use SF or DFmode move instead of DImode to avoid reformatting penalties.
905
906 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
907 whenever possible (upper half does contain padding).
908 */
909enum x86_64_reg_class
910 {
911 X86_64_NO_CLASS,
912 X86_64_INTEGER_CLASS,
913 X86_64_INTEGERSI_CLASS,
914 X86_64_SSE_CLASS,
915 X86_64_SSESF_CLASS,
916 X86_64_SSEDF_CLASS,
917 X86_64_SSEUP_CLASS,
918 X86_64_X87_CLASS,
919 X86_64_X87UP_CLASS,
920 X86_64_MEMORY_CLASS
921 };
922static const char * const x86_64_reg_class_name[] =
923 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
924
925#define MAX_CLASSES 4
926static int classify_argument (enum machine_mode, tree,
927 enum x86_64_reg_class [MAX_CLASSES], int);
928static int examine_argument (enum machine_mode, tree, int, int *, int *);
929static rtx construct_container (enum machine_mode, tree, int, int, int,
930 const int *, int);
931static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
932 enum x86_64_reg_class);
933
934/* Table of constants used by fldpi, fldln2, etc.... */
935static REAL_VALUE_TYPE ext_80387_constants_table [5];
936static bool ext_80387_constants_init = 0;
937static void init_ext_80387_constants (void);
938
939/* Initialize the GCC target structure. */
940#undef TARGET_ATTRIBUTE_TABLE
941#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
942#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
943# undef TARGET_MERGE_DECL_ATTRIBUTES
944# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
945#endif
946
947#undef TARGET_COMP_TYPE_ATTRIBUTES
948#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
949
950#undef TARGET_INIT_BUILTINS
951#define TARGET_INIT_BUILTINS ix86_init_builtins
952
953#undef TARGET_EXPAND_BUILTIN
954#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
955
956#undef TARGET_ASM_FUNCTION_EPILOGUE
957#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
958
959#undef TARGET_ASM_OPEN_PAREN
960#define TARGET_ASM_OPEN_PAREN ""
961#undef TARGET_ASM_CLOSE_PAREN
962#define TARGET_ASM_CLOSE_PAREN ""
963
964#undef TARGET_ASM_ALIGNED_HI_OP
965#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
966#undef TARGET_ASM_ALIGNED_SI_OP
967#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
968#ifdef ASM_QUAD
969#undef TARGET_ASM_ALIGNED_DI_OP
970#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
971#endif
972
973#undef TARGET_ASM_UNALIGNED_HI_OP
974#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
975#undef TARGET_ASM_UNALIGNED_SI_OP
976#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
977#undef TARGET_ASM_UNALIGNED_DI_OP
978#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
979
980#undef TARGET_SCHED_ADJUST_COST
981#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
982#undef TARGET_SCHED_ISSUE_RATE
983#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
984#undef TARGET_SCHED_VARIABLE_ISSUE
985#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
986#undef TARGET_SCHED_INIT
987#define TARGET_SCHED_INIT ix86_sched_init
988#undef TARGET_SCHED_REORDER
989#define TARGET_SCHED_REORDER ix86_sched_reorder
990#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
991#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
992 ia32_use_dfa_pipeline_interface
993#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995 ia32_multipass_dfa_lookahead
996
997#undef TARGET_FUNCTION_OK_FOR_SIBCALL
998#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
999
1000#ifdef HAVE_AS_TLS
1001#undef TARGET_HAVE_TLS
1002#define TARGET_HAVE_TLS true
1003#endif
1004#undef TARGET_CANNOT_FORCE_CONST_MEM
1005#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1006
1007#undef TARGET_DELEGITIMIZE_ADDRESS
1008#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1009
1010#undef TARGET_MS_BITFIELD_LAYOUT_P
1011#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1012
1013#undef TARGET_ASM_OUTPUT_MI_THUNK
1014#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1015#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1016#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1017
1018#undef TARGET_ASM_FILE_START
1019#define TARGET_ASM_FILE_START x86_file_start
1020
1021#undef TARGET_RTX_COSTS
1022#define TARGET_RTX_COSTS ix86_rtx_costs
1023#undef TARGET_ADDRESS_COST
1024#define TARGET_ADDRESS_COST ix86_address_cost
1025
1026#undef TARGET_FIXED_CONDITION_CODE_REGS
1027#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1028#undef TARGET_CC_MODES_COMPATIBLE
1029#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1030
1031#undef TARGET_MACHINE_DEPENDENT_REORG
1032#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1033
1034#undef TARGET_BUILD_BUILTIN_VA_LIST
1035#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1036
1037struct gcc_target targetm = TARGET_INITIALIZER;
1038
1039/* The svr4 ABI for the i386 says that records and unions are returned
1040 in memory. */
1041#ifndef DEFAULT_PCC_STRUCT_RETURN
1042#define DEFAULT_PCC_STRUCT_RETURN 1
1043#endif
1044
1045/* Sometimes certain combinations of command options do not make
1046 sense on a particular target machine. You can define a macro
1047 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1048 defined, is executed once just after all the command options have
1049 been parsed.
1050
1051 Don't use this macro to turn on various extra optimizations for
1052 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1053
1054void
1055override_options (void)
1056{
1057 int i;
1058 /* Comes from final.c -- no real reason to change it. */
1059#define MAX_CODE_ALIGN 16
1060
1061 static struct ptt
1062 {
1063 const struct processor_costs *cost; /* Processor costs */
1064 const int target_enable; /* Target flags to enable. */
1065 const int target_disable; /* Target flags to disable. */
1066 const int align_loop; /* Default alignments. */
1067 const int align_loop_max_skip;
1068 const int align_jump;
1069 const int align_jump_max_skip;
1070 const int align_func;
1071 }
1072 const processor_target_table[PROCESSOR_max] =
1073 {
1074 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1075 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1076 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1077 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1078 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1079 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1080 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1081 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1082 };
1083
1084 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1085 static struct pta
1086 {
1087 const char *const name; /* processor name or nickname. */
1088 const enum processor_type processor;
1089 const enum pta_flags
1090 {
1091 PTA_SSE = 1,
1092 PTA_SSE2 = 2,
1093 PTA_SSE3 = 4,
1094 PTA_MMX = 8,
1095 PTA_PREFETCH_SSE = 16,
1096 PTA_3DNOW = 32,
1097 PTA_3DNOW_A = 64,
1098 PTA_64BIT = 128
1099 } flags;
1100 }
1101 const processor_alias_table[] =
1102 {
1103 {"i386", PROCESSOR_I386, 0},
1104 {"i486", PROCESSOR_I486, 0},
1105 {"i586", PROCESSOR_PENTIUM, 0},
1106 {"pentium", PROCESSOR_PENTIUM, 0},
1107 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1108 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1109 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1110 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1111 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1112 {"i686", PROCESSOR_PENTIUMPRO, 0},
1113 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1114 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1115 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1116 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1117 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1118 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1119 | PTA_MMX | PTA_PREFETCH_SSE},
1120 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1121 | PTA_MMX | PTA_PREFETCH_SSE},
1122 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1123 | PTA_MMX | PTA_PREFETCH_SSE},
1124 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1125 | PTA_MMX | PTA_PREFETCH_SSE},
1126 {"k6", PROCESSOR_K6, PTA_MMX},
1127 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1128 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1129 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1130 | PTA_3DNOW_A},
1131 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1132 | PTA_3DNOW | PTA_3DNOW_A},
1133 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134 | PTA_3DNOW_A | PTA_SSE},
1135 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1136 | PTA_3DNOW_A | PTA_SSE},
1137 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1138 | PTA_3DNOW_A | PTA_SSE},
1139 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1140 | PTA_SSE | PTA_SSE2 },
1141 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1145 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1146 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1147 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1148 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1149 };
1150
1151 int const pta_size = ARRAY_SIZE (processor_alias_table);
1152
1153 /* Set the default values for switches whose default depends on TARGET_64BIT
1154 in case they weren't overwritten by command line options. */
1155 if (TARGET_64BIT)
1156 {
1157 if (flag_omit_frame_pointer == 2)
1158 flag_omit_frame_pointer = 1;
1159 if (flag_asynchronous_unwind_tables == 2)
1160 flag_asynchronous_unwind_tables = 1;
1161 if (flag_pcc_struct_return == 2)
1162 flag_pcc_struct_return = 0;
1163 }
1164 else
1165 {
1166 if (flag_omit_frame_pointer == 2)
1167 flag_omit_frame_pointer = 0;
1168 if (flag_asynchronous_unwind_tables == 2)
1169 flag_asynchronous_unwind_tables = 0;
1170 if (flag_pcc_struct_return == 2)
1171 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1172 }
1173
1174#ifdef SUBTARGET_OVERRIDE_OPTIONS
1175 SUBTARGET_OVERRIDE_OPTIONS;
1176#endif
1177
1178 if (!ix86_tune_string && ix86_arch_string)
1179 ix86_tune_string = ix86_arch_string;
1180 if (!ix86_tune_string)
1181 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1182 if (!ix86_arch_string)
1183 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1184
1185 if (ix86_cmodel_string != 0)
1186 {
1187 if (!strcmp (ix86_cmodel_string, "small"))
1188 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1189 else if (flag_pic)
1190 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1191 else if (!strcmp (ix86_cmodel_string, "32"))
1192 ix86_cmodel = CM_32;
1193 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1194 ix86_cmodel = CM_KERNEL;
1195 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1196 ix86_cmodel = CM_MEDIUM;
1197 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1198 ix86_cmodel = CM_LARGE;
1199 else
1200 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1201 }
1202 else
1203 {
1204 ix86_cmodel = CM_32;
1205 if (TARGET_64BIT)
1206 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1207 }
1208 if (ix86_asm_string != 0)
1209 {
1210 if (!strcmp (ix86_asm_string, "intel"))
1211 ix86_asm_dialect = ASM_INTEL;
1212 else if (!strcmp (ix86_asm_string, "att"))
1213 ix86_asm_dialect = ASM_ATT;
1214 else
1215 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1216 }
1217 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1218 error ("code model `%s' not supported in the %s bit mode",
1219 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1220 if (ix86_cmodel == CM_LARGE)
1221 sorry ("code model `large' not supported yet");
1222 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1223 sorry ("%i-bit mode not compiled in",
1224 (target_flags & MASK_64BIT) ? 64 : 32);
1225
1226 for (i = 0; i < pta_size; i++)
1227 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1228 {
1229 ix86_arch = processor_alias_table[i].processor;
1230 /* Default cpu tuning to the architecture. */
1231 ix86_tune = ix86_arch;
1232 if (processor_alias_table[i].flags & PTA_MMX
1233 && !(target_flags_explicit & MASK_MMX))
1234 target_flags |= MASK_MMX;
1235 if (processor_alias_table[i].flags & PTA_3DNOW
1236 && !(target_flags_explicit & MASK_3DNOW))
1237 target_flags |= MASK_3DNOW;
1238 if (processor_alias_table[i].flags & PTA_3DNOW_A
1239 && !(target_flags_explicit & MASK_3DNOW_A))
1240 target_flags |= MASK_3DNOW_A;
1241 if (processor_alias_table[i].flags & PTA_SSE
1242 && !(target_flags_explicit & MASK_SSE))
1243 target_flags |= MASK_SSE;
1244 if (processor_alias_table[i].flags & PTA_SSE2
1245 && !(target_flags_explicit & MASK_SSE2))
1246 target_flags |= MASK_SSE2;
1247 if (processor_alias_table[i].flags & PTA_SSE3
1248 && !(target_flags_explicit & MASK_SSE3))
1249 target_flags |= MASK_SSE3;
1250 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1251 x86_prefetch_sse = true;
1252 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1253 error ("CPU you selected does not support x86-64 instruction set");
1254 break;
1255 }
1256
1257 if (i == pta_size)
1258 error ("bad value (%s) for -march= switch", ix86_arch_string);
1259
1260 for (i = 0; i < pta_size; i++)
1261 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1262 {
1263 ix86_tune = processor_alias_table[i].processor;
1264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1265 error ("CPU you selected does not support x86-64 instruction set");
1266
1267 /* Intel CPUs have always interpreted SSE prefetch instructions as
1268 NOPs; so, we can enable SSE prefetch instructions even when
1269 -mtune (rather than -march) points us to a processor that has them.
1270 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1271 higher processors. */
1272 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1273 x86_prefetch_sse = true;
1274 break;
1275 }
1276 if (i == pta_size)
1277 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1278
1279 if (optimize_size)
1280 ix86_cost = &size_cost;
1281 else
1282 ix86_cost = processor_target_table[ix86_tune].cost;
1283 target_flags |= processor_target_table[ix86_tune].target_enable;
1284 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1285
1286 /* Arrange to set up i386_stack_locals for all functions. */
1287 init_machine_status = ix86_init_machine_status;
1288
1289 /* Validate -mregparm= value. */
1290 if (ix86_regparm_string)
1291 {
1292 i = atoi (ix86_regparm_string);
1293 if (i < 0 || i > REGPARM_MAX)
1294 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1295 else
1296 ix86_regparm = i;
1297 }
1298 else
1299 if (TARGET_64BIT)
1300 ix86_regparm = REGPARM_MAX;
1301
1302 /* If the user has provided any of the -malign-* options,
1303 warn and use that value only if -falign-* is not set.
1304 Remove this code in GCC 3.2 or later. */
1305 if (ix86_align_loops_string)
1306 {
1307 warning ("-malign-loops is obsolete, use -falign-loops");
1308 if (align_loops == 0)
1309 {
1310 i = atoi (ix86_align_loops_string);
1311 if (i < 0 || i > MAX_CODE_ALIGN)
1312 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1313 else
1314 align_loops = 1 << i;
1315 }
1316 }
1317
1318 if (ix86_align_jumps_string)
1319 {
1320 warning ("-malign-jumps is obsolete, use -falign-jumps");
1321 if (align_jumps == 0)
1322 {
1323 i = atoi (ix86_align_jumps_string);
1324 if (i < 0 || i > MAX_CODE_ALIGN)
1325 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1326 else
1327 align_jumps = 1 << i;
1328 }
1329 }
1330
1331 if (ix86_align_funcs_string)
1332 {
1333 warning ("-malign-functions is obsolete, use -falign-functions");
1334 if (align_functions == 0)
1335 {
1336 i = atoi (ix86_align_funcs_string);
1337 if (i < 0 || i > MAX_CODE_ALIGN)
1338 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1339 else
1340 align_functions = 1 << i;
1341 }
1342 }
1343
1344 /* Default align_* from the processor table. */
1345 if (align_loops == 0)
1346 {
1347 align_loops = processor_target_table[ix86_tune].align_loop;
1348 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1349 }
1350 if (align_jumps == 0)
1351 {
1352 align_jumps = processor_target_table[ix86_tune].align_jump;
1353 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1354 }
1355 if (align_functions == 0)
1356 {
1357 align_functions = processor_target_table[ix86_tune].align_func;
1358 }
1359
1360 /* Validate -mpreferred-stack-boundary= value, or provide default.
1361 The default of 128 bits is for Pentium III's SSE __m128, but we
1362 don't want additional code to keep the stack aligned when
1363 optimizing for code size. */
1364 ix86_preferred_stack_boundary = (optimize_size
1365 ? TARGET_64BIT ? 128 : 32
1366 : 128);
1367 if (ix86_preferred_stack_boundary_string)
1368 {
1369 i = atoi (ix86_preferred_stack_boundary_string);
1370 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1371 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1372 TARGET_64BIT ? 4 : 2);
1373 else
1374 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1375 }
1376
1377 /* Validate -mbranch-cost= value, or provide default. */
1378 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1379 if (ix86_branch_cost_string)
1380 {
1381 i = atoi (ix86_branch_cost_string);
1382 if (i < 0 || i > 5)
1383 error ("-mbranch-cost=%d is not between 0 and 5", i);
1384 else
1385 ix86_branch_cost = i;
1386 }
1387
1388 if (ix86_tls_dialect_string)
1389 {
1390 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1391 ix86_tls_dialect = TLS_DIALECT_GNU;
1392 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1393 ix86_tls_dialect = TLS_DIALECT_SUN;
1394 else
1395 error ("bad value (%s) for -mtls-dialect= switch",
1396 ix86_tls_dialect_string);
1397 }
1398
1399 /* Keep nonleaf frame pointers. */
1400 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1401 flag_omit_frame_pointer = 1;
1402
1403 /* If we're doing fast math, we don't care about comparison order
1404 wrt NaNs. This lets us use a shorter comparison sequence. */
1405 if (flag_unsafe_math_optimizations)
1406 target_flags &= ~MASK_IEEE_FP;
1407
1408 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1409 since the insns won't need emulation. */
1410 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1411 target_flags &= ~MASK_NO_FANCY_MATH_387;
1412
1413 /* Turn on SSE2 builtins for -msse3. */
1414 if (TARGET_SSE3)
1415 target_flags |= MASK_SSE2;
1416
1417 /* Turn on SSE builtins for -msse2. */
1418 if (TARGET_SSE2)
1419 target_flags |= MASK_SSE;
1420
1421 if (TARGET_64BIT)
1422 {
1423 if (TARGET_ALIGN_DOUBLE)
1424 error ("-malign-double makes no sense in the 64bit mode");
1425 if (TARGET_RTD)
1426 error ("-mrtd calling convention not supported in the 64bit mode");
1427 /* Enable by default the SSE and MMX builtins. */
1428 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1429 ix86_fpmath = FPMATH_SSE;
1430 }
1431 else
1432 {
1433 ix86_fpmath = FPMATH_387;
1434 /* i386 ABI does not specify red zone. It still makes sense to use it
1435 when programmer takes care to stack from being destroyed. */
1436 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1437 target_flags |= MASK_NO_RED_ZONE;
1438 }
1439
1440 if (ix86_fpmath_string != 0)
1441 {
1442 if (! strcmp (ix86_fpmath_string, "387"))
1443 ix86_fpmath = FPMATH_387;
1444 else if (! strcmp (ix86_fpmath_string, "sse"))
1445 {
1446 if (!TARGET_SSE)
1447 {
1448 warning ("SSE instruction set disabled, using 387 arithmetics");
1449 ix86_fpmath = FPMATH_387;
1450 }
1451 else
1452 ix86_fpmath = FPMATH_SSE;
1453 }
1454 else if (! strcmp (ix86_fpmath_string, "387,sse")
1455 || ! strcmp (ix86_fpmath_string, "sse,387"))
1456 {
1457 if (!TARGET_SSE)
1458 {
1459 warning ("SSE instruction set disabled, using 387 arithmetics");
1460 ix86_fpmath = FPMATH_387;
1461 }
1462 else if (!TARGET_80387)
1463 {
1464 warning ("387 instruction set disabled, using SSE arithmetics");
1465 ix86_fpmath = FPMATH_SSE;
1466 }
1467 else
1468 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1469 }
1470 else
1471 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1472 }
1473
1474 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1475 on by -msse. */
1476 if (TARGET_SSE)
1477 {
1478 target_flags |= MASK_MMX;
1479 x86_prefetch_sse = true;
1480 }
1481
1482 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1483 if (TARGET_3DNOW)
1484 {
1485 target_flags |= MASK_MMX;
1486 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1487 extensions it adds. */
1488 if (x86_3dnow_a & (1 << ix86_arch))
1489 target_flags |= MASK_3DNOW_A;
1490 }
1491 if ((x86_accumulate_outgoing_args & TUNEMASK)
1492 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1493 && !optimize_size)
1494 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1495
1496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1497 {
1498 char *p;
1499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1500 p = strchr (internal_label_prefix, 'X');
1501 internal_label_prefix_len = p - internal_label_prefix;
1502 *p = '\0';
1503 }
1504}
1505
1506void
1507optimization_options (int level, int size ATTRIBUTE_UNUSED)
1508{
1509 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1510 make the problem with not enough registers even worse. */
1511#ifdef INSN_SCHEDULING
1512 if (level > 1)
1513 flag_schedule_insns = 0;
1514#endif
1515
1516 /* The default values of these switches depend on the TARGET_64BIT
1517 that is not known at this moment. Mark these values with 2 and
1518 let user the to override these. In case there is no command line option
1519 specifying them, we will set the defaults in override_options. */
1520 if (optimize >= 1)
1521 flag_omit_frame_pointer = 2;
1522 flag_pcc_struct_return = 2;
1523 flag_asynchronous_unwind_tables = 2;
1524}
1525
1526/* Table of valid machine attributes. */
1527const struct attribute_spec ix86_attribute_table[] =
1528{
1529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1530 /* Stdcall attribute says callee is responsible for popping arguments
1531 if they are not variable. */
1532 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1533 /* Fastcall attribute says callee is responsible for popping arguments
1534 if they are not variable. */
1535 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1536 /* Cdecl attribute says the callee is a normal C declaration */
1537 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1538 /* Regparm attribute specifies how many integer arguments are to be
1539 passed in registers. */
1540 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1541#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1542 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1543 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1544 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1545#endif
1546 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1547 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1548 { NULL, 0, 0, false, false, false, NULL }
1549};
1550
1551/* Decide whether we can make a sibling call to a function. DECL is the
1552 declaration of the function being targeted by the call and EXP is the
1553 CALL_EXPR representing the call. */
1554
1555static bool
1556ix86_function_ok_for_sibcall (tree decl, tree exp)
1557{
1558 /* If we are generating position-independent code, we cannot sibcall
1559 optimize any indirect call, or a direct call to a global function,
1560 as the PLT requires %ebx be live. */
1561 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1562 return false;
1563
1564 /* If we are returning floats on the 80387 register stack, we cannot
1565 make a sibcall from a function that doesn't return a float to a
1566 function that does or, conversely, from a function that does return
1567 a float to a function that doesn't; the necessary stack adjustment
1568 would not be executed. */
1569 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1570 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1571 return false;
1572
1573 /* If this call is indirect, we'll need to be able to use a call-clobbered
1574 register for the address of the target function. Make sure that all
1575 such registers are not used for passing parameters. */
1576 if (!decl && !TARGET_64BIT)
1577 {
1578 tree type;
1579
1580 /* We're looking at the CALL_EXPR, we need the type of the function. */
1581 type = TREE_OPERAND (exp, 0); /* pointer expression */
1582 type = TREE_TYPE (type); /* pointer type */
1583 type = TREE_TYPE (type); /* function type */
1584
1585 if (ix86_function_regparm (type, NULL) >= 3)
1586 {
1587 /* ??? Need to count the actual number of registers to be used,
1588 not the possible number of registers. Fix later. */
1589 return false;
1590 }
1591 }
1592
1593 /* Otherwise okay. That also includes certain types of indirect calls. */
1594 return true;
1595}
1596
1597/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1598 arguments as in struct attribute_spec.handler. */
1599static tree
1600ix86_handle_cdecl_attribute (tree *node, tree name,
1601 tree args ATTRIBUTE_UNUSED,
1602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1603{
1604 if (TREE_CODE (*node) != FUNCTION_TYPE
1605 && TREE_CODE (*node) != METHOD_TYPE
1606 && TREE_CODE (*node) != FIELD_DECL
1607 && TREE_CODE (*node) != TYPE_DECL)
1608 {
1609 warning ("`%s' attribute only applies to functions",
1610 IDENTIFIER_POINTER (name));
1611 *no_add_attrs = true;
1612 }
1613 else
1614 {
1615 if (is_attribute_p ("fastcall", name))
1616 {
1617 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1618 {
1619 error ("fastcall and stdcall attributes are not compatible");
1620 }
1621 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1622 {
1623 error ("fastcall and regparm attributes are not compatible");
1624 }
1625 }
1626 else if (is_attribute_p ("stdcall", name))
1627 {
1628 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1629 {
1630 error ("fastcall and stdcall attributes are not compatible");
1631 }
1632 }
1633 }
1634
1635 if (TARGET_64BIT)
1636 {
1637 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1638 *no_add_attrs = true;
1639 }
1640
1641 return NULL_TREE;
1642}
1643
1644/* Handle a "regparm" attribute;
1645 arguments as in struct attribute_spec.handler. */
1646static tree
1647ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1648 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1649{
1650 if (TREE_CODE (*node) != FUNCTION_TYPE
1651 && TREE_CODE (*node) != METHOD_TYPE
1652 && TREE_CODE (*node) != FIELD_DECL
1653 && TREE_CODE (*node) != TYPE_DECL)
1654 {
1655 warning ("`%s' attribute only applies to functions",
1656 IDENTIFIER_POINTER (name));
1657 *no_add_attrs = true;
1658 }
1659 else
1660 {
1661 tree cst;
1662
1663 cst = TREE_VALUE (args);
1664 if (TREE_CODE (cst) != INTEGER_CST)
1665 {
1666 warning ("`%s' attribute requires an integer constant argument",
1667 IDENTIFIER_POINTER (name));
1668 *no_add_attrs = true;
1669 }
1670 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1671 {
1672 warning ("argument to `%s' attribute larger than %d",
1673 IDENTIFIER_POINTER (name), REGPARM_MAX);
1674 *no_add_attrs = true;
1675 }
1676
1677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1678 {
1679 error ("fastcall and regparm attributes are not compatible");
1680 }
1681 }
1682
1683 return NULL_TREE;
1684}
1685
1686/* Return 0 if the attributes for two types are incompatible, 1 if they
1687 are compatible, and 2 if they are nearly compatible (which causes a
1688 warning to be generated). */
1689
1690static int
1691ix86_comp_type_attributes (tree type1, tree type2)
1692{
1693 /* Check for mismatch of non-default calling convention. */
1694 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1695
1696 if (TREE_CODE (type1) != FUNCTION_TYPE)
1697 return 1;
1698
1699 /* Check for mismatched fastcall types */
1700 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1701 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1702 return 0;
1703
1704 /* Check for mismatched return types (cdecl vs stdcall). */
1705 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1706 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1707 return 0;
1708 if (ix86_function_regparm (type1, NULL)
1709 != ix86_function_regparm (type2, NULL))
1710 return 0;
1711 return 1;
1712}
1713
1714/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1715 DECL may be NULL when calling function indirectly
1716 or considering a libcall. */
1717
1718static int
1719ix86_function_regparm (tree type, tree decl)
1720{
1721 tree attr;
1722 int regparm = ix86_regparm;
1723 bool user_convention = false;
1724
1725 if (!TARGET_64BIT)
1726 {
1727 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1728 if (attr)
1729 {
1730 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1731 user_convention = true;
1732 }
1733
1734 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1735 {
1736 regparm = 2;
1737 user_convention = true;
1738 }
1739
1740 /* Use register calling convention for local functions when possible. */
1741 if (!TARGET_64BIT && !user_convention && decl
1742 && flag_unit_at_a_time && !profile_flag)
1743 {
1744 struct cgraph_local_info *i = cgraph_local_info (decl);
1745 if (i && i->local)
1746 {
1747 /* We can't use regparm(3) for nested functions as these use
1748 static chain pointer in third argument. */
1749 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1750 regparm = 2;
1751 else
1752 regparm = 3;
1753 }
1754 }
1755 }
1756 return regparm;
1757}
1758
1759/* Return true if EAX is live at the start of the function. Used by
1760 ix86_expand_prologue to determine if we need special help before
1761 calling allocate_stack_worker. */
1762
1763static bool
1764ix86_eax_live_at_start_p (void)
1765{
1766 /* Cheat. Don't bother working forward from ix86_function_regparm
1767 to the function type to whether an actual argument is located in
1768 eax. Instead just look at cfg info, which is still close enough
1769 to correct at this point. This gives false positives for broken
1770 functions that might use uninitialized data that happens to be
1771 allocated in eax, but who cares? */
1772 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1773}
1774
1775/* Value is the number of bytes of arguments automatically
1776 popped when returning from a subroutine call.
1777 FUNDECL is the declaration node of the function (as a tree),
1778 FUNTYPE is the data type of the function (as a tree),
1779 or for a library call it is an identifier node for the subroutine name.
1780 SIZE is the number of bytes of arguments passed on the stack.
1781
1782 On the 80386, the RTD insn may be used to pop them if the number
1783 of args is fixed, but if the number is variable then the caller
1784 must pop them all. RTD can't be used for library calls now
1785 because the library is compiled with the Unix compiler.
1786 Use of RTD is a selectable option, since it is incompatible with
1787 standard Unix calling sequences. If the option is not selected,
1788 the caller must always pop the args.
1789
1790 The attribute stdcall is equivalent to RTD on a per module basis. */
1791
1792int
1793ix86_return_pops_args (tree fundecl, tree funtype, int size)
1794{
1795 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1796
1797 /* Cdecl functions override -mrtd, and never pop the stack. */
1798 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1799
1800 /* Stdcall and fastcall functions will pop the stack if not
1801 variable args. */
1802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1803 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1804 rtd = 1;
1805
1806 if (rtd
1807 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1808 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1809 == void_type_node)))
1810 return size;
1811 }
1812
1813 /* Lose any fake structure return argument if it is passed on the stack. */
1814 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1815 && !TARGET_64BIT)
1816 {
1817 int nregs = ix86_function_regparm (funtype, fundecl);
1818
1819 if (!nregs)
1820 return GET_MODE_SIZE (Pmode);
1821 }
1822
1823 return 0;
1824}
1825
1826/* Argument support functions. */
1827
1828/* Return true when register may be used to pass function parameters. */
1829bool
1830ix86_function_arg_regno_p (int regno)
1831{
1832 int i;
1833 if (!TARGET_64BIT)
1834 return (regno < REGPARM_MAX
1835 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1836 if (SSE_REGNO_P (regno) && TARGET_SSE)
1837 return true;
1838 /* RAX is used as hidden argument to va_arg functions. */
1839 if (!regno)
1840 return true;
1841 for (i = 0; i < REGPARM_MAX; i++)
1842 if (regno == x86_64_int_parameter_registers[i])
1843 return true;
1844 return false;
1845}
1846
1847/* Initialize a variable CUM of type CUMULATIVE_ARGS
1848 for a call to a function whose data type is FNTYPE.
1849 For a library call, FNTYPE is 0. */
1850
1851void
1852init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1853 tree fntype, /* tree ptr for function decl */
1854 rtx libname, /* SYMBOL_REF of library name or 0 */
1855 tree fndecl)
1856{
1857 static CUMULATIVE_ARGS zero_cum;
1858 tree param, next_param;
1859
1860 if (TARGET_DEBUG_ARG)
1861 {
1862 fprintf (stderr, "\ninit_cumulative_args (");
1863 if (fntype)
1864 fprintf (stderr, "fntype code = %s, ret code = %s",
1865 tree_code_name[(int) TREE_CODE (fntype)],
1866 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1867 else
1868 fprintf (stderr, "no fntype");
1869
1870 if (libname)
1871 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1872 }
1873
1874 *cum = zero_cum;
1875
1876 /* Set up the number of registers to use for passing arguments. */
1877 if (fntype)
1878 cum->nregs = ix86_function_regparm (fntype, fndecl);
1879 else
1880 cum->nregs = ix86_regparm;
1881 cum->sse_nregs = SSE_REGPARM_MAX;
1882 cum->mmx_nregs = MMX_REGPARM_MAX;
1883 cum->warn_sse = true;
1884 cum->warn_mmx = true;
1885 cum->maybe_vaarg = false;
1886
1887 /* Use ecx and edx registers if function has fastcall attribute */
1888 if (fntype && !TARGET_64BIT)
1889 {
1890 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1891 {
1892 cum->nregs = 2;
1893 cum->fastcall = 1;
1894 }
1895 }
1896
1897
1898 /* Determine if this function has variable arguments. This is
1899 indicated by the last argument being 'void_type_mode' if there
1900 are no variable arguments. If there are variable arguments, then
1901 we won't pass anything in registers */
1902
1903 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1904 {
1905 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1906 param != 0; param = next_param)
1907 {
1908 next_param = TREE_CHAIN (param);
1909 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1910 {
1911 if (!TARGET_64BIT)
1912 {
1913 cum->nregs = 0;
1914 cum->sse_nregs = 0;
1915 cum->mmx_nregs = 0;
1916 cum->warn_sse = 0;
1917 cum->warn_mmx = 0;
1918 cum->fastcall = 0;
1919 }
1920 cum->maybe_vaarg = true;
1921 }
1922 }
1923 }
1924 if ((!fntype && !libname)
1925 || (fntype && !TYPE_ARG_TYPES (fntype)))
1926 cum->maybe_vaarg = 1;
1927
1928 if (TARGET_DEBUG_ARG)
1929 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1930
1931 return;
1932}
1933
1934/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1935 of this code is to classify each 8bytes of incoming argument by the register
1936 class and assign registers accordingly. */
1937
1938/* Return the union class of CLASS1 and CLASS2.
1939 See the x86-64 PS ABI for details. */
1940
1941static enum x86_64_reg_class
1942merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1943{
1944 /* Rule #1: If both classes are equal, this is the resulting class. */
1945 if (class1 == class2)
1946 return class1;
1947
1948 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1949 the other class. */
1950 if (class1 == X86_64_NO_CLASS)
1951 return class2;
1952 if (class2 == X86_64_NO_CLASS)
1953 return class1;
1954
1955 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1956 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1957 return X86_64_MEMORY_CLASS;
1958
1959 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1960 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1961 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1962 return X86_64_INTEGERSI_CLASS;
1963 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1964 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1965 return X86_64_INTEGER_CLASS;
1966
1967 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1968 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1969 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1970 return X86_64_MEMORY_CLASS;
1971
1972 /* Rule #6: Otherwise class SSE is used. */
1973 return X86_64_SSE_CLASS;
1974}
1975
1976/* Classify the argument of type TYPE and mode MODE.
1977 CLASSES will be filled by the register class used to pass each word
1978 of the operand. The number of words is returned. In case the parameter
1979 should be passed in memory, 0 is returned. As a special case for zero
1980 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1981
1982 BIT_OFFSET is used internally for handling records and specifies offset
1983 of the offset in bits modulo 256 to avoid overflow cases.
1984
1985 See the x86-64 PS ABI for details.
1986*/
1987
1988static int
1989classify_argument (enum machine_mode mode, tree type,
1990 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1991{
1992 HOST_WIDE_INT bytes =
1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1995
1996 /* Variable sized entities are always passed/returned in memory. */
1997 if (bytes < 0)
1998 return 0;
1999
2000 if (mode != VOIDmode
2001 && MUST_PASS_IN_STACK (mode, type))
2002 return 0;
2003
2004 if (type && AGGREGATE_TYPE_P (type))
2005 {
2006 int i;
2007 tree field;
2008 enum x86_64_reg_class subclasses[MAX_CLASSES];
2009
2010 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2011 if (bytes > 16)
2012 return 0;
2013
2014 for (i = 0; i < words; i++)
2015 classes[i] = X86_64_NO_CLASS;
2016
2017 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2018 signalize memory class, so handle it as special case. */
2019 if (!words)
2020 {
2021 classes[0] = X86_64_NO_CLASS;
2022 return 1;
2023 }
2024
2025 /* Classify each field of record and merge classes. */
2026 if (TREE_CODE (type) == RECORD_TYPE)
2027 {
2028 /* For classes first merge in the field of the subclasses. */
2029 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2030 {
2031 tree bases = TYPE_BINFO_BASETYPES (type);
2032 int n_bases = TREE_VEC_LENGTH (bases);
2033 int i;
2034
2035 for (i = 0; i < n_bases; ++i)
2036 {
2037 tree binfo = TREE_VEC_ELT (bases, i);
2038 int num;
2039 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2040 tree type = BINFO_TYPE (binfo);
2041
2042 num = classify_argument (TYPE_MODE (type),
2043 type, subclasses,
2044 (offset + bit_offset) % 256);
2045 if (!num)
2046 return 0;
2047 for (i = 0; i < num; i++)
2048 {
2049 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2050 classes[i + pos] =
2051 merge_classes (subclasses[i], classes[i + pos]);
2052 }
2053 }
2054 }
2055 /* And now merge the fields of structure. */
2056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2057 {
2058 if (TREE_CODE (field) == FIELD_DECL)
2059 {
2060 int num;
2061
2062 /* Bitfields are always classified as integer. Handle them
2063 early, since later code would consider them to be
2064 misaligned integers. */
2065 if (DECL_BIT_FIELD (field))
2066 {
2067 for (i = int_bit_position (field) / 8 / 8;
2068 i < (int_bit_position (field)
2069 + tree_low_cst (DECL_SIZE (field), 0)
2070 + 63) / 8 / 8; i++)
2071 classes[i] =
2072 merge_classes (X86_64_INTEGER_CLASS,
2073 classes[i]);
2074 }
2075 else
2076 {
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2079 (int_bit_position (field)
2080 + bit_offset) % 256);
2081 if (!num)
2082 return 0;
2083 for (i = 0; i < num; i++)
2084 {
2085 int pos =
2086 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2087 classes[i + pos] =
2088 merge_classes (subclasses[i], classes[i + pos]);
2089 }
2090 }
2091 }
2092 }
2093 }
2094 /* Arrays are handled as small records. */
2095 else if (TREE_CODE (type) == ARRAY_TYPE)
2096 {
2097 int num;
2098 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2099 TREE_TYPE (type), subclasses, bit_offset);
2100 if (!num)
2101 return 0;
2102
2103 /* The partial classes are now full classes. */
2104 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2105 subclasses[0] = X86_64_SSE_CLASS;
2106 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2107 subclasses[0] = X86_64_INTEGER_CLASS;
2108
2109 for (i = 0; i < words; i++)
2110 classes[i] = subclasses[i % num];
2111 }
2112 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2113 else if (TREE_CODE (type) == UNION_TYPE
2114 || TREE_CODE (type) == QUAL_UNION_TYPE)
2115 {
2116 /* For classes first merge in the field of the subclasses. */
2117 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2118 {
2119 tree bases = TYPE_BINFO_BASETYPES (type);
2120 int n_bases = TREE_VEC_LENGTH (bases);
2121 int i;
2122
2123 for (i = 0; i < n_bases; ++i)
2124 {
2125 tree binfo = TREE_VEC_ELT (bases, i);
2126 int num;
2127 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2128 tree type = BINFO_TYPE (binfo);
2129
2130 num = classify_argument (TYPE_MODE (type),
2131 type, subclasses,
2132 (offset + (bit_offset % 64)) % 256);
2133 if (!num)
2134 return 0;
2135 for (i = 0; i < num; i++)
2136 {
2137 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2138 classes[i + pos] =
2139 merge_classes (subclasses[i], classes[i + pos]);
2140 }
2141 }
2142 }
2143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2144 {
2145 if (TREE_CODE (field) == FIELD_DECL)
2146 {
2147 int num;
2148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2149 TREE_TYPE (field), subclasses,
2150 bit_offset);
2151 if (!num)
2152 return 0;
2153 for (i = 0; i < num; i++)
2154 classes[i] = merge_classes (subclasses[i], classes[i]);
2155 }
2156 }
2157 }
2158 else if (TREE_CODE (type) == SET_TYPE)
2159 {
2160 if (bytes <= 4)
2161 {
2162 classes[0] = X86_64_INTEGERSI_CLASS;
2163 return 1;
2164 }
2165 else if (bytes <= 8)
2166 {
2167 classes[0] = X86_64_INTEGER_CLASS;
2168 return 1;
2169 }
2170 else if (bytes <= 12)
2171 {
2172 classes[0] = X86_64_INTEGER_CLASS;
2173 classes[1] = X86_64_INTEGERSI_CLASS;
2174 return 2;
2175 }
2176 else
2177 {
2178 classes[0] = X86_64_INTEGER_CLASS;
2179 classes[1] = X86_64_INTEGER_CLASS;
2180 return 2;
2181 }
2182 }
2183 else
2184 abort ();
2185
2186 /* Final merger cleanup. */
2187 for (i = 0; i < words; i++)
2188 {
2189 /* If one class is MEMORY, everything should be passed in
2190 memory. */
2191 if (classes[i] == X86_64_MEMORY_CLASS)
2192 return 0;
2193
2194 /* The X86_64_SSEUP_CLASS should be always preceded by
2195 X86_64_SSE_CLASS. */
2196 if (classes[i] == X86_64_SSEUP_CLASS
2197 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2198 classes[i] = X86_64_SSE_CLASS;
2199
2200 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2201 if (classes[i] == X86_64_X87UP_CLASS
2202 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2203 classes[i] = X86_64_SSE_CLASS;
2204 }
2205 return words;
2206 }
2207
2208 /* Compute alignment needed. We align all types to natural boundaries with
2209 exception of XFmode that is aligned to 64bits. */
2210 if (mode != VOIDmode && mode != BLKmode)
2211 {
2212 int mode_alignment = GET_MODE_BITSIZE (mode);
2213
2214 if (mode == XFmode)
2215 mode_alignment = 128;
2216 else if (mode == XCmode)
2217 mode_alignment = 256;
2218 if (COMPLEX_MODE_P (mode))
2219 mode_alignment /= 2;
2220 /* Misaligned fields are always returned in memory. */
2221 if (bit_offset % mode_alignment)
2222 return 0;
2223 }
2224
2225 /* Classification of atomic types. */
2226 switch (mode)
2227 {
2228 case DImode:
2229 case SImode:
2230 case HImode:
2231 case QImode:
2232 case CSImode:
2233 case CHImode:
2234 case CQImode:
2235 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2236 classes[0] = X86_64_INTEGERSI_CLASS;
2237 else
2238 classes[0] = X86_64_INTEGER_CLASS;
2239 return 1;
2240 case CDImode:
2241 case TImode:
2242 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2243 return 2;
2244 case CTImode:
2245 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2246 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2247 return 4;
2248 case SFmode:
2249 if (!(bit_offset % 64))
2250 classes[0] = X86_64_SSESF_CLASS;
2251 else
2252 classes[0] = X86_64_SSE_CLASS;
2253 return 1;
2254 case DFmode:
2255 classes[0] = X86_64_SSEDF_CLASS;
2256 return 1;
2257 case XFmode:
2258 classes[0] = X86_64_X87_CLASS;
2259 classes[1] = X86_64_X87UP_CLASS;
2260 return 2;
2261 case TFmode:
2262 case TCmode:
2263 return 0;
2264 case XCmode:
2265 classes[0] = X86_64_X87_CLASS;
2266 classes[1] = X86_64_X87UP_CLASS;
2267 classes[2] = X86_64_X87_CLASS;
2268 classes[3] = X86_64_X87UP_CLASS;
2269 return 4;
2270 case DCmode:
2271 classes[0] = X86_64_SSEDF_CLASS;
2272 classes[1] = X86_64_SSEDF_CLASS;
2273 return 2;
2274 case SCmode:
2275 classes[0] = X86_64_SSE_CLASS;
2276 return 1;
2277 case V4SFmode:
2278 case V4SImode:
2279 case V16QImode:
2280 case V8HImode:
2281 case V2DFmode:
2282 case V2DImode:
2283 classes[0] = X86_64_SSE_CLASS;
2284 classes[1] = X86_64_SSEUP_CLASS;
2285 return 2;
2286 case V2SFmode:
2287 case V2SImode:
2288 case V4HImode:
2289 case V8QImode:
2290 return 0;
2291 case BLKmode:
2292 case VOIDmode:
2293 return 0;
2294 default:
2295 abort ();
2296 }
2297}
2298
2299/* Examine the argument and return set number of register required in each
2300 class. Return 0 iff parameter should be passed in memory. */
2301static int
2302examine_argument (enum machine_mode mode, tree type, int in_return,
2303 int *int_nregs, int *sse_nregs)
2304{
2305 enum x86_64_reg_class class[MAX_CLASSES];
2306 int n = classify_argument (mode, type, class, 0);
2307
2308 *int_nregs = 0;
2309 *sse_nregs = 0;
2310 if (!n)
2311 return 0;
2312 for (n--; n >= 0; n--)
2313 switch (class[n])
2314 {
2315 case X86_64_INTEGER_CLASS:
2316 case X86_64_INTEGERSI_CLASS:
2317 (*int_nregs)++;
2318 break;
2319 case X86_64_SSE_CLASS:
2320 case X86_64_SSESF_CLASS:
2321 case X86_64_SSEDF_CLASS:
2322 (*sse_nregs)++;
2323 break;
2324 case X86_64_NO_CLASS:
2325 case X86_64_SSEUP_CLASS:
2326 break;
2327 case X86_64_X87_CLASS:
2328 case X86_64_X87UP_CLASS:
2329 if (!in_return)
2330 return 0;
2331 break;
2332 case X86_64_MEMORY_CLASS:
2333 abort ();
2334 }
2335 return 1;
2336}
2337/* Construct container for the argument used by GCC interface. See
2338 FUNCTION_ARG for the detailed description. */
2339static rtx
2340construct_container (enum machine_mode mode, tree type, int in_return,
2341 int nintregs, int nsseregs, const int * intreg,
2342 int sse_regno)
2343{
2344 enum machine_mode tmpmode;
2345 int bytes =
2346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2347 enum x86_64_reg_class class[MAX_CLASSES];
2348 int n;
2349 int i;
2350 int nexps = 0;
2351 int needed_sseregs, needed_intregs;
2352 rtx exp[MAX_CLASSES];
2353 rtx ret;
2354
2355 n = classify_argument (mode, type, class, 0);
2356 if (TARGET_DEBUG_ARG)
2357 {
2358 if (!n)
2359 fprintf (stderr, "Memory class\n");
2360 else
2361 {
2362 fprintf (stderr, "Classes:");
2363 for (i = 0; i < n; i++)
2364 {
2365 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2366 }
2367 fprintf (stderr, "\n");
2368 }
2369 }
2370 if (!n)
2371 return NULL;
2372 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2373 return NULL;
2374 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2375 return NULL;
2376
2377 /* First construct simple cases. Avoid SCmode, since we want to use
2378 single register to pass this type. */
2379 if (n == 1 && mode != SCmode)
2380 switch (class[0])
2381 {
2382 case X86_64_INTEGER_CLASS:
2383 case X86_64_INTEGERSI_CLASS:
2384 return gen_rtx_REG (mode, intreg[0]);
2385 case X86_64_SSE_CLASS:
2386 case X86_64_SSESF_CLASS:
2387 case X86_64_SSEDF_CLASS:
2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389 case X86_64_X87_CLASS:
2390 return gen_rtx_REG (mode, FIRST_STACK_REG);
2391 case X86_64_NO_CLASS:
2392 /* Zero sized array, struct or class. */
2393 return NULL;
2394 default:
2395 abort ();
2396 }
2397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2398 && mode != BLKmode)
2399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2400 if (n == 2
2401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2404 && class[1] == X86_64_INTEGER_CLASS
2405 && (mode == CDImode || mode == TImode || mode == TFmode)
2406 && intreg[0] + 1 == intreg[1])
2407 return gen_rtx_REG (mode, intreg[0]);
2408 if (n == 4
2409 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2410 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2411 && mode != BLKmode)
2412 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2413
2414 /* Otherwise figure out the entries of the PARALLEL. */
2415 for (i = 0; i < n; i++)
2416 {
2417 switch (class[i])
2418 {
2419 case X86_64_NO_CLASS:
2420 break;
2421 case X86_64_INTEGER_CLASS:
2422 case X86_64_INTEGERSI_CLASS:
2423 /* Merge TImodes on aligned occasions here too. */
2424 if (i * 8 + 8 > bytes)
2425 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2426 else if (class[i] == X86_64_INTEGERSI_CLASS)
2427 tmpmode = SImode;
2428 else
2429 tmpmode = DImode;
2430 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2431 if (tmpmode == BLKmode)
2432 tmpmode = DImode;
2433 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2434 gen_rtx_REG (tmpmode, *intreg),
2435 GEN_INT (i*8));
2436 intreg++;
2437 break;
2438 case X86_64_SSESF_CLASS:
2439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2440 gen_rtx_REG (SFmode,
2441 SSE_REGNO (sse_regno)),
2442 GEN_INT (i*8));
2443 sse_regno++;
2444 break;
2445 case X86_64_SSEDF_CLASS:
2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 gen_rtx_REG (DFmode,
2448 SSE_REGNO (sse_regno)),
2449 GEN_INT (i*8));
2450 sse_regno++;
2451 break;
2452 case X86_64_SSE_CLASS:
2453 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2454 tmpmode = TImode;
2455 else
2456 tmpmode = DImode;
2457 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2458 gen_rtx_REG (tmpmode,
2459 SSE_REGNO (sse_regno)),
2460 GEN_INT (i*8));
2461 if (tmpmode == TImode)
2462 i++;
2463 sse_regno++;
2464 break;
2465 default:
2466 abort ();
2467 }
2468 }
2469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2470 for (i = 0; i < nexps; i++)
2471 XVECEXP (ret, 0, i) = exp [i];
2472 return ret;
2473}
2474
2475/* Update the data in CUM to advance over an argument
2476 of mode MODE and data type TYPE.
2477 (TYPE is null for libcalls where that information may not be available.) */
2478
2479void
2480function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2481 enum machine_mode mode, /* current arg mode */
2482 tree type, /* type of the argument or 0 if lib support */
2483 int named) /* whether or not the argument was named */
2484{
2485 int bytes =
2486 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2487 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2488
2489 if (TARGET_DEBUG_ARG)
2490 fprintf (stderr,
2491 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2492 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2493 if (TARGET_64BIT)
2494 {
2495 int int_nregs, sse_nregs;
2496 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2497 cum->words += words;
2498 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2499 {
2500 cum->nregs -= int_nregs;
2501 cum->sse_nregs -= sse_nregs;
2502 cum->regno += int_nregs;
2503 cum->sse_regno += sse_nregs;
2504 }
2505 else
2506 cum->words += words;
2507 }
2508 else
2509 {
2510 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2511 && (!type || !AGGREGATE_TYPE_P (type)))
2512 {
2513 cum->sse_words += words;
2514 cum->sse_nregs -= 1;
2515 cum->sse_regno += 1;
2516 if (cum->sse_nregs <= 0)
2517 {
2518 cum->sse_nregs = 0;
2519 cum->sse_regno = 0;
2520 }
2521 }
2522 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2523 && (!type || !AGGREGATE_TYPE_P (type)))
2524 {
2525 cum->mmx_words += words;
2526 cum->mmx_nregs -= 1;
2527 cum->mmx_regno += 1;
2528 if (cum->mmx_nregs <= 0)
2529 {
2530 cum->mmx_nregs = 0;
2531 cum->mmx_regno = 0;
2532 }
2533 }
2534 else
2535 {
2536 cum->words += words;
2537 cum->nregs -= words;
2538 cum->regno += words;
2539
2540 if (cum->nregs <= 0)
2541 {
2542 cum->nregs = 0;
2543 cum->regno = 0;
2544 }
2545 }
2546 }
2547 return;
2548}
2549
2550/* A subroutine of function_arg. We want to pass a parameter whose nominal
2551 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not
2552 actually be valid for REGNO with the current ISA. In this case, ALT_MODE
2553 is used instead. It must be the same size as MODE, and must be known to
2554 be valid for REGNO. Finally, ORIG_MODE is the original mode of the
2555 parameter, as seen by the type system. This may be different from MODE
2556 when we're mucking with things minimizing ABI variations.
2557
2558 Returns a REG or a PARALLEL as appropriate. */
2559
2560static rtx
2561gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2562 enum machine_mode orig_mode, unsigned int regno)
2563{
2564 rtx tmp;
2565
2566 if (HARD_REGNO_MODE_OK (regno, mode))
2567 tmp = gen_rtx_REG (mode, regno);
2568 else
2569 {
2570 tmp = gen_rtx_REG (alt_mode, regno);
2571 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2572 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2573 }
2574
2575 return tmp;
2576}
2577
2578/* Define where to put the arguments to a function.
2579 Value is zero to push the argument on the stack,
2580 or a hard register in which to store the argument.
2581
2582 MODE is the argument's machine mode.
2583 TYPE is the data type of the argument (as a tree).
2584 This is null for libcalls where that information may
2585 not be available.
2586 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2587 the preceding args and about the function being called.
2588 NAMED is nonzero if this argument is a named parameter
2589 (otherwise it is an extra parameter matching an ellipsis). */
2590
2591rtx
2592function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2593 tree type, int named)
2594{
2595 enum machine_mode mode = orig_mode;
2596 rtx ret = NULL_RTX;
2597 int bytes =
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600 static bool warnedsse, warnedmmx;
2601
2602 /* Handle a hidden AL argument containing number of registers for varargs
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2604 any AL settings. */
2605 if (mode == VOIDmode)
2606 {
2607 if (TARGET_64BIT)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2610 ? SSE_REGPARM_MAX
2611 : cum->sse_regno)
2612 : -1);
2613 else
2614 return constm1_rtx;
2615 }
2616 if (TARGET_64BIT)
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2619 cum->sse_regno);
2620 else
2621 switch (mode)
2622 {
2623 /* For now, pass fp/complex values on the stack. */
2624 default:
2625 break;
2626
2627 case BLKmode:
2628 if (bytes < 0)
2629 break;
2630 /* FALLTHRU */
2631 case DImode:
2632 case SImode:
2633 case HImode:
2634 case QImode:
2635 if (words <= cum->nregs)
2636 {
2637 int regno = cum->regno;
2638
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2641 if (cum->fastcall)
2642 {
2643 if (mode == BLKmode || mode == DImode)
2644 break;
2645
2646 /* ECX not EAX is the first allocated register. */
2647 if (regno == 0)
2648 regno = 2;
2649 }
2650 ret = gen_rtx_REG (mode, regno);
2651 }
2652 break;
2653 case TImode:
2654 case V16QImode:
2655 case V8HImode:
2656 case V4SImode:
2657 case V2DImode:
2658 case V4SFmode:
2659 case V2DFmode:
2660 if (!type || !AGGREGATE_TYPE_P (type))
2661 {
2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2663 {
2664 warnedsse = true;
2665 warning ("SSE vector argument without SSE enabled "
2666 "changes the ABI");
2667 }
2668 if (cum->sse_nregs)
2669 ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2670 cum->sse_regno + FIRST_SSE_REG);
2671 }
2672 break;
2673 case V8QImode:
2674 case V4HImode:
2675 case V2SImode:
2676 case V2SFmode:
2677 if (!type || !AGGREGATE_TYPE_P (type))
2678 {
2679 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2680 {
2681 warnedmmx = true;
2682 warning ("MMX vector argument without MMX enabled "
2683 "changes the ABI");
2684 }
2685 if (cum->mmx_nregs)
2686 ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2687 cum->mmx_regno + FIRST_MMX_REG);
2688 }
2689 break;
2690 }
2691
2692 if (TARGET_DEBUG_ARG)
2693 {
2694 fprintf (stderr,
2695 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2696 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2697
2698 if (ret)
2699 print_simple_rtl (stderr, ret);
2700 else
2701 fprintf (stderr, ", stack");
2702
2703 fprintf (stderr, " )\n");
2704 }
2705
2706 return ret;
2707}
2708
2709/* A C expression that indicates when an argument must be passed by
2710 reference. If nonzero for an argument, a copy of that argument is
2711 made in memory and a pointer to the argument is passed instead of
2712 the argument itself. The pointer is passed in whatever way is
2713 appropriate for passing a pointer to that type. */
2714
2715int
2716function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2717 enum machine_mode mode ATTRIBUTE_UNUSED,
2718 tree type, int named ATTRIBUTE_UNUSED)
2719{
2720 if (!TARGET_64BIT)
2721 return 0;
2722
2723 if (type && int_size_in_bytes (type) == -1)
2724 {
2725 if (TARGET_DEBUG_ARG)
2726 fprintf (stderr, "function_arg_pass_by_reference\n");
2727 return 1;
2728 }
2729
2730 return 0;
2731}
2732
2733/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2734 ABI */
2735static bool
2736contains_128bit_aligned_vector_p (tree type)
2737{
2738 enum machine_mode mode = TYPE_MODE (type);
2739 if (SSE_REG_MODE_P (mode)
2740 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2741 return true;
2742 if (TYPE_ALIGN (type) < 128)
2743 return false;
2744
2745 if (AGGREGATE_TYPE_P (type))
2746 {
2747 /* Walk the aggregates recursively. */
2748 if (TREE_CODE (type) == RECORD_TYPE
2749 || TREE_CODE (type) == UNION_TYPE
2750 || TREE_CODE (type) == QUAL_UNION_TYPE)
2751 {
2752 tree field;
2753
2754 if (TYPE_BINFO (type) != NULL
2755 && TYPE_BINFO_BASETYPES (type) != NULL)
2756 {
2757 tree bases = TYPE_BINFO_BASETYPES (type);
2758 int n_bases = TREE_VEC_LENGTH (bases);
2759 int i;
2760
2761 for (i = 0; i < n_bases; ++i)
2762 {
2763 tree binfo = TREE_VEC_ELT (bases, i);
2764 tree type = BINFO_TYPE (binfo);
2765
2766 if (contains_128bit_aligned_vector_p (type))
2767 return true;
2768 }
2769 }
2770 /* And now merge the fields of structure. */
2771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2772 {
2773 if (TREE_CODE (field) == FIELD_DECL
2774 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2775 return true;
2776 }
2777 }
2778 /* Just for use if some languages passes arrays by value. */
2779 else if (TREE_CODE (type) == ARRAY_TYPE)
2780 {
2781 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2782 return true;
2783 }
2784 else
2785 abort ();
2786 }
2787 return false;
2788}
2789
2790/* Gives the alignment boundary, in bits, of an argument with the
2791 specified mode and type. */
2792
2793int
2794ix86_function_arg_boundary (enum machine_mode mode, tree type)
2795{
2796 int align;
2797 if (type)
2798 align = TYPE_ALIGN (type);
2799 else
2800 align = GET_MODE_ALIGNMENT (mode);
2801 if (align < PARM_BOUNDARY)
2802 align = PARM_BOUNDARY;
2803 if (!TARGET_64BIT)
2804 {
2805 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2806 make an exception for SSE modes since these require 128bit
2807 alignment.
2808
2809 The handling here differs from field_alignment. ICC aligns MMX
2810 arguments to 4 byte boundaries, while structure fields are aligned
2811 to 8 byte boundaries. */
2812 if (!type)
2813 {
2814 if (!SSE_REG_MODE_P (mode))
2815 align = PARM_BOUNDARY;
2816 }
2817 else
2818 {
2819 if (!contains_128bit_aligned_vector_p (type))
2820 align = PARM_BOUNDARY;
2821 }
2822 }
2823 if (align > 128)
2824 align = 128;
2825 return align;
2826}
2827
2828/* Return true if N is a possible register number of function value. */
2829bool
2830ix86_function_value_regno_p (int regno)
2831{
2832 if (!TARGET_64BIT)
2833 {
2834 return ((regno) == 0
2835 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2836 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2837 }
2838 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2839 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2840 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2841}
2842
2843/* Define how to find the value returned by a function.
2844 VALTYPE is the data type of the value (as a tree).
2845 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2846 otherwise, FUNC is 0. */
2847rtx
2848ix86_function_value (tree valtype)
2849{
2850 if (TARGET_64BIT)
2851 {
2852 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2853 REGPARM_MAX, SSE_REGPARM_MAX,
2854 x86_64_int_return_registers, 0);
2855 /* For zero sized structures, construct_container return NULL, but we need
2856 to keep rest of compiler happy by returning meaningful value. */
2857 if (!ret)
2858 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2859 return ret;
2860 }
2861 else
2862 return gen_rtx_REG (TYPE_MODE (valtype),
2863 ix86_value_regno (TYPE_MODE (valtype)));
2864}
2865
2866/* Return false iff type is returned in memory. */
2867int
2868ix86_return_in_memory (tree type)
2869{
2870 int needed_intregs, needed_sseregs, size;
2871 enum machine_mode mode = TYPE_MODE (type);
2872
2873 if (TARGET_64BIT)
2874 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2875
2876 if (mode == BLKmode)
2877 return 1;
2878
2879 size = int_size_in_bytes (type);
2880
2881 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2882 return 0;
2883
2884 if (VECTOR_MODE_P (mode) || mode == TImode)
2885 {
2886 /* User-created vectors small enough to fit in EAX. */
2887 if (size < 8)
2888 return 0;
2889
2890 /* MMX/3dNow values are returned on the stack, since we've
2891 got to EMMS/FEMMS before returning. */
2892 if (size == 8)
2893 return 1;
2894
2895 /* SSE values are returned in XMM0. */
2896 /* ??? Except when it doesn't exist? We have a choice of
2897 either (1) being abi incompatible with a -march switch,
2898 or (2) generating an error here. Given no good solution,
2899 I think the safest thing is one warning. The user won't
2900 be able to use -Werror, but.... */
2901 if (size == 16)
2902 {
2903 static bool warned;
2904
2905 if (TARGET_SSE)
2906 return 0;
2907
2908 if (!warned)
2909 {
2910 warned = true;
2911 warning ("SSE vector return without SSE enabled "
2912 "changes the ABI");
2913 }
2914 return 1;
2915 }
2916 }
2917
2918 if (mode == XFmode)
2919 return 0;
2920
2921 if (size > 12)
2922 return 1;
2923 return 0;
2924}
2925
2926/* Define how to find the value returned by a library function
2927 assuming the value has mode MODE. */
2928rtx
2929ix86_libcall_value (enum machine_mode mode)
2930{
2931 if (TARGET_64BIT)
2932 {
2933 switch (mode)
2934 {
2935 case SFmode:
2936 case SCmode:
2937 case DFmode:
2938 case DCmode:
2939 return gen_rtx_REG (mode, FIRST_SSE_REG);
2940 case XFmode:
2941 case XCmode:
2942 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2943 case TFmode:
2944 case TCmode:
2945 return NULL;
2946 default:
2947 return gen_rtx_REG (mode, 0);
2948 }
2949 }
2950 else
2951 return gen_rtx_REG (mode, ix86_value_regno (mode));
2952}
2953
2954/* Given a mode, return the register to use for a return value. */
2955
2956static int
2957ix86_value_regno (enum machine_mode mode)
2958{
2959 /* Floating point return values in %st(0). */
2960 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2961 return FIRST_FLOAT_REG;
2962 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2963 we prevent this case when sse is not available. */
2964 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2965 return FIRST_SSE_REG;
2966 /* Everything else in %eax. */
2967 return 0;
2968}
2969
2970/* Create the va_list data type. */
2971
2972static tree
2973ix86_build_builtin_va_list (void)
2974{
2975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2976
2977 /* For i386 we use plain pointer to argument area. */
2978 if (!TARGET_64BIT)
2979 return build_pointer_type (char_type_node);
2980
2981 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2982 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2983
2984 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2985 unsigned_type_node);
2986 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2987 unsigned_type_node);
2988 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2989 ptr_type_node);
2990 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2991 ptr_type_node);
2992
2993 DECL_FIELD_CONTEXT (f_gpr) = record;
2994 DECL_FIELD_CONTEXT (f_fpr) = record;
2995 DECL_FIELD_CONTEXT (f_ovf) = record;
2996 DECL_FIELD_CONTEXT (f_sav) = record;
2997
2998 TREE_CHAIN (record) = type_decl;
2999 TYPE_NAME (record) = type_decl;
3000 TYPE_FIELDS (record) = f_gpr;
3001 TREE_CHAIN (f_gpr) = f_fpr;
3002 TREE_CHAIN (f_fpr) = f_ovf;
3003 TREE_CHAIN (f_ovf) = f_sav;
3004
3005 layout_type (record);
3006
3007 /* The correct type is an array type of one element. */
3008 return build_array_type (record, build_index_type (size_zero_node));
3009}
3010
3011/* Perform any needed actions needed for a function that is receiving a
3012 variable number of arguments.
3013
3014 CUM is as above.
3015
3016 MODE and TYPE are the mode and type of the current parameter.
3017
3018 PRETEND_SIZE is a variable that should be set to the amount of stack
3019 that must be pushed by the prolog to pretend that our caller pushed
3020 it.
3021
3022 Normally, this macro will push all remaining incoming registers on the
3023 stack and set PRETEND_SIZE to the length of the registers pushed. */
3024
3025void
3026ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3027 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3028 int no_rtl)
3029{
3030 CUMULATIVE_ARGS next_cum;
3031 rtx save_area = NULL_RTX, mem;
3032 rtx label;
3033 rtx label_ref;
3034 rtx tmp_reg;
3035 rtx nsse_reg;
3036 int set;
3037 tree fntype;
3038 int stdarg_p;
3039 int i;
3040
3041 if (!TARGET_64BIT)
3042 return;
3043
3044 /* Indicate to allocate space on the stack for varargs save area. */
3045 ix86_save_varrargs_registers = 1;
3046
3047 cfun->stack_alignment_needed = 128;
3048
3049 fntype = TREE_TYPE (current_function_decl);
3050 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3051 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3052 != void_type_node));
3053
3054 /* For varargs, we do not want to skip the dummy va_dcl argument.
3055 For stdargs, we do want to skip the last named argument. */
3056 next_cum = *cum;
3057 if (stdarg_p)
3058 function_arg_advance (&next_cum, mode, type, 1);
3059
3060 if (!no_rtl)
3061 save_area = frame_pointer_rtx;
3062
3063 set = get_varargs_alias_set ();
3064
3065 for (i = next_cum.regno; i < ix86_regparm; i++)
3066 {
3067 mem = gen_rtx_MEM (Pmode,
3068 plus_constant (save_area, i * UNITS_PER_WORD));
3069 set_mem_alias_set (mem, set);
3070 emit_move_insn (mem, gen_rtx_REG (Pmode,
3071 x86_64_int_parameter_registers[i]));
3072 }
3073
3074 if (next_cum.sse_nregs)
3075 {
3076 /* Now emit code to save SSE registers. The AX parameter contains number
3077 of SSE parameter registers used to call this function. We use
3078 sse_prologue_save insn template that produces computed jump across
3079 SSE saves. We need some preparation work to get this working. */
3080
3081 label = gen_label_rtx ();
3082 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3083
3084 /* Compute address to jump to :
3085 label - 5*eax + nnamed_sse_arguments*5 */
3086 tmp_reg = gen_reg_rtx (Pmode);
3087 nsse_reg = gen_reg_rtx (Pmode);
3088 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3089 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3090 gen_rtx_MULT (Pmode, nsse_reg,
3091 GEN_INT (4))));
3092 if (next_cum.sse_regno)
3093 emit_move_insn
3094 (nsse_reg,
3095 gen_rtx_CONST (DImode,
3096 gen_rtx_PLUS (DImode,
3097 label_ref,
3098 GEN_INT (next_cum.sse_regno * 4))));
3099 else
3100 emit_move_insn (nsse_reg, label_ref);
3101 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3102
3103 /* Compute address of memory block we save into. We always use pointer
3104 pointing 127 bytes after first byte to store - this is needed to keep
3105 instruction size limited by 4 bytes. */
3106 tmp_reg = gen_reg_rtx (Pmode);
3107 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3108 plus_constant (save_area,
3109 8 * REGPARM_MAX + 127)));
3110 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3111 set_mem_alias_set (mem, set);
3112 set_mem_align (mem, BITS_PER_WORD);
3113
3114 /* And finally do the dirty job! */
3115 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3116 GEN_INT (next_cum.sse_regno), label));
3117 }
3118
3119}
3120
3121/* Implement va_start. */
3122
3123void
3124ix86_va_start (tree valist, rtx nextarg)
3125{
3126 HOST_WIDE_INT words, n_gpr, n_fpr;
3127 tree f_gpr, f_fpr, f_ovf, f_sav;
3128 tree gpr, fpr, ovf, sav, t;
3129
3130 /* Only 64bit target needs something special. */
3131 if (!TARGET_64BIT)
3132 {
3133 std_expand_builtin_va_start (valist, nextarg);
3134 return;
3135 }
3136
3137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3138 f_fpr = TREE_CHAIN (f_gpr);
3139 f_ovf = TREE_CHAIN (f_fpr);
3140 f_sav = TREE_CHAIN (f_ovf);
3141
3142 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3143 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3144 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3145 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3146 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3147
3148 /* Count number of gp and fp argument registers used. */
3149 words = current_function_args_info.words;
3150 n_gpr = current_function_args_info.regno;
3151 n_fpr = current_function_args_info.sse_regno;
3152
3153 if (TARGET_DEBUG_ARG)
3154 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3155 (int) words, (int) n_gpr, (int) n_fpr);
3156
3157 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3158 build_int_2 (n_gpr * 8, 0));
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3161
3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3163 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3164 TREE_SIDE_EFFECTS (t) = 1;
3165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3166
3167 /* Find the overflow area. */
3168 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3169 if (words != 0)
3170 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3171 build_int_2 (words * UNITS_PER_WORD, 0));
3172 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3173 TREE_SIDE_EFFECTS (t) = 1;
3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3175
3176 /* Find the register save area.
3177 Prologue of the function save it right above stack frame. */
3178 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3179 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3180 TREE_SIDE_EFFECTS (t) = 1;
3181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3182}
3183
3184/* Implement va_arg. */
3185rtx
3186ix86_va_arg (tree valist, tree type)
3187{
3188 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3189 tree f_gpr, f_fpr, f_ovf, f_sav;
3190 tree gpr, fpr, ovf, sav, t;
3191 int size, rsize;
3192 rtx lab_false, lab_over = NULL_RTX;
3193 rtx addr_rtx, r;
3194 rtx container;
3195 int indirect_p = 0;
3196
3197 /* Only 64bit target needs something special. */
3198 if (!TARGET_64BIT)
3199 {
3200 return std_expand_builtin_va_arg (valist, type);
3201 }
3202
3203 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3204 f_fpr = TREE_CHAIN (f_gpr);
3205 f_ovf = TREE_CHAIN (f_fpr);
3206 f_sav = TREE_CHAIN (f_ovf);
3207
3208 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3209 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3210 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3211 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3212 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3213
3214 size = int_size_in_bytes (type);
3215 if (size == -1)
3216 {
3217 /* Passed by reference. */
3218 indirect_p = 1;
3219 type = build_pointer_type (type);
3220 size = int_size_in_bytes (type);
3221 }
3222 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3223
3224 container = construct_container (TYPE_MODE (type), type, 0,
3225 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3226 /*
3227 * Pull the value out of the saved registers ...
3228 */
3229
3230 addr_rtx = gen_reg_rtx (Pmode);
3231
3232 if (container)
3233 {
3234 rtx int_addr_rtx, sse_addr_rtx;
3235 int needed_intregs, needed_sseregs;
3236 int need_temp;
3237
3238 lab_over = gen_label_rtx ();
3239 lab_false = gen_label_rtx ();
3240
3241 examine_argument (TYPE_MODE (type), type, 0,
3242 &needed_intregs, &needed_sseregs);
3243
3244
3245 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3246 || TYPE_ALIGN (type) > 128);
3247
3248 /* In case we are passing structure, verify that it is consecutive block
3249 on the register save area. If not we need to do moves. */
3250 if (!need_temp && !REG_P (container))
3251 {
3252 /* Verify that all registers are strictly consecutive */
3253 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3254 {
3255 int i;
3256
3257 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3258 {
3259 rtx slot = XVECEXP (container, 0, i);
3260 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3261 || INTVAL (XEXP (slot, 1)) != i * 16)
3262 need_temp = 1;
3263 }
3264 }
3265 else
3266 {
3267 int i;
3268
3269 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3270 {
3271 rtx slot = XVECEXP (container, 0, i);
3272 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3273 || INTVAL (XEXP (slot, 1)) != i * 8)
3274 need_temp = 1;
3275 }
3276 }
3277 }
3278 if (!need_temp)
3279 {
3280 int_addr_rtx = addr_rtx;
3281 sse_addr_rtx = addr_rtx;
3282 }
3283 else
3284 {
3285 int_addr_rtx = gen_reg_rtx (Pmode);
3286 sse_addr_rtx = gen_reg_rtx (Pmode);
3287 }
3288 /* First ensure that we fit completely in registers. */
3289 if (needed_intregs)
3290 {
3291 emit_cmp_and_jump_insns (expand_expr
3292 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3293 GEN_INT ((REGPARM_MAX - needed_intregs +
3294 1) * 8), GE, const1_rtx, SImode,
3295 1, lab_false);
3296 }
3297 if (needed_sseregs)
3298 {
3299 emit_cmp_and_jump_insns (expand_expr
3300 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3301 GEN_INT ((SSE_REGPARM_MAX -
3302 needed_sseregs + 1) * 16 +
3303 REGPARM_MAX * 8), GE, const1_rtx,
3304 SImode, 1, lab_false);
3305 }
3306
3307 /* Compute index to start of area used for integer regs. */
3308 if (needed_intregs)
3309 {
3310 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3311 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3312 if (r != int_addr_rtx)
3313 emit_move_insn (int_addr_rtx, r);
3314 }
3315 if (needed_sseregs)
3316 {
3317 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3318 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3319 if (r != sse_addr_rtx)
3320 emit_move_insn (sse_addr_rtx, r);
3321 }
3322 if (need_temp)
3323 {
3324 int i;
3325 rtx mem;
3326 rtx x;
3327
3328 /* Never use the memory itself, as it has the alias set. */
3329 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3330 mem = gen_rtx_MEM (BLKmode, x);
3331 force_operand (x, addr_rtx);
3332 set_mem_alias_set (mem, get_varargs_alias_set ());
3333 set_mem_align (mem, BITS_PER_UNIT);
3334
3335 for (i = 0; i < XVECLEN (container, 0); i++)
3336 {
3337 rtx slot = XVECEXP (container, 0, i);
3338 rtx reg = XEXP (slot, 0);
3339 enum machine_mode mode = GET_MODE (reg);
3340 rtx src_addr;
3341 rtx src_mem;
3342 int src_offset;
3343 rtx dest_mem;
3344
3345 if (SSE_REGNO_P (REGNO (reg)))
3346 {
3347 src_addr = sse_addr_rtx;
3348 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3349 }
3350 else
3351 {
3352 src_addr = int_addr_rtx;
3353 src_offset = REGNO (reg) * 8;
3354 }
3355 src_mem = gen_rtx_MEM (mode, src_addr);
3356 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3357 src_mem = adjust_address (src_mem, mode, src_offset);
3358 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3359 emit_move_insn (dest_mem, src_mem);
3360 }
3361 }
3362
3363 if (needed_intregs)
3364 {
3365 t =
3366 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3367 build_int_2 (needed_intregs * 8, 0));
3368 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3369 TREE_SIDE_EFFECTS (t) = 1;
3370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3371 }
3372 if (needed_sseregs)
3373 {
3374 t =
3375 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3376 build_int_2 (needed_sseregs * 16, 0));
3377 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3378 TREE_SIDE_EFFECTS (t) = 1;
3379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3380 }
3381
3382 emit_jump_insn (gen_jump (lab_over));
3383 emit_barrier ();
3384 emit_label (lab_false);
3385 }
3386
3387 /* ... otherwise out of the overflow area. */
3388
3389 /* Care for on-stack alignment if needed. */
3390 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3391 t = ovf;
3392 else
3393 {
3394 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3395 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3396 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3397 }
3398 t = save_expr (t);
3399
3400 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3401 if (r != addr_rtx)
3402 emit_move_insn (addr_rtx, r);
3403
3404 t =
3405 build (PLUS_EXPR, TREE_TYPE (t), t,
3406 build_int_2 (rsize * UNITS_PER_WORD, 0));
3407 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3408 TREE_SIDE_EFFECTS (t) = 1;
3409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3410
3411 if (container)
3412 emit_label (lab_over);
3413
3414 if (indirect_p)
3415 {
3416 r = gen_rtx_MEM (Pmode, addr_rtx);
3417 set_mem_alias_set (r, get_varargs_alias_set ());
3418 emit_move_insn (addr_rtx, r);
3419 }
3420
3421 return addr_rtx;
3422}
3423
3424/* Return nonzero if OP is either a i387 or SSE fp register. */
3425int
3426any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3427{
3428 return ANY_FP_REG_P (op);
3429}
3430
3431/* Return nonzero if OP is an i387 fp register. */
3432int
3433fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3434{
3435 return FP_REG_P (op);
3436}
3437
3438/* Return nonzero if OP is a non-fp register_operand. */
3439int
3440register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3441{
3442 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3443}
3444
3445/* Return nonzero if OP is a register operand other than an
3446 i387 fp register. */
3447int
3448register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3449{
3450 return register_operand (op, mode) && !FP_REG_P (op);
3451}
3452
3453/* Return nonzero if OP is general operand representable on x86_64. */
3454
3455int
3456x86_64_general_operand (rtx op, enum machine_mode mode)
3457{
3458 if (!TARGET_64BIT)
3459 return general_operand (op, mode);
3460 if (nonimmediate_operand (op, mode))
3461 return 1;
3462 return x86_64_sign_extended_value (op);
3463}
3464
3465/* Return nonzero if OP is general operand representable on x86_64
3466 as either sign extended or zero extended constant. */
3467
3468int
3469x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3470{
3471 if (!TARGET_64BIT)
3472 return general_operand (op, mode);
3473 if (nonimmediate_operand (op, mode))
3474 return 1;
3475 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3476}
3477
3478/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3479
3480int
3481x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3482{
3483 if (!TARGET_64BIT)
3484 return nonmemory_operand (op, mode);
3485 if (register_operand (op, mode))
3486 return 1;
3487 return x86_64_sign_extended_value (op);
3488}
3489
3490/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3491
3492int
3493x86_64_movabs_operand (rtx op, enum machine_mode mode)
3494{
3495 if (!TARGET_64BIT || !flag_pic)
3496 return nonmemory_operand (op, mode);
3497 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3498 return 1;
3499 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3500 return 1;
3501 return 0;
3502}
3503
3504/* Return nonzero if OPNUM's MEM should be matched
3505 in movabs* patterns. */
3506
3507int
3508ix86_check_movabs (rtx insn, int opnum)
3509{
3510 rtx set, mem;
3511
3512 set = PATTERN (insn);
3513 if (GET_CODE (set) == PARALLEL)
3514 set = XVECEXP (set, 0, 0);
3515 if (GET_CODE (set) != SET)
3516 abort ();
3517 mem = XEXP (set, opnum);
3518 while (GET_CODE (mem) == SUBREG)
3519 mem = SUBREG_REG (mem);
3520 if (GET_CODE (mem) != MEM)
3521 abort ();
3522 return (volatile_ok || !MEM_VOLATILE_P (mem));
3523}
3524
3525/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3526
3527int
3528x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3529{
3530 if (!TARGET_64BIT)
3531 return nonmemory_operand (op, mode);
3532 if (register_operand (op, mode))
3533 return 1;
3534 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3535}
3536
3537/* Return nonzero if OP is immediate operand representable on x86_64. */
3538
3539int
3540x86_64_immediate_operand (rtx op, enum machine_mode mode)
3541{
3542 if (!TARGET_64BIT)
3543 return immediate_operand (op, mode);
3544 return x86_64_sign_extended_value (op);
3545}
3546
3547/* Return nonzero if OP is immediate operand representable on x86_64. */
3548
3549int
3550x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3551{
3552 return x86_64_zero_extended_value (op);
3553}
3554
3555/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3556 for shift & compare patterns, as shifting by 0 does not change flags),
3557 else return zero. */
3558
3559int
3560const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3561{
3562 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3563}
3564
3565/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3566 reference and a constant. */
3567
3568int
3569symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3570{
3571 switch (GET_CODE (op))
3572 {
3573 case SYMBOL_REF:
3574 case LABEL_REF:
3575 return 1;
3576
3577 case CONST:
3578 op = XEXP (op, 0);
3579 if (GET_CODE (op) == SYMBOL_REF
3580 || GET_CODE (op) == LABEL_REF
3581 || (GET_CODE (op) == UNSPEC
3582 && (XINT (op, 1) == UNSPEC_GOT
3583 || XINT (op, 1) == UNSPEC_GOTOFF
3584 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3585 return 1;
3586 if (GET_CODE (op) != PLUS
3587 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3588 return 0;
3589
3590 op = XEXP (op, 0);
3591 if (GET_CODE (op) == SYMBOL_REF
3592 || GET_CODE (op) == LABEL_REF)
3593 return 1;
3594 /* Only @GOTOFF gets offsets. */
3595 if (GET_CODE (op) != UNSPEC
3596 || XINT (op, 1) != UNSPEC_GOTOFF)
3597 return 0;
3598
3599 op = XVECEXP (op, 0, 0);
3600 if (GET_CODE (op) == SYMBOL_REF
3601 || GET_CODE (op) == LABEL_REF)
3602 return 1;
3603 return 0;
3604
3605 default:
3606 return 0;
3607 }
3608}
3609
3610/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3611
3612int
3613pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3614{
3615 if (GET_CODE (op) != CONST)
3616 return 0;
3617 op = XEXP (op, 0);
3618 if (TARGET_64BIT)
3619 {
3620 if (GET_CODE (op) == UNSPEC
3621 && XINT (op, 1) == UNSPEC_GOTPCREL)
3622 return 1;
3623 if (GET_CODE (op) == PLUS
3624 && GET_CODE (XEXP (op, 0)) == UNSPEC
3625 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3626 return 1;
3627 }
3628 else
3629 {
3630 if (GET_CODE (op) == UNSPEC)
3631 return 1;
3632 if (GET_CODE (op) != PLUS
3633 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3634 return 0;
3635 op = XEXP (op, 0);
3636 if (GET_CODE (op) == UNSPEC)
3637 return 1;
3638 }
3639 return 0;
3640}
3641
3642/* Return true if OP is a symbolic operand that resolves locally. */
3643
3644static int
3645local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646{
3647 if (GET_CODE (op) == CONST
3648 && GET_CODE (XEXP (op, 0)) == PLUS
3649 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3650 op = XEXP (XEXP (op, 0), 0);
3651
3652 if (GET_CODE (op) == LABEL_REF)
3653 return 1;
3654
3655 if (GET_CODE (op) != SYMBOL_REF)
3656 return 0;
3657
3658 if (SYMBOL_REF_LOCAL_P (op))
3659 return 1;
3660
3661 /* There is, however, a not insubstantial body of code in the rest of
3662 the compiler that assumes it can just stick the results of
3663 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3664 /* ??? This is a hack. Should update the body of the compiler to
3665 always create a DECL an invoke targetm.encode_section_info. */
3666 if (strncmp (XSTR (op, 0), internal_label_prefix,
3667 internal_label_prefix_len) == 0)
3668 return 1;
3669
3670 return 0;
3671}
3672
3673/* Test for various thread-local symbols. */
3674
3675int
3676tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3677{
3678 if (GET_CODE (op) != SYMBOL_REF)
3679 return 0;
3680 return SYMBOL_REF_TLS_MODEL (op);
3681}
3682
3683static inline int
3684tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3685{
3686 if (GET_CODE (op) != SYMBOL_REF)
3687 return 0;
3688 return SYMBOL_REF_TLS_MODEL (op) == kind;
3689}
3690
3691int
3692global_dynamic_symbolic_operand (rtx op,
3693 enum machine_mode mode ATTRIBUTE_UNUSED)
3694{
3695 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3696}
3697
3698int
3699local_dynamic_symbolic_operand (rtx op,
3700 enum machine_mode mode ATTRIBUTE_UNUSED)
3701{
3702 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3703}
3704
3705int
3706initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3707{
3708 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3709}
3710
3711int
3712local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3713{
3714 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3715}
3716
3717/* Test for a valid operand for a call instruction. Don't allow the
3718 arg pointer register or virtual regs since they may decay into
3719 reg + const, which the patterns can't handle. */
3720
3721int
3722call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3723{
3724 /* Disallow indirect through a virtual register. This leads to
3725 compiler aborts when trying to eliminate them. */
3726 if (GET_CODE (op) == REG
3727 && (op == arg_pointer_rtx
3728 || op == frame_pointer_rtx
3729 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3730 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3731 return 0;
3732
3733 /* Disallow `call 1234'. Due to varying assembler lameness this
3734 gets either rejected or translated to `call .+1234'. */
3735 if (GET_CODE (op) == CONST_INT)
3736 return 0;
3737
3738 /* Explicitly allow SYMBOL_REF even if pic. */
3739 if (GET_CODE (op) == SYMBOL_REF)
3740 return 1;
3741
3742 /* Otherwise we can allow any general_operand in the address. */
3743 return general_operand (op, Pmode);
3744}
3745
3746/* Test for a valid operand for a call instruction. Don't allow the
3747 arg pointer register or virtual regs since they may decay into
3748 reg + const, which the patterns can't handle. */
3749
3750int
3751sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3752{
3753 /* Disallow indirect through a virtual register. This leads to
3754 compiler aborts when trying to eliminate them. */
3755 if (GET_CODE (op) == REG
3756 && (op == arg_pointer_rtx
3757 || op == frame_pointer_rtx
3758 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3759 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3760 return 0;
3761
3762 /* Explicitly allow SYMBOL_REF even if pic. */
3763 if (GET_CODE (op) == SYMBOL_REF)
3764 return 1;
3765
3766 /* Otherwise we can only allow register operands. */
3767 return register_operand (op, Pmode);
3768}
3769
3770int
3771constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772{
3773 if (GET_CODE (op) == CONST
3774 && GET_CODE (XEXP (op, 0)) == PLUS
3775 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3776 op = XEXP (XEXP (op, 0), 0);
3777 return GET_CODE (op) == SYMBOL_REF;
3778}
3779
3780/* Match exactly zero and one. */
3781
3782int
3783const0_operand (rtx op, enum machine_mode mode)
3784{
3785 return op == CONST0_RTX (mode);
3786}
3787
3788int
3789const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3790{
3791 return op == const1_rtx;
3792}
3793
3794/* Match 2, 4, or 8. Used for leal multiplicands. */
3795
3796int
3797const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3798{
3799 return (GET_CODE (op) == CONST_INT
3800 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3801}
3802
3803int
3804const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805{
3806 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3807}
3808
3809int
3810const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3811{
3812 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3813}
3814
3815int
3816const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3817{
3818 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3819}
3820
3821int
3822const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3823{
3824 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3825}
3826
3827
3828/* True if this is a constant appropriate for an increment or decrement. */
3829
3830int
3831incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3832{
3833 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3834 registers, since carry flag is not set. */
3835 if (TARGET_PENTIUM4 && !optimize_size)
3836 return 0;
3837 return op == const1_rtx || op == constm1_rtx;
3838}
3839
3840/* Return nonzero if OP is acceptable as operand of DImode shift
3841 expander. */
3842
3843int
3844shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845{
3846 if (TARGET_64BIT)
3847 return nonimmediate_operand (op, mode);
3848 else
3849 return register_operand (op, mode);
3850}
3851
3852/* Return false if this is the stack pointer, or any other fake
3853 register eliminable to the stack pointer. Otherwise, this is
3854 a register operand.
3855
3856 This is used to prevent esp from being used as an index reg.
3857 Which would only happen in pathological cases. */
3858
3859int
3860reg_no_sp_operand (rtx op, enum machine_mode mode)
3861{
3862 rtx t = op;
3863 if (GET_CODE (t) == SUBREG)
3864 t = SUBREG_REG (t);
3865 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3866 return 0;
3867
3868 return register_operand (op, mode);
3869}
3870
3871int
3872mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3873{
3874 return MMX_REG_P (op);
3875}
3876
3877/* Return false if this is any eliminable register. Otherwise
3878 general_operand. */
3879
3880int
3881general_no_elim_operand (rtx op, enum machine_mode mode)
3882{
3883 rtx t = op;
3884 if (GET_CODE (t) == SUBREG)
3885 t = SUBREG_REG (t);
3886 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3887 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3888 || t == virtual_stack_dynamic_rtx)
3889 return 0;
3890 if (REG_P (t)
3891 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3892 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3893 return 0;
3894
3895 return general_operand (op, mode);
3896}
3897
3898/* Return false if this is any eliminable register. Otherwise
3899 register_operand or const_int. */
3900
3901int
3902nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3903{
3904 rtx t = op;
3905 if (GET_CODE (t) == SUBREG)
3906 t = SUBREG_REG (t);
3907 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3908 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3909 || t == virtual_stack_dynamic_rtx)
3910 return 0;
3911
3912 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3913}
3914
3915/* Return false if this is any eliminable register or stack register,
3916 otherwise work like register_operand. */
3917
3918int
3919index_register_operand (rtx op, enum machine_mode mode)
3920{
3921 rtx t = op;
3922 if (GET_CODE (t) == SUBREG)
3923 t = SUBREG_REG (t);
3924 if (!REG_P (t))
3925 return 0;
3926 if (t == arg_pointer_rtx
3927 || t == frame_pointer_rtx
3928 || t == virtual_incoming_args_rtx
3929 || t == virtual_stack_vars_rtx
3930 || t == virtual_stack_dynamic_rtx
3931 || REGNO (t) == STACK_POINTER_REGNUM)
3932 return 0;
3933
3934 return general_operand (op, mode);
3935}
3936
3937/* Return true if op is a Q_REGS class register. */
3938
3939int
3940q_regs_operand (rtx op, enum machine_mode mode)
3941{
3942 if (mode != VOIDmode && GET_MODE (op) != mode)
3943 return 0;
3944 if (GET_CODE (op) == SUBREG)
3945 op = SUBREG_REG (op);
3946 return ANY_QI_REG_P (op);
3947}
3948
3949/* Return true if op is an flags register. */
3950
3951int
3952flags_reg_operand (rtx op, enum machine_mode mode)
3953{
3954 if (mode != VOIDmode && GET_MODE (op) != mode)
3955 return 0;
3956 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3957}
3958
3959/* Return true if op is a NON_Q_REGS class register. */
3960
3961int
3962non_q_regs_operand (rtx op, enum machine_mode mode)
3963{
3964 if (mode != VOIDmode && GET_MODE (op) != mode)
3965 return 0;
3966 if (GET_CODE (op) == SUBREG)
3967 op = SUBREG_REG (op);
3968 return NON_QI_REG_P (op);
3969}
3970
3971int
3972zero_extended_scalar_load_operand (rtx op,
3973 enum machine_mode mode ATTRIBUTE_UNUSED)
3974{
3975 unsigned n_elts;
3976 if (GET_CODE (op) != MEM)
3977 return 0;
3978 op = maybe_get_pool_constant (op);
3979 if (!op)
3980 return 0;
3981 if (GET_CODE (op) != CONST_VECTOR)
3982 return 0;
3983 n_elts =
3984 (GET_MODE_SIZE (GET_MODE (op)) /
3985 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3986 for (n_elts--; n_elts > 0; n_elts--)
3987 {
3988 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3989 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3990 return 0;
3991 }
3992 return 1;
3993}
3994
3995/* Return 1 when OP is operand acceptable for standard SSE move. */
3996int
3997vector_move_operand (rtx op, enum machine_mode mode)
3998{
3999 if (nonimmediate_operand (op, mode))
4000 return 1;
4001 if (GET_MODE (op) != mode && mode != VOIDmode)
4002 return 0;
4003 return (op == CONST0_RTX (GET_MODE (op)));
4004}
4005
4006/* Return true if op if a valid address, and does not contain
4007 a segment override. */
4008
4009int
4010no_seg_address_operand (rtx op, enum machine_mode mode)
4011{
4012 struct ix86_address parts;
4013
4014 if (! address_operand (op, mode))
4015 return 0;
4016
4017 if (! ix86_decompose_address (op, &parts))
4018 abort ();
4019
4020 return parts.seg == SEG_DEFAULT;
4021}
4022
4023/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4024 insns. */
4025int
4026sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4027{
4028 enum rtx_code code = GET_CODE (op);
4029 switch (code)
4030 {
4031 /* Operations supported directly. */
4032 case EQ:
4033 case LT:
4034 case LE:
4035 case UNORDERED:
4036 case NE:
4037 case UNGE:
4038 case UNGT:
4039 case ORDERED:
4040 return 1;
4041 /* These are equivalent to ones above in non-IEEE comparisons. */
4042 case UNEQ:
4043 case UNLT:
4044 case UNLE:
4045 case LTGT:
4046 case GE:
4047 case GT:
4048 return !TARGET_IEEE_FP;
4049 default:
4050 return 0;
4051 }
4052}
4053/* Return 1 if OP is a valid comparison operator in valid mode. */
4054int
4055ix86_comparison_operator (rtx op, enum machine_mode mode)
4056{
4057 enum machine_mode inmode;
4058 enum rtx_code code = GET_CODE (op);
4059 if (mode != VOIDmode && GET_MODE (op) != mode)
4060 return 0;
4061 if (GET_RTX_CLASS (code) != '<')
4062 return 0;
4063 inmode = GET_MODE (XEXP (op, 0));
4064
4065 if (inmode == CCFPmode || inmode == CCFPUmode)
4066 {
4067 enum rtx_code second_code, bypass_code;
4068 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4069 return (bypass_code == NIL && second_code == NIL);
4070 }
4071 switch (code)
4072 {
4073 case EQ: case NE:
4074 return 1;
4075 case LT: case GE:
4076 if (inmode == CCmode || inmode == CCGCmode
4077 || inmode == CCGOCmode || inmode == CCNOmode)
4078 return 1;
4079 return 0;
4080 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4081 if (inmode == CCmode)
4082 return 1;
4083 return 0;
4084 case GT: case LE:
4085 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4086 return 1;
4087 return 0;
4088 default:
4089 return 0;
4090 }
4091}
4092
4093/* Return 1 if OP is a valid comparison operator testing carry flag
4094 to be set. */
4095int
4096ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4097{
4098 enum machine_mode inmode;
4099 enum rtx_code code = GET_CODE (op);
4100
4101 if (mode != VOIDmode && GET_MODE (op) != mode)
4102 return 0;
4103 if (GET_RTX_CLASS (code) != '<')
4104 return 0;
4105 inmode = GET_MODE (XEXP (op, 0));
4106 if (GET_CODE (XEXP (op, 0)) != REG
4107 || REGNO (XEXP (op, 0)) != 17
4108 || XEXP (op, 1) != const0_rtx)
4109 return 0;
4110
4111 if (inmode == CCFPmode || inmode == CCFPUmode)
4112 {
4113 enum rtx_code second_code, bypass_code;
4114
4115 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4116 if (bypass_code != NIL || second_code != NIL)
4117 return 0;
4118 code = ix86_fp_compare_code_to_integer (code);
4119 }
4120 else if (inmode != CCmode)
4121 return 0;
4122 return code == LTU;
4123}
4124
4125/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4126
4127int
4128fcmov_comparison_operator (rtx op, enum machine_mode mode)
4129{
4130 enum machine_mode inmode;
4131 enum rtx_code code = GET_CODE (op);
4132
4133 if (mode != VOIDmode && GET_MODE (op) != mode)
4134 return 0;
4135 if (GET_RTX_CLASS (code) != '<')
4136 return 0;
4137 inmode = GET_MODE (XEXP (op, 0));
4138 if (inmode == CCFPmode || inmode == CCFPUmode)
4139 {
4140 enum rtx_code second_code, bypass_code;
4141
4142 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4143 if (bypass_code != NIL || second_code != NIL)
4144 return 0;
4145 code = ix86_fp_compare_code_to_integer (code);
4146 }
4147 /* i387 supports just limited amount of conditional codes. */
4148 switch (code)
4149 {
4150 case LTU: case GTU: case LEU: case GEU:
4151 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4152 return 1;
4153 return 0;
4154 case ORDERED: case UNORDERED:
4155 case EQ: case NE:
4156 return 1;
4157 default:
4158 return 0;
4159 }
4160}
4161
4162/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4163
4164int
4165promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4166{
4167 switch (GET_CODE (op))
4168 {
4169 case MULT:
4170 /* Modern CPUs have same latency for HImode and SImode multiply,
4171 but 386 and 486 do HImode multiply faster. */
4172 return ix86_tune > PROCESSOR_I486;
4173 case PLUS:
4174 case AND:
4175 case IOR:
4176 case XOR:
4177 case ASHIFT:
4178 return 1;
4179 default:
4180 return 0;
4181 }
4182}
4183
4184/* Nearly general operand, but accept any const_double, since we wish
4185 to be able to drop them into memory rather than have them get pulled
4186 into registers. */
4187
4188int
4189cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4190{
4191 if (mode != VOIDmode && mode != GET_MODE (op))
4192 return 0;
4193 if (GET_CODE (op) == CONST_DOUBLE)
4194 return 1;
4195 return general_operand (op, mode);
4196}
4197
4198/* Match an SI or HImode register for a zero_extract. */
4199
4200int
4201ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4202{
4203 int regno;
4204 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4205 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4206 return 0;
4207
4208 if (!register_operand (op, VOIDmode))
4209 return 0;
4210
4211 /* Be careful to accept only registers having upper parts. */
4212 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4213 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4214}
4215
4216/* Return 1 if this is a valid binary floating-point operation.
4217 OP is the expression matched, and MODE is its mode. */
4218
4219int
4220binary_fp_operator (rtx op, enum machine_mode mode)
4221{
4222 if (mode != VOIDmode && mode != GET_MODE (op))
4223 return 0;
4224
4225 switch (GET_CODE (op))
4226 {
4227 case PLUS:
4228 case MINUS:
4229 case MULT:
4230 case DIV:
4231 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4232
4233 default:
4234 return 0;
4235 }
4236}
4237
4238int
4239mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4240{
4241 return GET_CODE (op) == MULT;
4242}
4243
4244int
4245div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4246{
4247 return GET_CODE (op) == DIV;
4248}
4249
4250int
4251arith_or_logical_operator (rtx op, enum machine_mode mode)
4252{
4253 return ((mode == VOIDmode || GET_MODE (op) == mode)
4254 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4255 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4256}
4257
4258/* Returns 1 if OP is memory operand with a displacement. */
4259
4260int
4261memory_displacement_operand (rtx op, enum machine_mode mode)
4262{
4263 struct ix86_address parts;
4264
4265 if (! memory_operand (op, mode))
4266 return 0;
4267
4268 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4269 abort ();
4270
4271 return parts.disp != NULL_RTX;
4272}
4273
4274/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4275 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4276
4277 ??? It seems likely that this will only work because cmpsi is an
4278 expander, and no actual insns use this. */
4279
4280int
4281cmpsi_operand (rtx op, enum machine_mode mode)
4282{
4283 if (nonimmediate_operand (op, mode))
4284 return 1;
4285
4286 if (GET_CODE (op) == AND
4287 && GET_MODE (op) == SImode
4288 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4289 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4290 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4291 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4292 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4293 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4294 return 1;
4295
4296 return 0;
4297}
4298
4299/* Returns 1 if OP is memory operand that can not be represented by the
4300 modRM array. */
4301
4302int
4303long_memory_operand (rtx op, enum machine_mode mode)
4304{
4305 if (! memory_operand (op, mode))
4306 return 0;
4307
4308 return memory_address_length (op) != 0;
4309}
4310
4311/* Return nonzero if the rtx is known aligned. */
4312
4313int
4314aligned_operand (rtx op, enum machine_mode mode)
4315{
4316 struct ix86_address parts;
4317
4318 if (!general_operand (op, mode))
4319 return 0;
4320
4321 /* Registers and immediate operands are always "aligned". */
4322 if (GET_CODE (op) != MEM)
4323 return 1;
4324
4325 /* Don't even try to do any aligned optimizations with volatiles. */
4326 if (MEM_VOLATILE_P (op))
4327 return 0;
4328
4329 op = XEXP (op, 0);
4330
4331 /* Pushes and pops are only valid on the stack pointer. */
4332 if (GET_CODE (op) == PRE_DEC
4333 || GET_CODE (op) == POST_INC)
4334 return 1;
4335
4336 /* Decode the address. */
4337 if (! ix86_decompose_address (op, &parts))
4338 abort ();
4339
4340 /* Look for some component that isn't known to be aligned. */
4341 if (parts.index)
4342 {
4343 if (parts.scale < 4
4344 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4345 return 0;
4346 }
4347 if (parts.base)
4348 {
4349 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4350 return 0;
4351 }
4352 if (parts.disp)
4353 {
4354 if (GET_CODE (parts.disp) != CONST_INT
4355 || (INTVAL (parts.disp) & 3) != 0)
4356 return 0;
4357 }
4358
4359 /* Didn't find one -- this must be an aligned address. */
4360 return 1;
4361}
4362
4363int
4364compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4365{
4366 return GET_CODE (op) == COMPARE;
4367}
4368
4369/* Initialize the table of extra 80387 mathematical constants. */
4370
4371static void
4372init_ext_80387_constants (void)
4373{
4374 static const char * cst[5] =
4375 {
4376 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4377 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4378 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4379 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4380 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4381 };
4382 int i;
4383
4384 for (i = 0; i < 5; i++)
4385 {
4386 real_from_string (&ext_80387_constants_table[i], cst[i]);
4387 /* Ensure each constant is rounded to XFmode precision. */
4388 real_convert (&ext_80387_constants_table[i],
4389 XFmode, &ext_80387_constants_table[i]);
4390 }
4391
4392 ext_80387_constants_init = 1;
4393}
4394
4395/* Return true if the constant is something that can be loaded with
4396 a special instruction. */
4397
4398int
4399standard_80387_constant_p (rtx x)
4400{
4401 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4402 return -1;
4403
4404 if (x == CONST0_RTX (GET_MODE (x)))
4405 return 1;
4406 if (x == CONST1_RTX (GET_MODE (x)))
4407 return 2;
4408
4409 /* For XFmode constants, try to find a special 80387 instruction on
4410 those CPUs that benefit from them. */
4411 if (GET_MODE (x) == XFmode
4412 && x86_ext_80387_constants & TUNEMASK)
4413 {
4414 REAL_VALUE_TYPE r;
4415 int i;
4416
4417 if (! ext_80387_constants_init)
4418 init_ext_80387_constants ();
4419
4420 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4421 for (i = 0; i < 5; i++)
4422 if (real_identical (&r, &ext_80387_constants_table[i]))
4423 return i + 3;
4424 }
4425
4426 return 0;
4427}
4428
4429/* Return the opcode of the special instruction to be used to load
4430 the constant X. */
4431
4432const char *
4433standard_80387_constant_opcode (rtx x)
4434{
4435 switch (standard_80387_constant_p (x))
4436 {
4437 case 1:
4438 return "fldz";
4439 case 2:
4440 return "fld1";
4441 case 3:
4442 return "fldlg2";
4443 case 4:
4444 return "fldln2";
4445 case 5:
4446 return "fldl2e";
4447 case 6:
4448 return "fldl2t";
4449 case 7:
4450 return "fldpi";
4451 }
4452 abort ();
4453}
4454
4455/* Return the CONST_DOUBLE representing the 80387 constant that is
4456 loaded by the specified special instruction. The argument IDX
4457 matches the return value from standard_80387_constant_p. */
4458
4459rtx
4460standard_80387_constant_rtx (int idx)
4461{
4462 int i;
4463
4464 if (! ext_80387_constants_init)
4465 init_ext_80387_constants ();
4466
4467 switch (idx)
4468 {
4469 case 3:
4470 case 4:
4471 case 5:
4472 case 6:
4473 case 7:
4474 i = idx - 3;
4475 break;
4476
4477 default:
4478 abort ();
4479 }
4480
4481 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4482 XFmode);
4483}
4484
4485/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4486 */
4487int
4488standard_sse_constant_p (rtx x)
4489{
4490 if (x == const0_rtx)
4491 return 1;
4492 return (x == CONST0_RTX (GET_MODE (x)));
4493}
4494
4495/* Returns 1 if OP contains a symbol reference */
4496
4497int
4498symbolic_reference_mentioned_p (rtx op)
4499{
4500 const char *fmt;
4501 int i;
4502
4503 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4504 return 1;
4505
4506 fmt = GET_RTX_FORMAT (GET_CODE (op));
4507 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4508 {
4509 if (fmt[i] == 'E')
4510 {
4511 int j;
4512
4513 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4514 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4515 return 1;
4516 }
4517
4518 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4519 return 1;
4520 }
4521
4522 return 0;
4523}
4524
4525/* Return 1 if it is appropriate to emit `ret' instructions in the
4526 body of a function. Do this only if the epilogue is simple, needing a
4527 couple of insns. Prior to reloading, we can't tell how many registers
4528 must be saved, so return 0 then. Return 0 if there is no frame
4529 marker to de-allocate.
4530
4531 If NON_SAVING_SETJMP is defined and true, then it is not possible
4532 for the epilogue to be simple, so return 0. This is a special case
4533 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4534 until final, but jump_optimize may need to know sooner if a
4535 `return' is OK. */
4536
4537int
4538ix86_can_use_return_insn_p (void)
4539{
4540 struct ix86_frame frame;
4541
4542#ifdef NON_SAVING_SETJMP
4543 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4544 return 0;
4545#endif
4546
4547 if (! reload_completed || frame_pointer_needed)
4548 return 0;
4549
4550 /* Don't allow more than 32 pop, since that's all we can do
4551 with one instruction. */
4552 if (current_function_pops_args
4553 && current_function_args_size >= 32768)
4554 return 0;
4555
4556 ix86_compute_frame_layout (&frame);
4557 return frame.to_allocate == 0 && frame.nregs == 0;
4558}
4559
4560/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4561int
4562x86_64_sign_extended_value (rtx value)
4563{
4564 switch (GET_CODE (value))
4565 {
4566 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4567 to be at least 32 and this all acceptable constants are
4568 represented as CONST_INT. */
4569 case CONST_INT:
4570 if (HOST_BITS_PER_WIDE_INT == 32)
4571 return 1;
4572 else
4573 {
4574 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4575 return trunc_int_for_mode (val, SImode) == val;
4576 }
4577 break;
4578
4579 /* For certain code models, the symbolic references are known to fit.
4580 in CM_SMALL_PIC model we know it fits if it is local to the shared
4581 library. Don't count TLS SYMBOL_REFs here, since they should fit
4582 only if inside of UNSPEC handled below. */
4583 case SYMBOL_REF:
4584 /* TLS symbols are not constant. */
4585 if (tls_symbolic_operand (value, Pmode))
4586 return false;
4587 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4588
4589 /* For certain code models, the code is near as well. */
4590 case LABEL_REF:
4591 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4592 || ix86_cmodel == CM_KERNEL);
4593
4594 /* We also may accept the offsetted memory references in certain special
4595 cases. */
4596 case CONST:
4597 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4598 switch (XINT (XEXP (value, 0), 1))
4599 {
4600 case UNSPEC_GOTPCREL:
4601 case UNSPEC_DTPOFF:
4602 case UNSPEC_GOTNTPOFF:
4603 case UNSPEC_NTPOFF:
4604 return 1;
4605 default:
4606 break;
4607 }
4608 if (GET_CODE (XEXP (value, 0)) == PLUS)
4609 {
4610 rtx op1 = XEXP (XEXP (value, 0), 0);
4611 rtx op2 = XEXP (XEXP (value, 0), 1);
4612 HOST_WIDE_INT offset;
4613
4614 if (ix86_cmodel == CM_LARGE)
4615 return 0;
4616 if (GET_CODE (op2) != CONST_INT)
4617 return 0;
4618 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4619 switch (GET_CODE (op1))
4620 {
4621 case SYMBOL_REF:
4622 /* For CM_SMALL assume that latest object is 16MB before
4623 end of 31bits boundary. We may also accept pretty
4624 large negative constants knowing that all objects are
4625 in the positive half of address space. */
4626 if (ix86_cmodel == CM_SMALL
4627 && offset < 16*1024*1024
4628 && trunc_int_for_mode (offset, SImode) == offset)
4629 return 1;
4630 /* For CM_KERNEL we know that all object resist in the
4631 negative half of 32bits address space. We may not
4632 accept negative offsets, since they may be just off
4633 and we may accept pretty large positive ones. */
4634 if (ix86_cmodel == CM_KERNEL
4635 && offset > 0
4636 && trunc_int_for_mode (offset, SImode) == offset)
4637 return 1;
4638 break;
4639 case LABEL_REF:
4640 /* These conditions are similar to SYMBOL_REF ones, just the
4641 constraints for code models differ. */
4642 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4643 && offset < 16*1024*1024
4644 && trunc_int_for_mode (offset, SImode) == offset)
4645 return 1;
4646 if (ix86_cmodel == CM_KERNEL
4647 && offset > 0
4648 && trunc_int_for_mode (offset, SImode) == offset)
4649 return 1;
4650 break;
4651 case UNSPEC:
4652 switch (XINT (op1, 1))
4653 {
4654 case UNSPEC_DTPOFF:
4655 case UNSPEC_NTPOFF:
4656 if (offset > 0
4657 && trunc_int_for_mode (offset, SImode) == offset)
4658 return 1;
4659 }
4660 break;
4661 default:
4662 return 0;
4663 }
4664 }
4665 return 0;
4666 default:
4667 return 0;
4668 }
4669}
4670
4671/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4672int
4673x86_64_zero_extended_value (rtx value)
4674{
4675 switch (GET_CODE (value))
4676 {
4677 case CONST_DOUBLE:
4678 if (HOST_BITS_PER_WIDE_INT == 32)
4679 return (GET_MODE (value) == VOIDmode
4680 && !CONST_DOUBLE_HIGH (value));
4681 else
4682 return 0;
4683 case CONST_INT:
4684 if (HOST_BITS_PER_WIDE_INT == 32)
4685 return INTVAL (value) >= 0;
4686 else
4687 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4688 break;
4689
4690 /* For certain code models, the symbolic references are known to fit. */
4691 case SYMBOL_REF:
4692 /* TLS symbols are not constant. */
4693 if (tls_symbolic_operand (value, Pmode))
4694 return false;
4695 return ix86_cmodel == CM_SMALL;
4696
4697 /* For certain code models, the code is near as well. */
4698 case LABEL_REF:
4699 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4700
4701 /* We also may accept the offsetted memory references in certain special
4702 cases. */
4703 case CONST:
4704 if (GET_CODE (XEXP (value, 0)) == PLUS)
4705 {
4706 rtx op1 = XEXP (XEXP (value, 0), 0);
4707 rtx op2 = XEXP (XEXP (value, 0), 1);
4708
4709 if (ix86_cmodel == CM_LARGE)
4710 return 0;
4711 switch (GET_CODE (op1))
4712 {
4713 case SYMBOL_REF:
4714 return 0;
4715 /* For small code model we may accept pretty large positive
4716 offsets, since one bit is available for free. Negative
4717 offsets are limited by the size of NULL pointer area
4718 specified by the ABI. */
4719 if (ix86_cmodel == CM_SMALL
4720 && GET_CODE (op2) == CONST_INT
4721 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4722 && (trunc_int_for_mode (INTVAL (op2), SImode)
4723 == INTVAL (op2)))
4724 return 1;
4725 /* ??? For the kernel, we may accept adjustment of
4726 -0x10000000, since we know that it will just convert
4727 negative address space to positive, but perhaps this
4728 is not worthwhile. */
4729 break;
4730 case LABEL_REF:
4731 /* These conditions are similar to SYMBOL_REF ones, just the
4732 constraints for code models differ. */
4733 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4734 && GET_CODE (op2) == CONST_INT
4735 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4736 && (trunc_int_for_mode (INTVAL (op2), SImode)
4737 == INTVAL (op2)))
4738 return 1;
4739 break;
4740 default:
4741 return 0;
4742 }
4743 }
4744 return 0;
4745 default:
4746 return 0;
4747 }
4748}
4749
4750/* Value should be nonzero if functions must have frame pointers.
4751 Zero means the frame pointer need not be set up (and parms may
4752 be accessed via the stack pointer) in functions that seem suitable. */
4753
4754int
4755ix86_frame_pointer_required (void)
4756{
4757 /* If we accessed previous frames, then the generated code expects
4758 to be able to access the saved ebp value in our frame. */
4759 if (cfun->machine->accesses_prev_frame)
4760 return 1;
4761
4762 /* Several x86 os'es need a frame pointer for other reasons,
4763 usually pertaining to setjmp. */
4764 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4765 return 1;
4766
4767 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4768 the frame pointer by default. Turn it back on now if we've not
4769 got a leaf function. */
4770 if (TARGET_OMIT_LEAF_FRAME_POINTER
4771 && (!current_function_is_leaf))
4772 return 1;
4773
4774 if (current_function_profile)
4775 return 1;
4776
4777 return 0;
4778}
4779
4780/* Record that the current function accesses previous call frames. */
4781
4782void
4783ix86_setup_frame_addresses (void)
4784{
4785 cfun->machine->accesses_prev_frame = 1;
4786}
4787
4788#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4789# define USE_HIDDEN_LINKONCE 1
4790#else
4791# define USE_HIDDEN_LINKONCE 0
4792#endif
4793
4794static int pic_labels_used;
4795
4796/* Fills in the label name that should be used for a pc thunk for
4797 the given register. */
4798
4799static void
4800get_pc_thunk_name (char name[32], unsigned int regno)
4801{
4802 if (USE_HIDDEN_LINKONCE)
4803 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4804 else
4805 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4806}
4807
4808
4809/* This function generates code for -fpic that loads %ebx with
4810 the return address of the caller and then returns. */
4811
4812void
4813ix86_file_end (void)
4814{
4815 rtx xops[2];
4816 int regno;
4817
4818 for (regno = 0; regno < 8; ++regno)
4819 {
4820 char name[32];
4821
4822 if (! ((pic_labels_used >> regno) & 1))
4823 continue;
4824
4825 get_pc_thunk_name (name, regno);
4826
4827 if (USE_HIDDEN_LINKONCE)
4828 {
4829 tree decl;
4830
4831 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4832 error_mark_node);
4833 TREE_PUBLIC (decl) = 1;
4834 TREE_STATIC (decl) = 1;
4835 DECL_ONE_ONLY (decl) = 1;
4836
4837 (*targetm.asm_out.unique_section) (decl, 0);
4838 named_section (decl, NULL, 0);
4839
4840 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4841 fputs ("\t.hidden\t", asm_out_file);
4842 assemble_name (asm_out_file, name);
4843 fputc ('\n', asm_out_file);
4844 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4845 }
4846 else
4847 {
4848 text_section ();
4849 ASM_OUTPUT_LABEL (asm_out_file, name);
4850 }
4851
4852 xops[0] = gen_rtx_REG (SImode, regno);
4853 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4854 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4855 output_asm_insn ("ret", xops);
4856 }
4857
4858 if (NEED_INDICATE_EXEC_STACK)
4859 file_end_indicate_exec_stack ();
4860}
4861
4862/* Emit code for the SET_GOT patterns. */
4863
4864const char *
4865output_set_got (rtx dest)
4866{
4867 rtx xops[3];
4868
4869 xops[0] = dest;
4870 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4871
4872 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4873 {
4874 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4875
4876 if (!flag_pic)
4877 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4878 else
4879 output_asm_insn ("call\t%a2", xops);
4880
4881#if TARGET_MACHO
4882 /* Output the "canonical" label name ("Lxx$pb") here too. This
4883 is what will be referred to by the Mach-O PIC subsystem. */
4884 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4885#endif
4886 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4887 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4888
4889 if (flag_pic)
4890 output_asm_insn ("pop{l}\t%0", xops);
4891 }
4892 else
4893 {
4894 char name[32];
4895 get_pc_thunk_name (name, REGNO (dest));
4896 pic_labels_used |= 1 << REGNO (dest);
4897
4898 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4899 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4900 output_asm_insn ("call\t%X2", xops);
4901 }
4902
4903 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4904 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4905 else if (!TARGET_MACHO)
24
25
26#include "config.h"
27#include "system.h"
28#include "coretypes.h"
29#include "tm.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "regs.h"
34#include "hard-reg-set.h"
35#include "real.h"
36#include "insn-config.h"
37#include "conditions.h"
38#include "output.h"
39#include "insn-attr.h"
40#include "flags.h"
41#include "except.h"
42#include "function.h"
43#include "recog.h"
44#include "expr.h"
45#include "optabs.h"
46#include "toplev.h"
47#include "basic-block.h"
48#include "ggc.h"
49#include "target.h"
50#include "target-def.h"
51#include "langhooks.h"
52#include "cgraph.h"
53
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT (-1)
56#endif
57
58/* Return index of given mode in mult and division cost tables. */
59#define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
64 : 4)
65
66/* Processor costs (relative to an add) */
67static const
68struct processor_costs size_cost = { /* costs for tunning for size */
69 2, /* cost of an add instruction */
70 3, /* cost of a lea instruction */
71 2, /* variable shift costs */
72 3, /* constant shift costs */
73 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
74 0, /* cost of multiply per each bit set */
75 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
76 3, /* cost of movsx */
77 3, /* cost of movzx */
78 0, /* "large" insn */
79 2, /* MOVE_RATIO */
80 2, /* cost for loading QImode using movzbl */
81 {2, 2, 2}, /* cost of loading integer registers
82 in QImode, HImode and SImode.
83 Relative to reg-reg move (2). */
84 {2, 2, 2}, /* cost of storing integer registers */
85 2, /* cost of reg,reg fld/fst */
86 {2, 2, 2}, /* cost of loading fp registers
87 in SFmode, DFmode and XFmode */
88 {2, 2, 2}, /* cost of loading integer registers */
89 3, /* cost of moving MMX register */
90 {3, 3}, /* cost of loading MMX registers
91 in SImode and DImode */
92 {3, 3}, /* cost of storing MMX registers
93 in SImode and DImode */
94 3, /* cost of moving SSE register */
95 {3, 3, 3}, /* cost of loading SSE registers
96 in SImode, DImode and TImode */
97 {3, 3, 3}, /* cost of storing SSE registers
98 in SImode, DImode and TImode */
99 3, /* MMX or SSE register to integer */
100 0, /* size of prefetch block */
101 0, /* number of parallel prefetches */
102 1, /* Branch cost */
103 2, /* cost of FADD and FSUB insns. */
104 2, /* cost of FMUL instruction. */
105 2, /* cost of FDIV instruction. */
106 2, /* cost of FABS instruction. */
107 2, /* cost of FCHS instruction. */
108 2, /* cost of FSQRT instruction. */
109};
110
111/* Processor costs (relative to an add) */
112static const
113struct processor_costs i386_cost = { /* 386 specific costs */
114 1, /* cost of an add instruction */
115 1, /* cost of a lea instruction */
116 3, /* variable shift costs */
117 2, /* constant shift costs */
118 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
119 1, /* cost of multiply per each bit set */
120 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
121 3, /* cost of movsx */
122 2, /* cost of movzx */
123 15, /* "large" insn */
124 3, /* MOVE_RATIO */
125 4, /* cost for loading QImode using movzbl */
126 {2, 4, 2}, /* cost of loading integer registers
127 in QImode, HImode and SImode.
128 Relative to reg-reg move (2). */
129 {2, 4, 2}, /* cost of storing integer registers */
130 2, /* cost of reg,reg fld/fst */
131 {8, 8, 8}, /* cost of loading fp registers
132 in SFmode, DFmode and XFmode */
133 {8, 8, 8}, /* cost of loading integer registers */
134 2, /* cost of moving MMX register */
135 {4, 8}, /* cost of loading MMX registers
136 in SImode and DImode */
137 {4, 8}, /* cost of storing MMX registers
138 in SImode and DImode */
139 2, /* cost of moving SSE register */
140 {4, 8, 16}, /* cost of loading SSE registers
141 in SImode, DImode and TImode */
142 {4, 8, 16}, /* cost of storing SSE registers
143 in SImode, DImode and TImode */
144 3, /* MMX or SSE register to integer */
145 0, /* size of prefetch block */
146 0, /* number of parallel prefetches */
147 1, /* Branch cost */
148 23, /* cost of FADD and FSUB insns. */
149 27, /* cost of FMUL instruction. */
150 88, /* cost of FDIV instruction. */
151 22, /* cost of FABS instruction. */
152 24, /* cost of FCHS instruction. */
153 122, /* cost of FSQRT instruction. */
154};
155
156static const
157struct processor_costs i486_cost = { /* 486 specific costs */
158 1, /* cost of an add instruction */
159 1, /* cost of a lea instruction */
160 3, /* variable shift costs */
161 2, /* constant shift costs */
162 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
163 1, /* cost of multiply per each bit set */
164 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
165 3, /* cost of movsx */
166 2, /* cost of movzx */
167 15, /* "large" insn */
168 3, /* MOVE_RATIO */
169 4, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {8, 8, 8}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {8, 8, 8}, /* cost of loading integer registers */
178 2, /* cost of moving MMX register */
179 {4, 8}, /* cost of loading MMX registers
180 in SImode and DImode */
181 {4, 8}, /* cost of storing MMX registers
182 in SImode and DImode */
183 2, /* cost of moving SSE register */
184 {4, 8, 16}, /* cost of loading SSE registers
185 in SImode, DImode and TImode */
186 {4, 8, 16}, /* cost of storing SSE registers
187 in SImode, DImode and TImode */
188 3, /* MMX or SSE register to integer */
189 0, /* size of prefetch block */
190 0, /* number of parallel prefetches */
191 1, /* Branch cost */
192 8, /* cost of FADD and FSUB insns. */
193 16, /* cost of FMUL instruction. */
194 73, /* cost of FDIV instruction. */
195 3, /* cost of FABS instruction. */
196 3, /* cost of FCHS instruction. */
197 83, /* cost of FSQRT instruction. */
198};
199
200static const
201struct processor_costs pentium_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 4, /* variable shift costs */
205 1, /* constant shift costs */
206 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
209 3, /* cost of movsx */
210 2, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 6, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 8, /* cost of moving MMX register */
223 {8, 8}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {8, 8}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {4, 8, 16}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {4, 8, 16}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
235 2, /* Branch cost */
236 3, /* cost of FADD and FSUB insns. */
237 3, /* cost of FMUL instruction. */
238 39, /* cost of FDIV instruction. */
239 1, /* cost of FABS instruction. */
240 1, /* cost of FCHS instruction. */
241 70, /* cost of FSQRT instruction. */
242};
243
244static const
245struct processor_costs pentiumpro_cost = {
246 1, /* cost of an add instruction */
247 1, /* cost of a lea instruction */
248 1, /* variable shift costs */
249 1, /* constant shift costs */
250 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
251 0, /* cost of multiply per each bit set */
252 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
253 1, /* cost of movsx */
254 1, /* cost of movzx */
255 8, /* "large" insn */
256 6, /* MOVE_RATIO */
257 2, /* cost for loading QImode using movzbl */
258 {4, 4, 4}, /* cost of loading integer registers
259 in QImode, HImode and SImode.
260 Relative to reg-reg move (2). */
261 {2, 2, 2}, /* cost of storing integer registers */
262 2, /* cost of reg,reg fld/fst */
263 {2, 2, 6}, /* cost of loading fp registers
264 in SFmode, DFmode and XFmode */
265 {4, 4, 6}, /* cost of loading integer registers */
266 2, /* cost of moving MMX register */
267 {2, 2}, /* cost of loading MMX registers
268 in SImode and DImode */
269 {2, 2}, /* cost of storing MMX registers
270 in SImode and DImode */
271 2, /* cost of moving SSE register */
272 {2, 2, 8}, /* cost of loading SSE registers
273 in SImode, DImode and TImode */
274 {2, 2, 8}, /* cost of storing SSE registers
275 in SImode, DImode and TImode */
276 3, /* MMX or SSE register to integer */
277 32, /* size of prefetch block */
278 6, /* number of parallel prefetches */
279 2, /* Branch cost */
280 3, /* cost of FADD and FSUB insns. */
281 5, /* cost of FMUL instruction. */
282 56, /* cost of FDIV instruction. */
283 2, /* cost of FABS instruction. */
284 2, /* cost of FCHS instruction. */
285 56, /* cost of FSQRT instruction. */
286};
287
288static const
289struct processor_costs k6_cost = {
290 1, /* cost of an add instruction */
291 2, /* cost of a lea instruction */
292 1, /* variable shift costs */
293 1, /* constant shift costs */
294 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
295 0, /* cost of multiply per each bit set */
296 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
297 2, /* cost of movsx */
298 2, /* cost of movzx */
299 8, /* "large" insn */
300 4, /* MOVE_RATIO */
301 3, /* cost for loading QImode using movzbl */
302 {4, 5, 4}, /* cost of loading integer registers
303 in QImode, HImode and SImode.
304 Relative to reg-reg move (2). */
305 {2, 3, 2}, /* cost of storing integer registers */
306 4, /* cost of reg,reg fld/fst */
307 {6, 6, 6}, /* cost of loading fp registers
308 in SFmode, DFmode and XFmode */
309 {4, 4, 4}, /* cost of loading integer registers */
310 2, /* cost of moving MMX register */
311 {2, 2}, /* cost of loading MMX registers
312 in SImode and DImode */
313 {2, 2}, /* cost of storing MMX registers
314 in SImode and DImode */
315 2, /* cost of moving SSE register */
316 {2, 2, 8}, /* cost of loading SSE registers
317 in SImode, DImode and TImode */
318 {2, 2, 8}, /* cost of storing SSE registers
319 in SImode, DImode and TImode */
320 6, /* MMX or SSE register to integer */
321 32, /* size of prefetch block */
322 1, /* number of parallel prefetches */
323 1, /* Branch cost */
324 2, /* cost of FADD and FSUB insns. */
325 2, /* cost of FMUL instruction. */
326 56, /* cost of FDIV instruction. */
327 2, /* cost of FABS instruction. */
328 2, /* cost of FCHS instruction. */
329 56, /* cost of FSQRT instruction. */
330};
331
332static const
333struct processor_costs athlon_cost = {
334 1, /* cost of an add instruction */
335 2, /* cost of a lea instruction */
336 1, /* variable shift costs */
337 1, /* constant shift costs */
338 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
339 0, /* cost of multiply per each bit set */
340 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
341 1, /* cost of movsx */
342 1, /* cost of movzx */
343 8, /* "large" insn */
344 9, /* MOVE_RATIO */
345 4, /* cost for loading QImode using movzbl */
346 {3, 4, 3}, /* cost of loading integer registers
347 in QImode, HImode and SImode.
348 Relative to reg-reg move (2). */
349 {3, 4, 3}, /* cost of storing integer registers */
350 4, /* cost of reg,reg fld/fst */
351 {4, 4, 12}, /* cost of loading fp registers
352 in SFmode, DFmode and XFmode */
353 {6, 6, 8}, /* cost of loading integer registers */
354 2, /* cost of moving MMX register */
355 {4, 4}, /* cost of loading MMX registers
356 in SImode and DImode */
357 {4, 4}, /* cost of storing MMX registers
358 in SImode and DImode */
359 2, /* cost of moving SSE register */
360 {4, 4, 6}, /* cost of loading SSE registers
361 in SImode, DImode and TImode */
362 {4, 4, 5}, /* cost of storing SSE registers
363 in SImode, DImode and TImode */
364 5, /* MMX or SSE register to integer */
365 64, /* size of prefetch block */
366 6, /* number of parallel prefetches */
367 2, /* Branch cost */
368 4, /* cost of FADD and FSUB insns. */
369 4, /* cost of FMUL instruction. */
370 24, /* cost of FDIV instruction. */
371 2, /* cost of FABS instruction. */
372 2, /* cost of FCHS instruction. */
373 35, /* cost of FSQRT instruction. */
374};
375
376static const
377struct processor_costs k8_cost = {
378 1, /* cost of an add instruction */
379 2, /* cost of a lea instruction */
380 1, /* variable shift costs */
381 1, /* constant shift costs */
382 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
383 0, /* cost of multiply per each bit set */
384 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
385 1, /* cost of movsx */
386 1, /* cost of movzx */
387 8, /* "large" insn */
388 9, /* MOVE_RATIO */
389 4, /* cost for loading QImode using movzbl */
390 {3, 4, 3}, /* cost of loading integer registers
391 in QImode, HImode and SImode.
392 Relative to reg-reg move (2). */
393 {3, 4, 3}, /* cost of storing integer registers */
394 4, /* cost of reg,reg fld/fst */
395 {4, 4, 12}, /* cost of loading fp registers
396 in SFmode, DFmode and XFmode */
397 {6, 6, 8}, /* cost of loading integer registers */
398 2, /* cost of moving MMX register */
399 {3, 3}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {4, 4}, /* cost of storing MMX registers
402 in SImode and DImode */
403 2, /* cost of moving SSE register */
404 {4, 3, 6}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {4, 4, 5}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 5, /* MMX or SSE register to integer */
409 64, /* size of prefetch block */
410 6, /* number of parallel prefetches */
411 2, /* Branch cost */
412 4, /* cost of FADD and FSUB insns. */
413 4, /* cost of FMUL instruction. */
414 19, /* cost of FDIV instruction. */
415 2, /* cost of FABS instruction. */
416 2, /* cost of FCHS instruction. */
417 35, /* cost of FSQRT instruction. */
418};
419
420static const
421struct processor_costs pentium4_cost = {
422 1, /* cost of an add instruction */
423 1, /* cost of a lea instruction */
424 4, /* variable shift costs */
425 4, /* constant shift costs */
426 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
427 0, /* cost of multiply per each bit set */
428 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
429 1, /* cost of movsx */
430 1, /* cost of movzx */
431 16, /* "large" insn */
432 6, /* MOVE_RATIO */
433 2, /* cost for loading QImode using movzbl */
434 {4, 5, 4}, /* cost of loading integer registers
435 in QImode, HImode and SImode.
436 Relative to reg-reg move (2). */
437 {2, 3, 2}, /* cost of storing integer registers */
438 2, /* cost of reg,reg fld/fst */
439 {2, 2, 6}, /* cost of loading fp registers
440 in SFmode, DFmode and XFmode */
441 {4, 4, 6}, /* cost of loading integer registers */
442 2, /* cost of moving MMX register */
443 {2, 2}, /* cost of loading MMX registers
444 in SImode and DImode */
445 {2, 2}, /* cost of storing MMX registers
446 in SImode and DImode */
447 12, /* cost of moving SSE register */
448 {12, 12, 12}, /* cost of loading SSE registers
449 in SImode, DImode and TImode */
450 {2, 2, 8}, /* cost of storing SSE registers
451 in SImode, DImode and TImode */
452 10, /* MMX or SSE register to integer */
453 64, /* size of prefetch block */
454 6, /* number of parallel prefetches */
455 2, /* Branch cost */
456 5, /* cost of FADD and FSUB insns. */
457 7, /* cost of FMUL instruction. */
458 43, /* cost of FDIV instruction. */
459 2, /* cost of FABS instruction. */
460 2, /* cost of FCHS instruction. */
461 43, /* cost of FSQRT instruction. */
462};
463
464const struct processor_costs *ix86_cost = &pentium_cost;
465
466/* Processor feature/optimization bitmasks. */
467#define m_386 (1<<PROCESSOR_I386)
468#define m_486 (1<<PROCESSOR_I486)
469#define m_PENT (1<<PROCESSOR_PENTIUM)
470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
471#define m_K6 (1<<PROCESSOR_K6)
472#define m_ATHLON (1<<PROCESSOR_ATHLON)
473#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
474#define m_K8 (1<<PROCESSOR_K8)
475#define m_ATHLON_K8 (m_K8 | m_ATHLON)
476
477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
479const int x86_zero_extend_with_and = m_486 | m_PENT;
480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
481const int x86_double_with_add = ~m_386;
482const int x86_use_bit_test = m_386;
483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
485const int x86_3dnow_a = m_ATHLON_K8;
486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
487const int x86_branch_hints = m_PENT4;
488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
489const int x86_partial_reg_stall = m_PPRO;
490const int x86_use_loop = m_K6;
491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
492const int x86_use_mov0 = m_K6;
493const int x86_use_cltd = ~(m_PENT | m_K6);
494const int x86_read_modify_write = ~m_PENT;
495const int x86_read_modify = ~(m_PENT | m_PPRO);
496const int x86_split_long_moves = m_PPRO;
497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
499const int x86_single_stringop = m_386 | m_PENT4;
500const int x86_qimode_math = ~(0);
501const int x86_promote_qi_regs = 0;
502const int x86_himode_math = ~(m_PPRO);
503const int x86_promote_hi_regs = m_PPRO;
504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
514const int x86_decompose_lea = m_PENT4;
515const int x86_shift1 = ~m_486;
516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
518/* Set for machines where the type and dependencies are resolved on SSE register
519 parts instead of whole registers, so we may maintain just lower part of
520 scalar values in proper format leaving the upper part undefined. */
521const int x86_sse_partial_regs = m_ATHLON_K8;
522/* Athlon optimizes partial-register FPS special case, thus avoiding the
523 need for extra instructions beforehand */
524const int x86_sse_partial_regs_for_cvtsd2ss = 0;
525const int x86_sse_typeless_stores = m_ATHLON_K8;
526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
527const int x86_use_ffreep = m_ATHLON_K8;
528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
529
530/* ??? HACK! The following is a lie. SSE can hold e.g. SImode, and
531 indeed *must* be able to hold SImode so that SSE2 shifts are able
532 to work right. But this can result in some mighty surprising
533 register allocation when building kernels. Turning this off should
534 make us less likely to all-of-the-sudden select an SSE register. */
535const int x86_inter_unit_moves = 0; /* ~(m_ATHLON_K8) */
536
537const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
538
539/* In case the average insn count for single function invocation is
540 lower than this constant, emit fast (but longer) prologue and
541 epilogue code. */
542#define FAST_PROLOGUE_INSN_COUNT 20
543
544/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
545static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
546static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
547static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
548
549/* Array of the smallest class containing reg number REGNO, indexed by
550 REGNO. Used by REGNO_REG_CLASS in i386.h. */
551
552enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
553{
554 /* ax, dx, cx, bx */
555 AREG, DREG, CREG, BREG,
556 /* si, di, bp, sp */
557 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
558 /* FP registers */
559 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
560 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
561 /* arg pointer */
562 NON_Q_REGS,
563 /* flags, fpsr, dirflag, frame */
564 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
565 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
566 SSE_REGS, SSE_REGS,
567 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
568 MMX_REGS, MMX_REGS,
569 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
570 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
571 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
572 SSE_REGS, SSE_REGS,
573};
574
575/* The "default" register map used in 32bit mode. */
576
577int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
578{
579 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
580 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
581 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
582 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
583 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
584 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
585 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
586};
587
588static int const x86_64_int_parameter_registers[6] =
589{
590 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
591 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
592};
593
594static int const x86_64_int_return_registers[4] =
595{
596 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
597};
598
599/* The "default" register map used in 64bit mode. */
600int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
601{
602 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
603 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
604 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
605 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
606 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
607 8,9,10,11,12,13,14,15, /* extended integer registers */
608 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
609};
610
611/* Define the register numbers to be used in Dwarf debugging information.
612 The SVR4 reference port C compiler uses the following register numbers
613 in its Dwarf output code:
614 0 for %eax (gcc regno = 0)
615 1 for %ecx (gcc regno = 2)
616 2 for %edx (gcc regno = 1)
617 3 for %ebx (gcc regno = 3)
618 4 for %esp (gcc regno = 7)
619 5 for %ebp (gcc regno = 6)
620 6 for %esi (gcc regno = 4)
621 7 for %edi (gcc regno = 5)
622 The following three DWARF register numbers are never generated by
623 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
624 believes these numbers have these meanings.
625 8 for %eip (no gcc equivalent)
626 9 for %eflags (gcc regno = 17)
627 10 for %trapno (no gcc equivalent)
628 It is not at all clear how we should number the FP stack registers
629 for the x86 architecture. If the version of SDB on x86/svr4 were
630 a bit less brain dead with respect to floating-point then we would
631 have a precedent to follow with respect to DWARF register numbers
632 for x86 FP registers, but the SDB on x86/svr4 is so completely
633 broken with respect to FP registers that it is hardly worth thinking
634 of it as something to strive for compatibility with.
635 The version of x86/svr4 SDB I have at the moment does (partially)
636 seem to believe that DWARF register number 11 is associated with
637 the x86 register %st(0), but that's about all. Higher DWARF
638 register numbers don't seem to be associated with anything in
639 particular, and even for DWARF regno 11, SDB only seems to under-
640 stand that it should say that a variable lives in %st(0) (when
641 asked via an `=' command) if we said it was in DWARF regno 11,
642 but SDB still prints garbage when asked for the value of the
643 variable in question (via a `/' command).
644 (Also note that the labels SDB prints for various FP stack regs
645 when doing an `x' command are all wrong.)
646 Note that these problems generally don't affect the native SVR4
647 C compiler because it doesn't allow the use of -O with -g and
648 because when it is *not* optimizing, it allocates a memory
649 location for each floating-point variable, and the memory
650 location is what gets described in the DWARF AT_location
651 attribute for the variable in question.
652 Regardless of the severe mental illness of the x86/svr4 SDB, we
653 do something sensible here and we use the following DWARF
654 register numbers. Note that these are all stack-top-relative
655 numbers.
656 11 for %st(0) (gcc regno = 8)
657 12 for %st(1) (gcc regno = 9)
658 13 for %st(2) (gcc regno = 10)
659 14 for %st(3) (gcc regno = 11)
660 15 for %st(4) (gcc regno = 12)
661 16 for %st(5) (gcc regno = 13)
662 17 for %st(6) (gcc regno = 14)
663 18 for %st(7) (gcc regno = 15)
664*/
665int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
666{
667 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
668 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
669 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
670 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
671 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
672 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
673 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
674};
675
676/* Test and compare insns in i386.md store the information needed to
677 generate branch and scc insns here. */
678
679rtx ix86_compare_op0 = NULL_RTX;
680rtx ix86_compare_op1 = NULL_RTX;
681
682#define MAX_386_STACK_LOCALS 3
683/* Size of the register save area. */
684#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
685
686/* Define the structure for the machine field in struct function. */
687
688struct stack_local_entry GTY(())
689{
690 unsigned short mode;
691 unsigned short n;
692 rtx rtl;
693 struct stack_local_entry *next;
694};
695
696/* Structure describing stack frame layout.
697 Stack grows downward:
698
699 [arguments]
700 <- ARG_POINTER
701 saved pc
702
703 saved frame pointer if frame_pointer_needed
704 <- HARD_FRAME_POINTER
705 [saved regs]
706
707 [padding1] \
708 )
709 [va_arg registers] (
710 > to_allocate <- FRAME_POINTER
711 [frame] (
712 )
713 [padding2] /
714 */
715struct ix86_frame
716{
717 int nregs;
718 int padding1;
719 int va_arg_size;
720 HOST_WIDE_INT frame;
721 int padding2;
722 int outgoing_arguments_size;
723 int red_zone_size;
724
725 HOST_WIDE_INT to_allocate;
726 /* The offsets relative to ARG_POINTER. */
727 HOST_WIDE_INT frame_pointer_offset;
728 HOST_WIDE_INT hard_frame_pointer_offset;
729 HOST_WIDE_INT stack_pointer_offset;
730
731 /* When save_regs_using_mov is set, emit prologue using
732 move instead of push instructions. */
733 bool save_regs_using_mov;
734};
735
736/* Used to enable/disable debugging features. */
737const char *ix86_debug_arg_string, *ix86_debug_addr_string;
738/* Code model option as passed by user. */
739const char *ix86_cmodel_string;
740/* Parsed value. */
741enum cmodel ix86_cmodel;
742/* Asm dialect. */
743const char *ix86_asm_string;
744enum asm_dialect ix86_asm_dialect = ASM_ATT;
745/* TLS dialext. */
746const char *ix86_tls_dialect_string;
747enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
748
749/* Which unit we are generating floating point math for. */
750enum fpmath_unit ix86_fpmath;
751
752/* Which cpu are we scheduling for. */
753enum processor_type ix86_tune;
754/* Which instruction set architecture to use. */
755enum processor_type ix86_arch;
756
757/* Strings to hold which cpu and instruction set architecture to use. */
758const char *ix86_tune_string; /* for -mtune=<xxx> */
759const char *ix86_arch_string; /* for -march=<xxx> */
760const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
761
762/* # of registers to use to pass arguments. */
763const char *ix86_regparm_string;
764
765/* true if sse prefetch instruction is not NOOP. */
766int x86_prefetch_sse;
767
768/* ix86_regparm_string as a number */
769int ix86_regparm;
770
771/* Alignment to use for loops and jumps: */
772
773/* Power of two alignment for loops. */
774const char *ix86_align_loops_string;
775
776/* Power of two alignment for non-loop jumps. */
777const char *ix86_align_jumps_string;
778
779/* Power of two alignment for stack boundary in bytes. */
780const char *ix86_preferred_stack_boundary_string;
781
782/* Preferred alignment for stack boundary in bits. */
783int ix86_preferred_stack_boundary;
784
785/* Values 1-5: see jump.c */
786int ix86_branch_cost;
787const char *ix86_branch_cost_string;
788
789/* Power of two alignment for functions. */
790const char *ix86_align_funcs_string;
791
792/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
793static char internal_label_prefix[16];
794static int internal_label_prefix_len;
795
796static int local_symbolic_operand (rtx, enum machine_mode);
797static int tls_symbolic_operand_1 (rtx, enum tls_model);
798static void output_pic_addr_const (FILE *, rtx, int);
799static void put_condition_code (enum rtx_code, enum machine_mode,
800 int, int, FILE *);
801static const char *get_some_local_dynamic_name (void);
802static int get_some_local_dynamic_name_1 (rtx *, void *);
803static rtx maybe_get_pool_constant (rtx);
804static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
805static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
806 rtx *);
807static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
808static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
809 enum machine_mode);
810static rtx get_thread_pointer (int);
811static rtx legitimize_tls_address (rtx, enum tls_model, int);
812static void get_pc_thunk_name (char [32], unsigned int);
813static rtx gen_push (rtx);
814static int memory_address_length (rtx addr);
815static int ix86_flags_dependant (rtx, rtx, enum attr_type);
816static int ix86_agi_dependant (rtx, rtx, enum attr_type);
817static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
818static void ix86_dump_ppro_packet (FILE *);
819static void ix86_reorder_insn (rtx *, rtx *);
820static struct machine_function * ix86_init_machine_status (void);
821static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
822static int ix86_nsaved_regs (void);
823static void ix86_emit_save_regs (void);
824static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
825static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
826static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
827static void ix86_sched_reorder_ppro (rtx *, rtx *);
828static HOST_WIDE_INT ix86_GOT_alias_set (void);
829static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
830static rtx ix86_expand_aligntest (rtx, int);
831static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
832static int ix86_issue_rate (void);
833static int ix86_adjust_cost (rtx, rtx, rtx, int);
834static void ix86_sched_init (FILE *, int, int);
835static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
836static int ix86_variable_issue (FILE *, int, rtx, int);
837static int ia32_use_dfa_pipeline_interface (void);
838static int ia32_multipass_dfa_lookahead (void);
839static void ix86_init_mmx_sse_builtins (void);
840static rtx x86_this_parameter (tree);
841static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
842 HOST_WIDE_INT, tree);
843static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
844static void x86_file_start (void);
845static void ix86_reorg (void);
846static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
847static tree ix86_build_builtin_va_list (void);
848
849struct ix86_address
850{
851 rtx base, index, disp;
852 HOST_WIDE_INT scale;
853 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
854};
855
856static int ix86_decompose_address (rtx, struct ix86_address *);
857static int ix86_address_cost (rtx);
858static bool ix86_cannot_force_const_mem (rtx);
859static rtx ix86_delegitimize_address (rtx);
860
861struct builtin_description;
862static rtx ix86_expand_sse_comi (const struct builtin_description *,
863 tree, rtx);
864static rtx ix86_expand_sse_compare (const struct builtin_description *,
865 tree, rtx);
866static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
867static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
868static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
869static rtx ix86_expand_store_builtin (enum insn_code, tree);
870static rtx safe_vector_operand (rtx, enum machine_mode);
871static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
872static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
873 enum rtx_code *, enum rtx_code *);
874static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
875static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
876static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
877static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
878static int ix86_fp_comparison_cost (enum rtx_code code);
879static unsigned int ix86_select_alt_pic_regnum (void);
880static int ix86_save_reg (unsigned int, int);
881static void ix86_compute_frame_layout (struct ix86_frame *);
882static int ix86_comp_type_attributes (tree, tree);
883static int ix86_function_regparm (tree, tree);
884const struct attribute_spec ix86_attribute_table[];
885static bool ix86_function_ok_for_sibcall (tree, tree);
886static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
887static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
888static int ix86_value_regno (enum machine_mode);
889static bool contains_128bit_aligned_vector_p (tree);
890static bool ix86_ms_bitfield_layout_p (tree);
891static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
892static int extended_reg_mentioned_1 (rtx *, void *);
893static bool ix86_rtx_costs (rtx, int, int, int *);
894static int min_insn_size (rtx);
895static void k8_avoid_jump_misspredicts (void);
896
897#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
898static void ix86_svr3_asm_out_constructor (rtx, int);
899#endif
900
901/* Register class used for passing given 64bit part of the argument.
902 These represent classes as documented by the PS ABI, with the exception
903 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
904 use SF or DFmode move instead of DImode to avoid reformatting penalties.
905
906 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
907 whenever possible (upper half does contain padding).
908 */
909enum x86_64_reg_class
910 {
911 X86_64_NO_CLASS,
912 X86_64_INTEGER_CLASS,
913 X86_64_INTEGERSI_CLASS,
914 X86_64_SSE_CLASS,
915 X86_64_SSESF_CLASS,
916 X86_64_SSEDF_CLASS,
917 X86_64_SSEUP_CLASS,
918 X86_64_X87_CLASS,
919 X86_64_X87UP_CLASS,
920 X86_64_MEMORY_CLASS
921 };
922static const char * const x86_64_reg_class_name[] =
923 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
924
925#define MAX_CLASSES 4
926static int classify_argument (enum machine_mode, tree,
927 enum x86_64_reg_class [MAX_CLASSES], int);
928static int examine_argument (enum machine_mode, tree, int, int *, int *);
929static rtx construct_container (enum machine_mode, tree, int, int, int,
930 const int *, int);
931static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
932 enum x86_64_reg_class);
933
934/* Table of constants used by fldpi, fldln2, etc.... */
935static REAL_VALUE_TYPE ext_80387_constants_table [5];
936static bool ext_80387_constants_init = 0;
937static void init_ext_80387_constants (void);
938
939/* Initialize the GCC target structure. */
940#undef TARGET_ATTRIBUTE_TABLE
941#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
942#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
943# undef TARGET_MERGE_DECL_ATTRIBUTES
944# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
945#endif
946
947#undef TARGET_COMP_TYPE_ATTRIBUTES
948#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
949
950#undef TARGET_INIT_BUILTINS
951#define TARGET_INIT_BUILTINS ix86_init_builtins
952
953#undef TARGET_EXPAND_BUILTIN
954#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
955
956#undef TARGET_ASM_FUNCTION_EPILOGUE
957#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
958
959#undef TARGET_ASM_OPEN_PAREN
960#define TARGET_ASM_OPEN_PAREN ""
961#undef TARGET_ASM_CLOSE_PAREN
962#define TARGET_ASM_CLOSE_PAREN ""
963
964#undef TARGET_ASM_ALIGNED_HI_OP
965#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
966#undef TARGET_ASM_ALIGNED_SI_OP
967#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
968#ifdef ASM_QUAD
969#undef TARGET_ASM_ALIGNED_DI_OP
970#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
971#endif
972
973#undef TARGET_ASM_UNALIGNED_HI_OP
974#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
975#undef TARGET_ASM_UNALIGNED_SI_OP
976#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
977#undef TARGET_ASM_UNALIGNED_DI_OP
978#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
979
980#undef TARGET_SCHED_ADJUST_COST
981#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
982#undef TARGET_SCHED_ISSUE_RATE
983#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
984#undef TARGET_SCHED_VARIABLE_ISSUE
985#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
986#undef TARGET_SCHED_INIT
987#define TARGET_SCHED_INIT ix86_sched_init
988#undef TARGET_SCHED_REORDER
989#define TARGET_SCHED_REORDER ix86_sched_reorder
990#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
991#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
992 ia32_use_dfa_pipeline_interface
993#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995 ia32_multipass_dfa_lookahead
996
997#undef TARGET_FUNCTION_OK_FOR_SIBCALL
998#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
999
1000#ifdef HAVE_AS_TLS
1001#undef TARGET_HAVE_TLS
1002#define TARGET_HAVE_TLS true
1003#endif
1004#undef TARGET_CANNOT_FORCE_CONST_MEM
1005#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1006
1007#undef TARGET_DELEGITIMIZE_ADDRESS
1008#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1009
1010#undef TARGET_MS_BITFIELD_LAYOUT_P
1011#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1012
1013#undef TARGET_ASM_OUTPUT_MI_THUNK
1014#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1015#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1016#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1017
1018#undef TARGET_ASM_FILE_START
1019#define TARGET_ASM_FILE_START x86_file_start
1020
1021#undef TARGET_RTX_COSTS
1022#define TARGET_RTX_COSTS ix86_rtx_costs
1023#undef TARGET_ADDRESS_COST
1024#define TARGET_ADDRESS_COST ix86_address_cost
1025
1026#undef TARGET_FIXED_CONDITION_CODE_REGS
1027#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1028#undef TARGET_CC_MODES_COMPATIBLE
1029#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1030
1031#undef TARGET_MACHINE_DEPENDENT_REORG
1032#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1033
1034#undef TARGET_BUILD_BUILTIN_VA_LIST
1035#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1036
1037struct gcc_target targetm = TARGET_INITIALIZER;
1038
1039/* The svr4 ABI for the i386 says that records and unions are returned
1040 in memory. */
1041#ifndef DEFAULT_PCC_STRUCT_RETURN
1042#define DEFAULT_PCC_STRUCT_RETURN 1
1043#endif
1044
1045/* Sometimes certain combinations of command options do not make
1046 sense on a particular target machine. You can define a macro
1047 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1048 defined, is executed once just after all the command options have
1049 been parsed.
1050
1051 Don't use this macro to turn on various extra optimizations for
1052 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1053
1054void
1055override_options (void)
1056{
1057 int i;
1058 /* Comes from final.c -- no real reason to change it. */
1059#define MAX_CODE_ALIGN 16
1060
1061 static struct ptt
1062 {
1063 const struct processor_costs *cost; /* Processor costs */
1064 const int target_enable; /* Target flags to enable. */
1065 const int target_disable; /* Target flags to disable. */
1066 const int align_loop; /* Default alignments. */
1067 const int align_loop_max_skip;
1068 const int align_jump;
1069 const int align_jump_max_skip;
1070 const int align_func;
1071 }
1072 const processor_target_table[PROCESSOR_max] =
1073 {
1074 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1075 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1076 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1077 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1078 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1079 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1080 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1081 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1082 };
1083
1084 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1085 static struct pta
1086 {
1087 const char *const name; /* processor name or nickname. */
1088 const enum processor_type processor;
1089 const enum pta_flags
1090 {
1091 PTA_SSE = 1,
1092 PTA_SSE2 = 2,
1093 PTA_SSE3 = 4,
1094 PTA_MMX = 8,
1095 PTA_PREFETCH_SSE = 16,
1096 PTA_3DNOW = 32,
1097 PTA_3DNOW_A = 64,
1098 PTA_64BIT = 128
1099 } flags;
1100 }
1101 const processor_alias_table[] =
1102 {
1103 {"i386", PROCESSOR_I386, 0},
1104 {"i486", PROCESSOR_I486, 0},
1105 {"i586", PROCESSOR_PENTIUM, 0},
1106 {"pentium", PROCESSOR_PENTIUM, 0},
1107 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1108 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1109 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1110 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1111 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1112 {"i686", PROCESSOR_PENTIUMPRO, 0},
1113 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1114 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1115 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1116 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1117 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1118 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1119 | PTA_MMX | PTA_PREFETCH_SSE},
1120 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1121 | PTA_MMX | PTA_PREFETCH_SSE},
1122 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1123 | PTA_MMX | PTA_PREFETCH_SSE},
1124 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1125 | PTA_MMX | PTA_PREFETCH_SSE},
1126 {"k6", PROCESSOR_K6, PTA_MMX},
1127 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1128 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1129 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1130 | PTA_3DNOW_A},
1131 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1132 | PTA_3DNOW | PTA_3DNOW_A},
1133 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134 | PTA_3DNOW_A | PTA_SSE},
1135 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1136 | PTA_3DNOW_A | PTA_SSE},
1137 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1138 | PTA_3DNOW_A | PTA_SSE},
1139 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1140 | PTA_SSE | PTA_SSE2 },
1141 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1145 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1146 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1147 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1148 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1149 };
1150
1151 int const pta_size = ARRAY_SIZE (processor_alias_table);
1152
1153 /* Set the default values for switches whose default depends on TARGET_64BIT
1154 in case they weren't overwritten by command line options. */
1155 if (TARGET_64BIT)
1156 {
1157 if (flag_omit_frame_pointer == 2)
1158 flag_omit_frame_pointer = 1;
1159 if (flag_asynchronous_unwind_tables == 2)
1160 flag_asynchronous_unwind_tables = 1;
1161 if (flag_pcc_struct_return == 2)
1162 flag_pcc_struct_return = 0;
1163 }
1164 else
1165 {
1166 if (flag_omit_frame_pointer == 2)
1167 flag_omit_frame_pointer = 0;
1168 if (flag_asynchronous_unwind_tables == 2)
1169 flag_asynchronous_unwind_tables = 0;
1170 if (flag_pcc_struct_return == 2)
1171 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1172 }
1173
1174#ifdef SUBTARGET_OVERRIDE_OPTIONS
1175 SUBTARGET_OVERRIDE_OPTIONS;
1176#endif
1177
1178 if (!ix86_tune_string && ix86_arch_string)
1179 ix86_tune_string = ix86_arch_string;
1180 if (!ix86_tune_string)
1181 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1182 if (!ix86_arch_string)
1183 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1184
1185 if (ix86_cmodel_string != 0)
1186 {
1187 if (!strcmp (ix86_cmodel_string, "small"))
1188 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1189 else if (flag_pic)
1190 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1191 else if (!strcmp (ix86_cmodel_string, "32"))
1192 ix86_cmodel = CM_32;
1193 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1194 ix86_cmodel = CM_KERNEL;
1195 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1196 ix86_cmodel = CM_MEDIUM;
1197 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1198 ix86_cmodel = CM_LARGE;
1199 else
1200 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1201 }
1202 else
1203 {
1204 ix86_cmodel = CM_32;
1205 if (TARGET_64BIT)
1206 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1207 }
1208 if (ix86_asm_string != 0)
1209 {
1210 if (!strcmp (ix86_asm_string, "intel"))
1211 ix86_asm_dialect = ASM_INTEL;
1212 else if (!strcmp (ix86_asm_string, "att"))
1213 ix86_asm_dialect = ASM_ATT;
1214 else
1215 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1216 }
1217 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1218 error ("code model `%s' not supported in the %s bit mode",
1219 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1220 if (ix86_cmodel == CM_LARGE)
1221 sorry ("code model `large' not supported yet");
1222 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1223 sorry ("%i-bit mode not compiled in",
1224 (target_flags & MASK_64BIT) ? 64 : 32);
1225
1226 for (i = 0; i < pta_size; i++)
1227 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1228 {
1229 ix86_arch = processor_alias_table[i].processor;
1230 /* Default cpu tuning to the architecture. */
1231 ix86_tune = ix86_arch;
1232 if (processor_alias_table[i].flags & PTA_MMX
1233 && !(target_flags_explicit & MASK_MMX))
1234 target_flags |= MASK_MMX;
1235 if (processor_alias_table[i].flags & PTA_3DNOW
1236 && !(target_flags_explicit & MASK_3DNOW))
1237 target_flags |= MASK_3DNOW;
1238 if (processor_alias_table[i].flags & PTA_3DNOW_A
1239 && !(target_flags_explicit & MASK_3DNOW_A))
1240 target_flags |= MASK_3DNOW_A;
1241 if (processor_alias_table[i].flags & PTA_SSE
1242 && !(target_flags_explicit & MASK_SSE))
1243 target_flags |= MASK_SSE;
1244 if (processor_alias_table[i].flags & PTA_SSE2
1245 && !(target_flags_explicit & MASK_SSE2))
1246 target_flags |= MASK_SSE2;
1247 if (processor_alias_table[i].flags & PTA_SSE3
1248 && !(target_flags_explicit & MASK_SSE3))
1249 target_flags |= MASK_SSE3;
1250 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1251 x86_prefetch_sse = true;
1252 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1253 error ("CPU you selected does not support x86-64 instruction set");
1254 break;
1255 }
1256
1257 if (i == pta_size)
1258 error ("bad value (%s) for -march= switch", ix86_arch_string);
1259
1260 for (i = 0; i < pta_size; i++)
1261 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1262 {
1263 ix86_tune = processor_alias_table[i].processor;
1264 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1265 error ("CPU you selected does not support x86-64 instruction set");
1266
1267 /* Intel CPUs have always interpreted SSE prefetch instructions as
1268 NOPs; so, we can enable SSE prefetch instructions even when
1269 -mtune (rather than -march) points us to a processor that has them.
1270 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1271 higher processors. */
1272 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1273 x86_prefetch_sse = true;
1274 break;
1275 }
1276 if (i == pta_size)
1277 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1278
1279 if (optimize_size)
1280 ix86_cost = &size_cost;
1281 else
1282 ix86_cost = processor_target_table[ix86_tune].cost;
1283 target_flags |= processor_target_table[ix86_tune].target_enable;
1284 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1285
1286 /* Arrange to set up i386_stack_locals for all functions. */
1287 init_machine_status = ix86_init_machine_status;
1288
1289 /* Validate -mregparm= value. */
1290 if (ix86_regparm_string)
1291 {
1292 i = atoi (ix86_regparm_string);
1293 if (i < 0 || i > REGPARM_MAX)
1294 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1295 else
1296 ix86_regparm = i;
1297 }
1298 else
1299 if (TARGET_64BIT)
1300 ix86_regparm = REGPARM_MAX;
1301
1302 /* If the user has provided any of the -malign-* options,
1303 warn and use that value only if -falign-* is not set.
1304 Remove this code in GCC 3.2 or later. */
1305 if (ix86_align_loops_string)
1306 {
1307 warning ("-malign-loops is obsolete, use -falign-loops");
1308 if (align_loops == 0)
1309 {
1310 i = atoi (ix86_align_loops_string);
1311 if (i < 0 || i > MAX_CODE_ALIGN)
1312 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1313 else
1314 align_loops = 1 << i;
1315 }
1316 }
1317
1318 if (ix86_align_jumps_string)
1319 {
1320 warning ("-malign-jumps is obsolete, use -falign-jumps");
1321 if (align_jumps == 0)
1322 {
1323 i = atoi (ix86_align_jumps_string);
1324 if (i < 0 || i > MAX_CODE_ALIGN)
1325 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1326 else
1327 align_jumps = 1 << i;
1328 }
1329 }
1330
1331 if (ix86_align_funcs_string)
1332 {
1333 warning ("-malign-functions is obsolete, use -falign-functions");
1334 if (align_functions == 0)
1335 {
1336 i = atoi (ix86_align_funcs_string);
1337 if (i < 0 || i > MAX_CODE_ALIGN)
1338 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1339 else
1340 align_functions = 1 << i;
1341 }
1342 }
1343
1344 /* Default align_* from the processor table. */
1345 if (align_loops == 0)
1346 {
1347 align_loops = processor_target_table[ix86_tune].align_loop;
1348 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1349 }
1350 if (align_jumps == 0)
1351 {
1352 align_jumps = processor_target_table[ix86_tune].align_jump;
1353 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1354 }
1355 if (align_functions == 0)
1356 {
1357 align_functions = processor_target_table[ix86_tune].align_func;
1358 }
1359
1360 /* Validate -mpreferred-stack-boundary= value, or provide default.
1361 The default of 128 bits is for Pentium III's SSE __m128, but we
1362 don't want additional code to keep the stack aligned when
1363 optimizing for code size. */
1364 ix86_preferred_stack_boundary = (optimize_size
1365 ? TARGET_64BIT ? 128 : 32
1366 : 128);
1367 if (ix86_preferred_stack_boundary_string)
1368 {
1369 i = atoi (ix86_preferred_stack_boundary_string);
1370 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1371 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1372 TARGET_64BIT ? 4 : 2);
1373 else
1374 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1375 }
1376
1377 /* Validate -mbranch-cost= value, or provide default. */
1378 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1379 if (ix86_branch_cost_string)
1380 {
1381 i = atoi (ix86_branch_cost_string);
1382 if (i < 0 || i > 5)
1383 error ("-mbranch-cost=%d is not between 0 and 5", i);
1384 else
1385 ix86_branch_cost = i;
1386 }
1387
1388 if (ix86_tls_dialect_string)
1389 {
1390 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1391 ix86_tls_dialect = TLS_DIALECT_GNU;
1392 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1393 ix86_tls_dialect = TLS_DIALECT_SUN;
1394 else
1395 error ("bad value (%s) for -mtls-dialect= switch",
1396 ix86_tls_dialect_string);
1397 }
1398
1399 /* Keep nonleaf frame pointers. */
1400 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1401 flag_omit_frame_pointer = 1;
1402
1403 /* If we're doing fast math, we don't care about comparison order
1404 wrt NaNs. This lets us use a shorter comparison sequence. */
1405 if (flag_unsafe_math_optimizations)
1406 target_flags &= ~MASK_IEEE_FP;
1407
1408 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1409 since the insns won't need emulation. */
1410 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1411 target_flags &= ~MASK_NO_FANCY_MATH_387;
1412
1413 /* Turn on SSE2 builtins for -msse3. */
1414 if (TARGET_SSE3)
1415 target_flags |= MASK_SSE2;
1416
1417 /* Turn on SSE builtins for -msse2. */
1418 if (TARGET_SSE2)
1419 target_flags |= MASK_SSE;
1420
1421 if (TARGET_64BIT)
1422 {
1423 if (TARGET_ALIGN_DOUBLE)
1424 error ("-malign-double makes no sense in the 64bit mode");
1425 if (TARGET_RTD)
1426 error ("-mrtd calling convention not supported in the 64bit mode");
1427 /* Enable by default the SSE and MMX builtins. */
1428 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1429 ix86_fpmath = FPMATH_SSE;
1430 }
1431 else
1432 {
1433 ix86_fpmath = FPMATH_387;
1434 /* i386 ABI does not specify red zone. It still makes sense to use it
1435 when programmer takes care to stack from being destroyed. */
1436 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1437 target_flags |= MASK_NO_RED_ZONE;
1438 }
1439
1440 if (ix86_fpmath_string != 0)
1441 {
1442 if (! strcmp (ix86_fpmath_string, "387"))
1443 ix86_fpmath = FPMATH_387;
1444 else if (! strcmp (ix86_fpmath_string, "sse"))
1445 {
1446 if (!TARGET_SSE)
1447 {
1448 warning ("SSE instruction set disabled, using 387 arithmetics");
1449 ix86_fpmath = FPMATH_387;
1450 }
1451 else
1452 ix86_fpmath = FPMATH_SSE;
1453 }
1454 else if (! strcmp (ix86_fpmath_string, "387,sse")
1455 || ! strcmp (ix86_fpmath_string, "sse,387"))
1456 {
1457 if (!TARGET_SSE)
1458 {
1459 warning ("SSE instruction set disabled, using 387 arithmetics");
1460 ix86_fpmath = FPMATH_387;
1461 }
1462 else if (!TARGET_80387)
1463 {
1464 warning ("387 instruction set disabled, using SSE arithmetics");
1465 ix86_fpmath = FPMATH_SSE;
1466 }
1467 else
1468 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1469 }
1470 else
1471 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1472 }
1473
1474 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1475 on by -msse. */
1476 if (TARGET_SSE)
1477 {
1478 target_flags |= MASK_MMX;
1479 x86_prefetch_sse = true;
1480 }
1481
1482 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1483 if (TARGET_3DNOW)
1484 {
1485 target_flags |= MASK_MMX;
1486 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1487 extensions it adds. */
1488 if (x86_3dnow_a & (1 << ix86_arch))
1489 target_flags |= MASK_3DNOW_A;
1490 }
1491 if ((x86_accumulate_outgoing_args & TUNEMASK)
1492 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1493 && !optimize_size)
1494 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1495
1496 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1497 {
1498 char *p;
1499 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1500 p = strchr (internal_label_prefix, 'X');
1501 internal_label_prefix_len = p - internal_label_prefix;
1502 *p = '\0';
1503 }
1504}
1505
1506void
1507optimization_options (int level, int size ATTRIBUTE_UNUSED)
1508{
1509 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1510 make the problem with not enough registers even worse. */
1511#ifdef INSN_SCHEDULING
1512 if (level > 1)
1513 flag_schedule_insns = 0;
1514#endif
1515
1516 /* The default values of these switches depend on the TARGET_64BIT
1517 that is not known at this moment. Mark these values with 2 and
1518 let user the to override these. In case there is no command line option
1519 specifying them, we will set the defaults in override_options. */
1520 if (optimize >= 1)
1521 flag_omit_frame_pointer = 2;
1522 flag_pcc_struct_return = 2;
1523 flag_asynchronous_unwind_tables = 2;
1524}
1525
1526/* Table of valid machine attributes. */
1527const struct attribute_spec ix86_attribute_table[] =
1528{
1529 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1530 /* Stdcall attribute says callee is responsible for popping arguments
1531 if they are not variable. */
1532 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1533 /* Fastcall attribute says callee is responsible for popping arguments
1534 if they are not variable. */
1535 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1536 /* Cdecl attribute says the callee is a normal C declaration */
1537 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1538 /* Regparm attribute specifies how many integer arguments are to be
1539 passed in registers. */
1540 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1541#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1542 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1543 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1544 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1545#endif
1546 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1547 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1548 { NULL, 0, 0, false, false, false, NULL }
1549};
1550
1551/* Decide whether we can make a sibling call to a function. DECL is the
1552 declaration of the function being targeted by the call and EXP is the
1553 CALL_EXPR representing the call. */
1554
1555static bool
1556ix86_function_ok_for_sibcall (tree decl, tree exp)
1557{
1558 /* If we are generating position-independent code, we cannot sibcall
1559 optimize any indirect call, or a direct call to a global function,
1560 as the PLT requires %ebx be live. */
1561 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1562 return false;
1563
1564 /* If we are returning floats on the 80387 register stack, we cannot
1565 make a sibcall from a function that doesn't return a float to a
1566 function that does or, conversely, from a function that does return
1567 a float to a function that doesn't; the necessary stack adjustment
1568 would not be executed. */
1569 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1570 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1571 return false;
1572
1573 /* If this call is indirect, we'll need to be able to use a call-clobbered
1574 register for the address of the target function. Make sure that all
1575 such registers are not used for passing parameters. */
1576 if (!decl && !TARGET_64BIT)
1577 {
1578 tree type;
1579
1580 /* We're looking at the CALL_EXPR, we need the type of the function. */
1581 type = TREE_OPERAND (exp, 0); /* pointer expression */
1582 type = TREE_TYPE (type); /* pointer type */
1583 type = TREE_TYPE (type); /* function type */
1584
1585 if (ix86_function_regparm (type, NULL) >= 3)
1586 {
1587 /* ??? Need to count the actual number of registers to be used,
1588 not the possible number of registers. Fix later. */
1589 return false;
1590 }
1591 }
1592
1593 /* Otherwise okay. That also includes certain types of indirect calls. */
1594 return true;
1595}
1596
1597/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1598 arguments as in struct attribute_spec.handler. */
1599static tree
1600ix86_handle_cdecl_attribute (tree *node, tree name,
1601 tree args ATTRIBUTE_UNUSED,
1602 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1603{
1604 if (TREE_CODE (*node) != FUNCTION_TYPE
1605 && TREE_CODE (*node) != METHOD_TYPE
1606 && TREE_CODE (*node) != FIELD_DECL
1607 && TREE_CODE (*node) != TYPE_DECL)
1608 {
1609 warning ("`%s' attribute only applies to functions",
1610 IDENTIFIER_POINTER (name));
1611 *no_add_attrs = true;
1612 }
1613 else
1614 {
1615 if (is_attribute_p ("fastcall", name))
1616 {
1617 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1618 {
1619 error ("fastcall and stdcall attributes are not compatible");
1620 }
1621 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1622 {
1623 error ("fastcall and regparm attributes are not compatible");
1624 }
1625 }
1626 else if (is_attribute_p ("stdcall", name))
1627 {
1628 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1629 {
1630 error ("fastcall and stdcall attributes are not compatible");
1631 }
1632 }
1633 }
1634
1635 if (TARGET_64BIT)
1636 {
1637 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1638 *no_add_attrs = true;
1639 }
1640
1641 return NULL_TREE;
1642}
1643
1644/* Handle a "regparm" attribute;
1645 arguments as in struct attribute_spec.handler. */
1646static tree
1647ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1648 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1649{
1650 if (TREE_CODE (*node) != FUNCTION_TYPE
1651 && TREE_CODE (*node) != METHOD_TYPE
1652 && TREE_CODE (*node) != FIELD_DECL
1653 && TREE_CODE (*node) != TYPE_DECL)
1654 {
1655 warning ("`%s' attribute only applies to functions",
1656 IDENTIFIER_POINTER (name));
1657 *no_add_attrs = true;
1658 }
1659 else
1660 {
1661 tree cst;
1662
1663 cst = TREE_VALUE (args);
1664 if (TREE_CODE (cst) != INTEGER_CST)
1665 {
1666 warning ("`%s' attribute requires an integer constant argument",
1667 IDENTIFIER_POINTER (name));
1668 *no_add_attrs = true;
1669 }
1670 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1671 {
1672 warning ("argument to `%s' attribute larger than %d",
1673 IDENTIFIER_POINTER (name), REGPARM_MAX);
1674 *no_add_attrs = true;
1675 }
1676
1677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1678 {
1679 error ("fastcall and regparm attributes are not compatible");
1680 }
1681 }
1682
1683 return NULL_TREE;
1684}
1685
1686/* Return 0 if the attributes for two types are incompatible, 1 if they
1687 are compatible, and 2 if they are nearly compatible (which causes a
1688 warning to be generated). */
1689
1690static int
1691ix86_comp_type_attributes (tree type1, tree type2)
1692{
1693 /* Check for mismatch of non-default calling convention. */
1694 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1695
1696 if (TREE_CODE (type1) != FUNCTION_TYPE)
1697 return 1;
1698
1699 /* Check for mismatched fastcall types */
1700 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1701 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1702 return 0;
1703
1704 /* Check for mismatched return types (cdecl vs stdcall). */
1705 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1706 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1707 return 0;
1708 if (ix86_function_regparm (type1, NULL)
1709 != ix86_function_regparm (type2, NULL))
1710 return 0;
1711 return 1;
1712}
1713
1714/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1715 DECL may be NULL when calling function indirectly
1716 or considering a libcall. */
1717
1718static int
1719ix86_function_regparm (tree type, tree decl)
1720{
1721 tree attr;
1722 int regparm = ix86_regparm;
1723 bool user_convention = false;
1724
1725 if (!TARGET_64BIT)
1726 {
1727 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1728 if (attr)
1729 {
1730 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1731 user_convention = true;
1732 }
1733
1734 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1735 {
1736 regparm = 2;
1737 user_convention = true;
1738 }
1739
1740 /* Use register calling convention for local functions when possible. */
1741 if (!TARGET_64BIT && !user_convention && decl
1742 && flag_unit_at_a_time && !profile_flag)
1743 {
1744 struct cgraph_local_info *i = cgraph_local_info (decl);
1745 if (i && i->local)
1746 {
1747 /* We can't use regparm(3) for nested functions as these use
1748 static chain pointer in third argument. */
1749 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1750 regparm = 2;
1751 else
1752 regparm = 3;
1753 }
1754 }
1755 }
1756 return regparm;
1757}
1758
1759/* Return true if EAX is live at the start of the function. Used by
1760 ix86_expand_prologue to determine if we need special help before
1761 calling allocate_stack_worker. */
1762
1763static bool
1764ix86_eax_live_at_start_p (void)
1765{
1766 /* Cheat. Don't bother working forward from ix86_function_regparm
1767 to the function type to whether an actual argument is located in
1768 eax. Instead just look at cfg info, which is still close enough
1769 to correct at this point. This gives false positives for broken
1770 functions that might use uninitialized data that happens to be
1771 allocated in eax, but who cares? */
1772 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1773}
1774
1775/* Value is the number of bytes of arguments automatically
1776 popped when returning from a subroutine call.
1777 FUNDECL is the declaration node of the function (as a tree),
1778 FUNTYPE is the data type of the function (as a tree),
1779 or for a library call it is an identifier node for the subroutine name.
1780 SIZE is the number of bytes of arguments passed on the stack.
1781
1782 On the 80386, the RTD insn may be used to pop them if the number
1783 of args is fixed, but if the number is variable then the caller
1784 must pop them all. RTD can't be used for library calls now
1785 because the library is compiled with the Unix compiler.
1786 Use of RTD is a selectable option, since it is incompatible with
1787 standard Unix calling sequences. If the option is not selected,
1788 the caller must always pop the args.
1789
1790 The attribute stdcall is equivalent to RTD on a per module basis. */
1791
1792int
1793ix86_return_pops_args (tree fundecl, tree funtype, int size)
1794{
1795 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1796
1797 /* Cdecl functions override -mrtd, and never pop the stack. */
1798 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1799
1800 /* Stdcall and fastcall functions will pop the stack if not
1801 variable args. */
1802 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1803 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1804 rtd = 1;
1805
1806 if (rtd
1807 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1808 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1809 == void_type_node)))
1810 return size;
1811 }
1812
1813 /* Lose any fake structure return argument if it is passed on the stack. */
1814 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1815 && !TARGET_64BIT)
1816 {
1817 int nregs = ix86_function_regparm (funtype, fundecl);
1818
1819 if (!nregs)
1820 return GET_MODE_SIZE (Pmode);
1821 }
1822
1823 return 0;
1824}
1825
1826/* Argument support functions. */
1827
1828/* Return true when register may be used to pass function parameters. */
1829bool
1830ix86_function_arg_regno_p (int regno)
1831{
1832 int i;
1833 if (!TARGET_64BIT)
1834 return (regno < REGPARM_MAX
1835 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1836 if (SSE_REGNO_P (regno) && TARGET_SSE)
1837 return true;
1838 /* RAX is used as hidden argument to va_arg functions. */
1839 if (!regno)
1840 return true;
1841 for (i = 0; i < REGPARM_MAX; i++)
1842 if (regno == x86_64_int_parameter_registers[i])
1843 return true;
1844 return false;
1845}
1846
1847/* Initialize a variable CUM of type CUMULATIVE_ARGS
1848 for a call to a function whose data type is FNTYPE.
1849 For a library call, FNTYPE is 0. */
1850
1851void
1852init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1853 tree fntype, /* tree ptr for function decl */
1854 rtx libname, /* SYMBOL_REF of library name or 0 */
1855 tree fndecl)
1856{
1857 static CUMULATIVE_ARGS zero_cum;
1858 tree param, next_param;
1859
1860 if (TARGET_DEBUG_ARG)
1861 {
1862 fprintf (stderr, "\ninit_cumulative_args (");
1863 if (fntype)
1864 fprintf (stderr, "fntype code = %s, ret code = %s",
1865 tree_code_name[(int) TREE_CODE (fntype)],
1866 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1867 else
1868 fprintf (stderr, "no fntype");
1869
1870 if (libname)
1871 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1872 }
1873
1874 *cum = zero_cum;
1875
1876 /* Set up the number of registers to use for passing arguments. */
1877 if (fntype)
1878 cum->nregs = ix86_function_regparm (fntype, fndecl);
1879 else
1880 cum->nregs = ix86_regparm;
1881 cum->sse_nregs = SSE_REGPARM_MAX;
1882 cum->mmx_nregs = MMX_REGPARM_MAX;
1883 cum->warn_sse = true;
1884 cum->warn_mmx = true;
1885 cum->maybe_vaarg = false;
1886
1887 /* Use ecx and edx registers if function has fastcall attribute */
1888 if (fntype && !TARGET_64BIT)
1889 {
1890 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1891 {
1892 cum->nregs = 2;
1893 cum->fastcall = 1;
1894 }
1895 }
1896
1897
1898 /* Determine if this function has variable arguments. This is
1899 indicated by the last argument being 'void_type_mode' if there
1900 are no variable arguments. If there are variable arguments, then
1901 we won't pass anything in registers */
1902
1903 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1904 {
1905 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1906 param != 0; param = next_param)
1907 {
1908 next_param = TREE_CHAIN (param);
1909 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1910 {
1911 if (!TARGET_64BIT)
1912 {
1913 cum->nregs = 0;
1914 cum->sse_nregs = 0;
1915 cum->mmx_nregs = 0;
1916 cum->warn_sse = 0;
1917 cum->warn_mmx = 0;
1918 cum->fastcall = 0;
1919 }
1920 cum->maybe_vaarg = true;
1921 }
1922 }
1923 }
1924 if ((!fntype && !libname)
1925 || (fntype && !TYPE_ARG_TYPES (fntype)))
1926 cum->maybe_vaarg = 1;
1927
1928 if (TARGET_DEBUG_ARG)
1929 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1930
1931 return;
1932}
1933
1934/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1935 of this code is to classify each 8bytes of incoming argument by the register
1936 class and assign registers accordingly. */
1937
1938/* Return the union class of CLASS1 and CLASS2.
1939 See the x86-64 PS ABI for details. */
1940
1941static enum x86_64_reg_class
1942merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1943{
1944 /* Rule #1: If both classes are equal, this is the resulting class. */
1945 if (class1 == class2)
1946 return class1;
1947
1948 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1949 the other class. */
1950 if (class1 == X86_64_NO_CLASS)
1951 return class2;
1952 if (class2 == X86_64_NO_CLASS)
1953 return class1;
1954
1955 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1956 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1957 return X86_64_MEMORY_CLASS;
1958
1959 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1960 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1961 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1962 return X86_64_INTEGERSI_CLASS;
1963 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1964 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1965 return X86_64_INTEGER_CLASS;
1966
1967 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1968 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1969 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1970 return X86_64_MEMORY_CLASS;
1971
1972 /* Rule #6: Otherwise class SSE is used. */
1973 return X86_64_SSE_CLASS;
1974}
1975
1976/* Classify the argument of type TYPE and mode MODE.
1977 CLASSES will be filled by the register class used to pass each word
1978 of the operand. The number of words is returned. In case the parameter
1979 should be passed in memory, 0 is returned. As a special case for zero
1980 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1981
1982 BIT_OFFSET is used internally for handling records and specifies offset
1983 of the offset in bits modulo 256 to avoid overflow cases.
1984
1985 See the x86-64 PS ABI for details.
1986*/
1987
1988static int
1989classify_argument (enum machine_mode mode, tree type,
1990 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1991{
1992 HOST_WIDE_INT bytes =
1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1995
1996 /* Variable sized entities are always passed/returned in memory. */
1997 if (bytes < 0)
1998 return 0;
1999
2000 if (mode != VOIDmode
2001 && MUST_PASS_IN_STACK (mode, type))
2002 return 0;
2003
2004 if (type && AGGREGATE_TYPE_P (type))
2005 {
2006 int i;
2007 tree field;
2008 enum x86_64_reg_class subclasses[MAX_CLASSES];
2009
2010 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2011 if (bytes > 16)
2012 return 0;
2013
2014 for (i = 0; i < words; i++)
2015 classes[i] = X86_64_NO_CLASS;
2016
2017 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2018 signalize memory class, so handle it as special case. */
2019 if (!words)
2020 {
2021 classes[0] = X86_64_NO_CLASS;
2022 return 1;
2023 }
2024
2025 /* Classify each field of record and merge classes. */
2026 if (TREE_CODE (type) == RECORD_TYPE)
2027 {
2028 /* For classes first merge in the field of the subclasses. */
2029 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2030 {
2031 tree bases = TYPE_BINFO_BASETYPES (type);
2032 int n_bases = TREE_VEC_LENGTH (bases);
2033 int i;
2034
2035 for (i = 0; i < n_bases; ++i)
2036 {
2037 tree binfo = TREE_VEC_ELT (bases, i);
2038 int num;
2039 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2040 tree type = BINFO_TYPE (binfo);
2041
2042 num = classify_argument (TYPE_MODE (type),
2043 type, subclasses,
2044 (offset + bit_offset) % 256);
2045 if (!num)
2046 return 0;
2047 for (i = 0; i < num; i++)
2048 {
2049 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2050 classes[i + pos] =
2051 merge_classes (subclasses[i], classes[i + pos]);
2052 }
2053 }
2054 }
2055 /* And now merge the fields of structure. */
2056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2057 {
2058 if (TREE_CODE (field) == FIELD_DECL)
2059 {
2060 int num;
2061
2062 /* Bitfields are always classified as integer. Handle them
2063 early, since later code would consider them to be
2064 misaligned integers. */
2065 if (DECL_BIT_FIELD (field))
2066 {
2067 for (i = int_bit_position (field) / 8 / 8;
2068 i < (int_bit_position (field)
2069 + tree_low_cst (DECL_SIZE (field), 0)
2070 + 63) / 8 / 8; i++)
2071 classes[i] =
2072 merge_classes (X86_64_INTEGER_CLASS,
2073 classes[i]);
2074 }
2075 else
2076 {
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2079 (int_bit_position (field)
2080 + bit_offset) % 256);
2081 if (!num)
2082 return 0;
2083 for (i = 0; i < num; i++)
2084 {
2085 int pos =
2086 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2087 classes[i + pos] =
2088 merge_classes (subclasses[i], classes[i + pos]);
2089 }
2090 }
2091 }
2092 }
2093 }
2094 /* Arrays are handled as small records. */
2095 else if (TREE_CODE (type) == ARRAY_TYPE)
2096 {
2097 int num;
2098 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2099 TREE_TYPE (type), subclasses, bit_offset);
2100 if (!num)
2101 return 0;
2102
2103 /* The partial classes are now full classes. */
2104 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2105 subclasses[0] = X86_64_SSE_CLASS;
2106 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2107 subclasses[0] = X86_64_INTEGER_CLASS;
2108
2109 for (i = 0; i < words; i++)
2110 classes[i] = subclasses[i % num];
2111 }
2112 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2113 else if (TREE_CODE (type) == UNION_TYPE
2114 || TREE_CODE (type) == QUAL_UNION_TYPE)
2115 {
2116 /* For classes first merge in the field of the subclasses. */
2117 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2118 {
2119 tree bases = TYPE_BINFO_BASETYPES (type);
2120 int n_bases = TREE_VEC_LENGTH (bases);
2121 int i;
2122
2123 for (i = 0; i < n_bases; ++i)
2124 {
2125 tree binfo = TREE_VEC_ELT (bases, i);
2126 int num;
2127 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2128 tree type = BINFO_TYPE (binfo);
2129
2130 num = classify_argument (TYPE_MODE (type),
2131 type, subclasses,
2132 (offset + (bit_offset % 64)) % 256);
2133 if (!num)
2134 return 0;
2135 for (i = 0; i < num; i++)
2136 {
2137 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2138 classes[i + pos] =
2139 merge_classes (subclasses[i], classes[i + pos]);
2140 }
2141 }
2142 }
2143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2144 {
2145 if (TREE_CODE (field) == FIELD_DECL)
2146 {
2147 int num;
2148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2149 TREE_TYPE (field), subclasses,
2150 bit_offset);
2151 if (!num)
2152 return 0;
2153 for (i = 0; i < num; i++)
2154 classes[i] = merge_classes (subclasses[i], classes[i]);
2155 }
2156 }
2157 }
2158 else if (TREE_CODE (type) == SET_TYPE)
2159 {
2160 if (bytes <= 4)
2161 {
2162 classes[0] = X86_64_INTEGERSI_CLASS;
2163 return 1;
2164 }
2165 else if (bytes <= 8)
2166 {
2167 classes[0] = X86_64_INTEGER_CLASS;
2168 return 1;
2169 }
2170 else if (bytes <= 12)
2171 {
2172 classes[0] = X86_64_INTEGER_CLASS;
2173 classes[1] = X86_64_INTEGERSI_CLASS;
2174 return 2;
2175 }
2176 else
2177 {
2178 classes[0] = X86_64_INTEGER_CLASS;
2179 classes[1] = X86_64_INTEGER_CLASS;
2180 return 2;
2181 }
2182 }
2183 else
2184 abort ();
2185
2186 /* Final merger cleanup. */
2187 for (i = 0; i < words; i++)
2188 {
2189 /* If one class is MEMORY, everything should be passed in
2190 memory. */
2191 if (classes[i] == X86_64_MEMORY_CLASS)
2192 return 0;
2193
2194 /* The X86_64_SSEUP_CLASS should be always preceded by
2195 X86_64_SSE_CLASS. */
2196 if (classes[i] == X86_64_SSEUP_CLASS
2197 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2198 classes[i] = X86_64_SSE_CLASS;
2199
2200 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2201 if (classes[i] == X86_64_X87UP_CLASS
2202 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2203 classes[i] = X86_64_SSE_CLASS;
2204 }
2205 return words;
2206 }
2207
2208 /* Compute alignment needed. We align all types to natural boundaries with
2209 exception of XFmode that is aligned to 64bits. */
2210 if (mode != VOIDmode && mode != BLKmode)
2211 {
2212 int mode_alignment = GET_MODE_BITSIZE (mode);
2213
2214 if (mode == XFmode)
2215 mode_alignment = 128;
2216 else if (mode == XCmode)
2217 mode_alignment = 256;
2218 if (COMPLEX_MODE_P (mode))
2219 mode_alignment /= 2;
2220 /* Misaligned fields are always returned in memory. */
2221 if (bit_offset % mode_alignment)
2222 return 0;
2223 }
2224
2225 /* Classification of atomic types. */
2226 switch (mode)
2227 {
2228 case DImode:
2229 case SImode:
2230 case HImode:
2231 case QImode:
2232 case CSImode:
2233 case CHImode:
2234 case CQImode:
2235 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2236 classes[0] = X86_64_INTEGERSI_CLASS;
2237 else
2238 classes[0] = X86_64_INTEGER_CLASS;
2239 return 1;
2240 case CDImode:
2241 case TImode:
2242 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2243 return 2;
2244 case CTImode:
2245 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2246 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2247 return 4;
2248 case SFmode:
2249 if (!(bit_offset % 64))
2250 classes[0] = X86_64_SSESF_CLASS;
2251 else
2252 classes[0] = X86_64_SSE_CLASS;
2253 return 1;
2254 case DFmode:
2255 classes[0] = X86_64_SSEDF_CLASS;
2256 return 1;
2257 case XFmode:
2258 classes[0] = X86_64_X87_CLASS;
2259 classes[1] = X86_64_X87UP_CLASS;
2260 return 2;
2261 case TFmode:
2262 case TCmode:
2263 return 0;
2264 case XCmode:
2265 classes[0] = X86_64_X87_CLASS;
2266 classes[1] = X86_64_X87UP_CLASS;
2267 classes[2] = X86_64_X87_CLASS;
2268 classes[3] = X86_64_X87UP_CLASS;
2269 return 4;
2270 case DCmode:
2271 classes[0] = X86_64_SSEDF_CLASS;
2272 classes[1] = X86_64_SSEDF_CLASS;
2273 return 2;
2274 case SCmode:
2275 classes[0] = X86_64_SSE_CLASS;
2276 return 1;
2277 case V4SFmode:
2278 case V4SImode:
2279 case V16QImode:
2280 case V8HImode:
2281 case V2DFmode:
2282 case V2DImode:
2283 classes[0] = X86_64_SSE_CLASS;
2284 classes[1] = X86_64_SSEUP_CLASS;
2285 return 2;
2286 case V2SFmode:
2287 case V2SImode:
2288 case V4HImode:
2289 case V8QImode:
2290 return 0;
2291 case BLKmode:
2292 case VOIDmode:
2293 return 0;
2294 default:
2295 abort ();
2296 }
2297}
2298
2299/* Examine the argument and return set number of register required in each
2300 class. Return 0 iff parameter should be passed in memory. */
2301static int
2302examine_argument (enum machine_mode mode, tree type, int in_return,
2303 int *int_nregs, int *sse_nregs)
2304{
2305 enum x86_64_reg_class class[MAX_CLASSES];
2306 int n = classify_argument (mode, type, class, 0);
2307
2308 *int_nregs = 0;
2309 *sse_nregs = 0;
2310 if (!n)
2311 return 0;
2312 for (n--; n >= 0; n--)
2313 switch (class[n])
2314 {
2315 case X86_64_INTEGER_CLASS:
2316 case X86_64_INTEGERSI_CLASS:
2317 (*int_nregs)++;
2318 break;
2319 case X86_64_SSE_CLASS:
2320 case X86_64_SSESF_CLASS:
2321 case X86_64_SSEDF_CLASS:
2322 (*sse_nregs)++;
2323 break;
2324 case X86_64_NO_CLASS:
2325 case X86_64_SSEUP_CLASS:
2326 break;
2327 case X86_64_X87_CLASS:
2328 case X86_64_X87UP_CLASS:
2329 if (!in_return)
2330 return 0;
2331 break;
2332 case X86_64_MEMORY_CLASS:
2333 abort ();
2334 }
2335 return 1;
2336}
2337/* Construct container for the argument used by GCC interface. See
2338 FUNCTION_ARG for the detailed description. */
2339static rtx
2340construct_container (enum machine_mode mode, tree type, int in_return,
2341 int nintregs, int nsseregs, const int * intreg,
2342 int sse_regno)
2343{
2344 enum machine_mode tmpmode;
2345 int bytes =
2346 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2347 enum x86_64_reg_class class[MAX_CLASSES];
2348 int n;
2349 int i;
2350 int nexps = 0;
2351 int needed_sseregs, needed_intregs;
2352 rtx exp[MAX_CLASSES];
2353 rtx ret;
2354
2355 n = classify_argument (mode, type, class, 0);
2356 if (TARGET_DEBUG_ARG)
2357 {
2358 if (!n)
2359 fprintf (stderr, "Memory class\n");
2360 else
2361 {
2362 fprintf (stderr, "Classes:");
2363 for (i = 0; i < n; i++)
2364 {
2365 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2366 }
2367 fprintf (stderr, "\n");
2368 }
2369 }
2370 if (!n)
2371 return NULL;
2372 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2373 return NULL;
2374 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2375 return NULL;
2376
2377 /* First construct simple cases. Avoid SCmode, since we want to use
2378 single register to pass this type. */
2379 if (n == 1 && mode != SCmode)
2380 switch (class[0])
2381 {
2382 case X86_64_INTEGER_CLASS:
2383 case X86_64_INTEGERSI_CLASS:
2384 return gen_rtx_REG (mode, intreg[0]);
2385 case X86_64_SSE_CLASS:
2386 case X86_64_SSESF_CLASS:
2387 case X86_64_SSEDF_CLASS:
2388 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389 case X86_64_X87_CLASS:
2390 return gen_rtx_REG (mode, FIRST_STACK_REG);
2391 case X86_64_NO_CLASS:
2392 /* Zero sized array, struct or class. */
2393 return NULL;
2394 default:
2395 abort ();
2396 }
2397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2398 && mode != BLKmode)
2399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2400 if (n == 2
2401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2404 && class[1] == X86_64_INTEGER_CLASS
2405 && (mode == CDImode || mode == TImode || mode == TFmode)
2406 && intreg[0] + 1 == intreg[1])
2407 return gen_rtx_REG (mode, intreg[0]);
2408 if (n == 4
2409 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2410 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2411 && mode != BLKmode)
2412 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2413
2414 /* Otherwise figure out the entries of the PARALLEL. */
2415 for (i = 0; i < n; i++)
2416 {
2417 switch (class[i])
2418 {
2419 case X86_64_NO_CLASS:
2420 break;
2421 case X86_64_INTEGER_CLASS:
2422 case X86_64_INTEGERSI_CLASS:
2423 /* Merge TImodes on aligned occasions here too. */
2424 if (i * 8 + 8 > bytes)
2425 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2426 else if (class[i] == X86_64_INTEGERSI_CLASS)
2427 tmpmode = SImode;
2428 else
2429 tmpmode = DImode;
2430 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2431 if (tmpmode == BLKmode)
2432 tmpmode = DImode;
2433 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2434 gen_rtx_REG (tmpmode, *intreg),
2435 GEN_INT (i*8));
2436 intreg++;
2437 break;
2438 case X86_64_SSESF_CLASS:
2439 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2440 gen_rtx_REG (SFmode,
2441 SSE_REGNO (sse_regno)),
2442 GEN_INT (i*8));
2443 sse_regno++;
2444 break;
2445 case X86_64_SSEDF_CLASS:
2446 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447 gen_rtx_REG (DFmode,
2448 SSE_REGNO (sse_regno)),
2449 GEN_INT (i*8));
2450 sse_regno++;
2451 break;
2452 case X86_64_SSE_CLASS:
2453 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2454 tmpmode = TImode;
2455 else
2456 tmpmode = DImode;
2457 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2458 gen_rtx_REG (tmpmode,
2459 SSE_REGNO (sse_regno)),
2460 GEN_INT (i*8));
2461 if (tmpmode == TImode)
2462 i++;
2463 sse_regno++;
2464 break;
2465 default:
2466 abort ();
2467 }
2468 }
2469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2470 for (i = 0; i < nexps; i++)
2471 XVECEXP (ret, 0, i) = exp [i];
2472 return ret;
2473}
2474
2475/* Update the data in CUM to advance over an argument
2476 of mode MODE and data type TYPE.
2477 (TYPE is null for libcalls where that information may not be available.) */
2478
2479void
2480function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2481 enum machine_mode mode, /* current arg mode */
2482 tree type, /* type of the argument or 0 if lib support */
2483 int named) /* whether or not the argument was named */
2484{
2485 int bytes =
2486 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2487 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2488
2489 if (TARGET_DEBUG_ARG)
2490 fprintf (stderr,
2491 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2492 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2493 if (TARGET_64BIT)
2494 {
2495 int int_nregs, sse_nregs;
2496 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2497 cum->words += words;
2498 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2499 {
2500 cum->nregs -= int_nregs;
2501 cum->sse_nregs -= sse_nregs;
2502 cum->regno += int_nregs;
2503 cum->sse_regno += sse_nregs;
2504 }
2505 else
2506 cum->words += words;
2507 }
2508 else
2509 {
2510 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2511 && (!type || !AGGREGATE_TYPE_P (type)))
2512 {
2513 cum->sse_words += words;
2514 cum->sse_nregs -= 1;
2515 cum->sse_regno += 1;
2516 if (cum->sse_nregs <= 0)
2517 {
2518 cum->sse_nregs = 0;
2519 cum->sse_regno = 0;
2520 }
2521 }
2522 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2523 && (!type || !AGGREGATE_TYPE_P (type)))
2524 {
2525 cum->mmx_words += words;
2526 cum->mmx_nregs -= 1;
2527 cum->mmx_regno += 1;
2528 if (cum->mmx_nregs <= 0)
2529 {
2530 cum->mmx_nregs = 0;
2531 cum->mmx_regno = 0;
2532 }
2533 }
2534 else
2535 {
2536 cum->words += words;
2537 cum->nregs -= words;
2538 cum->regno += words;
2539
2540 if (cum->nregs <= 0)
2541 {
2542 cum->nregs = 0;
2543 cum->regno = 0;
2544 }
2545 }
2546 }
2547 return;
2548}
2549
2550/* A subroutine of function_arg. We want to pass a parameter whose nominal
2551 type is MODE in REGNO. We try to minimize ABI variation, so MODE may not
2552 actually be valid for REGNO with the current ISA. In this case, ALT_MODE
2553 is used instead. It must be the same size as MODE, and must be known to
2554 be valid for REGNO. Finally, ORIG_MODE is the original mode of the
2555 parameter, as seen by the type system. This may be different from MODE
2556 when we're mucking with things minimizing ABI variations.
2557
2558 Returns a REG or a PARALLEL as appropriate. */
2559
2560static rtx
2561gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2562 enum machine_mode orig_mode, unsigned int regno)
2563{
2564 rtx tmp;
2565
2566 if (HARD_REGNO_MODE_OK (regno, mode))
2567 tmp = gen_rtx_REG (mode, regno);
2568 else
2569 {
2570 tmp = gen_rtx_REG (alt_mode, regno);
2571 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2572 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2573 }
2574
2575 return tmp;
2576}
2577
2578/* Define where to put the arguments to a function.
2579 Value is zero to push the argument on the stack,
2580 or a hard register in which to store the argument.
2581
2582 MODE is the argument's machine mode.
2583 TYPE is the data type of the argument (as a tree).
2584 This is null for libcalls where that information may
2585 not be available.
2586 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2587 the preceding args and about the function being called.
2588 NAMED is nonzero if this argument is a named parameter
2589 (otherwise it is an extra parameter matching an ellipsis). */
2590
2591rtx
2592function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2593 tree type, int named)
2594{
2595 enum machine_mode mode = orig_mode;
2596 rtx ret = NULL_RTX;
2597 int bytes =
2598 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600 static bool warnedsse, warnedmmx;
2601
2602 /* Handle a hidden AL argument containing number of registers for varargs
2603 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2604 any AL settings. */
2605 if (mode == VOIDmode)
2606 {
2607 if (TARGET_64BIT)
2608 return GEN_INT (cum->maybe_vaarg
2609 ? (cum->sse_nregs < 0
2610 ? SSE_REGPARM_MAX
2611 : cum->sse_regno)
2612 : -1);
2613 else
2614 return constm1_rtx;
2615 }
2616 if (TARGET_64BIT)
2617 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618 &x86_64_int_parameter_registers [cum->regno],
2619 cum->sse_regno);
2620 else
2621 switch (mode)
2622 {
2623 /* For now, pass fp/complex values on the stack. */
2624 default:
2625 break;
2626
2627 case BLKmode:
2628 if (bytes < 0)
2629 break;
2630 /* FALLTHRU */
2631 case DImode:
2632 case SImode:
2633 case HImode:
2634 case QImode:
2635 if (words <= cum->nregs)
2636 {
2637 int regno = cum->regno;
2638
2639 /* Fastcall allocates the first two DWORD (SImode) or
2640 smaller arguments to ECX and EDX. */
2641 if (cum->fastcall)
2642 {
2643 if (mode == BLKmode || mode == DImode)
2644 break;
2645
2646 /* ECX not EAX is the first allocated register. */
2647 if (regno == 0)
2648 regno = 2;
2649 }
2650 ret = gen_rtx_REG (mode, regno);
2651 }
2652 break;
2653 case TImode:
2654 case V16QImode:
2655 case V8HImode:
2656 case V4SImode:
2657 case V2DImode:
2658 case V4SFmode:
2659 case V2DFmode:
2660 if (!type || !AGGREGATE_TYPE_P (type))
2661 {
2662 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2663 {
2664 warnedsse = true;
2665 warning ("SSE vector argument without SSE enabled "
2666 "changes the ABI");
2667 }
2668 if (cum->sse_nregs)
2669 ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2670 cum->sse_regno + FIRST_SSE_REG);
2671 }
2672 break;
2673 case V8QImode:
2674 case V4HImode:
2675 case V2SImode:
2676 case V2SFmode:
2677 if (!type || !AGGREGATE_TYPE_P (type))
2678 {
2679 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2680 {
2681 warnedmmx = true;
2682 warning ("MMX vector argument without MMX enabled "
2683 "changes the ABI");
2684 }
2685 if (cum->mmx_nregs)
2686 ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2687 cum->mmx_regno + FIRST_MMX_REG);
2688 }
2689 break;
2690 }
2691
2692 if (TARGET_DEBUG_ARG)
2693 {
2694 fprintf (stderr,
2695 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2696 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2697
2698 if (ret)
2699 print_simple_rtl (stderr, ret);
2700 else
2701 fprintf (stderr, ", stack");
2702
2703 fprintf (stderr, " )\n");
2704 }
2705
2706 return ret;
2707}
2708
2709/* A C expression that indicates when an argument must be passed by
2710 reference. If nonzero for an argument, a copy of that argument is
2711 made in memory and a pointer to the argument is passed instead of
2712 the argument itself. The pointer is passed in whatever way is
2713 appropriate for passing a pointer to that type. */
2714
2715int
2716function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2717 enum machine_mode mode ATTRIBUTE_UNUSED,
2718 tree type, int named ATTRIBUTE_UNUSED)
2719{
2720 if (!TARGET_64BIT)
2721 return 0;
2722
2723 if (type && int_size_in_bytes (type) == -1)
2724 {
2725 if (TARGET_DEBUG_ARG)
2726 fprintf (stderr, "function_arg_pass_by_reference\n");
2727 return 1;
2728 }
2729
2730 return 0;
2731}
2732
2733/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2734 ABI */
2735static bool
2736contains_128bit_aligned_vector_p (tree type)
2737{
2738 enum machine_mode mode = TYPE_MODE (type);
2739 if (SSE_REG_MODE_P (mode)
2740 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2741 return true;
2742 if (TYPE_ALIGN (type) < 128)
2743 return false;
2744
2745 if (AGGREGATE_TYPE_P (type))
2746 {
2747 /* Walk the aggregates recursively. */
2748 if (TREE_CODE (type) == RECORD_TYPE
2749 || TREE_CODE (type) == UNION_TYPE
2750 || TREE_CODE (type) == QUAL_UNION_TYPE)
2751 {
2752 tree field;
2753
2754 if (TYPE_BINFO (type) != NULL
2755 && TYPE_BINFO_BASETYPES (type) != NULL)
2756 {
2757 tree bases = TYPE_BINFO_BASETYPES (type);
2758 int n_bases = TREE_VEC_LENGTH (bases);
2759 int i;
2760
2761 for (i = 0; i < n_bases; ++i)
2762 {
2763 tree binfo = TREE_VEC_ELT (bases, i);
2764 tree type = BINFO_TYPE (binfo);
2765
2766 if (contains_128bit_aligned_vector_p (type))
2767 return true;
2768 }
2769 }
2770 /* And now merge the fields of structure. */
2771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2772 {
2773 if (TREE_CODE (field) == FIELD_DECL
2774 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2775 return true;
2776 }
2777 }
2778 /* Just for use if some languages passes arrays by value. */
2779 else if (TREE_CODE (type) == ARRAY_TYPE)
2780 {
2781 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2782 return true;
2783 }
2784 else
2785 abort ();
2786 }
2787 return false;
2788}
2789
2790/* Gives the alignment boundary, in bits, of an argument with the
2791 specified mode and type. */
2792
2793int
2794ix86_function_arg_boundary (enum machine_mode mode, tree type)
2795{
2796 int align;
2797 if (type)
2798 align = TYPE_ALIGN (type);
2799 else
2800 align = GET_MODE_ALIGNMENT (mode);
2801 if (align < PARM_BOUNDARY)
2802 align = PARM_BOUNDARY;
2803 if (!TARGET_64BIT)
2804 {
2805 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2806 make an exception for SSE modes since these require 128bit
2807 alignment.
2808
2809 The handling here differs from field_alignment. ICC aligns MMX
2810 arguments to 4 byte boundaries, while structure fields are aligned
2811 to 8 byte boundaries. */
2812 if (!type)
2813 {
2814 if (!SSE_REG_MODE_P (mode))
2815 align = PARM_BOUNDARY;
2816 }
2817 else
2818 {
2819 if (!contains_128bit_aligned_vector_p (type))
2820 align = PARM_BOUNDARY;
2821 }
2822 }
2823 if (align > 128)
2824 align = 128;
2825 return align;
2826}
2827
2828/* Return true if N is a possible register number of function value. */
2829bool
2830ix86_function_value_regno_p (int regno)
2831{
2832 if (!TARGET_64BIT)
2833 {
2834 return ((regno) == 0
2835 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2836 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2837 }
2838 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2839 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2840 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2841}
2842
2843/* Define how to find the value returned by a function.
2844 VALTYPE is the data type of the value (as a tree).
2845 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2846 otherwise, FUNC is 0. */
2847rtx
2848ix86_function_value (tree valtype)
2849{
2850 if (TARGET_64BIT)
2851 {
2852 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2853 REGPARM_MAX, SSE_REGPARM_MAX,
2854 x86_64_int_return_registers, 0);
2855 /* For zero sized structures, construct_container return NULL, but we need
2856 to keep rest of compiler happy by returning meaningful value. */
2857 if (!ret)
2858 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2859 return ret;
2860 }
2861 else
2862 return gen_rtx_REG (TYPE_MODE (valtype),
2863 ix86_value_regno (TYPE_MODE (valtype)));
2864}
2865
2866/* Return false iff type is returned in memory. */
2867int
2868ix86_return_in_memory (tree type)
2869{
2870 int needed_intregs, needed_sseregs, size;
2871 enum machine_mode mode = TYPE_MODE (type);
2872
2873 if (TARGET_64BIT)
2874 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2875
2876 if (mode == BLKmode)
2877 return 1;
2878
2879 size = int_size_in_bytes (type);
2880
2881 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2882 return 0;
2883
2884 if (VECTOR_MODE_P (mode) || mode == TImode)
2885 {
2886 /* User-created vectors small enough to fit in EAX. */
2887 if (size < 8)
2888 return 0;
2889
2890 /* MMX/3dNow values are returned on the stack, since we've
2891 got to EMMS/FEMMS before returning. */
2892 if (size == 8)
2893 return 1;
2894
2895 /* SSE values are returned in XMM0. */
2896 /* ??? Except when it doesn't exist? We have a choice of
2897 either (1) being abi incompatible with a -march switch,
2898 or (2) generating an error here. Given no good solution,
2899 I think the safest thing is one warning. The user won't
2900 be able to use -Werror, but.... */
2901 if (size == 16)
2902 {
2903 static bool warned;
2904
2905 if (TARGET_SSE)
2906 return 0;
2907
2908 if (!warned)
2909 {
2910 warned = true;
2911 warning ("SSE vector return without SSE enabled "
2912 "changes the ABI");
2913 }
2914 return 1;
2915 }
2916 }
2917
2918 if (mode == XFmode)
2919 return 0;
2920
2921 if (size > 12)
2922 return 1;
2923 return 0;
2924}
2925
2926/* Define how to find the value returned by a library function
2927 assuming the value has mode MODE. */
2928rtx
2929ix86_libcall_value (enum machine_mode mode)
2930{
2931 if (TARGET_64BIT)
2932 {
2933 switch (mode)
2934 {
2935 case SFmode:
2936 case SCmode:
2937 case DFmode:
2938 case DCmode:
2939 return gen_rtx_REG (mode, FIRST_SSE_REG);
2940 case XFmode:
2941 case XCmode:
2942 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2943 case TFmode:
2944 case TCmode:
2945 return NULL;
2946 default:
2947 return gen_rtx_REG (mode, 0);
2948 }
2949 }
2950 else
2951 return gen_rtx_REG (mode, ix86_value_regno (mode));
2952}
2953
2954/* Given a mode, return the register to use for a return value. */
2955
2956static int
2957ix86_value_regno (enum machine_mode mode)
2958{
2959 /* Floating point return values in %st(0). */
2960 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2961 return FIRST_FLOAT_REG;
2962 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2963 we prevent this case when sse is not available. */
2964 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2965 return FIRST_SSE_REG;
2966 /* Everything else in %eax. */
2967 return 0;
2968}
2969
2970/* Create the va_list data type. */
2971
2972static tree
2973ix86_build_builtin_va_list (void)
2974{
2975 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2976
2977 /* For i386 we use plain pointer to argument area. */
2978 if (!TARGET_64BIT)
2979 return build_pointer_type (char_type_node);
2980
2981 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2982 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2983
2984 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2985 unsigned_type_node);
2986 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2987 unsigned_type_node);
2988 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2989 ptr_type_node);
2990 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2991 ptr_type_node);
2992
2993 DECL_FIELD_CONTEXT (f_gpr) = record;
2994 DECL_FIELD_CONTEXT (f_fpr) = record;
2995 DECL_FIELD_CONTEXT (f_ovf) = record;
2996 DECL_FIELD_CONTEXT (f_sav) = record;
2997
2998 TREE_CHAIN (record) = type_decl;
2999 TYPE_NAME (record) = type_decl;
3000 TYPE_FIELDS (record) = f_gpr;
3001 TREE_CHAIN (f_gpr) = f_fpr;
3002 TREE_CHAIN (f_fpr) = f_ovf;
3003 TREE_CHAIN (f_ovf) = f_sav;
3004
3005 layout_type (record);
3006
3007 /* The correct type is an array type of one element. */
3008 return build_array_type (record, build_index_type (size_zero_node));
3009}
3010
3011/* Perform any needed actions needed for a function that is receiving a
3012 variable number of arguments.
3013
3014 CUM is as above.
3015
3016 MODE and TYPE are the mode and type of the current parameter.
3017
3018 PRETEND_SIZE is a variable that should be set to the amount of stack
3019 that must be pushed by the prolog to pretend that our caller pushed
3020 it.
3021
3022 Normally, this macro will push all remaining incoming registers on the
3023 stack and set PRETEND_SIZE to the length of the registers pushed. */
3024
3025void
3026ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3027 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3028 int no_rtl)
3029{
3030 CUMULATIVE_ARGS next_cum;
3031 rtx save_area = NULL_RTX, mem;
3032 rtx label;
3033 rtx label_ref;
3034 rtx tmp_reg;
3035 rtx nsse_reg;
3036 int set;
3037 tree fntype;
3038 int stdarg_p;
3039 int i;
3040
3041 if (!TARGET_64BIT)
3042 return;
3043
3044 /* Indicate to allocate space on the stack for varargs save area. */
3045 ix86_save_varrargs_registers = 1;
3046
3047 cfun->stack_alignment_needed = 128;
3048
3049 fntype = TREE_TYPE (current_function_decl);
3050 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3051 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3052 != void_type_node));
3053
3054 /* For varargs, we do not want to skip the dummy va_dcl argument.
3055 For stdargs, we do want to skip the last named argument. */
3056 next_cum = *cum;
3057 if (stdarg_p)
3058 function_arg_advance (&next_cum, mode, type, 1);
3059
3060 if (!no_rtl)
3061 save_area = frame_pointer_rtx;
3062
3063 set = get_varargs_alias_set ();
3064
3065 for (i = next_cum.regno; i < ix86_regparm; i++)
3066 {
3067 mem = gen_rtx_MEM (Pmode,
3068 plus_constant (save_area, i * UNITS_PER_WORD));
3069 set_mem_alias_set (mem, set);
3070 emit_move_insn (mem, gen_rtx_REG (Pmode,
3071 x86_64_int_parameter_registers[i]));
3072 }
3073
3074 if (next_cum.sse_nregs)
3075 {
3076 /* Now emit code to save SSE registers. The AX parameter contains number
3077 of SSE parameter registers used to call this function. We use
3078 sse_prologue_save insn template that produces computed jump across
3079 SSE saves. We need some preparation work to get this working. */
3080
3081 label = gen_label_rtx ();
3082 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3083
3084 /* Compute address to jump to :
3085 label - 5*eax + nnamed_sse_arguments*5 */
3086 tmp_reg = gen_reg_rtx (Pmode);
3087 nsse_reg = gen_reg_rtx (Pmode);
3088 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3089 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3090 gen_rtx_MULT (Pmode, nsse_reg,
3091 GEN_INT (4))));
3092 if (next_cum.sse_regno)
3093 emit_move_insn
3094 (nsse_reg,
3095 gen_rtx_CONST (DImode,
3096 gen_rtx_PLUS (DImode,
3097 label_ref,
3098 GEN_INT (next_cum.sse_regno * 4))));
3099 else
3100 emit_move_insn (nsse_reg, label_ref);
3101 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3102
3103 /* Compute address of memory block we save into. We always use pointer
3104 pointing 127 bytes after first byte to store - this is needed to keep
3105 instruction size limited by 4 bytes. */
3106 tmp_reg = gen_reg_rtx (Pmode);
3107 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3108 plus_constant (save_area,
3109 8 * REGPARM_MAX + 127)));
3110 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3111 set_mem_alias_set (mem, set);
3112 set_mem_align (mem, BITS_PER_WORD);
3113
3114 /* And finally do the dirty job! */
3115 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3116 GEN_INT (next_cum.sse_regno), label));
3117 }
3118
3119}
3120
3121/* Implement va_start. */
3122
3123void
3124ix86_va_start (tree valist, rtx nextarg)
3125{
3126 HOST_WIDE_INT words, n_gpr, n_fpr;
3127 tree f_gpr, f_fpr, f_ovf, f_sav;
3128 tree gpr, fpr, ovf, sav, t;
3129
3130 /* Only 64bit target needs something special. */
3131 if (!TARGET_64BIT)
3132 {
3133 std_expand_builtin_va_start (valist, nextarg);
3134 return;
3135 }
3136
3137 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3138 f_fpr = TREE_CHAIN (f_gpr);
3139 f_ovf = TREE_CHAIN (f_fpr);
3140 f_sav = TREE_CHAIN (f_ovf);
3141
3142 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3143 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3144 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3145 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3146 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3147
3148 /* Count number of gp and fp argument registers used. */
3149 words = current_function_args_info.words;
3150 n_gpr = current_function_args_info.regno;
3151 n_fpr = current_function_args_info.sse_regno;
3152
3153 if (TARGET_DEBUG_ARG)
3154 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3155 (int) words, (int) n_gpr, (int) n_fpr);
3156
3157 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3158 build_int_2 (n_gpr * 8, 0));
3159 TREE_SIDE_EFFECTS (t) = 1;
3160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3161
3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3163 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3164 TREE_SIDE_EFFECTS (t) = 1;
3165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3166
3167 /* Find the overflow area. */
3168 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3169 if (words != 0)
3170 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3171 build_int_2 (words * UNITS_PER_WORD, 0));
3172 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3173 TREE_SIDE_EFFECTS (t) = 1;
3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3175
3176 /* Find the register save area.
3177 Prologue of the function save it right above stack frame. */
3178 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3179 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3180 TREE_SIDE_EFFECTS (t) = 1;
3181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3182}
3183
3184/* Implement va_arg. */
3185rtx
3186ix86_va_arg (tree valist, tree type)
3187{
3188 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3189 tree f_gpr, f_fpr, f_ovf, f_sav;
3190 tree gpr, fpr, ovf, sav, t;
3191 int size, rsize;
3192 rtx lab_false, lab_over = NULL_RTX;
3193 rtx addr_rtx, r;
3194 rtx container;
3195 int indirect_p = 0;
3196
3197 /* Only 64bit target needs something special. */
3198 if (!TARGET_64BIT)
3199 {
3200 return std_expand_builtin_va_arg (valist, type);
3201 }
3202
3203 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3204 f_fpr = TREE_CHAIN (f_gpr);
3205 f_ovf = TREE_CHAIN (f_fpr);
3206 f_sav = TREE_CHAIN (f_ovf);
3207
3208 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3209 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3210 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3211 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3212 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3213
3214 size = int_size_in_bytes (type);
3215 if (size == -1)
3216 {
3217 /* Passed by reference. */
3218 indirect_p = 1;
3219 type = build_pointer_type (type);
3220 size = int_size_in_bytes (type);
3221 }
3222 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3223
3224 container = construct_container (TYPE_MODE (type), type, 0,
3225 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3226 /*
3227 * Pull the value out of the saved registers ...
3228 */
3229
3230 addr_rtx = gen_reg_rtx (Pmode);
3231
3232 if (container)
3233 {
3234 rtx int_addr_rtx, sse_addr_rtx;
3235 int needed_intregs, needed_sseregs;
3236 int need_temp;
3237
3238 lab_over = gen_label_rtx ();
3239 lab_false = gen_label_rtx ();
3240
3241 examine_argument (TYPE_MODE (type), type, 0,
3242 &needed_intregs, &needed_sseregs);
3243
3244
3245 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3246 || TYPE_ALIGN (type) > 128);
3247
3248 /* In case we are passing structure, verify that it is consecutive block
3249 on the register save area. If not we need to do moves. */
3250 if (!need_temp && !REG_P (container))
3251 {
3252 /* Verify that all registers are strictly consecutive */
3253 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3254 {
3255 int i;
3256
3257 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3258 {
3259 rtx slot = XVECEXP (container, 0, i);
3260 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3261 || INTVAL (XEXP (slot, 1)) != i * 16)
3262 need_temp = 1;
3263 }
3264 }
3265 else
3266 {
3267 int i;
3268
3269 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3270 {
3271 rtx slot = XVECEXP (container, 0, i);
3272 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3273 || INTVAL (XEXP (slot, 1)) != i * 8)
3274 need_temp = 1;
3275 }
3276 }
3277 }
3278 if (!need_temp)
3279 {
3280 int_addr_rtx = addr_rtx;
3281 sse_addr_rtx = addr_rtx;
3282 }
3283 else
3284 {
3285 int_addr_rtx = gen_reg_rtx (Pmode);
3286 sse_addr_rtx = gen_reg_rtx (Pmode);
3287 }
3288 /* First ensure that we fit completely in registers. */
3289 if (needed_intregs)
3290 {
3291 emit_cmp_and_jump_insns (expand_expr
3292 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3293 GEN_INT ((REGPARM_MAX - needed_intregs +
3294 1) * 8), GE, const1_rtx, SImode,
3295 1, lab_false);
3296 }
3297 if (needed_sseregs)
3298 {
3299 emit_cmp_and_jump_insns (expand_expr
3300 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3301 GEN_INT ((SSE_REGPARM_MAX -
3302 needed_sseregs + 1) * 16 +
3303 REGPARM_MAX * 8), GE, const1_rtx,
3304 SImode, 1, lab_false);
3305 }
3306
3307 /* Compute index to start of area used for integer regs. */
3308 if (needed_intregs)
3309 {
3310 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3311 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3312 if (r != int_addr_rtx)
3313 emit_move_insn (int_addr_rtx, r);
3314 }
3315 if (needed_sseregs)
3316 {
3317 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3318 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3319 if (r != sse_addr_rtx)
3320 emit_move_insn (sse_addr_rtx, r);
3321 }
3322 if (need_temp)
3323 {
3324 int i;
3325 rtx mem;
3326 rtx x;
3327
3328 /* Never use the memory itself, as it has the alias set. */
3329 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3330 mem = gen_rtx_MEM (BLKmode, x);
3331 force_operand (x, addr_rtx);
3332 set_mem_alias_set (mem, get_varargs_alias_set ());
3333 set_mem_align (mem, BITS_PER_UNIT);
3334
3335 for (i = 0; i < XVECLEN (container, 0); i++)
3336 {
3337 rtx slot = XVECEXP (container, 0, i);
3338 rtx reg = XEXP (slot, 0);
3339 enum machine_mode mode = GET_MODE (reg);
3340 rtx src_addr;
3341 rtx src_mem;
3342 int src_offset;
3343 rtx dest_mem;
3344
3345 if (SSE_REGNO_P (REGNO (reg)))
3346 {
3347 src_addr = sse_addr_rtx;
3348 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3349 }
3350 else
3351 {
3352 src_addr = int_addr_rtx;
3353 src_offset = REGNO (reg) * 8;
3354 }
3355 src_mem = gen_rtx_MEM (mode, src_addr);
3356 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3357 src_mem = adjust_address (src_mem, mode, src_offset);
3358 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3359 emit_move_insn (dest_mem, src_mem);
3360 }
3361 }
3362
3363 if (needed_intregs)
3364 {
3365 t =
3366 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3367 build_int_2 (needed_intregs * 8, 0));
3368 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3369 TREE_SIDE_EFFECTS (t) = 1;
3370 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3371 }
3372 if (needed_sseregs)
3373 {
3374 t =
3375 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3376 build_int_2 (needed_sseregs * 16, 0));
3377 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3378 TREE_SIDE_EFFECTS (t) = 1;
3379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3380 }
3381
3382 emit_jump_insn (gen_jump (lab_over));
3383 emit_barrier ();
3384 emit_label (lab_false);
3385 }
3386
3387 /* ... otherwise out of the overflow area. */
3388
3389 /* Care for on-stack alignment if needed. */
3390 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3391 t = ovf;
3392 else
3393 {
3394 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3395 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3396 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3397 }
3398 t = save_expr (t);
3399
3400 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3401 if (r != addr_rtx)
3402 emit_move_insn (addr_rtx, r);
3403
3404 t =
3405 build (PLUS_EXPR, TREE_TYPE (t), t,
3406 build_int_2 (rsize * UNITS_PER_WORD, 0));
3407 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3408 TREE_SIDE_EFFECTS (t) = 1;
3409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3410
3411 if (container)
3412 emit_label (lab_over);
3413
3414 if (indirect_p)
3415 {
3416 r = gen_rtx_MEM (Pmode, addr_rtx);
3417 set_mem_alias_set (r, get_varargs_alias_set ());
3418 emit_move_insn (addr_rtx, r);
3419 }
3420
3421 return addr_rtx;
3422}
3423
3424/* Return nonzero if OP is either a i387 or SSE fp register. */
3425int
3426any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3427{
3428 return ANY_FP_REG_P (op);
3429}
3430
3431/* Return nonzero if OP is an i387 fp register. */
3432int
3433fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3434{
3435 return FP_REG_P (op);
3436}
3437
3438/* Return nonzero if OP is a non-fp register_operand. */
3439int
3440register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3441{
3442 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3443}
3444
3445/* Return nonzero if OP is a register operand other than an
3446 i387 fp register. */
3447int
3448register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3449{
3450 return register_operand (op, mode) && !FP_REG_P (op);
3451}
3452
3453/* Return nonzero if OP is general operand representable on x86_64. */
3454
3455int
3456x86_64_general_operand (rtx op, enum machine_mode mode)
3457{
3458 if (!TARGET_64BIT)
3459 return general_operand (op, mode);
3460 if (nonimmediate_operand (op, mode))
3461 return 1;
3462 return x86_64_sign_extended_value (op);
3463}
3464
3465/* Return nonzero if OP is general operand representable on x86_64
3466 as either sign extended or zero extended constant. */
3467
3468int
3469x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3470{
3471 if (!TARGET_64BIT)
3472 return general_operand (op, mode);
3473 if (nonimmediate_operand (op, mode))
3474 return 1;
3475 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3476}
3477
3478/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3479
3480int
3481x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3482{
3483 if (!TARGET_64BIT)
3484 return nonmemory_operand (op, mode);
3485 if (register_operand (op, mode))
3486 return 1;
3487 return x86_64_sign_extended_value (op);
3488}
3489
3490/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3491
3492int
3493x86_64_movabs_operand (rtx op, enum machine_mode mode)
3494{
3495 if (!TARGET_64BIT || !flag_pic)
3496 return nonmemory_operand (op, mode);
3497 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3498 return 1;
3499 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3500 return 1;
3501 return 0;
3502}
3503
3504/* Return nonzero if OPNUM's MEM should be matched
3505 in movabs* patterns. */
3506
3507int
3508ix86_check_movabs (rtx insn, int opnum)
3509{
3510 rtx set, mem;
3511
3512 set = PATTERN (insn);
3513 if (GET_CODE (set) == PARALLEL)
3514 set = XVECEXP (set, 0, 0);
3515 if (GET_CODE (set) != SET)
3516 abort ();
3517 mem = XEXP (set, opnum);
3518 while (GET_CODE (mem) == SUBREG)
3519 mem = SUBREG_REG (mem);
3520 if (GET_CODE (mem) != MEM)
3521 abort ();
3522 return (volatile_ok || !MEM_VOLATILE_P (mem));
3523}
3524
3525/* Return nonzero if OP is nonmemory operand representable on x86_64. */
3526
3527int
3528x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3529{
3530 if (!TARGET_64BIT)
3531 return nonmemory_operand (op, mode);
3532 if (register_operand (op, mode))
3533 return 1;
3534 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3535}
3536
3537/* Return nonzero if OP is immediate operand representable on x86_64. */
3538
3539int
3540x86_64_immediate_operand (rtx op, enum machine_mode mode)
3541{
3542 if (!TARGET_64BIT)
3543 return immediate_operand (op, mode);
3544 return x86_64_sign_extended_value (op);
3545}
3546
3547/* Return nonzero if OP is immediate operand representable on x86_64. */
3548
3549int
3550x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3551{
3552 return x86_64_zero_extended_value (op);
3553}
3554
3555/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3556 for shift & compare patterns, as shifting by 0 does not change flags),
3557 else return zero. */
3558
3559int
3560const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3561{
3562 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3563}
3564
3565/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3566 reference and a constant. */
3567
3568int
3569symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3570{
3571 switch (GET_CODE (op))
3572 {
3573 case SYMBOL_REF:
3574 case LABEL_REF:
3575 return 1;
3576
3577 case CONST:
3578 op = XEXP (op, 0);
3579 if (GET_CODE (op) == SYMBOL_REF
3580 || GET_CODE (op) == LABEL_REF
3581 || (GET_CODE (op) == UNSPEC
3582 && (XINT (op, 1) == UNSPEC_GOT
3583 || XINT (op, 1) == UNSPEC_GOTOFF
3584 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3585 return 1;
3586 if (GET_CODE (op) != PLUS
3587 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3588 return 0;
3589
3590 op = XEXP (op, 0);
3591 if (GET_CODE (op) == SYMBOL_REF
3592 || GET_CODE (op) == LABEL_REF)
3593 return 1;
3594 /* Only @GOTOFF gets offsets. */
3595 if (GET_CODE (op) != UNSPEC
3596 || XINT (op, 1) != UNSPEC_GOTOFF)
3597 return 0;
3598
3599 op = XVECEXP (op, 0, 0);
3600 if (GET_CODE (op) == SYMBOL_REF
3601 || GET_CODE (op) == LABEL_REF)
3602 return 1;
3603 return 0;
3604
3605 default:
3606 return 0;
3607 }
3608}
3609
3610/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3611
3612int
3613pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3614{
3615 if (GET_CODE (op) != CONST)
3616 return 0;
3617 op = XEXP (op, 0);
3618 if (TARGET_64BIT)
3619 {
3620 if (GET_CODE (op) == UNSPEC
3621 && XINT (op, 1) == UNSPEC_GOTPCREL)
3622 return 1;
3623 if (GET_CODE (op) == PLUS
3624 && GET_CODE (XEXP (op, 0)) == UNSPEC
3625 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3626 return 1;
3627 }
3628 else
3629 {
3630 if (GET_CODE (op) == UNSPEC)
3631 return 1;
3632 if (GET_CODE (op) != PLUS
3633 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3634 return 0;
3635 op = XEXP (op, 0);
3636 if (GET_CODE (op) == UNSPEC)
3637 return 1;
3638 }
3639 return 0;
3640}
3641
3642/* Return true if OP is a symbolic operand that resolves locally. */
3643
3644static int
3645local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646{
3647 if (GET_CODE (op) == CONST
3648 && GET_CODE (XEXP (op, 0)) == PLUS
3649 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3650 op = XEXP (XEXP (op, 0), 0);
3651
3652 if (GET_CODE (op) == LABEL_REF)
3653 return 1;
3654
3655 if (GET_CODE (op) != SYMBOL_REF)
3656 return 0;
3657
3658 if (SYMBOL_REF_LOCAL_P (op))
3659 return 1;
3660
3661 /* There is, however, a not insubstantial body of code in the rest of
3662 the compiler that assumes it can just stick the results of
3663 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3664 /* ??? This is a hack. Should update the body of the compiler to
3665 always create a DECL an invoke targetm.encode_section_info. */
3666 if (strncmp (XSTR (op, 0), internal_label_prefix,
3667 internal_label_prefix_len) == 0)
3668 return 1;
3669
3670 return 0;
3671}
3672
3673/* Test for various thread-local symbols. */
3674
3675int
3676tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3677{
3678 if (GET_CODE (op) != SYMBOL_REF)
3679 return 0;
3680 return SYMBOL_REF_TLS_MODEL (op);
3681}
3682
3683static inline int
3684tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3685{
3686 if (GET_CODE (op) != SYMBOL_REF)
3687 return 0;
3688 return SYMBOL_REF_TLS_MODEL (op) == kind;
3689}
3690
3691int
3692global_dynamic_symbolic_operand (rtx op,
3693 enum machine_mode mode ATTRIBUTE_UNUSED)
3694{
3695 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3696}
3697
3698int
3699local_dynamic_symbolic_operand (rtx op,
3700 enum machine_mode mode ATTRIBUTE_UNUSED)
3701{
3702 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3703}
3704
3705int
3706initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3707{
3708 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3709}
3710
3711int
3712local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3713{
3714 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3715}
3716
3717/* Test for a valid operand for a call instruction. Don't allow the
3718 arg pointer register or virtual regs since they may decay into
3719 reg + const, which the patterns can't handle. */
3720
3721int
3722call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3723{
3724 /* Disallow indirect through a virtual register. This leads to
3725 compiler aborts when trying to eliminate them. */
3726 if (GET_CODE (op) == REG
3727 && (op == arg_pointer_rtx
3728 || op == frame_pointer_rtx
3729 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3730 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3731 return 0;
3732
3733 /* Disallow `call 1234'. Due to varying assembler lameness this
3734 gets either rejected or translated to `call .+1234'. */
3735 if (GET_CODE (op) == CONST_INT)
3736 return 0;
3737
3738 /* Explicitly allow SYMBOL_REF even if pic. */
3739 if (GET_CODE (op) == SYMBOL_REF)
3740 return 1;
3741
3742 /* Otherwise we can allow any general_operand in the address. */
3743 return general_operand (op, Pmode);
3744}
3745
3746/* Test for a valid operand for a call instruction. Don't allow the
3747 arg pointer register or virtual regs since they may decay into
3748 reg + const, which the patterns can't handle. */
3749
3750int
3751sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3752{
3753 /* Disallow indirect through a virtual register. This leads to
3754 compiler aborts when trying to eliminate them. */
3755 if (GET_CODE (op) == REG
3756 && (op == arg_pointer_rtx
3757 || op == frame_pointer_rtx
3758 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3759 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3760 return 0;
3761
3762 /* Explicitly allow SYMBOL_REF even if pic. */
3763 if (GET_CODE (op) == SYMBOL_REF)
3764 return 1;
3765
3766 /* Otherwise we can only allow register operands. */
3767 return register_operand (op, Pmode);
3768}
3769
3770int
3771constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772{
3773 if (GET_CODE (op) == CONST
3774 && GET_CODE (XEXP (op, 0)) == PLUS
3775 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3776 op = XEXP (XEXP (op, 0), 0);
3777 return GET_CODE (op) == SYMBOL_REF;
3778}
3779
3780/* Match exactly zero and one. */
3781
3782int
3783const0_operand (rtx op, enum machine_mode mode)
3784{
3785 return op == CONST0_RTX (mode);
3786}
3787
3788int
3789const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3790{
3791 return op == const1_rtx;
3792}
3793
3794/* Match 2, 4, or 8. Used for leal multiplicands. */
3795
3796int
3797const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3798{
3799 return (GET_CODE (op) == CONST_INT
3800 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3801}
3802
3803int
3804const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805{
3806 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3807}
3808
3809int
3810const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3811{
3812 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3813}
3814
3815int
3816const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3817{
3818 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3819}
3820
3821int
3822const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3823{
3824 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3825}
3826
3827
3828/* True if this is a constant appropriate for an increment or decrement. */
3829
3830int
3831incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3832{
3833 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3834 registers, since carry flag is not set. */
3835 if (TARGET_PENTIUM4 && !optimize_size)
3836 return 0;
3837 return op == const1_rtx || op == constm1_rtx;
3838}
3839
3840/* Return nonzero if OP is acceptable as operand of DImode shift
3841 expander. */
3842
3843int
3844shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845{
3846 if (TARGET_64BIT)
3847 return nonimmediate_operand (op, mode);
3848 else
3849 return register_operand (op, mode);
3850}
3851
3852/* Return false if this is the stack pointer, or any other fake
3853 register eliminable to the stack pointer. Otherwise, this is
3854 a register operand.
3855
3856 This is used to prevent esp from being used as an index reg.
3857 Which would only happen in pathological cases. */
3858
3859int
3860reg_no_sp_operand (rtx op, enum machine_mode mode)
3861{
3862 rtx t = op;
3863 if (GET_CODE (t) == SUBREG)
3864 t = SUBREG_REG (t);
3865 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3866 return 0;
3867
3868 return register_operand (op, mode);
3869}
3870
3871int
3872mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3873{
3874 return MMX_REG_P (op);
3875}
3876
3877/* Return false if this is any eliminable register. Otherwise
3878 general_operand. */
3879
3880int
3881general_no_elim_operand (rtx op, enum machine_mode mode)
3882{
3883 rtx t = op;
3884 if (GET_CODE (t) == SUBREG)
3885 t = SUBREG_REG (t);
3886 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3887 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3888 || t == virtual_stack_dynamic_rtx)
3889 return 0;
3890 if (REG_P (t)
3891 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3892 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3893 return 0;
3894
3895 return general_operand (op, mode);
3896}
3897
3898/* Return false if this is any eliminable register. Otherwise
3899 register_operand or const_int. */
3900
3901int
3902nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3903{
3904 rtx t = op;
3905 if (GET_CODE (t) == SUBREG)
3906 t = SUBREG_REG (t);
3907 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3908 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3909 || t == virtual_stack_dynamic_rtx)
3910 return 0;
3911
3912 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3913}
3914
3915/* Return false if this is any eliminable register or stack register,
3916 otherwise work like register_operand. */
3917
3918int
3919index_register_operand (rtx op, enum machine_mode mode)
3920{
3921 rtx t = op;
3922 if (GET_CODE (t) == SUBREG)
3923 t = SUBREG_REG (t);
3924 if (!REG_P (t))
3925 return 0;
3926 if (t == arg_pointer_rtx
3927 || t == frame_pointer_rtx
3928 || t == virtual_incoming_args_rtx
3929 || t == virtual_stack_vars_rtx
3930 || t == virtual_stack_dynamic_rtx
3931 || REGNO (t) == STACK_POINTER_REGNUM)
3932 return 0;
3933
3934 return general_operand (op, mode);
3935}
3936
3937/* Return true if op is a Q_REGS class register. */
3938
3939int
3940q_regs_operand (rtx op, enum machine_mode mode)
3941{
3942 if (mode != VOIDmode && GET_MODE (op) != mode)
3943 return 0;
3944 if (GET_CODE (op) == SUBREG)
3945 op = SUBREG_REG (op);
3946 return ANY_QI_REG_P (op);
3947}
3948
3949/* Return true if op is an flags register. */
3950
3951int
3952flags_reg_operand (rtx op, enum machine_mode mode)
3953{
3954 if (mode != VOIDmode && GET_MODE (op) != mode)
3955 return 0;
3956 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3957}
3958
3959/* Return true if op is a NON_Q_REGS class register. */
3960
3961int
3962non_q_regs_operand (rtx op, enum machine_mode mode)
3963{
3964 if (mode != VOIDmode && GET_MODE (op) != mode)
3965 return 0;
3966 if (GET_CODE (op) == SUBREG)
3967 op = SUBREG_REG (op);
3968 return NON_QI_REG_P (op);
3969}
3970
3971int
3972zero_extended_scalar_load_operand (rtx op,
3973 enum machine_mode mode ATTRIBUTE_UNUSED)
3974{
3975 unsigned n_elts;
3976 if (GET_CODE (op) != MEM)
3977 return 0;
3978 op = maybe_get_pool_constant (op);
3979 if (!op)
3980 return 0;
3981 if (GET_CODE (op) != CONST_VECTOR)
3982 return 0;
3983 n_elts =
3984 (GET_MODE_SIZE (GET_MODE (op)) /
3985 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3986 for (n_elts--; n_elts > 0; n_elts--)
3987 {
3988 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3989 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3990 return 0;
3991 }
3992 return 1;
3993}
3994
3995/* Return 1 when OP is operand acceptable for standard SSE move. */
3996int
3997vector_move_operand (rtx op, enum machine_mode mode)
3998{
3999 if (nonimmediate_operand (op, mode))
4000 return 1;
4001 if (GET_MODE (op) != mode && mode != VOIDmode)
4002 return 0;
4003 return (op == CONST0_RTX (GET_MODE (op)));
4004}
4005
4006/* Return true if op if a valid address, and does not contain
4007 a segment override. */
4008
4009int
4010no_seg_address_operand (rtx op, enum machine_mode mode)
4011{
4012 struct ix86_address parts;
4013
4014 if (! address_operand (op, mode))
4015 return 0;
4016
4017 if (! ix86_decompose_address (op, &parts))
4018 abort ();
4019
4020 return parts.seg == SEG_DEFAULT;
4021}
4022
4023/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4024 insns. */
4025int
4026sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4027{
4028 enum rtx_code code = GET_CODE (op);
4029 switch (code)
4030 {
4031 /* Operations supported directly. */
4032 case EQ:
4033 case LT:
4034 case LE:
4035 case UNORDERED:
4036 case NE:
4037 case UNGE:
4038 case UNGT:
4039 case ORDERED:
4040 return 1;
4041 /* These are equivalent to ones above in non-IEEE comparisons. */
4042 case UNEQ:
4043 case UNLT:
4044 case UNLE:
4045 case LTGT:
4046 case GE:
4047 case GT:
4048 return !TARGET_IEEE_FP;
4049 default:
4050 return 0;
4051 }
4052}
4053/* Return 1 if OP is a valid comparison operator in valid mode. */
4054int
4055ix86_comparison_operator (rtx op, enum machine_mode mode)
4056{
4057 enum machine_mode inmode;
4058 enum rtx_code code = GET_CODE (op);
4059 if (mode != VOIDmode && GET_MODE (op) != mode)
4060 return 0;
4061 if (GET_RTX_CLASS (code) != '<')
4062 return 0;
4063 inmode = GET_MODE (XEXP (op, 0));
4064
4065 if (inmode == CCFPmode || inmode == CCFPUmode)
4066 {
4067 enum rtx_code second_code, bypass_code;
4068 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4069 return (bypass_code == NIL && second_code == NIL);
4070 }
4071 switch (code)
4072 {
4073 case EQ: case NE:
4074 return 1;
4075 case LT: case GE:
4076 if (inmode == CCmode || inmode == CCGCmode
4077 || inmode == CCGOCmode || inmode == CCNOmode)
4078 return 1;
4079 return 0;
4080 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4081 if (inmode == CCmode)
4082 return 1;
4083 return 0;
4084 case GT: case LE:
4085 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4086 return 1;
4087 return 0;
4088 default:
4089 return 0;
4090 }
4091}
4092
4093/* Return 1 if OP is a valid comparison operator testing carry flag
4094 to be set. */
4095int
4096ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4097{
4098 enum machine_mode inmode;
4099 enum rtx_code code = GET_CODE (op);
4100
4101 if (mode != VOIDmode && GET_MODE (op) != mode)
4102 return 0;
4103 if (GET_RTX_CLASS (code) != '<')
4104 return 0;
4105 inmode = GET_MODE (XEXP (op, 0));
4106 if (GET_CODE (XEXP (op, 0)) != REG
4107 || REGNO (XEXP (op, 0)) != 17
4108 || XEXP (op, 1) != const0_rtx)
4109 return 0;
4110
4111 if (inmode == CCFPmode || inmode == CCFPUmode)
4112 {
4113 enum rtx_code second_code, bypass_code;
4114
4115 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4116 if (bypass_code != NIL || second_code != NIL)
4117 return 0;
4118 code = ix86_fp_compare_code_to_integer (code);
4119 }
4120 else if (inmode != CCmode)
4121 return 0;
4122 return code == LTU;
4123}
4124
4125/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4126
4127int
4128fcmov_comparison_operator (rtx op, enum machine_mode mode)
4129{
4130 enum machine_mode inmode;
4131 enum rtx_code code = GET_CODE (op);
4132
4133 if (mode != VOIDmode && GET_MODE (op) != mode)
4134 return 0;
4135 if (GET_RTX_CLASS (code) != '<')
4136 return 0;
4137 inmode = GET_MODE (XEXP (op, 0));
4138 if (inmode == CCFPmode || inmode == CCFPUmode)
4139 {
4140 enum rtx_code second_code, bypass_code;
4141
4142 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4143 if (bypass_code != NIL || second_code != NIL)
4144 return 0;
4145 code = ix86_fp_compare_code_to_integer (code);
4146 }
4147 /* i387 supports just limited amount of conditional codes. */
4148 switch (code)
4149 {
4150 case LTU: case GTU: case LEU: case GEU:
4151 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4152 return 1;
4153 return 0;
4154 case ORDERED: case UNORDERED:
4155 case EQ: case NE:
4156 return 1;
4157 default:
4158 return 0;
4159 }
4160}
4161
4162/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4163
4164int
4165promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4166{
4167 switch (GET_CODE (op))
4168 {
4169 case MULT:
4170 /* Modern CPUs have same latency for HImode and SImode multiply,
4171 but 386 and 486 do HImode multiply faster. */
4172 return ix86_tune > PROCESSOR_I486;
4173 case PLUS:
4174 case AND:
4175 case IOR:
4176 case XOR:
4177 case ASHIFT:
4178 return 1;
4179 default:
4180 return 0;
4181 }
4182}
4183
4184/* Nearly general operand, but accept any const_double, since we wish
4185 to be able to drop them into memory rather than have them get pulled
4186 into registers. */
4187
4188int
4189cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4190{
4191 if (mode != VOIDmode && mode != GET_MODE (op))
4192 return 0;
4193 if (GET_CODE (op) == CONST_DOUBLE)
4194 return 1;
4195 return general_operand (op, mode);
4196}
4197
4198/* Match an SI or HImode register for a zero_extract. */
4199
4200int
4201ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4202{
4203 int regno;
4204 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4205 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4206 return 0;
4207
4208 if (!register_operand (op, VOIDmode))
4209 return 0;
4210
4211 /* Be careful to accept only registers having upper parts. */
4212 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4213 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4214}
4215
4216/* Return 1 if this is a valid binary floating-point operation.
4217 OP is the expression matched, and MODE is its mode. */
4218
4219int
4220binary_fp_operator (rtx op, enum machine_mode mode)
4221{
4222 if (mode != VOIDmode && mode != GET_MODE (op))
4223 return 0;
4224
4225 switch (GET_CODE (op))
4226 {
4227 case PLUS:
4228 case MINUS:
4229 case MULT:
4230 case DIV:
4231 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4232
4233 default:
4234 return 0;
4235 }
4236}
4237
4238int
4239mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4240{
4241 return GET_CODE (op) == MULT;
4242}
4243
4244int
4245div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4246{
4247 return GET_CODE (op) == DIV;
4248}
4249
4250int
4251arith_or_logical_operator (rtx op, enum machine_mode mode)
4252{
4253 return ((mode == VOIDmode || GET_MODE (op) == mode)
4254 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4255 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4256}
4257
4258/* Returns 1 if OP is memory operand with a displacement. */
4259
4260int
4261memory_displacement_operand (rtx op, enum machine_mode mode)
4262{
4263 struct ix86_address parts;
4264
4265 if (! memory_operand (op, mode))
4266 return 0;
4267
4268 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4269 abort ();
4270
4271 return parts.disp != NULL_RTX;
4272}
4273
4274/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4275 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4276
4277 ??? It seems likely that this will only work because cmpsi is an
4278 expander, and no actual insns use this. */
4279
4280int
4281cmpsi_operand (rtx op, enum machine_mode mode)
4282{
4283 if (nonimmediate_operand (op, mode))
4284 return 1;
4285
4286 if (GET_CODE (op) == AND
4287 && GET_MODE (op) == SImode
4288 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4289 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4290 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4291 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4292 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4293 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4294 return 1;
4295
4296 return 0;
4297}
4298
4299/* Returns 1 if OP is memory operand that can not be represented by the
4300 modRM array. */
4301
4302int
4303long_memory_operand (rtx op, enum machine_mode mode)
4304{
4305 if (! memory_operand (op, mode))
4306 return 0;
4307
4308 return memory_address_length (op) != 0;
4309}
4310
4311/* Return nonzero if the rtx is known aligned. */
4312
4313int
4314aligned_operand (rtx op, enum machine_mode mode)
4315{
4316 struct ix86_address parts;
4317
4318 if (!general_operand (op, mode))
4319 return 0;
4320
4321 /* Registers and immediate operands are always "aligned". */
4322 if (GET_CODE (op) != MEM)
4323 return 1;
4324
4325 /* Don't even try to do any aligned optimizations with volatiles. */
4326 if (MEM_VOLATILE_P (op))
4327 return 0;
4328
4329 op = XEXP (op, 0);
4330
4331 /* Pushes and pops are only valid on the stack pointer. */
4332 if (GET_CODE (op) == PRE_DEC
4333 || GET_CODE (op) == POST_INC)
4334 return 1;
4335
4336 /* Decode the address. */
4337 if (! ix86_decompose_address (op, &parts))
4338 abort ();
4339
4340 /* Look for some component that isn't known to be aligned. */
4341 if (parts.index)
4342 {
4343 if (parts.scale < 4
4344 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4345 return 0;
4346 }
4347 if (parts.base)
4348 {
4349 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4350 return 0;
4351 }
4352 if (parts.disp)
4353 {
4354 if (GET_CODE (parts.disp) != CONST_INT
4355 || (INTVAL (parts.disp) & 3) != 0)
4356 return 0;
4357 }
4358
4359 /* Didn't find one -- this must be an aligned address. */
4360 return 1;
4361}
4362
4363int
4364compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4365{
4366 return GET_CODE (op) == COMPARE;
4367}
4368
4369/* Initialize the table of extra 80387 mathematical constants. */
4370
4371static void
4372init_ext_80387_constants (void)
4373{
4374 static const char * cst[5] =
4375 {
4376 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4377 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4378 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4379 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4380 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4381 };
4382 int i;
4383
4384 for (i = 0; i < 5; i++)
4385 {
4386 real_from_string (&ext_80387_constants_table[i], cst[i]);
4387 /* Ensure each constant is rounded to XFmode precision. */
4388 real_convert (&ext_80387_constants_table[i],
4389 XFmode, &ext_80387_constants_table[i]);
4390 }
4391
4392 ext_80387_constants_init = 1;
4393}
4394
4395/* Return true if the constant is something that can be loaded with
4396 a special instruction. */
4397
4398int
4399standard_80387_constant_p (rtx x)
4400{
4401 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4402 return -1;
4403
4404 if (x == CONST0_RTX (GET_MODE (x)))
4405 return 1;
4406 if (x == CONST1_RTX (GET_MODE (x)))
4407 return 2;
4408
4409 /* For XFmode constants, try to find a special 80387 instruction on
4410 those CPUs that benefit from them. */
4411 if (GET_MODE (x) == XFmode
4412 && x86_ext_80387_constants & TUNEMASK)
4413 {
4414 REAL_VALUE_TYPE r;
4415 int i;
4416
4417 if (! ext_80387_constants_init)
4418 init_ext_80387_constants ();
4419
4420 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4421 for (i = 0; i < 5; i++)
4422 if (real_identical (&r, &ext_80387_constants_table[i]))
4423 return i + 3;
4424 }
4425
4426 return 0;
4427}
4428
4429/* Return the opcode of the special instruction to be used to load
4430 the constant X. */
4431
4432const char *
4433standard_80387_constant_opcode (rtx x)
4434{
4435 switch (standard_80387_constant_p (x))
4436 {
4437 case 1:
4438 return "fldz";
4439 case 2:
4440 return "fld1";
4441 case 3:
4442 return "fldlg2";
4443 case 4:
4444 return "fldln2";
4445 case 5:
4446 return "fldl2e";
4447 case 6:
4448 return "fldl2t";
4449 case 7:
4450 return "fldpi";
4451 }
4452 abort ();
4453}
4454
4455/* Return the CONST_DOUBLE representing the 80387 constant that is
4456 loaded by the specified special instruction. The argument IDX
4457 matches the return value from standard_80387_constant_p. */
4458
4459rtx
4460standard_80387_constant_rtx (int idx)
4461{
4462 int i;
4463
4464 if (! ext_80387_constants_init)
4465 init_ext_80387_constants ();
4466
4467 switch (idx)
4468 {
4469 case 3:
4470 case 4:
4471 case 5:
4472 case 6:
4473 case 7:
4474 i = idx - 3;
4475 break;
4476
4477 default:
4478 abort ();
4479 }
4480
4481 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4482 XFmode);
4483}
4484
4485/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4486 */
4487int
4488standard_sse_constant_p (rtx x)
4489{
4490 if (x == const0_rtx)
4491 return 1;
4492 return (x == CONST0_RTX (GET_MODE (x)));
4493}
4494
4495/* Returns 1 if OP contains a symbol reference */
4496
4497int
4498symbolic_reference_mentioned_p (rtx op)
4499{
4500 const char *fmt;
4501 int i;
4502
4503 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4504 return 1;
4505
4506 fmt = GET_RTX_FORMAT (GET_CODE (op));
4507 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4508 {
4509 if (fmt[i] == 'E')
4510 {
4511 int j;
4512
4513 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4514 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4515 return 1;
4516 }
4517
4518 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4519 return 1;
4520 }
4521
4522 return 0;
4523}
4524
4525/* Return 1 if it is appropriate to emit `ret' instructions in the
4526 body of a function. Do this only if the epilogue is simple, needing a
4527 couple of insns. Prior to reloading, we can't tell how many registers
4528 must be saved, so return 0 then. Return 0 if there is no frame
4529 marker to de-allocate.
4530
4531 If NON_SAVING_SETJMP is defined and true, then it is not possible
4532 for the epilogue to be simple, so return 0. This is a special case
4533 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4534 until final, but jump_optimize may need to know sooner if a
4535 `return' is OK. */
4536
4537int
4538ix86_can_use_return_insn_p (void)
4539{
4540 struct ix86_frame frame;
4541
4542#ifdef NON_SAVING_SETJMP
4543 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4544 return 0;
4545#endif
4546
4547 if (! reload_completed || frame_pointer_needed)
4548 return 0;
4549
4550 /* Don't allow more than 32 pop, since that's all we can do
4551 with one instruction. */
4552 if (current_function_pops_args
4553 && current_function_args_size >= 32768)
4554 return 0;
4555
4556 ix86_compute_frame_layout (&frame);
4557 return frame.to_allocate == 0 && frame.nregs == 0;
4558}
4559
4560/* Return 1 if VALUE can be stored in the sign extended immediate field. */
4561int
4562x86_64_sign_extended_value (rtx value)
4563{
4564 switch (GET_CODE (value))
4565 {
4566 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4567 to be at least 32 and this all acceptable constants are
4568 represented as CONST_INT. */
4569 case CONST_INT:
4570 if (HOST_BITS_PER_WIDE_INT == 32)
4571 return 1;
4572 else
4573 {
4574 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4575 return trunc_int_for_mode (val, SImode) == val;
4576 }
4577 break;
4578
4579 /* For certain code models, the symbolic references are known to fit.
4580 in CM_SMALL_PIC model we know it fits if it is local to the shared
4581 library. Don't count TLS SYMBOL_REFs here, since they should fit
4582 only if inside of UNSPEC handled below. */
4583 case SYMBOL_REF:
4584 /* TLS symbols are not constant. */
4585 if (tls_symbolic_operand (value, Pmode))
4586 return false;
4587 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4588
4589 /* For certain code models, the code is near as well. */
4590 case LABEL_REF:
4591 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4592 || ix86_cmodel == CM_KERNEL);
4593
4594 /* We also may accept the offsetted memory references in certain special
4595 cases. */
4596 case CONST:
4597 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4598 switch (XINT (XEXP (value, 0), 1))
4599 {
4600 case UNSPEC_GOTPCREL:
4601 case UNSPEC_DTPOFF:
4602 case UNSPEC_GOTNTPOFF:
4603 case UNSPEC_NTPOFF:
4604 return 1;
4605 default:
4606 break;
4607 }
4608 if (GET_CODE (XEXP (value, 0)) == PLUS)
4609 {
4610 rtx op1 = XEXP (XEXP (value, 0), 0);
4611 rtx op2 = XEXP (XEXP (value, 0), 1);
4612 HOST_WIDE_INT offset;
4613
4614 if (ix86_cmodel == CM_LARGE)
4615 return 0;
4616 if (GET_CODE (op2) != CONST_INT)
4617 return 0;
4618 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4619 switch (GET_CODE (op1))
4620 {
4621 case SYMBOL_REF:
4622 /* For CM_SMALL assume that latest object is 16MB before
4623 end of 31bits boundary. We may also accept pretty
4624 large negative constants knowing that all objects are
4625 in the positive half of address space. */
4626 if (ix86_cmodel == CM_SMALL
4627 && offset < 16*1024*1024
4628 && trunc_int_for_mode (offset, SImode) == offset)
4629 return 1;
4630 /* For CM_KERNEL we know that all object resist in the
4631 negative half of 32bits address space. We may not
4632 accept negative offsets, since they may be just off
4633 and we may accept pretty large positive ones. */
4634 if (ix86_cmodel == CM_KERNEL
4635 && offset > 0
4636 && trunc_int_for_mode (offset, SImode) == offset)
4637 return 1;
4638 break;
4639 case LABEL_REF:
4640 /* These conditions are similar to SYMBOL_REF ones, just the
4641 constraints for code models differ. */
4642 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4643 && offset < 16*1024*1024
4644 && trunc_int_for_mode (offset, SImode) == offset)
4645 return 1;
4646 if (ix86_cmodel == CM_KERNEL
4647 && offset > 0
4648 && trunc_int_for_mode (offset, SImode) == offset)
4649 return 1;
4650 break;
4651 case UNSPEC:
4652 switch (XINT (op1, 1))
4653 {
4654 case UNSPEC_DTPOFF:
4655 case UNSPEC_NTPOFF:
4656 if (offset > 0
4657 && trunc_int_for_mode (offset, SImode) == offset)
4658 return 1;
4659 }
4660 break;
4661 default:
4662 return 0;
4663 }
4664 }
4665 return 0;
4666 default:
4667 return 0;
4668 }
4669}
4670
4671/* Return 1 if VALUE can be stored in the zero extended immediate field. */
4672int
4673x86_64_zero_extended_value (rtx value)
4674{
4675 switch (GET_CODE (value))
4676 {
4677 case CONST_DOUBLE:
4678 if (HOST_BITS_PER_WIDE_INT == 32)
4679 return (GET_MODE (value) == VOIDmode
4680 && !CONST_DOUBLE_HIGH (value));
4681 else
4682 return 0;
4683 case CONST_INT:
4684 if (HOST_BITS_PER_WIDE_INT == 32)
4685 return INTVAL (value) >= 0;
4686 else
4687 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4688 break;
4689
4690 /* For certain code models, the symbolic references are known to fit. */
4691 case SYMBOL_REF:
4692 /* TLS symbols are not constant. */
4693 if (tls_symbolic_operand (value, Pmode))
4694 return false;
4695 return ix86_cmodel == CM_SMALL;
4696
4697 /* For certain code models, the code is near as well. */
4698 case LABEL_REF:
4699 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4700
4701 /* We also may accept the offsetted memory references in certain special
4702 cases. */
4703 case CONST:
4704 if (GET_CODE (XEXP (value, 0)) == PLUS)
4705 {
4706 rtx op1 = XEXP (XEXP (value, 0), 0);
4707 rtx op2 = XEXP (XEXP (value, 0), 1);
4708
4709 if (ix86_cmodel == CM_LARGE)
4710 return 0;
4711 switch (GET_CODE (op1))
4712 {
4713 case SYMBOL_REF:
4714 return 0;
4715 /* For small code model we may accept pretty large positive
4716 offsets, since one bit is available for free. Negative
4717 offsets are limited by the size of NULL pointer area
4718 specified by the ABI. */
4719 if (ix86_cmodel == CM_SMALL
4720 && GET_CODE (op2) == CONST_INT
4721 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4722 && (trunc_int_for_mode (INTVAL (op2), SImode)
4723 == INTVAL (op2)))
4724 return 1;
4725 /* ??? For the kernel, we may accept adjustment of
4726 -0x10000000, since we know that it will just convert
4727 negative address space to positive, but perhaps this
4728 is not worthwhile. */
4729 break;
4730 case LABEL_REF:
4731 /* These conditions are similar to SYMBOL_REF ones, just the
4732 constraints for code models differ. */
4733 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4734 && GET_CODE (op2) == CONST_INT
4735 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4736 && (trunc_int_for_mode (INTVAL (op2), SImode)
4737 == INTVAL (op2)))
4738 return 1;
4739 break;
4740 default:
4741 return 0;
4742 }
4743 }
4744 return 0;
4745 default:
4746 return 0;
4747 }
4748}
4749
4750/* Value should be nonzero if functions must have frame pointers.
4751 Zero means the frame pointer need not be set up (and parms may
4752 be accessed via the stack pointer) in functions that seem suitable. */
4753
4754int
4755ix86_frame_pointer_required (void)
4756{
4757 /* If we accessed previous frames, then the generated code expects
4758 to be able to access the saved ebp value in our frame. */
4759 if (cfun->machine->accesses_prev_frame)
4760 return 1;
4761
4762 /* Several x86 os'es need a frame pointer for other reasons,
4763 usually pertaining to setjmp. */
4764 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4765 return 1;
4766
4767 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4768 the frame pointer by default. Turn it back on now if we've not
4769 got a leaf function. */
4770 if (TARGET_OMIT_LEAF_FRAME_POINTER
4771 && (!current_function_is_leaf))
4772 return 1;
4773
4774 if (current_function_profile)
4775 return 1;
4776
4777 return 0;
4778}
4779
4780/* Record that the current function accesses previous call frames. */
4781
4782void
4783ix86_setup_frame_addresses (void)
4784{
4785 cfun->machine->accesses_prev_frame = 1;
4786}
4787
4788#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4789# define USE_HIDDEN_LINKONCE 1
4790#else
4791# define USE_HIDDEN_LINKONCE 0
4792#endif
4793
4794static int pic_labels_used;
4795
4796/* Fills in the label name that should be used for a pc thunk for
4797 the given register. */
4798
4799static void
4800get_pc_thunk_name (char name[32], unsigned int regno)
4801{
4802 if (USE_HIDDEN_LINKONCE)
4803 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4804 else
4805 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4806}
4807
4808
4809/* This function generates code for -fpic that loads %ebx with
4810 the return address of the caller and then returns. */
4811
4812void
4813ix86_file_end (void)
4814{
4815 rtx xops[2];
4816 int regno;
4817
4818 for (regno = 0; regno < 8; ++regno)
4819 {
4820 char name[32];
4821
4822 if (! ((pic_labels_used >> regno) & 1))
4823 continue;
4824
4825 get_pc_thunk_name (name, regno);
4826
4827 if (USE_HIDDEN_LINKONCE)
4828 {
4829 tree decl;
4830
4831 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4832 error_mark_node);
4833 TREE_PUBLIC (decl) = 1;
4834 TREE_STATIC (decl) = 1;
4835 DECL_ONE_ONLY (decl) = 1;
4836
4837 (*targetm.asm_out.unique_section) (decl, 0);
4838 named_section (decl, NULL, 0);
4839
4840 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4841 fputs ("\t.hidden\t", asm_out_file);
4842 assemble_name (asm_out_file, name);
4843 fputc ('\n', asm_out_file);
4844 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4845 }
4846 else
4847 {
4848 text_section ();
4849 ASM_OUTPUT_LABEL (asm_out_file, name);
4850 }
4851
4852 xops[0] = gen_rtx_REG (SImode, regno);
4853 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4854 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4855 output_asm_insn ("ret", xops);
4856 }
4857
4858 if (NEED_INDICATE_EXEC_STACK)
4859 file_end_indicate_exec_stack ();
4860}
4861
4862/* Emit code for the SET_GOT patterns. */
4863
4864const char *
4865output_set_got (rtx dest)
4866{
4867 rtx xops[3];
4868
4869 xops[0] = dest;
4870 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4871
4872 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4873 {
4874 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4875
4876 if (!flag_pic)
4877 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4878 else
4879 output_asm_insn ("call\t%a2", xops);
4880
4881#if TARGET_MACHO
4882 /* Output the "canonical" label name ("Lxx$pb") here too. This
4883 is what will be referred to by the Mach-O PIC subsystem. */
4884 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4885#endif
4886 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4887 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4888
4889 if (flag_pic)
4890 output_asm_insn ("pop{l}\t%0", xops);
4891 }
4892 else
4893 {
4894 char name[32];
4895 get_pc_thunk_name (name, REGNO (dest));
4896 pic_labels_used |= 1 << REGNO (dest);
4897
4898 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4899 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4900 output_asm_insn ("call\t%X2", xops);
4901 }
4902
4903 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4904 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4905 else if (!TARGET_MACHO)
4906 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4906 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4907
4908 return "";
4909}
4910
4911/* Generate an "push" pattern for input ARG. */
4912
4913static rtx
4914gen_push (rtx arg)
4915{
4916 return gen_rtx_SET (VOIDmode,
4917 gen_rtx_MEM (Pmode,
4918 gen_rtx_PRE_DEC (Pmode,
4919 stack_pointer_rtx)),
4920 arg);
4921}
4922
4923/* Return >= 0 if there is an unused call-clobbered register available
4924 for the entire function. */
4925
4926static unsigned int
4927ix86_select_alt_pic_regnum (void)
4928{
4929 if (current_function_is_leaf && !current_function_profile)
4930 {
4931 int i;
4932 for (i = 2; i >= 0; --i)
4933 if (!regs_ever_live[i])
4934 return i;
4935 }
4936
4937 return INVALID_REGNUM;
4938}
4939
4940/* Return 1 if we need to save REGNO. */
4941static int
4942ix86_save_reg (unsigned int regno, int maybe_eh_return)
4943{
4944 if (pic_offset_table_rtx
4945 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4946 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4947 || current_function_profile
4948 || current_function_calls_eh_return
4949 || current_function_uses_const_pool))
4950 {
4951 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4952 return 0;
4953 return 1;
4954 }
4955
4956 if (current_function_calls_eh_return && maybe_eh_return)
4957 {
4958 unsigned i;
4959 for (i = 0; ; i++)
4960 {
4961 unsigned test = EH_RETURN_DATA_REGNO (i);
4962 if (test == INVALID_REGNUM)
4963 break;
4964 if (test == regno)
4965 return 1;
4966 }
4967 }
4968
4969 return (regs_ever_live[regno]
4970 && !call_used_regs[regno]
4971 && !fixed_regs[regno]
4972 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4973}
4974
4975/* Return number of registers to be saved on the stack. */
4976
4977static int
4978ix86_nsaved_regs (void)
4979{
4980 int nregs = 0;
4981 int regno;
4982
4983 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4984 if (ix86_save_reg (regno, true))
4985 nregs++;
4986 return nregs;
4987}
4988
4989/* Return the offset between two registers, one to be eliminated, and the other
4990 its replacement, at the start of a routine. */
4991
4992HOST_WIDE_INT
4993ix86_initial_elimination_offset (int from, int to)
4994{
4995 struct ix86_frame frame;
4996 ix86_compute_frame_layout (&frame);
4997
4998 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4999 return frame.hard_frame_pointer_offset;
5000 else if (from == FRAME_POINTER_REGNUM
5001 && to == HARD_FRAME_POINTER_REGNUM)
5002 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5003 else
5004 {
5005 if (to != STACK_POINTER_REGNUM)
5006 abort ();
5007 else if (from == ARG_POINTER_REGNUM)
5008 return frame.stack_pointer_offset;
5009 else if (from != FRAME_POINTER_REGNUM)
5010 abort ();
5011 else
5012 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5013 }
5014}
5015
5016/* Fill structure ix86_frame about frame of currently computed function. */
5017
5018static void
5019ix86_compute_frame_layout (struct ix86_frame *frame)
5020{
5021 HOST_WIDE_INT total_size;
5022 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5023 HOST_WIDE_INT offset;
5024 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5025 HOST_WIDE_INT size = get_frame_size ();
5026
5027 frame->nregs = ix86_nsaved_regs ();
5028 total_size = size;
5029
5030 /* During reload iteration the amount of registers saved can change.
5031 Recompute the value as needed. Do not recompute when amount of registers
5032 didn't change as reload does mutiple calls to the function and does not
5033 expect the decision to change within single iteration. */
5034 if (!optimize_size
5035 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5036 {
5037 int count = frame->nregs;
5038
5039 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5040 /* The fast prologue uses move instead of push to save registers. This
5041 is significantly longer, but also executes faster as modern hardware
5042 can execute the moves in parallel, but can't do that for push/pop.
5043
5044 Be careful about choosing what prologue to emit: When function takes
5045 many instructions to execute we may use slow version as well as in
5046 case function is known to be outside hot spot (this is known with
5047 feedback only). Weight the size of function by number of registers
5048 to save as it is cheap to use one or two push instructions but very
5049 slow to use many of them. */
5050 if (count)
5051 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5052 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5053 || (flag_branch_probabilities
5054 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5055 cfun->machine->use_fast_prologue_epilogue = false;
5056 else
5057 cfun->machine->use_fast_prologue_epilogue
5058 = !expensive_function_p (count);
5059 }
5060 if (TARGET_PROLOGUE_USING_MOVE
5061 && cfun->machine->use_fast_prologue_epilogue)
5062 frame->save_regs_using_mov = true;
5063 else
5064 frame->save_regs_using_mov = false;
5065
5066
5067 /* Skip return address and saved base pointer. */
5068 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5069
5070 frame->hard_frame_pointer_offset = offset;
5071
5072 /* Do some sanity checking of stack_alignment_needed and
5073 preferred_alignment, since i386 port is the only using those features
5074 that may break easily. */
5075
5076 if (size && !stack_alignment_needed)
5077 abort ();
5078 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5079 abort ();
5080 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5081 abort ();
5082 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5083 abort ();
5084
5085 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5086 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5087
5088 /* Register save area */
5089 offset += frame->nregs * UNITS_PER_WORD;
5090
5091 /* Va-arg area */
5092 if (ix86_save_varrargs_registers)
5093 {
5094 offset += X86_64_VARARGS_SIZE;
5095 frame->va_arg_size = X86_64_VARARGS_SIZE;
5096 }
5097 else
5098 frame->va_arg_size = 0;
5099
5100 /* Align start of frame for local function. */
5101 frame->padding1 = ((offset + stack_alignment_needed - 1)
5102 & -stack_alignment_needed) - offset;
5103
5104 offset += frame->padding1;
5105
5106 /* Frame pointer points here. */
5107 frame->frame_pointer_offset = offset;
5108
5109 offset += size;
5110
5111 /* Add outgoing arguments area. Can be skipped if we eliminated
5112 all the function calls as dead code.
5113 Skipping is however impossible when function calls alloca. Alloca
5114 expander assumes that last current_function_outgoing_args_size
5115 of stack frame are unused. */
5116 if (ACCUMULATE_OUTGOING_ARGS
5117 && (!current_function_is_leaf || current_function_calls_alloca))
5118 {
5119 offset += current_function_outgoing_args_size;
5120 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5121 }
5122 else
5123 frame->outgoing_arguments_size = 0;
5124
5125 /* Align stack boundary. Only needed if we're calling another function
5126 or using alloca. */
5127 if (!current_function_is_leaf || current_function_calls_alloca)
5128 frame->padding2 = ((offset + preferred_alignment - 1)
5129 & -preferred_alignment) - offset;
5130 else
5131 frame->padding2 = 0;
5132
5133 offset += frame->padding2;
5134
5135 /* We've reached end of stack frame. */
5136 frame->stack_pointer_offset = offset;
5137
5138 /* Size prologue needs to allocate. */
5139 frame->to_allocate =
5140 (size + frame->padding1 + frame->padding2
5141 + frame->outgoing_arguments_size + frame->va_arg_size);
5142
5143 if ((!frame->to_allocate && frame->nregs <= 1)
5144 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5145 frame->save_regs_using_mov = false;
5146
5147 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5148 && current_function_is_leaf)
5149 {
5150 frame->red_zone_size = frame->to_allocate;
5151 if (frame->save_regs_using_mov)
5152 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5153 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5154 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5155 }
5156 else
5157 frame->red_zone_size = 0;
5158 frame->to_allocate -= frame->red_zone_size;
5159 frame->stack_pointer_offset -= frame->red_zone_size;
5160#if 0
5161 fprintf (stderr, "nregs: %i\n", frame->nregs);
5162 fprintf (stderr, "size: %i\n", size);
5163 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5164 fprintf (stderr, "padding1: %i\n", frame->padding1);
5165 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5166 fprintf (stderr, "padding2: %i\n", frame->padding2);
5167 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5168 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5169 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5170 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5171 frame->hard_frame_pointer_offset);
5172 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5173#endif
5174}
5175
5176/* Emit code to save registers in the prologue. */
5177
5178static void
5179ix86_emit_save_regs (void)
5180{
5181 int regno;
5182 rtx insn;
5183
5184 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5185 if (ix86_save_reg (regno, true))
5186 {
5187 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5188 RTX_FRAME_RELATED_P (insn) = 1;
5189 }
5190}
5191
5192/* Emit code to save registers using MOV insns. First register
5193 is restored from POINTER + OFFSET. */
5194static void
5195ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5196{
5197 int regno;
5198 rtx insn;
5199
5200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5201 if (ix86_save_reg (regno, true))
5202 {
5203 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5204 Pmode, offset),
5205 gen_rtx_REG (Pmode, regno));
5206 RTX_FRAME_RELATED_P (insn) = 1;
5207 offset += UNITS_PER_WORD;
5208 }
5209}
5210
5211/* Expand prologue or epilogue stack adjustment.
5212 The pattern exist to put a dependency on all ebp-based memory accesses.
5213 STYLE should be negative if instructions should be marked as frame related,
5214 zero if %r11 register is live and cannot be freely used and positive
5215 otherwise. */
5216
5217static void
5218pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5219{
5220 rtx insn;
5221
5222 if (! TARGET_64BIT)
5223 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5224 else if (x86_64_immediate_operand (offset, DImode))
5225 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5226 else
5227 {
5228 rtx r11;
5229 /* r11 is used by indirect sibcall return as well, set before the
5230 epilogue and used after the epilogue. ATM indirect sibcall
5231 shouldn't be used together with huge frame sizes in one
5232 function because of the frame_size check in sibcall.c. */
5233 if (style == 0)
5234 abort ();
5235 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5236 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5237 if (style < 0)
5238 RTX_FRAME_RELATED_P (insn) = 1;
5239 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5240 offset));
5241 }
5242 if (style < 0)
5243 RTX_FRAME_RELATED_P (insn) = 1;
5244}
5245
5246/* Expand the prologue into a bunch of separate insns. */
5247
5248void
5249ix86_expand_prologue (void)
5250{
5251 rtx insn;
5252 bool pic_reg_used;
5253 struct ix86_frame frame;
5254 HOST_WIDE_INT allocate;
5255
5256 ix86_compute_frame_layout (&frame);
5257
5258 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5259 slower on all targets. Also sdb doesn't like it. */
5260
5261 if (frame_pointer_needed)
5262 {
5263 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5264 RTX_FRAME_RELATED_P (insn) = 1;
5265
5266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5267 RTX_FRAME_RELATED_P (insn) = 1;
5268 }
5269
5270 allocate = frame.to_allocate;
5271
5272 if (!frame.save_regs_using_mov)
5273 ix86_emit_save_regs ();
5274 else
5275 allocate += frame.nregs * UNITS_PER_WORD;
5276
5277 /* When using red zone we may start register saving before allocating
5278 the stack frame saving one cycle of the prologue. */
5279 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5280 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5281 : stack_pointer_rtx,
5282 -frame.nregs * UNITS_PER_WORD);
5283
5284 if (allocate == 0)
5285 ;
5286 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5288 GEN_INT (-allocate), -1);
5289 else
5290 {
5291 /* Only valid for Win32. */
5292 rtx eax = gen_rtx_REG (SImode, 0);
5293 bool eax_live = ix86_eax_live_at_start_p ();
5294
5295 if (TARGET_64BIT)
5296 abort ();
5297
5298 if (eax_live)
5299 {
5300 emit_insn (gen_push (eax));
5301 allocate -= 4;
5302 }
5303
5304 insn = emit_move_insn (eax, GEN_INT (allocate));
5305 RTX_FRAME_RELATED_P (insn) = 1;
5306
5307 insn = emit_insn (gen_allocate_stack_worker (eax));
5308 RTX_FRAME_RELATED_P (insn) = 1;
5309
5310 if (eax_live)
5311 {
5312 rtx t = plus_constant (stack_pointer_rtx, allocate);
5313 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5314 }
5315 }
5316
5317 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5318 {
5319 if (!frame_pointer_needed || !frame.to_allocate)
5320 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5321 else
5322 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5323 -frame.nregs * UNITS_PER_WORD);
5324 }
5325
5326 pic_reg_used = false;
5327 if (pic_offset_table_rtx
5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329 || current_function_profile))
5330 {
5331 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5332
5333 if (alt_pic_reg_used != INVALID_REGNUM)
5334 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5335
5336 pic_reg_used = true;
5337 }
5338
5339 if (pic_reg_used)
5340 {
5341 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5342
5343 /* Even with accurate pre-reload life analysis, we can wind up
5344 deleting all references to the pic register after reload.
5345 Consider if cross-jumping unifies two sides of a branch
5346 controlled by a comparison vs the only read from a global.
5347 In which case, allow the set_got to be deleted, though we're
5348 too late to do anything about the ebx save in the prologue. */
5349 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5350 }
5351
5352 /* Prevent function calls from be scheduled before the call to mcount.
5353 In the pic_reg_used case, make sure that the got load isn't deleted. */
5354 if (current_function_profile)
5355 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5356}
5357
5358/* Emit code to restore saved registers using MOV insns. First register
5359 is restored from POINTER + OFFSET. */
5360static void
5361ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5362 int maybe_eh_return)
5363{
5364 int regno;
5365 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5366
5367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5368 if (ix86_save_reg (regno, maybe_eh_return))
5369 {
5370 /* Ensure that adjust_address won't be forced to produce pointer
5371 out of range allowed by x86-64 instruction set. */
5372 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5373 {
5374 rtx r11;
5375
5376 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5377 emit_move_insn (r11, GEN_INT (offset));
5378 emit_insn (gen_adddi3 (r11, r11, pointer));
5379 base_address = gen_rtx_MEM (Pmode, r11);
5380 offset = 0;
5381 }
5382 emit_move_insn (gen_rtx_REG (Pmode, regno),
5383 adjust_address (base_address, Pmode, offset));
5384 offset += UNITS_PER_WORD;
5385 }
5386}
5387
5388/* Restore function stack, frame, and registers. */
5389
5390void
5391ix86_expand_epilogue (int style)
5392{
5393 int regno;
5394 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5395 struct ix86_frame frame;
5396 HOST_WIDE_INT offset;
5397
5398 ix86_compute_frame_layout (&frame);
5399
5400 /* Calculate start of saved registers relative to ebp. Special care
5401 must be taken for the normal return case of a function using
5402 eh_return: the eax and edx registers are marked as saved, but not
5403 restored along this path. */
5404 offset = frame.nregs;
5405 if (current_function_calls_eh_return && style != 2)
5406 offset -= 2;
5407 offset *= -UNITS_PER_WORD;
5408
5409 /* If we're only restoring one register and sp is not valid then
5410 using a move instruction to restore the register since it's
5411 less work than reloading sp and popping the register.
5412
5413 The default code result in stack adjustment using add/lea instruction,
5414 while this code results in LEAVE instruction (or discrete equivalent),
5415 so it is profitable in some other cases as well. Especially when there
5416 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5417 and there is exactly one register to pop. This heuristic may need some
5418 tuning in future. */
5419 if ((!sp_valid && frame.nregs <= 1)
5420 || (TARGET_EPILOGUE_USING_MOVE
5421 && cfun->machine->use_fast_prologue_epilogue
5422 && (frame.nregs > 1 || frame.to_allocate))
5423 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5424 || (frame_pointer_needed && TARGET_USE_LEAVE
5425 && cfun->machine->use_fast_prologue_epilogue
5426 && frame.nregs == 1)
5427 || current_function_calls_eh_return)
5428 {
5429 /* Restore registers. We can use ebp or esp to address the memory
5430 locations. If both are available, default to ebp, since offsets
5431 are known to be small. Only exception is esp pointing directly to the
5432 end of block of saved registers, where we may simplify addressing
5433 mode. */
5434
5435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5437 frame.to_allocate, style == 2);
5438 else
5439 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5440 offset, style == 2);
5441
5442 /* eh_return epilogues need %ecx added to the stack pointer. */
5443 if (style == 2)
5444 {
5445 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5446
5447 if (frame_pointer_needed)
5448 {
5449 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5450 tmp = plus_constant (tmp, UNITS_PER_WORD);
5451 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5452
5453 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5454 emit_move_insn (hard_frame_pointer_rtx, tmp);
5455
5456 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5457 const0_rtx, style);
5458 }
5459 else
5460 {
5461 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5462 tmp = plus_constant (tmp, (frame.to_allocate
5463 + frame.nregs * UNITS_PER_WORD));
5464 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5465 }
5466 }
5467 else if (!frame_pointer_needed)
5468 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5469 GEN_INT (frame.to_allocate
5470 + frame.nregs * UNITS_PER_WORD),
5471 style);
5472 /* If not an i386, mov & pop is faster than "leave". */
5473 else if (TARGET_USE_LEAVE || optimize_size
5474 || !cfun->machine->use_fast_prologue_epilogue)
5475 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5476 else
5477 {
5478 pro_epilogue_adjust_stack (stack_pointer_rtx,
5479 hard_frame_pointer_rtx,
5480 const0_rtx, style);
5481 if (TARGET_64BIT)
5482 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5483 else
5484 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5485 }
5486 }
5487 else
5488 {
5489 /* First step is to deallocate the stack frame so that we can
5490 pop the registers. */
5491 if (!sp_valid)
5492 {
5493 if (!frame_pointer_needed)
5494 abort ();
5495 pro_epilogue_adjust_stack (stack_pointer_rtx,
5496 hard_frame_pointer_rtx,
5497 GEN_INT (offset), style);
5498 }
5499 else if (frame.to_allocate)
5500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5501 GEN_INT (frame.to_allocate), style);
5502
5503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5504 if (ix86_save_reg (regno, false))
5505 {
5506 if (TARGET_64BIT)
5507 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5508 else
5509 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5510 }
5511 if (frame_pointer_needed)
5512 {
5513 /* Leave results in shorter dependency chains on CPUs that are
5514 able to grok it fast. */
5515 if (TARGET_USE_LEAVE)
5516 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5517 else if (TARGET_64BIT)
5518 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5519 else
5520 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5521 }
5522 }
5523
5524 /* Sibcall epilogues don't want a return instruction. */
5525 if (style == 0)
5526 return;
5527
5528 if (current_function_pops_args && current_function_args_size)
5529 {
5530 rtx popc = GEN_INT (current_function_pops_args);
5531
5532 /* i386 can only pop 64K bytes. If asked to pop more, pop
5533 return address, do explicit add, and jump indirectly to the
5534 caller. */
5535
5536 if (current_function_pops_args >= 65536)
5537 {
5538 rtx ecx = gen_rtx_REG (SImode, 2);
5539
5540 /* There is no "pascal" calling convention in 64bit ABI. */
5541 if (TARGET_64BIT)
5542 abort ();
5543
5544 emit_insn (gen_popsi1 (ecx));
5545 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5546 emit_jump_insn (gen_return_indirect_internal (ecx));
5547 }
5548 else
5549 emit_jump_insn (gen_return_pop_internal (popc));
5550 }
5551 else
5552 emit_jump_insn (gen_return_internal ());
5553}
5554
5555/* Reset from the function's potential modifications. */
5556
5557static void
5558ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5559 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5560{
5561 if (pic_offset_table_rtx)
5562 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5563}
5564
5565/* Extract the parts of an RTL expression that is a valid memory address
5566 for an instruction. Return 0 if the structure of the address is
5567 grossly off. Return -1 if the address contains ASHIFT, so it is not
5568 strictly valid, but still used for computing length of lea instruction. */
5569
5570static int
5571ix86_decompose_address (rtx addr, struct ix86_address *out)
5572{
5573 rtx base = NULL_RTX;
5574 rtx index = NULL_RTX;
5575 rtx disp = NULL_RTX;
5576 HOST_WIDE_INT scale = 1;
5577 rtx scale_rtx = NULL_RTX;
5578 int retval = 1;
5579 enum ix86_address_seg seg = SEG_DEFAULT;
5580
5581 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5582 base = addr;
5583 else if (GET_CODE (addr) == PLUS)
5584 {
5585 rtx addends[4], op;
5586 int n = 0, i;
5587
5588 op = addr;
5589 do
5590 {
5591 if (n >= 4)
5592 return 0;
5593 addends[n++] = XEXP (op, 1);
5594 op = XEXP (op, 0);
5595 }
5596 while (GET_CODE (op) == PLUS);
5597 if (n >= 4)
5598 return 0;
5599 addends[n] = op;
5600
5601 for (i = n; i >= 0; --i)
5602 {
5603 op = addends[i];
5604 switch (GET_CODE (op))
5605 {
5606 case MULT:
5607 if (index)
5608 return 0;
5609 index = XEXP (op, 0);
5610 scale_rtx = XEXP (op, 1);
5611 break;
5612
5613 case UNSPEC:
5614 if (XINT (op, 1) == UNSPEC_TP
5615 && TARGET_TLS_DIRECT_SEG_REFS
5616 && seg == SEG_DEFAULT)
5617 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5618 else
5619 return 0;
5620 break;
5621
5622 case REG:
5623 case SUBREG:
5624 if (!base)
5625 base = op;
5626 else if (!index)
5627 index = op;
5628 else
5629 return 0;
5630 break;
5631
5632 case CONST:
5633 case CONST_INT:
5634 case SYMBOL_REF:
5635 case LABEL_REF:
5636 if (disp)
5637 return 0;
5638 disp = op;
5639 break;
5640
5641 default:
5642 return 0;
5643 }
5644 }
5645 }
5646 else if (GET_CODE (addr) == MULT)
5647 {
5648 index = XEXP (addr, 0); /* index*scale */
5649 scale_rtx = XEXP (addr, 1);
5650 }
5651 else if (GET_CODE (addr) == ASHIFT)
5652 {
5653 rtx tmp;
5654
5655 /* We're called for lea too, which implements ashift on occasion. */
5656 index = XEXP (addr, 0);
5657 tmp = XEXP (addr, 1);
5658 if (GET_CODE (tmp) != CONST_INT)
5659 return 0;
5660 scale = INTVAL (tmp);
5661 if ((unsigned HOST_WIDE_INT) scale > 3)
5662 return 0;
5663 scale = 1 << scale;
5664 retval = -1;
5665 }
5666 else
5667 disp = addr; /* displacement */
5668
5669 /* Extract the integral value of scale. */
5670 if (scale_rtx)
5671 {
5672 if (GET_CODE (scale_rtx) != CONST_INT)
5673 return 0;
5674 scale = INTVAL (scale_rtx);
5675 }
5676
5677 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5678 if (base && index && scale == 1
5679 && (index == arg_pointer_rtx
5680 || index == frame_pointer_rtx
5681 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5682 {
5683 rtx tmp = base;
5684 base = index;
5685 index = tmp;
5686 }
5687
5688 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5689 if ((base == hard_frame_pointer_rtx
5690 || base == frame_pointer_rtx
5691 || base == arg_pointer_rtx) && !disp)
5692 disp = const0_rtx;
5693
5694 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5695 Avoid this by transforming to [%esi+0]. */
5696 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5697 && base && !index && !disp
5698 && REG_P (base)
5699 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5700 disp = const0_rtx;
5701
5702 /* Special case: encode reg+reg instead of reg*2. */
5703 if (!base && index && scale && scale == 2)
5704 base = index, scale = 1;
5705
5706 /* Special case: scaling cannot be encoded without base or displacement. */
5707 if (!base && !disp && index && scale != 1)
5708 disp = const0_rtx;
5709
5710 out->base = base;
5711 out->index = index;
5712 out->disp = disp;
5713 out->scale = scale;
5714 out->seg = seg;
5715
5716 return retval;
5717}
5718
5719/* Return cost of the memory address x.
5720 For i386, it is better to use a complex address than let gcc copy
5721 the address into a reg and make a new pseudo. But not if the address
5722 requires to two regs - that would mean more pseudos with longer
5723 lifetimes. */
5724static int
5725ix86_address_cost (rtx x)
5726{
5727 struct ix86_address parts;
5728 int cost = 1;
5729
5730 if (!ix86_decompose_address (x, &parts))
5731 abort ();
5732
5733 /* More complex memory references are better. */
5734 if (parts.disp && parts.disp != const0_rtx)
5735 cost--;
5736 if (parts.seg != SEG_DEFAULT)
5737 cost--;
5738
5739 /* Attempt to minimize number of registers in the address. */
5740 if ((parts.base
5741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5742 || (parts.index
5743 && (!REG_P (parts.index)
5744 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5745 cost++;
5746
5747 if (parts.base
5748 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5749 && parts.index
5750 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5751 && parts.base != parts.index)
5752 cost++;
5753
5754 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5755 since it's predecode logic can't detect the length of instructions
5756 and it degenerates to vector decoded. Increase cost of such
5757 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5758 to split such addresses or even refuse such addresses at all.
5759
5760 Following addressing modes are affected:
5761 [base+scale*index]
5762 [scale*index+disp]
5763 [base+index]
5764
5765 The first and last case may be avoidable by explicitly coding the zero in
5766 memory address, but I don't have AMD-K6 machine handy to check this
5767 theory. */
5768
5769 if (TARGET_K6
5770 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5771 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5772 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5773 cost += 10;
5774
5775 return cost;
5776}
5777
5778/* If X is a machine specific address (i.e. a symbol or label being
5779 referenced as a displacement from the GOT implemented using an
5780 UNSPEC), then return the base term. Otherwise return X. */
5781
5782rtx
5783ix86_find_base_term (rtx x)
5784{
5785 rtx term;
5786
5787 if (TARGET_64BIT)
5788 {
5789 if (GET_CODE (x) != CONST)
5790 return x;
5791 term = XEXP (x, 0);
5792 if (GET_CODE (term) == PLUS
5793 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5794 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5795 term = XEXP (term, 0);
5796 if (GET_CODE (term) != UNSPEC
5797 || XINT (term, 1) != UNSPEC_GOTPCREL)
5798 return x;
5799
5800 term = XVECEXP (term, 0, 0);
5801
5802 if (GET_CODE (term) != SYMBOL_REF
5803 && GET_CODE (term) != LABEL_REF)
5804 return x;
5805
5806 return term;
5807 }
5808
5809 term = ix86_delegitimize_address (x);
5810
5811 if (GET_CODE (term) != SYMBOL_REF
5812 && GET_CODE (term) != LABEL_REF)
5813 return x;
5814
5815 return term;
5816}
5817
5818/* Determine if a given RTX is a valid constant. We already know this
5819 satisfies CONSTANT_P. */
5820
5821bool
5822legitimate_constant_p (rtx x)
5823{
5824 switch (GET_CODE (x))
5825 {
5826 case CONST:
5827 x = XEXP (x, 0);
5828
5829 if (GET_CODE (x) == PLUS)
5830 {
5831 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5832 return false;
5833 x = XEXP (x, 0);
5834 }
5835
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (x) == UNSPEC)
5838 switch (XINT (x, 1))
5839 {
5840 case UNSPEC_TPOFF:
5841 case UNSPEC_NTPOFF:
5842 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5843 case UNSPEC_DTPOFF:
5844 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5845 default:
5846 return false;
5847 }
5848
5849 /* We must have drilled down to a symbol. */
5850 if (!symbolic_operand (x, Pmode))
5851 return false;
5852 /* FALLTHRU */
5853
5854 case SYMBOL_REF:
5855 /* TLS symbols are never valid. */
5856 if (tls_symbolic_operand (x, Pmode))
5857 return false;
5858 break;
5859
5860 default:
5861 break;
5862 }
5863
5864 /* Otherwise we handle everything else in the move patterns. */
5865 return true;
5866}
5867
5868/* Determine if it's legal to put X into the constant pool. This
5869 is not possible for the address of thread-local symbols, which
5870 is checked above. */
5871
5872static bool
5873ix86_cannot_force_const_mem (rtx x)
5874{
5875 return !legitimate_constant_p (x);
5876}
5877
5878/* Determine if a given RTX is a valid constant address. */
5879
5880bool
5881constant_address_p (rtx x)
5882{
5883 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5884}
5885
5886/* Nonzero if the constant value X is a legitimate general operand
5887 when generating PIC code. It is given that flag_pic is on and
5888 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5889
5890bool
5891legitimate_pic_operand_p (rtx x)
5892{
5893 rtx inner;
5894
5895 switch (GET_CODE (x))
5896 {
5897 case CONST:
5898 inner = XEXP (x, 0);
5899
5900 /* Only some unspecs are valid as "constants". */
5901 if (GET_CODE (inner) == UNSPEC)
5902 switch (XINT (inner, 1))
5903 {
5904 case UNSPEC_TPOFF:
5905 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5906 default:
5907 return false;
5908 }
5909 /* FALLTHRU */
5910
5911 case SYMBOL_REF:
5912 case LABEL_REF:
5913 return legitimate_pic_address_disp_p (x);
5914
5915 default:
5916 return true;
5917 }
5918}
5919
5920/* Determine if a given CONST RTX is a valid memory displacement
5921 in PIC mode. */
5922
5923int
5924legitimate_pic_address_disp_p (rtx disp)
5925{
5926 bool saw_plus;
5927
5928 /* In 64bit mode we can allow direct addresses of symbols and labels
5929 when they are not dynamic symbols. */
5930 if (TARGET_64BIT)
5931 {
5932 /* TLS references should always be enclosed in UNSPEC. */
5933 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5934 return 0;
5935 if (GET_CODE (disp) == SYMBOL_REF
5936 && ix86_cmodel == CM_SMALL_PIC
5937 && SYMBOL_REF_LOCAL_P (disp))
5938 return 1;
5939 if (GET_CODE (disp) == LABEL_REF)
5940 return 1;
5941 if (GET_CODE (disp) == CONST
5942 && GET_CODE (XEXP (disp, 0)) == PLUS)
5943 {
5944 rtx op0 = XEXP (XEXP (disp, 0), 0);
5945 rtx op1 = XEXP (XEXP (disp, 0), 1);
5946
5947 /* TLS references should always be enclosed in UNSPEC. */
5948 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5949 return 0;
5950 if (((GET_CODE (op0) == SYMBOL_REF
5951 && ix86_cmodel == CM_SMALL_PIC
5952 && SYMBOL_REF_LOCAL_P (op0))
5953 || GET_CODE (op0) == LABEL_REF)
5954 && GET_CODE (op1) == CONST_INT
5955 && INTVAL (op1) < 16*1024*1024
5956 && INTVAL (op1) >= -16*1024*1024)
5957 return 1;
5958 }
5959 }
5960 if (GET_CODE (disp) != CONST)
5961 return 0;
5962 disp = XEXP (disp, 0);
5963
5964 if (TARGET_64BIT)
5965 {
5966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5967 of GOT tables. We should not need these anyway. */
5968 if (GET_CODE (disp) != UNSPEC
5969 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5970 return 0;
5971
5972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5974 return 0;
5975 return 1;
5976 }
5977
5978 saw_plus = false;
5979 if (GET_CODE (disp) == PLUS)
5980 {
5981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5982 return 0;
5983 disp = XEXP (disp, 0);
5984 saw_plus = true;
5985 }
5986
5987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5988 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5989 {
5990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5993 {
5994 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5995 if (! strcmp (sym_name, "<pic base>"))
5996 return 1;
5997 }
5998 }
5999
6000 if (GET_CODE (disp) != UNSPEC)
6001 return 0;
6002
6003 switch (XINT (disp, 1))
6004 {
6005 case UNSPEC_GOT:
6006 if (saw_plus)
6007 return false;
6008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6009 case UNSPEC_GOTOFF:
6010 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6012 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6013 return false;
6014 case UNSPEC_GOTTPOFF:
6015 case UNSPEC_GOTNTPOFF:
6016 case UNSPEC_INDNTPOFF:
6017 if (saw_plus)
6018 return false;
6019 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6020 case UNSPEC_NTPOFF:
6021 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6022 case UNSPEC_DTPOFF:
6023 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6024 }
6025
6026 return 0;
6027}
6028
6029/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6030 memory address for an instruction. The MODE argument is the machine mode
6031 for the MEM expression that wants to use this address.
6032
6033 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6034 convert common non-canonical forms to canonical form so that they will
6035 be recognized. */
6036
6037int
6038legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6039{
6040 struct ix86_address parts;
6041 rtx base, index, disp;
6042 HOST_WIDE_INT scale;
6043 const char *reason = NULL;
6044 rtx reason_rtx = NULL_RTX;
6045
6046 if (TARGET_DEBUG_ADDR)
6047 {
6048 fprintf (stderr,
6049 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6050 GET_MODE_NAME (mode), strict);
6051 debug_rtx (addr);
6052 }
6053
6054 if (ix86_decompose_address (addr, &parts) <= 0)
6055 {
6056 reason = "decomposition failed";
6057 goto report_error;
6058 }
6059
6060 base = parts.base;
6061 index = parts.index;
6062 disp = parts.disp;
6063 scale = parts.scale;
6064
6065 /* Validate base register.
6066
6067 Don't allow SUBREG's here, it can lead to spill failures when the base
6068 is one word out of a two word structure, which is represented internally
6069 as a DImode int. */
6070
6071 if (base)
6072 {
6073 reason_rtx = base;
6074
6075 if (GET_CODE (base) != REG)
6076 {
6077 reason = "base is not a register";
6078 goto report_error;
6079 }
6080
6081 if (GET_MODE (base) != Pmode)
6082 {
6083 reason = "base is not in Pmode";
6084 goto report_error;
6085 }
6086
6087 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6088 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6089 {
6090 reason = "base is not valid";
6091 goto report_error;
6092 }
6093 }
6094
6095 /* Validate index register.
6096
6097 Don't allow SUBREG's here, it can lead to spill failures when the index
6098 is one word out of a two word structure, which is represented internally
6099 as a DImode int. */
6100
6101 if (index)
6102 {
6103 reason_rtx = index;
6104
6105 if (GET_CODE (index) != REG)
6106 {
6107 reason = "index is not a register";
6108 goto report_error;
6109 }
6110
6111 if (GET_MODE (index) != Pmode)
6112 {
6113 reason = "index is not in Pmode";
6114 goto report_error;
6115 }
6116
6117 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6118 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6119 {
6120 reason = "index is not valid";
6121 goto report_error;
6122 }
6123 }
6124
6125 /* Validate scale factor. */
6126 if (scale != 1)
6127 {
6128 reason_rtx = GEN_INT (scale);
6129 if (!index)
6130 {
6131 reason = "scale without index";
6132 goto report_error;
6133 }
6134
6135 if (scale != 2 && scale != 4 && scale != 8)
6136 {
6137 reason = "scale is not a valid multiplier";
6138 goto report_error;
6139 }
6140 }
6141
6142 /* Validate displacement. */
6143 if (disp)
6144 {
6145 reason_rtx = disp;
6146
6147 if (GET_CODE (disp) == CONST
6148 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6149 switch (XINT (XEXP (disp, 0), 1))
6150 {
6151 case UNSPEC_GOT:
6152 case UNSPEC_GOTOFF:
6153 case UNSPEC_GOTPCREL:
6154 if (!flag_pic)
6155 abort ();
6156 goto is_legitimate_pic;
6157
6158 case UNSPEC_GOTTPOFF:
6159 case UNSPEC_GOTNTPOFF:
6160 case UNSPEC_INDNTPOFF:
6161 case UNSPEC_NTPOFF:
6162 case UNSPEC_DTPOFF:
6163 break;
6164
6165 default:
6166 reason = "invalid address unspec";
6167 goto report_error;
6168 }
6169
6170 else if (flag_pic && (SYMBOLIC_CONST (disp)
6171#if TARGET_MACHO
6172 && !machopic_operand_p (disp)
6173#endif
6174 ))
6175 {
6176 is_legitimate_pic:
6177 if (TARGET_64BIT && (index || base))
6178 {
6179 /* foo@dtpoff(%rX) is ok. */
6180 if (GET_CODE (disp) != CONST
6181 || GET_CODE (XEXP (disp, 0)) != PLUS
6182 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6183 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6184 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6185 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6186 {
6187 reason = "non-constant pic memory reference";
6188 goto report_error;
6189 }
6190 }
6191 else if (! legitimate_pic_address_disp_p (disp))
6192 {
6193 reason = "displacement is an invalid pic construct";
6194 goto report_error;
6195 }
6196
6197 /* This code used to verify that a symbolic pic displacement
6198 includes the pic_offset_table_rtx register.
6199
6200 While this is good idea, unfortunately these constructs may
6201 be created by "adds using lea" optimization for incorrect
6202 code like:
6203
6204 int a;
6205 int foo(int i)
6206 {
6207 return *(&a+i);
6208 }
6209
6210 This code is nonsensical, but results in addressing
6211 GOT table with pic_offset_table_rtx base. We can't
6212 just refuse it easily, since it gets matched by
6213 "addsi3" pattern, that later gets split to lea in the
6214 case output register differs from input. While this
6215 can be handled by separate addsi pattern for this case
6216 that never results in lea, this seems to be easier and
6217 correct fix for crash to disable this test. */
6218 }
6219 else if (GET_CODE (disp) != LABEL_REF
6220 && GET_CODE (disp) != CONST_INT
6221 && (GET_CODE (disp) != CONST
6222 || !legitimate_constant_p (disp))
6223 && (GET_CODE (disp) != SYMBOL_REF
6224 || !legitimate_constant_p (disp)))
6225 {
6226 reason = "displacement is not constant";
6227 goto report_error;
6228 }
6229 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6230 {
6231 reason = "displacement is out of range";
6232 goto report_error;
6233 }
6234 }
6235
6236 /* Everything looks valid. */
6237 if (TARGET_DEBUG_ADDR)
6238 fprintf (stderr, "Success.\n");
6239 return TRUE;
6240
6241 report_error:
6242 if (TARGET_DEBUG_ADDR)
6243 {
6244 fprintf (stderr, "Error: %s\n", reason);
6245 debug_rtx (reason_rtx);
6246 }
6247 return FALSE;
6248}
6249
6250/* Return an unique alias set for the GOT. */
6251
6252static HOST_WIDE_INT
6253ix86_GOT_alias_set (void)
6254{
6255 static HOST_WIDE_INT set = -1;
6256 if (set == -1)
6257 set = new_alias_set ();
6258 return set;
6259}
6260
6261/* Return a legitimate reference for ORIG (an address) using the
6262 register REG. If REG is 0, a new pseudo is generated.
6263
6264 There are two types of references that must be handled:
6265
6266 1. Global data references must load the address from the GOT, via
6267 the PIC reg. An insn is emitted to do this load, and the reg is
6268 returned.
6269
6270 2. Static data references, constant pool addresses, and code labels
6271 compute the address as an offset from the GOT, whose base is in
6272 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6273 differentiate them from global data objects. The returned
6274 address is the PIC reg + an unspec constant.
6275
6276 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6277 reg also appears in the address. */
6278
6279rtx
6280legitimize_pic_address (rtx orig, rtx reg)
6281{
6282 rtx addr = orig;
6283 rtx new = orig;
6284 rtx base;
6285
6286#if TARGET_MACHO
6287 if (reg == 0)
6288 reg = gen_reg_rtx (Pmode);
6289 /* Use the generic Mach-O PIC machinery. */
6290 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6291#endif
6292
6293 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6294 new = addr;
6295 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6296 {
6297 /* This symbol may be referenced via a displacement from the PIC
6298 base address (@GOTOFF). */
6299
6300 if (reload_in_progress)
6301 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6302 if (GET_CODE (addr) == CONST)
6303 addr = XEXP (addr, 0);
6304 if (GET_CODE (addr) == PLUS)
6305 {
6306 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6307 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6308 }
6309 else
6310 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6311 new = gen_rtx_CONST (Pmode, new);
6312 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6313
6314 if (reg != 0)
6315 {
6316 emit_move_insn (reg, new);
6317 new = reg;
6318 }
6319 }
6320 else if (GET_CODE (addr) == SYMBOL_REF)
6321 {
6322 if (TARGET_64BIT)
6323 {
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6325 new = gen_rtx_CONST (Pmode, new);
6326 new = gen_rtx_MEM (Pmode, new);
6327 RTX_UNCHANGING_P (new) = 1;
6328 set_mem_alias_set (new, ix86_GOT_alias_set ());
6329
6330 if (reg == 0)
6331 reg = gen_reg_rtx (Pmode);
6332 /* Use directly gen_movsi, otherwise the address is loaded
6333 into register for CSE. We don't want to CSE this addresses,
6334 instead we CSE addresses from the GOT table, so skip this. */
6335 emit_insn (gen_movsi (reg, new));
6336 new = reg;
6337 }
6338 else
6339 {
6340 /* This symbol must be referenced via a load from the
6341 Global Offset Table (@GOT). */
6342
6343 if (reload_in_progress)
6344 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6346 new = gen_rtx_CONST (Pmode, new);
6347 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6348 new = gen_rtx_MEM (Pmode, new);
6349 RTX_UNCHANGING_P (new) = 1;
6350 set_mem_alias_set (new, ix86_GOT_alias_set ());
6351
6352 if (reg == 0)
6353 reg = gen_reg_rtx (Pmode);
6354 emit_move_insn (reg, new);
6355 new = reg;
6356 }
6357 }
6358 else
6359 {
6360 if (GET_CODE (addr) == CONST)
6361 {
6362 addr = XEXP (addr, 0);
6363
6364 /* We must match stuff we generate before. Assume the only
6365 unspecs that can get here are ours. Not that we could do
6366 anything with them anyway.... */
6367 if (GET_CODE (addr) == UNSPEC
6368 || (GET_CODE (addr) == PLUS
6369 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6370 return orig;
6371 if (GET_CODE (addr) != PLUS)
6372 abort ();
6373 }
6374 if (GET_CODE (addr) == PLUS)
6375 {
6376 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6377
6378 /* Check first to see if this is a constant offset from a @GOTOFF
6379 symbol reference. */
6380 if (local_symbolic_operand (op0, Pmode)
6381 && GET_CODE (op1) == CONST_INT)
6382 {
6383 if (!TARGET_64BIT)
6384 {
6385 if (reload_in_progress)
6386 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6387 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6388 UNSPEC_GOTOFF);
6389 new = gen_rtx_PLUS (Pmode, new, op1);
6390 new = gen_rtx_CONST (Pmode, new);
6391 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6392
6393 if (reg != 0)
6394 {
6395 emit_move_insn (reg, new);
6396 new = reg;
6397 }
6398 }
6399 else
6400 {
6401 if (INTVAL (op1) < -16*1024*1024
6402 || INTVAL (op1) >= 16*1024*1024)
6403 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6404 }
6405 }
6406 else
6407 {
6408 base = legitimize_pic_address (XEXP (addr, 0), reg);
6409 new = legitimize_pic_address (XEXP (addr, 1),
6410 base == reg ? NULL_RTX : reg);
6411
6412 if (GET_CODE (new) == CONST_INT)
6413 new = plus_constant (base, INTVAL (new));
6414 else
6415 {
6416 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6417 {
6418 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6419 new = XEXP (new, 1);
6420 }
6421 new = gen_rtx_PLUS (Pmode, base, new);
6422 }
6423 }
6424 }
6425 }
6426 return new;
6427}
6428
6429/* Load the thread pointer. If TO_REG is true, force it into a register. */
6430
6431static rtx
6432get_thread_pointer (int to_reg)
6433{
6434 rtx tp, reg, insn;
6435
6436 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6437 if (!to_reg)
6438 return tp;
6439
6440 reg = gen_reg_rtx (Pmode);
6441 insn = gen_rtx_SET (VOIDmode, reg, tp);
6442 insn = emit_insn (insn);
6443
6444 return reg;
6445}
6446
6447/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6448 false if we expect this to be used for a memory address and true if
6449 we expect to load the address into a register. */
6450
6451static rtx
6452legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6453{
6454 rtx dest, base, off, pic;
6455 int type;
6456
6457 switch (model)
6458 {
6459 case TLS_MODEL_GLOBAL_DYNAMIC:
6460 dest = gen_reg_rtx (Pmode);
6461 if (TARGET_64BIT)
6462 {
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6464
6465 start_sequence ();
6466 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6467 insns = get_insns ();
6468 end_sequence ();
6469
6470 emit_libcall_block (insns, dest, rax, x);
6471 }
6472 else
6473 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6474 break;
6475
6476 case TLS_MODEL_LOCAL_DYNAMIC:
6477 base = gen_reg_rtx (Pmode);
6478 if (TARGET_64BIT)
6479 {
6480 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6481
6482 start_sequence ();
6483 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6484 insns = get_insns ();
6485 end_sequence ();
6486
6487 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6488 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6489 emit_libcall_block (insns, base, rax, note);
6490 }
6491 else
6492 emit_insn (gen_tls_local_dynamic_base_32 (base));
6493
6494 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6495 off = gen_rtx_CONST (Pmode, off);
6496
6497 return gen_rtx_PLUS (Pmode, base, off);
6498
6499 case TLS_MODEL_INITIAL_EXEC:
6500 if (TARGET_64BIT)
6501 {
6502 pic = NULL;
6503 type = UNSPEC_GOTNTPOFF;
6504 }
6505 else if (flag_pic)
6506 {
6507 if (reload_in_progress)
6508 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6509 pic = pic_offset_table_rtx;
6510 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6511 }
6512 else if (!TARGET_GNU_TLS)
6513 {
6514 pic = gen_reg_rtx (Pmode);
6515 emit_insn (gen_set_got (pic));
6516 type = UNSPEC_GOTTPOFF;
6517 }
6518 else
6519 {
6520 pic = NULL;
6521 type = UNSPEC_INDNTPOFF;
6522 }
6523
6524 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6525 off = gen_rtx_CONST (Pmode, off);
6526 if (pic)
6527 off = gen_rtx_PLUS (Pmode, pic, off);
6528 off = gen_rtx_MEM (Pmode, off);
6529 RTX_UNCHANGING_P (off) = 1;
6530 set_mem_alias_set (off, ix86_GOT_alias_set ());
6531
6532 if (TARGET_64BIT || TARGET_GNU_TLS)
6533 {
6534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6535 off = force_reg (Pmode, off);
6536 return gen_rtx_PLUS (Pmode, base, off);
6537 }
6538 else
6539 {
6540 base = get_thread_pointer (true);
6541 dest = gen_reg_rtx (Pmode);
6542 emit_insn (gen_subsi3 (dest, base, off));
6543 }
6544 break;
6545
6546 case TLS_MODEL_LOCAL_EXEC:
6547 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6548 (TARGET_64BIT || TARGET_GNU_TLS)
6549 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6550 off = gen_rtx_CONST (Pmode, off);
6551
6552 if (TARGET_64BIT || TARGET_GNU_TLS)
6553 {
6554 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6555 return gen_rtx_PLUS (Pmode, base, off);
6556 }
6557 else
6558 {
6559 base = get_thread_pointer (true);
6560 dest = gen_reg_rtx (Pmode);
6561 emit_insn (gen_subsi3 (dest, base, off));
6562 }
6563 break;
6564
6565 default:
6566 abort ();
6567 }
6568
6569 return dest;
6570}
6571
6572/* Try machine-dependent ways of modifying an illegitimate address
6573 to be legitimate. If we find one, return the new, valid address.
6574 This macro is used in only one place: `memory_address' in explow.c.
6575
6576 OLDX is the address as it was before break_out_memory_refs was called.
6577 In some cases it is useful to look at this to decide what needs to be done.
6578
6579 MODE and WIN are passed so that this macro can use
6580 GO_IF_LEGITIMATE_ADDRESS.
6581
6582 It is always safe for this macro to do nothing. It exists to recognize
6583 opportunities to optimize the output.
6584
6585 For the 80386, we handle X+REG by loading X into a register R and
6586 using R+REG. R will go in a general reg and indexing will be used.
6587 However, if REG is a broken-out memory address or multiplication,
6588 nothing needs to be done because REG can certainly go in a general reg.
6589
6590 When -fpic is used, special handling is needed for symbolic references.
6591 See comments by legitimize_pic_address in i386.c for details. */
6592
6593rtx
6594legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6595{
6596 int changed = 0;
6597 unsigned log;
6598
6599 if (TARGET_DEBUG_ADDR)
6600 {
6601 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6602 GET_MODE_NAME (mode));
6603 debug_rtx (x);
6604 }
6605
6606 log = tls_symbolic_operand (x, mode);
6607 if (log)
6608 return legitimize_tls_address (x, log, false);
6609
6610 if (flag_pic && SYMBOLIC_CONST (x))
6611 return legitimize_pic_address (x, 0);
6612
6613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6614 if (GET_CODE (x) == ASHIFT
6615 && GET_CODE (XEXP (x, 1)) == CONST_INT
6616 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6617 {
6618 changed = 1;
6619 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6620 GEN_INT (1 << log));
6621 }
6622
6623 if (GET_CODE (x) == PLUS)
6624 {
6625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6626
6627 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6630 {
6631 changed = 1;
6632 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6633 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6634 GEN_INT (1 << log));
6635 }
6636
6637 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6638 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6639 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6640 {
6641 changed = 1;
6642 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6643 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6644 GEN_INT (1 << log));
6645 }
6646
6647 /* Put multiply first if it isn't already. */
6648 if (GET_CODE (XEXP (x, 1)) == MULT)
6649 {
6650 rtx tmp = XEXP (x, 0);
6651 XEXP (x, 0) = XEXP (x, 1);
6652 XEXP (x, 1) = tmp;
6653 changed = 1;
6654 }
6655
6656 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6657 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6658 created by virtual register instantiation, register elimination, and
6659 similar optimizations. */
6660 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6661 {
6662 changed = 1;
6663 x = gen_rtx_PLUS (Pmode,
6664 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6665 XEXP (XEXP (x, 1), 0)),
6666 XEXP (XEXP (x, 1), 1));
6667 }
6668
6669 /* Canonicalize
6670 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6671 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6672 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6673 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6675 && CONSTANT_P (XEXP (x, 1)))
6676 {
6677 rtx constant;
6678 rtx other = NULL_RTX;
6679
6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6681 {
6682 constant = XEXP (x, 1);
6683 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6684 }
6685 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6686 {
6687 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6688 other = XEXP (x, 1);
6689 }
6690 else
6691 constant = 0;
6692
6693 if (constant)
6694 {
6695 changed = 1;
6696 x = gen_rtx_PLUS (Pmode,
6697 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6698 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6699 plus_constant (other, INTVAL (constant)));
6700 }
6701 }
6702
6703 if (changed && legitimate_address_p (mode, x, FALSE))
6704 return x;
6705
6706 if (GET_CODE (XEXP (x, 0)) == MULT)
6707 {
6708 changed = 1;
6709 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6710 }
6711
6712 if (GET_CODE (XEXP (x, 1)) == MULT)
6713 {
6714 changed = 1;
6715 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6716 }
6717
6718 if (changed
6719 && GET_CODE (XEXP (x, 1)) == REG
6720 && GET_CODE (XEXP (x, 0)) == REG)
6721 return x;
6722
6723 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6724 {
6725 changed = 1;
6726 x = legitimize_pic_address (x, 0);
6727 }
6728
6729 if (changed && legitimate_address_p (mode, x, FALSE))
6730 return x;
6731
6732 if (GET_CODE (XEXP (x, 0)) == REG)
6733 {
6734 rtx temp = gen_reg_rtx (Pmode);
6735 rtx val = force_operand (XEXP (x, 1), temp);
6736 if (val != temp)
6737 emit_move_insn (temp, val);
6738
6739 XEXP (x, 1) = temp;
6740 return x;
6741 }
6742
6743 else if (GET_CODE (XEXP (x, 1)) == REG)
6744 {
6745 rtx temp = gen_reg_rtx (Pmode);
6746 rtx val = force_operand (XEXP (x, 0), temp);
6747 if (val != temp)
6748 emit_move_insn (temp, val);
6749
6750 XEXP (x, 0) = temp;
6751 return x;
6752 }
6753 }
6754
6755 return x;
6756}
6757
6758/* Print an integer constant expression in assembler syntax. Addition
6759 and subtraction are the only arithmetic that may appear in these
6760 expressions. FILE is the stdio stream to write to, X is the rtx, and
6761 CODE is the operand print code from the output string. */
6762
6763static void
6764output_pic_addr_const (FILE *file, rtx x, int code)
6765{
6766 char buf[256];
6767
6768 switch (GET_CODE (x))
6769 {
6770 case PC:
6771 if (flag_pic)
6772 putc ('.', file);
6773 else
6774 abort ();
6775 break;
6776
6777 case SYMBOL_REF:
6778 assemble_name (file, XSTR (x, 0));
6779 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6780 fputs ("@PLT", file);
6781 break;
6782
6783 case LABEL_REF:
6784 x = XEXP (x, 0);
6785 /* FALLTHRU */
6786 case CODE_LABEL:
6787 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6788 assemble_name (asm_out_file, buf);
6789 break;
6790
6791 case CONST_INT:
6792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6793 break;
6794
6795 case CONST:
6796 /* This used to output parentheses around the expression,
6797 but that does not work on the 386 (either ATT or BSD assembler). */
6798 output_pic_addr_const (file, XEXP (x, 0), code);
6799 break;
6800
6801 case CONST_DOUBLE:
6802 if (GET_MODE (x) == VOIDmode)
6803 {
6804 /* We can use %d if the number is <32 bits and positive. */
6805 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6806 fprintf (file, "0x%lx%08lx",
6807 (unsigned long) CONST_DOUBLE_HIGH (x),
6808 (unsigned long) CONST_DOUBLE_LOW (x));
6809 else
6810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6811 }
6812 else
6813 /* We can't handle floating point constants;
6814 PRINT_OPERAND must handle them. */
6815 output_operand_lossage ("floating constant misused");
6816 break;
6817
6818 case PLUS:
6819 /* Some assemblers need integer constants to appear first. */
6820 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6821 {
6822 output_pic_addr_const (file, XEXP (x, 0), code);
6823 putc ('+', file);
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6825 }
6826 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6827 {
6828 output_pic_addr_const (file, XEXP (x, 1), code);
6829 putc ('+', file);
6830 output_pic_addr_const (file, XEXP (x, 0), code);
6831 }
6832 else
6833 abort ();
6834 break;
6835
6836 case MINUS:
6837 if (!TARGET_MACHO)
6838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6839 output_pic_addr_const (file, XEXP (x, 0), code);
6840 putc ('-', file);
6841 output_pic_addr_const (file, XEXP (x, 1), code);
6842 if (!TARGET_MACHO)
6843 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6844 break;
6845
6846 case UNSPEC:
6847 if (XVECLEN (x, 0) != 1)
6848 abort ();
6849 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6850 switch (XINT (x, 1))
6851 {
6852 case UNSPEC_GOT:
6853 fputs ("@GOT", file);
6854 break;
6855 case UNSPEC_GOTOFF:
6856 fputs ("@GOTOFF", file);
6857 break;
6858 case UNSPEC_GOTPCREL:
6859 fputs ("@GOTPCREL(%rip)", file);
6860 break;
6861 case UNSPEC_GOTTPOFF:
6862 /* FIXME: This might be @TPOFF in Sun ld too. */
6863 fputs ("@GOTTPOFF", file);
6864 break;
6865 case UNSPEC_TPOFF:
6866 fputs ("@TPOFF", file);
6867 break;
6868 case UNSPEC_NTPOFF:
6869 if (TARGET_64BIT)
6870 fputs ("@TPOFF", file);
6871 else
6872 fputs ("@NTPOFF", file);
6873 break;
6874 case UNSPEC_DTPOFF:
6875 fputs ("@DTPOFF", file);
6876 break;
6877 case UNSPEC_GOTNTPOFF:
6878 if (TARGET_64BIT)
6879 fputs ("@GOTTPOFF(%rip)", file);
6880 else
6881 fputs ("@GOTNTPOFF", file);
6882 break;
6883 case UNSPEC_INDNTPOFF:
6884 fputs ("@INDNTPOFF", file);
6885 break;
6886 default:
6887 output_operand_lossage ("invalid UNSPEC as operand");
6888 break;
6889 }
6890 break;
6891
6892 default:
6893 output_operand_lossage ("invalid expression as operand");
6894 }
6895}
6896
6897/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6898 We need to handle our special PIC relocations. */
6899
6900void
6901i386_dwarf_output_addr_const (FILE *file, rtx x)
6902{
6903#ifdef ASM_QUAD
6904 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6905#else
6906 if (TARGET_64BIT)
6907 abort ();
6908 fprintf (file, "%s", ASM_LONG);
6909#endif
6910 if (flag_pic)
6911 output_pic_addr_const (file, x, '\0');
6912 else
6913 output_addr_const (file, x);
6914 fputc ('\n', file);
6915}
6916
6917/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6918 We need to emit DTP-relative relocations. */
6919
6920void
6921i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6922{
6923 fputs (ASM_LONG, file);
6924 output_addr_const (file, x);
6925 fputs ("@DTPOFF", file);
6926 switch (size)
6927 {
6928 case 4:
6929 break;
6930 case 8:
6931 fputs (", 0", file);
6932 break;
6933 default:
6934 abort ();
6935 }
6936}
6937
6938/* In the name of slightly smaller debug output, and to cater to
6939 general assembler losage, recognize PIC+GOTOFF and turn it back
6940 into a direct symbol reference. */
6941
6942static rtx
6943ix86_delegitimize_address (rtx orig_x)
6944{
6945 rtx x = orig_x, y;
6946
6947 if (GET_CODE (x) == MEM)
6948 x = XEXP (x, 0);
6949
6950 if (TARGET_64BIT)
6951 {
6952 if (GET_CODE (x) != CONST
6953 || GET_CODE (XEXP (x, 0)) != UNSPEC
6954 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6955 || GET_CODE (orig_x) != MEM)
6956 return orig_x;
6957 return XVECEXP (XEXP (x, 0), 0, 0);
6958 }
6959
6960 if (GET_CODE (x) != PLUS
6961 || GET_CODE (XEXP (x, 1)) != CONST)
6962 return orig_x;
6963
6964 if (GET_CODE (XEXP (x, 0)) == REG
6965 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6966 /* %ebx + GOT/GOTOFF */
6967 y = NULL;
6968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6969 {
6970 /* %ebx + %reg * scale + GOT/GOTOFF */
6971 y = XEXP (x, 0);
6972 if (GET_CODE (XEXP (y, 0)) == REG
6973 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6974 y = XEXP (y, 1);
6975 else if (GET_CODE (XEXP (y, 1)) == REG
6976 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6977 y = XEXP (y, 0);
6978 else
6979 return orig_x;
6980 if (GET_CODE (y) != REG
6981 && GET_CODE (y) != MULT
6982 && GET_CODE (y) != ASHIFT)
6983 return orig_x;
6984 }
6985 else
6986 return orig_x;
6987
6988 x = XEXP (XEXP (x, 1), 0);
6989 if (GET_CODE (x) == UNSPEC
6990 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6991 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6992 {
6993 if (y)
6994 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6995 return XVECEXP (x, 0, 0);
6996 }
6997
6998 if (GET_CODE (x) == PLUS
6999 && GET_CODE (XEXP (x, 0)) == UNSPEC
7000 && GET_CODE (XEXP (x, 1)) == CONST_INT
7001 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7002 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7003 && GET_CODE (orig_x) != MEM)))
7004 {
7005 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7006 if (y)
7007 return gen_rtx_PLUS (Pmode, y, x);
7008 return x;
7009 }
7010
7011 return orig_x;
7012}
7013
7014static void
7015put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7016 int fp, FILE *file)
7017{
7018 const char *suffix;
7019
7020 if (mode == CCFPmode || mode == CCFPUmode)
7021 {
7022 enum rtx_code second_code, bypass_code;
7023 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7024 if (bypass_code != NIL || second_code != NIL)
7025 abort ();
7026 code = ix86_fp_compare_code_to_integer (code);
7027 mode = CCmode;
7028 }
7029 if (reverse)
7030 code = reverse_condition (code);
7031
7032 switch (code)
7033 {
7034 case EQ:
7035 suffix = "e";
7036 break;
7037 case NE:
7038 suffix = "ne";
7039 break;
7040 case GT:
7041 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7042 abort ();
7043 suffix = "g";
7044 break;
7045 case GTU:
7046 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7047 Those same assemblers have the same but opposite losage on cmov. */
7048 if (mode != CCmode)
7049 abort ();
7050 suffix = fp ? "nbe" : "a";
7051 break;
7052 case LT:
7053 if (mode == CCNOmode || mode == CCGOCmode)
7054 suffix = "s";
7055 else if (mode == CCmode || mode == CCGCmode)
7056 suffix = "l";
7057 else
7058 abort ();
7059 break;
7060 case LTU:
7061 if (mode != CCmode)
7062 abort ();
7063 suffix = "b";
7064 break;
7065 case GE:
7066 if (mode == CCNOmode || mode == CCGOCmode)
7067 suffix = "ns";
7068 else if (mode == CCmode || mode == CCGCmode)
7069 suffix = "ge";
7070 else
7071 abort ();
7072 break;
7073 case GEU:
7074 /* ??? As above. */
7075 if (mode != CCmode)
7076 abort ();
7077 suffix = fp ? "nb" : "ae";
7078 break;
7079 case LE:
7080 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7081 abort ();
7082 suffix = "le";
7083 break;
7084 case LEU:
7085 if (mode != CCmode)
7086 abort ();
7087 suffix = "be";
7088 break;
7089 case UNORDERED:
7090 suffix = fp ? "u" : "p";
7091 break;
7092 case ORDERED:
7093 suffix = fp ? "nu" : "np";
7094 break;
7095 default:
7096 abort ();
7097 }
7098 fputs (suffix, file);
7099}
7100
7101/* Print the name of register X to FILE based on its machine mode and number.
7102 If CODE is 'w', pretend the mode is HImode.
7103 If CODE is 'b', pretend the mode is QImode.
7104 If CODE is 'k', pretend the mode is SImode.
7105 If CODE is 'q', pretend the mode is DImode.
7106 If CODE is 'h', pretend the reg is the `high' byte register.
7107 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7108
7109void
7110print_reg (rtx x, int code, FILE *file)
7111{
7112 if (REGNO (x) == ARG_POINTER_REGNUM
7113 || REGNO (x) == FRAME_POINTER_REGNUM
7114 || REGNO (x) == FLAGS_REG
7115 || REGNO (x) == FPSR_REG)
7116 abort ();
7117
7118 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7119 putc ('%', file);
7120
7121 if (code == 'w' || MMX_REG_P (x))
7122 code = 2;
7123 else if (code == 'b')
7124 code = 1;
7125 else if (code == 'k')
7126 code = 4;
7127 else if (code == 'q')
7128 code = 8;
7129 else if (code == 'y')
7130 code = 3;
7131 else if (code == 'h')
7132 code = 0;
7133 else
7134 code = GET_MODE_SIZE (GET_MODE (x));
7135
7136 /* Irritatingly, AMD extended registers use different naming convention
7137 from the normal registers. */
7138 if (REX_INT_REG_P (x))
7139 {
7140 if (!TARGET_64BIT)
7141 abort ();
7142 switch (code)
7143 {
7144 case 0:
7145 error ("extended registers have no high halves");
7146 break;
7147 case 1:
7148 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7149 break;
7150 case 2:
7151 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7152 break;
7153 case 4:
7154 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7155 break;
7156 case 8:
7157 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7158 break;
7159 default:
7160 error ("unsupported operand size for extended register");
7161 break;
7162 }
7163 return;
7164 }
7165 switch (code)
7166 {
7167 case 3:
7168 if (STACK_TOP_P (x))
7169 {
7170 fputs ("st(0)", file);
7171 break;
7172 }
7173 /* FALLTHRU */
7174 case 8:
7175 case 4:
7176 case 12:
7177 if (! ANY_FP_REG_P (x))
7178 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7179 /* FALLTHRU */
7180 case 16:
7181 case 2:
7182 normal:
7183 fputs (hi_reg_name[REGNO (x)], file);
7184 break;
7185 case 1:
7186 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7187 goto normal;
7188 fputs (qi_reg_name[REGNO (x)], file);
7189 break;
7190 case 0:
7191 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7192 goto normal;
7193 fputs (qi_high_reg_name[REGNO (x)], file);
7194 break;
7195 default:
7196 abort ();
7197 }
7198}
7199
7200/* Locate some local-dynamic symbol still in use by this function
7201 so that we can print its name in some tls_local_dynamic_base
7202 pattern. */
7203
7204static const char *
7205get_some_local_dynamic_name (void)
7206{
7207 rtx insn;
7208
7209 if (cfun->machine->some_ld_name)
7210 return cfun->machine->some_ld_name;
7211
7212 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7213 if (INSN_P (insn)
7214 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7215 return cfun->machine->some_ld_name;
7216
7217 abort ();
7218}
7219
7220static int
7221get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7222{
7223 rtx x = *px;
7224
7225 if (GET_CODE (x) == SYMBOL_REF
7226 && local_dynamic_symbolic_operand (x, Pmode))
7227 {
7228 cfun->machine->some_ld_name = XSTR (x, 0);
7229 return 1;
7230 }
7231
7232 return 0;
7233}
7234
7235/* Meaning of CODE:
7236 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7237 C -- print opcode suffix for set/cmov insn.
7238 c -- like C, but print reversed condition
7239 F,f -- likewise, but for floating-point.
7240 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7241 otherwise nothing
7242 R -- print the prefix for register names.
7243 z -- print the opcode suffix for the size of the current operand.
7244 * -- print a star (in certain assembler syntax)
7245 A -- print an absolute memory reference.
7246 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7247 s -- print a shift double count, followed by the assemblers argument
7248 delimiter.
7249 b -- print the QImode name of the register for the indicated operand.
7250 %b0 would print %al if operands[0] is reg 0.
7251 w -- likewise, print the HImode name of the register.
7252 k -- likewise, print the SImode name of the register.
7253 q -- likewise, print the DImode name of the register.
7254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7255 y -- print "st(0)" instead of "st" as a register.
7256 D -- print condition for SSE cmp instruction.
7257 P -- if PIC, print an @PLT suffix.
7258 X -- don't print any sort of PIC '@' suffix for a symbol.
7259 & -- print some in-use local-dynamic symbol name.
7260 */
7261
7262void
7263print_operand (FILE *file, rtx x, int code)
7264{
7265 if (code)
7266 {
7267 switch (code)
7268 {
7269 case '*':
7270 if (ASSEMBLER_DIALECT == ASM_ATT)
7271 putc ('*', file);
7272 return;
7273
7274 case '&':
7275 assemble_name (file, get_some_local_dynamic_name ());
7276 return;
7277
7278 case 'A':
7279 if (ASSEMBLER_DIALECT == ASM_ATT)
7280 putc ('*', file);
7281 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7282 {
7283 /* Intel syntax. For absolute addresses, registers should not
7284 be surrounded by braces. */
7285 if (GET_CODE (x) != REG)
7286 {
7287 putc ('[', file);
7288 PRINT_OPERAND (file, x, 0);
7289 putc (']', file);
7290 return;
7291 }
7292 }
7293 else
7294 abort ();
7295
7296 PRINT_OPERAND (file, x, 0);
7297 return;
7298
7299
7300 case 'L':
7301 if (ASSEMBLER_DIALECT == ASM_ATT)
7302 putc ('l', file);
7303 return;
7304
7305 case 'W':
7306 if (ASSEMBLER_DIALECT == ASM_ATT)
7307 putc ('w', file);
7308 return;
7309
7310 case 'B':
7311 if (ASSEMBLER_DIALECT == ASM_ATT)
7312 putc ('b', file);
7313 return;
7314
7315 case 'Q':
7316 if (ASSEMBLER_DIALECT == ASM_ATT)
7317 putc ('l', file);
7318 return;
7319
7320 case 'S':
7321 if (ASSEMBLER_DIALECT == ASM_ATT)
7322 putc ('s', file);
7323 return;
7324
7325 case 'T':
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7327 putc ('t', file);
7328 return;
7329
7330 case 'z':
7331 /* 387 opcodes don't get size suffixes if the operands are
7332 registers. */
7333 if (STACK_REG_P (x))
7334 return;
7335
7336 /* Likewise if using Intel opcodes. */
7337 if (ASSEMBLER_DIALECT == ASM_INTEL)
7338 return;
7339
7340 /* This is the size of op from size of operand. */
7341 switch (GET_MODE_SIZE (GET_MODE (x)))
7342 {
7343 case 2:
7344#ifdef HAVE_GAS_FILDS_FISTS
7345 putc ('s', file);
7346#endif
7347 return;
7348
7349 case 4:
7350 if (GET_MODE (x) == SFmode)
7351 {
7352 putc ('s', file);
7353 return;
7354 }
7355 else
7356 putc ('l', file);
7357 return;
7358
7359 case 12:
7360 case 16:
7361 putc ('t', file);
7362 return;
7363
7364 case 8:
7365 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7366 {
7367#ifdef GAS_MNEMONICS
7368 putc ('q', file);
7369#else
7370 putc ('l', file);
7371 putc ('l', file);
7372#endif
7373 }
7374 else
7375 putc ('l', file);
7376 return;
7377
7378 default:
7379 abort ();
7380 }
7381
7382 case 'b':
7383 case 'w':
7384 case 'k':
7385 case 'q':
7386 case 'h':
7387 case 'y':
7388 case 'X':
7389 case 'P':
7390 break;
7391
7392 case 's':
7393 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7394 {
7395 PRINT_OPERAND (file, x, 0);
7396 putc (',', file);
7397 }
7398 return;
7399
7400 case 'D':
7401 /* Little bit of braindamage here. The SSE compare instructions
7402 does use completely different names for the comparisons that the
7403 fp conditional moves. */
7404 switch (GET_CODE (x))
7405 {
7406 case EQ:
7407 case UNEQ:
7408 fputs ("eq", file);
7409 break;
7410 case LT:
7411 case UNLT:
7412 fputs ("lt", file);
7413 break;
7414 case LE:
7415 case UNLE:
7416 fputs ("le", file);
7417 break;
7418 case UNORDERED:
7419 fputs ("unord", file);
7420 break;
7421 case NE:
7422 case LTGT:
7423 fputs ("neq", file);
7424 break;
7425 case UNGE:
7426 case GE:
7427 fputs ("nlt", file);
7428 break;
7429 case UNGT:
7430 case GT:
7431 fputs ("nle", file);
7432 break;
7433 case ORDERED:
7434 fputs ("ord", file);
7435 break;
7436 default:
7437 abort ();
7438 break;
7439 }
7440 return;
7441 case 'O':
7442#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7443 if (ASSEMBLER_DIALECT == ASM_ATT)
7444 {
7445 switch (GET_MODE (x))
7446 {
7447 case HImode: putc ('w', file); break;
7448 case SImode:
7449 case SFmode: putc ('l', file); break;
7450 case DImode:
7451 case DFmode: putc ('q', file); break;
7452 default: abort ();
7453 }
7454 putc ('.', file);
7455 }
7456#endif
7457 return;
7458 case 'C':
7459 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7460 return;
7461 case 'F':
7462#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7463 if (ASSEMBLER_DIALECT == ASM_ATT)
7464 putc ('.', file);
7465#endif
7466 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7467 return;
7468
7469 /* Like above, but reverse condition */
7470 case 'c':
7471 /* Check to see if argument to %c is really a constant
7472 and not a condition code which needs to be reversed. */
7473 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7474 {
7475 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7476 return;
7477 }
7478 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7479 return;
7480 case 'f':
7481#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7482 if (ASSEMBLER_DIALECT == ASM_ATT)
7483 putc ('.', file);
7484#endif
7485 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7486 return;
7487 case '+':
7488 {
7489 rtx x;
7490
7491 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7492 return;
7493
7494 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7495 if (x)
7496 {
7497 int pred_val = INTVAL (XEXP (x, 0));
7498
7499 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7500 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7501 {
7502 int taken = pred_val > REG_BR_PROB_BASE / 2;
7503 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7504
7505 /* Emit hints only in the case default branch prediction
7506 heuristics would fail. */
7507 if (taken != cputaken)
7508 {
7509 /* We use 3e (DS) prefix for taken branches and
7510 2e (CS) prefix for not taken branches. */
7511 if (taken)
7512 fputs ("ds ; ", file);
7513 else
7514 fputs ("cs ; ", file);
7515 }
7516 }
7517 }
7518 return;
7519 }
7520 default:
7521 output_operand_lossage ("invalid operand code `%c'", code);
7522 }
7523 }
7524
7525 if (GET_CODE (x) == REG)
7526 print_reg (x, code, file);
7527
7528 else if (GET_CODE (x) == MEM)
7529 {
7530 /* No `byte ptr' prefix for call instructions. */
7531 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7532 {
7533 const char * size;
7534 switch (GET_MODE_SIZE (GET_MODE (x)))
7535 {
7536 case 1: size = "BYTE"; break;
7537 case 2: size = "WORD"; break;
7538 case 4: size = "DWORD"; break;
7539 case 8: size = "QWORD"; break;
7540 case 12: size = "XWORD"; break;
7541 case 16: size = "XMMWORD"; break;
7542 default:
7543 abort ();
7544 }
7545
7546 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7547 if (code == 'b')
7548 size = "BYTE";
7549 else if (code == 'w')
7550 size = "WORD";
7551 else if (code == 'k')
7552 size = "DWORD";
7553
7554 fputs (size, file);
7555 fputs (" PTR ", file);
7556 }
7557
7558 x = XEXP (x, 0);
7559 /* Avoid (%rip) for call operands. */
7560 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7561 && GET_CODE (x) != CONST_INT)
7562 output_addr_const (file, x);
7563 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7564 output_operand_lossage ("invalid constraints for operand");
7565 else
7566 output_address (x);
7567 }
7568
7569 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7570 {
7571 REAL_VALUE_TYPE r;
7572 long l;
7573
7574 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7575 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7576
7577 if (ASSEMBLER_DIALECT == ASM_ATT)
7578 putc ('$', file);
7579 fprintf (file, "0x%08lx", l);
7580 }
7581
7582 /* These float cases don't actually occur as immediate operands. */
7583 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7584 {
7585 char dstr[30];
7586
7587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7588 fprintf (file, "%s", dstr);
7589 }
7590
7591 else if (GET_CODE (x) == CONST_DOUBLE
7592 && GET_MODE (x) == XFmode)
7593 {
7594 char dstr[30];
7595
7596 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7597 fprintf (file, "%s", dstr);
7598 }
7599
7600 else
7601 {
7602 if (code != 'P')
7603 {
7604 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7605 {
7606 if (ASSEMBLER_DIALECT == ASM_ATT)
7607 putc ('$', file);
7608 }
7609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7610 || GET_CODE (x) == LABEL_REF)
7611 {
7612 if (ASSEMBLER_DIALECT == ASM_ATT)
7613 putc ('$', file);
7614 else
7615 fputs ("OFFSET FLAT:", file);
7616 }
7617 }
7618 if (GET_CODE (x) == CONST_INT)
7619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7620 else if (flag_pic)
7621 output_pic_addr_const (file, x, code);
7622 else
7623 output_addr_const (file, x);
7624 }
7625}
7626
7627/* Print a memory operand whose address is ADDR. */
7628
7629void
7630print_operand_address (FILE *file, rtx addr)
7631{
7632 struct ix86_address parts;
7633 rtx base, index, disp;
7634 int scale;
7635
7636 if (! ix86_decompose_address (addr, &parts))
7637 abort ();
7638
7639 base = parts.base;
7640 index = parts.index;
7641 disp = parts.disp;
7642 scale = parts.scale;
7643
7644 switch (parts.seg)
7645 {
7646 case SEG_DEFAULT:
7647 break;
7648 case SEG_FS:
7649 case SEG_GS:
7650 if (USER_LABEL_PREFIX[0] == 0)
7651 putc ('%', file);
7652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7653 break;
7654 default:
7655 abort ();
7656 }
7657
7658 if (!base && !index)
7659 {
7660 /* Displacement only requires special attention. */
7661
7662 if (GET_CODE (disp) == CONST_INT)
7663 {
7664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7665 {
7666 if (USER_LABEL_PREFIX[0] == 0)
7667 putc ('%', file);
7668 fputs ("ds:", file);
7669 }
7670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7671 }
7672 else if (flag_pic)
7673 output_pic_addr_const (file, disp, 0);
7674 else
7675 output_addr_const (file, disp);
7676
7677 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7678 if (TARGET_64BIT
7679 && ((GET_CODE (disp) == SYMBOL_REF
7680 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7681 || GET_CODE (disp) == LABEL_REF
7682 || (GET_CODE (disp) == CONST
7683 && GET_CODE (XEXP (disp, 0)) == PLUS
7684 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7685 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7686 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7687 fputs ("(%rip)", file);
7688 }
7689 else
7690 {
7691 if (ASSEMBLER_DIALECT == ASM_ATT)
7692 {
7693 if (disp)
7694 {
7695 if (flag_pic)
7696 output_pic_addr_const (file, disp, 0);
7697 else if (GET_CODE (disp) == LABEL_REF)
7698 output_asm_label (disp);
7699 else
7700 output_addr_const (file, disp);
7701 }
7702
7703 putc ('(', file);
7704 if (base)
7705 print_reg (base, 0, file);
7706 if (index)
7707 {
7708 putc (',', file);
7709 print_reg (index, 0, file);
7710 if (scale != 1)
7711 fprintf (file, ",%d", scale);
7712 }
7713 putc (')', file);
7714 }
7715 else
7716 {
7717 rtx offset = NULL_RTX;
7718
7719 if (disp)
7720 {
7721 /* Pull out the offset of a symbol; print any symbol itself. */
7722 if (GET_CODE (disp) == CONST
7723 && GET_CODE (XEXP (disp, 0)) == PLUS
7724 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7725 {
7726 offset = XEXP (XEXP (disp, 0), 1);
7727 disp = gen_rtx_CONST (VOIDmode,
7728 XEXP (XEXP (disp, 0), 0));
7729 }
7730
7731 if (flag_pic)
7732 output_pic_addr_const (file, disp, 0);
7733 else if (GET_CODE (disp) == LABEL_REF)
7734 output_asm_label (disp);
7735 else if (GET_CODE (disp) == CONST_INT)
7736 offset = disp;
7737 else
7738 output_addr_const (file, disp);
7739 }
7740
7741 putc ('[', file);
7742 if (base)
7743 {
7744 print_reg (base, 0, file);
7745 if (offset)
7746 {
7747 if (INTVAL (offset) >= 0)
7748 putc ('+', file);
7749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7750 }
7751 }
7752 else if (offset)
7753 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7754 else
7755 putc ('0', file);
7756
7757 if (index)
7758 {
7759 putc ('+', file);
7760 print_reg (index, 0, file);
7761 if (scale != 1)
7762 fprintf (file, "*%d", scale);
7763 }
7764 putc (']', file);
7765 }
7766 }
7767}
7768
7769bool
7770output_addr_const_extra (FILE *file, rtx x)
7771{
7772 rtx op;
7773
7774 if (GET_CODE (x) != UNSPEC)
7775 return false;
7776
7777 op = XVECEXP (x, 0, 0);
7778 switch (XINT (x, 1))
7779 {
7780 case UNSPEC_GOTTPOFF:
7781 output_addr_const (file, op);
7782 /* FIXME: This might be @TPOFF in Sun ld. */
7783 fputs ("@GOTTPOFF", file);
7784 break;
7785 case UNSPEC_TPOFF:
7786 output_addr_const (file, op);
7787 fputs ("@TPOFF", file);
7788 break;
7789 case UNSPEC_NTPOFF:
7790 output_addr_const (file, op);
7791 if (TARGET_64BIT)
7792 fputs ("@TPOFF", file);
7793 else
7794 fputs ("@NTPOFF", file);
7795 break;
7796 case UNSPEC_DTPOFF:
7797 output_addr_const (file, op);
7798 fputs ("@DTPOFF", file);
7799 break;
7800 case UNSPEC_GOTNTPOFF:
7801 output_addr_const (file, op);
7802 if (TARGET_64BIT)
7803 fputs ("@GOTTPOFF(%rip)", file);
7804 else
7805 fputs ("@GOTNTPOFF", file);
7806 break;
7807 case UNSPEC_INDNTPOFF:
7808 output_addr_const (file, op);
7809 fputs ("@INDNTPOFF", file);
7810 break;
7811
7812 default:
7813 return false;
7814 }
7815
7816 return true;
7817}
7818
7819/* Split one or more DImode RTL references into pairs of SImode
7820 references. The RTL can be REG, offsettable MEM, integer constant, or
7821 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7822 split and "num" is its length. lo_half and hi_half are output arrays
7823 that parallel "operands". */
7824
7825void
7826split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7827{
7828 while (num--)
7829 {
7830 rtx op = operands[num];
7831
7832 /* simplify_subreg refuse to split volatile memory addresses,
7833 but we still have to handle it. */
7834 if (GET_CODE (op) == MEM)
7835 {
7836 lo_half[num] = adjust_address (op, SImode, 0);
7837 hi_half[num] = adjust_address (op, SImode, 4);
7838 }
7839 else
7840 {
7841 lo_half[num] = simplify_gen_subreg (SImode, op,
7842 GET_MODE (op) == VOIDmode
7843 ? DImode : GET_MODE (op), 0);
7844 hi_half[num] = simplify_gen_subreg (SImode, op,
7845 GET_MODE (op) == VOIDmode
7846 ? DImode : GET_MODE (op), 4);
7847 }
7848 }
7849}
7850/* Split one or more TImode RTL references into pairs of SImode
7851 references. The RTL can be REG, offsettable MEM, integer constant, or
7852 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7853 split and "num" is its length. lo_half and hi_half are output arrays
7854 that parallel "operands". */
7855
7856void
7857split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7858{
7859 while (num--)
7860 {
7861 rtx op = operands[num];
7862
7863 /* simplify_subreg refuse to split volatile memory addresses, but we
7864 still have to handle it. */
7865 if (GET_CODE (op) == MEM)
7866 {
7867 lo_half[num] = adjust_address (op, DImode, 0);
7868 hi_half[num] = adjust_address (op, DImode, 8);
7869 }
7870 else
7871 {
7872 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7873 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7874 }
7875 }
7876}
7877
7878/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7879 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7880 is the expression of the binary operation. The output may either be
7881 emitted here, or returned to the caller, like all output_* functions.
7882
7883 There is no guarantee that the operands are the same mode, as they
7884 might be within FLOAT or FLOAT_EXTEND expressions. */
7885
7886#ifndef SYSV386_COMPAT
7887/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7888 wants to fix the assemblers because that causes incompatibility
7889 with gcc. No-one wants to fix gcc because that causes
7890 incompatibility with assemblers... You can use the option of
7891 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7892#define SYSV386_COMPAT 1
7893#endif
7894
7895const char *
7896output_387_binary_op (rtx insn, rtx *operands)
7897{
7898 static char buf[30];
7899 const char *p;
7900 const char *ssep;
7901 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7902
7903#ifdef ENABLE_CHECKING
7904 /* Even if we do not want to check the inputs, this documents input
7905 constraints. Which helps in understanding the following code. */
7906 if (STACK_REG_P (operands[0])
7907 && ((REG_P (operands[1])
7908 && REGNO (operands[0]) == REGNO (operands[1])
7909 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7910 || (REG_P (operands[2])
7911 && REGNO (operands[0]) == REGNO (operands[2])
7912 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7913 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7914 ; /* ok */
7915 else if (!is_sse)
7916 abort ();
7917#endif
7918
7919 switch (GET_CODE (operands[3]))
7920 {
7921 case PLUS:
7922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7924 p = "fiadd";
7925 else
7926 p = "fadd";
7927 ssep = "add";
7928 break;
7929
7930 case MINUS:
7931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7933 p = "fisub";
7934 else
7935 p = "fsub";
7936 ssep = "sub";
7937 break;
7938
7939 case MULT:
7940 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7941 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7942 p = "fimul";
7943 else
7944 p = "fmul";
7945 ssep = "mul";
7946 break;
7947
7948 case DIV:
7949 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7950 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7951 p = "fidiv";
7952 else
7953 p = "fdiv";
7954 ssep = "div";
7955 break;
7956
7957 default:
7958 abort ();
7959 }
7960
7961 if (is_sse)
7962 {
7963 strcpy (buf, ssep);
7964 if (GET_MODE (operands[0]) == SFmode)
7965 strcat (buf, "ss\t{%2, %0|%0, %2}");
7966 else
7967 strcat (buf, "sd\t{%2, %0|%0, %2}");
7968 return buf;
7969 }
7970 strcpy (buf, p);
7971
7972 switch (GET_CODE (operands[3]))
7973 {
7974 case MULT:
7975 case PLUS:
7976 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7977 {
7978 rtx temp = operands[2];
7979 operands[2] = operands[1];
7980 operands[1] = temp;
7981 }
7982
7983 /* know operands[0] == operands[1]. */
7984
7985 if (GET_CODE (operands[2]) == MEM)
7986 {
7987 p = "%z2\t%2";
7988 break;
7989 }
7990
7991 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7992 {
7993 if (STACK_TOP_P (operands[0]))
7994 /* How is it that we are storing to a dead operand[2]?
7995 Well, presumably operands[1] is dead too. We can't
7996 store the result to st(0) as st(0) gets popped on this
7997 instruction. Instead store to operands[2] (which I
7998 think has to be st(1)). st(1) will be popped later.
7999 gcc <= 2.8.1 didn't have this check and generated
8000 assembly code that the Unixware assembler rejected. */
8001 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8002 else
8003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8004 break;
8005 }
8006
8007 if (STACK_TOP_P (operands[0]))
8008 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8009 else
8010 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8011 break;
8012
8013 case MINUS:
8014 case DIV:
8015 if (GET_CODE (operands[1]) == MEM)
8016 {
8017 p = "r%z1\t%1";
8018 break;
8019 }
8020
8021 if (GET_CODE (operands[2]) == MEM)
8022 {
8023 p = "%z2\t%2";
8024 break;
8025 }
8026
8027 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8028 {
8029#if SYSV386_COMPAT
8030 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8031 derived assemblers, confusingly reverse the direction of
8032 the operation for fsub{r} and fdiv{r} when the
8033 destination register is not st(0). The Intel assembler
8034 doesn't have this brain damage. Read !SYSV386_COMPAT to
8035 figure out what the hardware really does. */
8036 if (STACK_TOP_P (operands[0]))
8037 p = "{p\t%0, %2|rp\t%2, %0}";
8038 else
8039 p = "{rp\t%2, %0|p\t%0, %2}";
8040#else
8041 if (STACK_TOP_P (operands[0]))
8042 /* As above for fmul/fadd, we can't store to st(0). */
8043 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8044 else
8045 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8046#endif
8047 break;
8048 }
8049
8050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8051 {
8052#if SYSV386_COMPAT
8053 if (STACK_TOP_P (operands[0]))
8054 p = "{rp\t%0, %1|p\t%1, %0}";
8055 else
8056 p = "{p\t%1, %0|rp\t%0, %1}";
8057#else
8058 if (STACK_TOP_P (operands[0]))
8059 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8060 else
8061 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8062#endif
8063 break;
8064 }
8065
8066 if (STACK_TOP_P (operands[0]))
8067 {
8068 if (STACK_TOP_P (operands[1]))
8069 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8070 else
8071 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8072 break;
8073 }
8074 else if (STACK_TOP_P (operands[1]))
8075 {
8076#if SYSV386_COMPAT
8077 p = "{\t%1, %0|r\t%0, %1}";
8078#else
8079 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8080#endif
8081 }
8082 else
8083 {
8084#if SYSV386_COMPAT
8085 p = "{r\t%2, %0|\t%0, %2}";
8086#else
8087 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8088#endif
8089 }
8090 break;
8091
8092 default:
8093 abort ();
8094 }
8095
8096 strcat (buf, p);
8097 return buf;
8098}
8099
8100/* Output code to initialize control word copies used by
8101 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8102 is set to control word rounding downwards. */
8103void
8104emit_i387_cw_initialization (rtx normal, rtx round_down)
8105{
8106 rtx reg = gen_reg_rtx (HImode);
8107
8108 emit_insn (gen_x86_fnstcw_1 (normal));
8109 emit_move_insn (reg, normal);
8110 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8111 && !TARGET_64BIT)
8112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8113 else
8114 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8115 emit_move_insn (round_down, reg);
8116}
8117
8118/* Output code for INSN to convert a float to a signed int. OPERANDS
8119 are the insn operands. The output may be [HSD]Imode and the input
8120 operand may be [SDX]Fmode. */
8121
8122const char *
8123output_fix_trunc (rtx insn, rtx *operands)
8124{
8125 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8126 int dimode_p = GET_MODE (operands[0]) == DImode;
8127
8128 /* Jump through a hoop or two for DImode, since the hardware has no
8129 non-popping instruction. We used to do this a different way, but
8130 that was somewhat fragile and broke with post-reload splitters. */
8131 if (dimode_p && !stack_top_dies)
8132 output_asm_insn ("fld\t%y1", operands);
8133
8134 if (!STACK_TOP_P (operands[1]))
8135 abort ();
8136
8137 if (GET_CODE (operands[0]) != MEM)
8138 abort ();
8139
8140 output_asm_insn ("fldcw\t%3", operands);
8141 if (stack_top_dies || dimode_p)
8142 output_asm_insn ("fistp%z0\t%0", operands);
8143 else
8144 output_asm_insn ("fist%z0\t%0", operands);
8145 output_asm_insn ("fldcw\t%2", operands);
8146
8147 return "";
8148}
8149
8150/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8151 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8152 when fucom should be used. */
8153
8154const char *
8155output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8156{
8157 int stack_top_dies;
8158 rtx cmp_op0 = operands[0];
8159 rtx cmp_op1 = operands[1];
8160 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8161
8162 if (eflags_p == 2)
8163 {
8164 cmp_op0 = cmp_op1;
8165 cmp_op1 = operands[2];
8166 }
8167 if (is_sse)
8168 {
8169 if (GET_MODE (operands[0]) == SFmode)
8170 if (unordered_p)
8171 return "ucomiss\t{%1, %0|%0, %1}";
8172 else
8173 return "comiss\t{%1, %0|%0, %1}";
8174 else
8175 if (unordered_p)
8176 return "ucomisd\t{%1, %0|%0, %1}";
8177 else
8178 return "comisd\t{%1, %0|%0, %1}";
8179 }
8180
8181 if (! STACK_TOP_P (cmp_op0))
8182 abort ();
8183
8184 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8185
8186 if (STACK_REG_P (cmp_op1)
8187 && stack_top_dies
8188 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8189 && REGNO (cmp_op1) != FIRST_STACK_REG)
8190 {
8191 /* If both the top of the 387 stack dies, and the other operand
8192 is also a stack register that dies, then this must be a
8193 `fcompp' float compare */
8194
8195 if (eflags_p == 1)
8196 {
8197 /* There is no double popping fcomi variant. Fortunately,
8198 eflags is immune from the fstp's cc clobbering. */
8199 if (unordered_p)
8200 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8201 else
8202 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8203 return "fstp\t%y0";
8204 }
8205 else
8206 {
8207 if (eflags_p == 2)
8208 {
8209 if (unordered_p)
8210 return "fucompp\n\tfnstsw\t%0";
8211 else
8212 return "fcompp\n\tfnstsw\t%0";
8213 }
8214 else
8215 {
8216 if (unordered_p)
8217 return "fucompp";
8218 else
8219 return "fcompp";
8220 }
8221 }
8222 }
8223 else
8224 {
8225 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8226
8227 static const char * const alt[24] =
8228 {
8229 "fcom%z1\t%y1",
8230 "fcomp%z1\t%y1",
8231 "fucom%z1\t%y1",
8232 "fucomp%z1\t%y1",
8233
8234 "ficom%z1\t%y1",
8235 "ficomp%z1\t%y1",
8236 NULL,
8237 NULL,
8238
8239 "fcomi\t{%y1, %0|%0, %y1}",
8240 "fcomip\t{%y1, %0|%0, %y1}",
8241 "fucomi\t{%y1, %0|%0, %y1}",
8242 "fucomip\t{%y1, %0|%0, %y1}",
8243
8244 NULL,
8245 NULL,
8246 NULL,
8247 NULL,
8248
8249 "fcom%z2\t%y2\n\tfnstsw\t%0",
8250 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8251 "fucom%z2\t%y2\n\tfnstsw\t%0",
8252 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8253
8254 "ficom%z2\t%y2\n\tfnstsw\t%0",
8255 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8256 NULL,
8257 NULL
8258 };
8259
8260 int mask;
8261 const char *ret;
8262
8263 mask = eflags_p << 3;
8264 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8265 mask |= unordered_p << 1;
8266 mask |= stack_top_dies;
8267
8268 if (mask >= 24)
8269 abort ();
8270 ret = alt[mask];
8271 if (ret == NULL)
8272 abort ();
8273
8274 return ret;
8275 }
8276}
8277
8278void
8279ix86_output_addr_vec_elt (FILE *file, int value)
8280{
8281 const char *directive = ASM_LONG;
8282
8283 if (TARGET_64BIT)
8284 {
8285#ifdef ASM_QUAD
8286 directive = ASM_QUAD;
8287#else
8288 abort ();
8289#endif
8290 }
8291
8292 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8293}
8294
8295void
8296ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8297{
8298 if (TARGET_64BIT)
8299 fprintf (file, "%s%s%d-%s%d\n",
8300 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8301 else if (HAVE_AS_GOTOFF_IN_DATA)
8302 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8303#if TARGET_MACHO
8304 else if (TARGET_MACHO)
8305 {
8306 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8307 machopic_output_function_base_name (file);
8308 fprintf(file, "\n");
8309 }
8310#endif
8311 else
8312 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8313 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8314}
8315
8316/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8317 for the target. */
8318
8319void
8320ix86_expand_clear (rtx dest)
8321{
8322 rtx tmp;
8323
8324 /* We play register width games, which are only valid after reload. */
8325 if (!reload_completed)
8326 abort ();
8327
8328 /* Avoid HImode and its attendant prefix byte. */
8329 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8330 dest = gen_rtx_REG (SImode, REGNO (dest));
8331
8332 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8333
8334 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8335 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8336 {
8337 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8338 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8339 }
8340
8341 emit_insn (tmp);
8342}
8343
8344/* X is an unchanging MEM. If it is a constant pool reference, return
8345 the constant pool rtx, else NULL. */
8346
8347static rtx
8348maybe_get_pool_constant (rtx x)
8349{
8350 x = ix86_delegitimize_address (XEXP (x, 0));
8351
8352 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8353 return get_pool_constant (x);
8354
8355 return NULL_RTX;
8356}
8357
8358void
8359ix86_expand_move (enum machine_mode mode, rtx operands[])
8360{
8361 int strict = (reload_in_progress || reload_completed);
8362 rtx op0, op1;
8363 enum tls_model model;
8364
8365 op0 = operands[0];
8366 op1 = operands[1];
8367
8368 model = tls_symbolic_operand (op1, Pmode);
8369 if (model)
8370 {
8371 op1 = legitimize_tls_address (op1, model, true);
8372 op1 = force_operand (op1, op0);
8373 if (op1 == op0)
8374 return;
8375 }
8376
8377 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8378 {
8379#if TARGET_MACHO
8380 if (MACHOPIC_PURE)
8381 {
8382 rtx temp = ((reload_in_progress
8383 || ((op0 && GET_CODE (op0) == REG)
8384 && mode == Pmode))
8385 ? op0 : gen_reg_rtx (Pmode));
8386 op1 = machopic_indirect_data_reference (op1, temp);
8387 op1 = machopic_legitimize_pic_address (op1, mode,
8388 temp == op1 ? 0 : temp);
8389 }
8390 else if (MACHOPIC_INDIRECT)
8391 op1 = machopic_indirect_data_reference (op1, 0);
8392 if (op0 == op1)
8393 return;
8394#else
8395 if (GET_CODE (op0) == MEM)
8396 op1 = force_reg (Pmode, op1);
8397 else
8398 {
8399 rtx temp = op0;
8400 if (GET_CODE (temp) != REG)
8401 temp = gen_reg_rtx (Pmode);
8402 temp = legitimize_pic_address (op1, temp);
8403 if (temp == op0)
8404 return;
8405 op1 = temp;
8406 }
8407#endif /* TARGET_MACHO */
8408 }
8409 else
8410 {
8411 if (GET_CODE (op0) == MEM
8412 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8413 || !push_operand (op0, mode))
8414 && GET_CODE (op1) == MEM)
8415 op1 = force_reg (mode, op1);
8416
8417 if (push_operand (op0, mode)
8418 && ! general_no_elim_operand (op1, mode))
8419 op1 = copy_to_mode_reg (mode, op1);
8420
8421 /* Force large constants in 64bit compilation into register
8422 to get them CSEed. */
8423 if (TARGET_64BIT && mode == DImode
8424 && immediate_operand (op1, mode)
8425 && !x86_64_zero_extended_value (op1)
8426 && !register_operand (op0, mode)
8427 && optimize && !reload_completed && !reload_in_progress)
8428 op1 = copy_to_mode_reg (mode, op1);
8429
8430 if (FLOAT_MODE_P (mode))
8431 {
8432 /* If we are loading a floating point constant to a register,
8433 force the value to memory now, since we'll get better code
8434 out the back end. */
8435
8436 if (strict)
8437 ;
8438 else if (GET_CODE (op1) == CONST_DOUBLE)
8439 {
8440 op1 = validize_mem (force_const_mem (mode, op1));
8441 if (!register_operand (op0, mode))
8442 {
8443 rtx temp = gen_reg_rtx (mode);
8444 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8445 emit_move_insn (op0, temp);
8446 return;
8447 }
8448 }
8449 }
8450 }
8451
8452 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8453}
8454
8455void
8456ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8457{
8458 /* Force constants other than zero into memory. We do not know how
8459 the instructions used to build constants modify the upper 64 bits
8460 of the register, once we have that information we may be able
8461 to handle some of them more efficiently. */
8462 if ((reload_in_progress | reload_completed) == 0
8463 && register_operand (operands[0], mode)
8464 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8465 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8466
8467 /* Make operand1 a register if it isn't already. */
8468 if (!no_new_pseudos
8469 && !register_operand (operands[0], mode)
8470 && !register_operand (operands[1], mode))
8471 {
8472 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8473 emit_move_insn (operands[0], temp);
8474 return;
8475 }
8476
8477 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8478}
8479
8480/* Attempt to expand a binary operator. Make the expansion closer to the
8481 actual machine, then just general_operand, which will allow 3 separate
8482 memory references (one output, two input) in a single insn. */
8483
8484void
8485ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8486 rtx operands[])
8487{
8488 int matching_memory;
8489 rtx src1, src2, dst, op, clob;
8490
8491 dst = operands[0];
8492 src1 = operands[1];
8493 src2 = operands[2];
8494
8495 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8496 if (GET_RTX_CLASS (code) == 'c'
8497 && (rtx_equal_p (dst, src2)
8498 || immediate_operand (src1, mode)))
8499 {
8500 rtx temp = src1;
8501 src1 = src2;
8502 src2 = temp;
8503 }
8504
8505 /* If the destination is memory, and we do not have matching source
8506 operands, do things in registers. */
8507 matching_memory = 0;
8508 if (GET_CODE (dst) == MEM)
8509 {
8510 if (rtx_equal_p (dst, src1))
8511 matching_memory = 1;
8512 else if (GET_RTX_CLASS (code) == 'c'
8513 && rtx_equal_p (dst, src2))
8514 matching_memory = 2;
8515 else
8516 dst = gen_reg_rtx (mode);
8517 }
8518
8519 /* Both source operands cannot be in memory. */
8520 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8521 {
8522 if (matching_memory != 2)
8523 src2 = force_reg (mode, src2);
8524 else
8525 src1 = force_reg (mode, src1);
8526 }
8527
8528 /* If the operation is not commutable, source 1 cannot be a constant
8529 or non-matching memory. */
8530 if ((CONSTANT_P (src1)
8531 || (!matching_memory && GET_CODE (src1) == MEM))
8532 && GET_RTX_CLASS (code) != 'c')
8533 src1 = force_reg (mode, src1);
8534
8535 /* If optimizing, copy to regs to improve CSE */
8536 if (optimize && ! no_new_pseudos)
8537 {
8538 if (GET_CODE (dst) == MEM)
8539 dst = gen_reg_rtx (mode);
8540 if (GET_CODE (src1) == MEM)
8541 src1 = force_reg (mode, src1);
8542 if (GET_CODE (src2) == MEM)
8543 src2 = force_reg (mode, src2);
8544 }
8545
8546 /* Emit the instruction. */
8547
8548 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8549 if (reload_in_progress)
8550 {
8551 /* Reload doesn't know about the flags register, and doesn't know that
8552 it doesn't want to clobber it. We can only do this with PLUS. */
8553 if (code != PLUS)
8554 abort ();
8555 emit_insn (op);
8556 }
8557 else
8558 {
8559 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8560 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8561 }
8562
8563 /* Fix up the destination if needed. */
8564 if (dst != operands[0])
8565 emit_move_insn (operands[0], dst);
8566}
8567
8568/* Return TRUE or FALSE depending on whether the binary operator meets the
8569 appropriate constraints. */
8570
8571int
8572ix86_binary_operator_ok (enum rtx_code code,
8573 enum machine_mode mode ATTRIBUTE_UNUSED,
8574 rtx operands[3])
8575{
8576 /* Both source operands cannot be in memory. */
8577 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8578 return 0;
8579 /* If the operation is not commutable, source 1 cannot be a constant. */
8580 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8581 return 0;
8582 /* If the destination is memory, we must have a matching source operand. */
8583 if (GET_CODE (operands[0]) == MEM
8584 && ! (rtx_equal_p (operands[0], operands[1])
8585 || (GET_RTX_CLASS (code) == 'c'
8586 && rtx_equal_p (operands[0], operands[2]))))
8587 return 0;
8588 /* If the operation is not commutable and the source 1 is memory, we must
8589 have a matching destination. */
8590 if (GET_CODE (operands[1]) == MEM
8591 && GET_RTX_CLASS (code) != 'c'
8592 && ! rtx_equal_p (operands[0], operands[1]))
8593 return 0;
8594 return 1;
8595}
8596
8597/* Attempt to expand a unary operator. Make the expansion closer to the
8598 actual machine, then just general_operand, which will allow 2 separate
8599 memory references (one output, one input) in a single insn. */
8600
8601void
8602ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8603 rtx operands[])
8604{
8605 int matching_memory;
8606 rtx src, dst, op, clob;
8607
8608 dst = operands[0];
8609 src = operands[1];
8610
8611 /* If the destination is memory, and we do not have matching source
8612 operands, do things in registers. */
8613 matching_memory = 0;
8614 if (GET_CODE (dst) == MEM)
8615 {
8616 if (rtx_equal_p (dst, src))
8617 matching_memory = 1;
8618 else
8619 dst = gen_reg_rtx (mode);
8620 }
8621
8622 /* When source operand is memory, destination must match. */
8623 if (!matching_memory && GET_CODE (src) == MEM)
8624 src = force_reg (mode, src);
8625
8626 /* If optimizing, copy to regs to improve CSE */
8627 if (optimize && ! no_new_pseudos)
8628 {
8629 if (GET_CODE (dst) == MEM)
8630 dst = gen_reg_rtx (mode);
8631 if (GET_CODE (src) == MEM)
8632 src = force_reg (mode, src);
8633 }
8634
8635 /* Emit the instruction. */
8636
8637 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8638 if (reload_in_progress || code == NOT)
8639 {
8640 /* Reload doesn't know about the flags register, and doesn't know that
8641 it doesn't want to clobber it. */
8642 if (code != NOT)
8643 abort ();
8644 emit_insn (op);
8645 }
8646 else
8647 {
8648 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8649 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8650 }
8651
8652 /* Fix up the destination if needed. */
8653 if (dst != operands[0])
8654 emit_move_insn (operands[0], dst);
8655}
8656
8657/* Return TRUE or FALSE depending on whether the unary operator meets the
8658 appropriate constraints. */
8659
8660int
8661ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8662 enum machine_mode mode ATTRIBUTE_UNUSED,
8663 rtx operands[2] ATTRIBUTE_UNUSED)
8664{
8665 /* If one of operands is memory, source and destination must match. */
8666 if ((GET_CODE (operands[0]) == MEM
8667 || GET_CODE (operands[1]) == MEM)
8668 && ! rtx_equal_p (operands[0], operands[1]))
8669 return FALSE;
8670 return TRUE;
8671}
8672
8673/* Return TRUE or FALSE depending on whether the first SET in INSN
8674 has source and destination with matching CC modes, and that the
8675 CC mode is at least as constrained as REQ_MODE. */
8676
8677int
8678ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8679{
8680 rtx set;
8681 enum machine_mode set_mode;
8682
8683 set = PATTERN (insn);
8684 if (GET_CODE (set) == PARALLEL)
8685 set = XVECEXP (set, 0, 0);
8686 if (GET_CODE (set) != SET)
8687 abort ();
8688 if (GET_CODE (SET_SRC (set)) != COMPARE)
8689 abort ();
8690
8691 set_mode = GET_MODE (SET_DEST (set));
8692 switch (set_mode)
8693 {
8694 case CCNOmode:
8695 if (req_mode != CCNOmode
8696 && (req_mode != CCmode
8697 || XEXP (SET_SRC (set), 1) != const0_rtx))
8698 return 0;
8699 break;
8700 case CCmode:
8701 if (req_mode == CCGCmode)
8702 return 0;
8703 /* FALLTHRU */
8704 case CCGCmode:
8705 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8706 return 0;
8707 /* FALLTHRU */
8708 case CCGOCmode:
8709 if (req_mode == CCZmode)
8710 return 0;
8711 /* FALLTHRU */
8712 case CCZmode:
8713 break;
8714
8715 default:
8716 abort ();
8717 }
8718
8719 return (GET_MODE (SET_SRC (set)) == set_mode);
8720}
8721
8722/* Generate insn patterns to do an integer compare of OPERANDS. */
8723
8724static rtx
8725ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8726{
8727 enum machine_mode cmpmode;
8728 rtx tmp, flags;
8729
8730 cmpmode = SELECT_CC_MODE (code, op0, op1);
8731 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8732
8733 /* This is very simple, but making the interface the same as in the
8734 FP case makes the rest of the code easier. */
8735 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8736 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8737
8738 /* Return the test that should be put into the flags user, i.e.
8739 the bcc, scc, or cmov instruction. */
8740 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8741}
8742
8743/* Figure out whether to use ordered or unordered fp comparisons.
8744 Return the appropriate mode to use. */
8745
8746enum machine_mode
8747ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8748{
8749 /* ??? In order to make all comparisons reversible, we do all comparisons
8750 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8751 all forms trapping and nontrapping comparisons, we can make inequality
8752 comparisons trapping again, since it results in better code when using
8753 FCOM based compares. */
8754 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8755}
8756
8757enum machine_mode
8758ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8759{
8760 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8761 return ix86_fp_compare_mode (code);
8762 switch (code)
8763 {
8764 /* Only zero flag is needed. */
8765 case EQ: /* ZF=0 */
8766 case NE: /* ZF!=0 */
8767 return CCZmode;
8768 /* Codes needing carry flag. */
8769 case GEU: /* CF=0 */
8770 case GTU: /* CF=0 & ZF=0 */
8771 case LTU: /* CF=1 */
8772 case LEU: /* CF=1 | ZF=1 */
8773 return CCmode;
8774 /* Codes possibly doable only with sign flag when
8775 comparing against zero. */
8776 case GE: /* SF=OF or SF=0 */
8777 case LT: /* SF<>OF or SF=1 */
8778 if (op1 == const0_rtx)
8779 return CCGOCmode;
8780 else
8781 /* For other cases Carry flag is not required. */
8782 return CCGCmode;
8783 /* Codes doable only with sign flag when comparing
8784 against zero, but we miss jump instruction for it
8785 so we need to use relational tests against overflow
8786 that thus needs to be zero. */
8787 case GT: /* ZF=0 & SF=OF */
8788 case LE: /* ZF=1 | SF<>OF */
8789 if (op1 == const0_rtx)
8790 return CCNOmode;
8791 else
8792 return CCGCmode;
8793 /* strcmp pattern do (use flags) and combine may ask us for proper
8794 mode. */
8795 case USE:
8796 return CCmode;
8797 default:
8798 abort ();
8799 }
8800}
8801
8802/* Return the fixed registers used for condition codes. */
8803
8804static bool
8805ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8806{
8807 *p1 = FLAGS_REG;
8808 *p2 = FPSR_REG;
8809 return true;
8810}
8811
8812/* If two condition code modes are compatible, return a condition code
8813 mode which is compatible with both. Otherwise, return
8814 VOIDmode. */
8815
8816static enum machine_mode
8817ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8818{
8819 if (m1 == m2)
8820 return m1;
8821
8822 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8823 return VOIDmode;
8824
8825 if ((m1 == CCGCmode && m2 == CCGOCmode)
8826 || (m1 == CCGOCmode && m2 == CCGCmode))
8827 return CCGCmode;
8828
8829 switch (m1)
8830 {
8831 default:
8832 abort ();
8833
8834 case CCmode:
8835 case CCGCmode:
8836 case CCGOCmode:
8837 case CCNOmode:
8838 case CCZmode:
8839 switch (m2)
8840 {
8841 default:
8842 return VOIDmode;
8843
8844 case CCmode:
8845 case CCGCmode:
8846 case CCGOCmode:
8847 case CCNOmode:
8848 case CCZmode:
8849 return CCmode;
8850 }
8851
8852 case CCFPmode:
8853 case CCFPUmode:
8854 /* These are only compatible with themselves, which we already
8855 checked above. */
8856 return VOIDmode;
8857 }
8858}
8859
8860/* Return true if we should use an FCOMI instruction for this fp comparison. */
8861
8862int
8863ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8864{
8865 enum rtx_code swapped_code = swap_condition (code);
8866 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8867 || (ix86_fp_comparison_cost (swapped_code)
8868 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8869}
8870
8871/* Swap, force into registers, or otherwise massage the two operands
8872 to a fp comparison. The operands are updated in place; the new
8873 comparison code is returned. */
8874
8875static enum rtx_code
8876ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8877{
8878 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8879 rtx op0 = *pop0, op1 = *pop1;
8880 enum machine_mode op_mode = GET_MODE (op0);
8881 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8882
8883 /* All of the unordered compare instructions only work on registers.
8884 The same is true of the XFmode compare instructions. The same is
8885 true of the fcomi compare instructions. */
8886
8887 if (!is_sse
8888 && (fpcmp_mode == CCFPUmode
8889 || op_mode == XFmode
8890 || ix86_use_fcomi_compare (code)))
8891 {
8892 op0 = force_reg (op_mode, op0);
8893 op1 = force_reg (op_mode, op1);
8894 }
8895 else
8896 {
8897 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8898 things around if they appear profitable, otherwise force op0
8899 into a register. */
8900
8901 if (standard_80387_constant_p (op0) == 0
8902 || (GET_CODE (op0) == MEM
8903 && ! (standard_80387_constant_p (op1) == 0
8904 || GET_CODE (op1) == MEM)))
8905 {
8906 rtx tmp;
8907 tmp = op0, op0 = op1, op1 = tmp;
8908 code = swap_condition (code);
8909 }
8910
8911 if (GET_CODE (op0) != REG)
8912 op0 = force_reg (op_mode, op0);
8913
8914 if (CONSTANT_P (op1))
8915 {
8916 if (standard_80387_constant_p (op1))
8917 op1 = force_reg (op_mode, op1);
8918 else
8919 op1 = validize_mem (force_const_mem (op_mode, op1));
8920 }
8921 }
8922
8923 /* Try to rearrange the comparison to make it cheaper. */
8924 if (ix86_fp_comparison_cost (code)
8925 > ix86_fp_comparison_cost (swap_condition (code))
8926 && (GET_CODE (op1) == REG || !no_new_pseudos))
8927 {
8928 rtx tmp;
8929 tmp = op0, op0 = op1, op1 = tmp;
8930 code = swap_condition (code);
8931 if (GET_CODE (op0) != REG)
8932 op0 = force_reg (op_mode, op0);
8933 }
8934
8935 *pop0 = op0;
8936 *pop1 = op1;
8937 return code;
8938}
8939
8940/* Convert comparison codes we use to represent FP comparison to integer
8941 code that will result in proper branch. Return UNKNOWN if no such code
8942 is available. */
8943static enum rtx_code
8944ix86_fp_compare_code_to_integer (enum rtx_code code)
8945{
8946 switch (code)
8947 {
8948 case GT:
8949 return GTU;
8950 case GE:
8951 return GEU;
8952 case ORDERED:
8953 case UNORDERED:
8954 return code;
8955 break;
8956 case UNEQ:
8957 return EQ;
8958 break;
8959 case UNLT:
8960 return LTU;
8961 break;
8962 case UNLE:
8963 return LEU;
8964 break;
8965 case LTGT:
8966 return NE;
8967 break;
8968 default:
8969 return UNKNOWN;
8970 }
8971}
8972
8973/* Split comparison code CODE into comparisons we can do using branch
8974 instructions. BYPASS_CODE is comparison code for branch that will
8975 branch around FIRST_CODE and SECOND_CODE. If some of branches
8976 is not required, set value to NIL.
8977 We never require more than two branches. */
8978static void
8979ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8980 enum rtx_code *first_code,
8981 enum rtx_code *second_code)
8982{
8983 *first_code = code;
8984 *bypass_code = NIL;
8985 *second_code = NIL;
8986
8987 /* The fcomi comparison sets flags as follows:
8988
8989 cmp ZF PF CF
8990 > 0 0 0
8991 < 0 0 1
8992 = 1 0 0
8993 un 1 1 1 */
8994
8995 switch (code)
8996 {
8997 case GT: /* GTU - CF=0 & ZF=0 */
8998 case GE: /* GEU - CF=0 */
8999 case ORDERED: /* PF=0 */
9000 case UNORDERED: /* PF=1 */
9001 case UNEQ: /* EQ - ZF=1 */
9002 case UNLT: /* LTU - CF=1 */
9003 case UNLE: /* LEU - CF=1 | ZF=1 */
9004 case LTGT: /* EQ - ZF=0 */
9005 break;
9006 case LT: /* LTU - CF=1 - fails on unordered */
9007 *first_code = UNLT;
9008 *bypass_code = UNORDERED;
9009 break;
9010 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9011 *first_code = UNLE;
9012 *bypass_code = UNORDERED;
9013 break;
9014 case EQ: /* EQ - ZF=1 - fails on unordered */
9015 *first_code = UNEQ;
9016 *bypass_code = UNORDERED;
9017 break;
9018 case NE: /* NE - ZF=0 - fails on unordered */
9019 *first_code = LTGT;
9020 *second_code = UNORDERED;
9021 break;
9022 case UNGE: /* GEU - CF=0 - fails on unordered */
9023 *first_code = GE;
9024 *second_code = UNORDERED;
9025 break;
9026 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9027 *first_code = GT;
9028 *second_code = UNORDERED;
9029 break;
9030 default:
9031 abort ();
9032 }
9033 if (!TARGET_IEEE_FP)
9034 {
9035 *second_code = NIL;
9036 *bypass_code = NIL;
9037 }
9038}
9039
9040/* Return cost of comparison done fcom + arithmetics operations on AX.
9041 All following functions do use number of instructions as a cost metrics.
9042 In future this should be tweaked to compute bytes for optimize_size and
9043 take into account performance of various instructions on various CPUs. */
9044static int
9045ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9046{
9047 if (!TARGET_IEEE_FP)
9048 return 4;
9049 /* The cost of code output by ix86_expand_fp_compare. */
9050 switch (code)
9051 {
9052 case UNLE:
9053 case UNLT:
9054 case LTGT:
9055 case GT:
9056 case GE:
9057 case UNORDERED:
9058 case ORDERED:
9059 case UNEQ:
9060 return 4;
9061 break;
9062 case LT:
9063 case NE:
9064 case EQ:
9065 case UNGE:
9066 return 5;
9067 break;
9068 case LE:
9069 case UNGT:
9070 return 6;
9071 break;
9072 default:
9073 abort ();
9074 }
9075}
9076
9077/* Return cost of comparison done using fcomi operation.
9078 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9079static int
9080ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9081{
9082 enum rtx_code bypass_code, first_code, second_code;
9083 /* Return arbitrarily high cost when instruction is not supported - this
9084 prevents gcc from using it. */
9085 if (!TARGET_CMOVE)
9086 return 1024;
9087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9088 return (bypass_code != NIL || second_code != NIL) + 2;
9089}
9090
9091/* Return cost of comparison done using sahf operation.
9092 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9093static int
9094ix86_fp_comparison_sahf_cost (enum rtx_code code)
9095{
9096 enum rtx_code bypass_code, first_code, second_code;
9097 /* Return arbitrarily high cost when instruction is not preferred - this
9098 avoids gcc from using it. */
9099 if (!TARGET_USE_SAHF && !optimize_size)
9100 return 1024;
9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9102 return (bypass_code != NIL || second_code != NIL) + 3;
9103}
9104
9105/* Compute cost of the comparison done using any method.
9106 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9107static int
9108ix86_fp_comparison_cost (enum rtx_code code)
9109{
9110 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9111 int min;
9112
9113 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9114 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9115
9116 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9117 if (min > sahf_cost)
9118 min = sahf_cost;
9119 if (min > fcomi_cost)
9120 min = fcomi_cost;
9121 return min;
9122}
9123
9124/* Generate insn patterns to do a floating point compare of OPERANDS. */
9125
9126static rtx
9127ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9128 rtx *second_test, rtx *bypass_test)
9129{
9130 enum machine_mode fpcmp_mode, intcmp_mode;
9131 rtx tmp, tmp2;
9132 int cost = ix86_fp_comparison_cost (code);
9133 enum rtx_code bypass_code, first_code, second_code;
9134
9135 fpcmp_mode = ix86_fp_compare_mode (code);
9136 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9137
9138 if (second_test)
9139 *second_test = NULL_RTX;
9140 if (bypass_test)
9141 *bypass_test = NULL_RTX;
9142
9143 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9144
9145 /* Do fcomi/sahf based test when profitable. */
9146 if ((bypass_code == NIL || bypass_test)
9147 && (second_code == NIL || second_test)
9148 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9149 {
9150 if (TARGET_CMOVE)
9151 {
9152 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9153 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9154 tmp);
9155 emit_insn (tmp);
9156 }
9157 else
9158 {
9159 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9160 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9161 if (!scratch)
9162 scratch = gen_reg_rtx (HImode);
9163 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9164 emit_insn (gen_x86_sahf_1 (scratch));
9165 }
9166
9167 /* The FP codes work out to act like unsigned. */
9168 intcmp_mode = fpcmp_mode;
9169 code = first_code;
9170 if (bypass_code != NIL)
9171 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9172 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9173 const0_rtx);
9174 if (second_code != NIL)
9175 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9176 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9177 const0_rtx);
9178 }
9179 else
9180 {
9181 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9182 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9183 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9184 if (!scratch)
9185 scratch = gen_reg_rtx (HImode);
9186 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9187
9188 /* In the unordered case, we have to check C2 for NaN's, which
9189 doesn't happen to work out to anything nice combination-wise.
9190 So do some bit twiddling on the value we've got in AH to come
9191 up with an appropriate set of condition codes. */
9192
9193 intcmp_mode = CCNOmode;
9194 switch (code)
9195 {
9196 case GT:
9197 case UNGT:
9198 if (code == GT || !TARGET_IEEE_FP)
9199 {
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9201 code = EQ;
9202 }
9203 else
9204 {
9205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9206 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9207 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9208 intcmp_mode = CCmode;
9209 code = GEU;
9210 }
9211 break;
9212 case LT:
9213 case UNLT:
9214 if (code == LT && TARGET_IEEE_FP)
9215 {
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9218 intcmp_mode = CCmode;
9219 code = EQ;
9220 }
9221 else
9222 {
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9224 code = NE;
9225 }
9226 break;
9227 case GE:
9228 case UNGE:
9229 if (code == GE || !TARGET_IEEE_FP)
9230 {
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9232 code = EQ;
9233 }
9234 else
9235 {
9236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9237 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9238 GEN_INT (0x01)));
9239 code = NE;
9240 }
9241 break;
9242 case LE:
9243 case UNLE:
9244 if (code == LE && TARGET_IEEE_FP)
9245 {
9246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9249 intcmp_mode = CCmode;
9250 code = LTU;
9251 }
9252 else
9253 {
9254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9255 code = NE;
9256 }
9257 break;
9258 case EQ:
9259 case UNEQ:
9260 if (code == EQ && TARGET_IEEE_FP)
9261 {
9262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9264 intcmp_mode = CCmode;
9265 code = EQ;
9266 }
9267 else
9268 {
9269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9270 code = NE;
9271 break;
9272 }
9273 break;
9274 case NE:
9275 case LTGT:
9276 if (code == NE && TARGET_IEEE_FP)
9277 {
9278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9279 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9280 GEN_INT (0x40)));
9281 code = NE;
9282 }
9283 else
9284 {
9285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9286 code = EQ;
9287 }
9288 break;
9289
9290 case UNORDERED:
9291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9292 code = NE;
9293 break;
9294 case ORDERED:
9295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9296 code = EQ;
9297 break;
9298
9299 default:
9300 abort ();
9301 }
9302 }
9303
9304 /* Return the test that should be put into the flags user, i.e.
9305 the bcc, scc, or cmov instruction. */
9306 return gen_rtx_fmt_ee (code, VOIDmode,
9307 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9308 const0_rtx);
9309}
9310
9311rtx
9312ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9313{
9314 rtx op0, op1, ret;
9315 op0 = ix86_compare_op0;
9316 op1 = ix86_compare_op1;
9317
9318 if (second_test)
9319 *second_test = NULL_RTX;
9320 if (bypass_test)
9321 *bypass_test = NULL_RTX;
9322
9323 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9324 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9325 second_test, bypass_test);
9326 else
9327 ret = ix86_expand_int_compare (code, op0, op1);
9328
9329 return ret;
9330}
9331
9332/* Return true if the CODE will result in nontrivial jump sequence. */
9333bool
9334ix86_fp_jump_nontrivial_p (enum rtx_code code)
9335{
9336 enum rtx_code bypass_code, first_code, second_code;
9337 if (!TARGET_CMOVE)
9338 return true;
9339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9340 return bypass_code != NIL || second_code != NIL;
9341}
9342
9343void
9344ix86_expand_branch (enum rtx_code code, rtx label)
9345{
9346 rtx tmp;
9347
9348 switch (GET_MODE (ix86_compare_op0))
9349 {
9350 case QImode:
9351 case HImode:
9352 case SImode:
9353 simple:
9354 tmp = ix86_expand_compare (code, NULL, NULL);
9355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9356 gen_rtx_LABEL_REF (VOIDmode, label),
9357 pc_rtx);
9358 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9359 return;
9360
9361 case SFmode:
9362 case DFmode:
9363 case XFmode:
9364 {
9365 rtvec vec;
9366 int use_fcomi;
9367 enum rtx_code bypass_code, first_code, second_code;
9368
9369 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9370 &ix86_compare_op1);
9371
9372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9373
9374 /* Check whether we will use the natural sequence with one jump. If
9375 so, we can expand jump early. Otherwise delay expansion by
9376 creating compound insn to not confuse optimizers. */
9377 if (bypass_code == NIL && second_code == NIL
9378 && TARGET_CMOVE)
9379 {
9380 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9381 gen_rtx_LABEL_REF (VOIDmode, label),
9382 pc_rtx, NULL_RTX);
9383 }
9384 else
9385 {
9386 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9387 ix86_compare_op0, ix86_compare_op1);
9388 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9389 gen_rtx_LABEL_REF (VOIDmode, label),
9390 pc_rtx);
9391 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9392
9393 use_fcomi = ix86_use_fcomi_compare (code);
9394 vec = rtvec_alloc (3 + !use_fcomi);
9395 RTVEC_ELT (vec, 0) = tmp;
9396 RTVEC_ELT (vec, 1)
9397 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9398 RTVEC_ELT (vec, 2)
9399 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9400 if (! use_fcomi)
9401 RTVEC_ELT (vec, 3)
9402 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9403
9404 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9405 }
9406 return;
9407 }
9408
9409 case DImode:
9410 if (TARGET_64BIT)
9411 goto simple;
9412 /* Expand DImode branch into multiple compare+branch. */
9413 {
9414 rtx lo[2], hi[2], label2;
9415 enum rtx_code code1, code2, code3;
9416
9417 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9418 {
9419 tmp = ix86_compare_op0;
9420 ix86_compare_op0 = ix86_compare_op1;
9421 ix86_compare_op1 = tmp;
9422 code = swap_condition (code);
9423 }
9424 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9425 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9426
9427 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9428 avoid two branches. This costs one extra insn, so disable when
9429 optimizing for size. */
9430
9431 if ((code == EQ || code == NE)
9432 && (!optimize_size
9433 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9434 {
9435 rtx xor0, xor1;
9436
9437 xor1 = hi[0];
9438 if (hi[1] != const0_rtx)
9439 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9440 NULL_RTX, 0, OPTAB_WIDEN);
9441
9442 xor0 = lo[0];
9443 if (lo[1] != const0_rtx)
9444 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9445 NULL_RTX, 0, OPTAB_WIDEN);
9446
9447 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9448 NULL_RTX, 0, OPTAB_WIDEN);
9449
9450 ix86_compare_op0 = tmp;
9451 ix86_compare_op1 = const0_rtx;
9452 ix86_expand_branch (code, label);
9453 return;
9454 }
9455
9456 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9457 op1 is a constant and the low word is zero, then we can just
9458 examine the high word. */
9459
9460 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9461 switch (code)
9462 {
9463 case LT: case LTU: case GE: case GEU:
9464 ix86_compare_op0 = hi[0];
9465 ix86_compare_op1 = hi[1];
9466 ix86_expand_branch (code, label);
9467 return;
9468 default:
9469 break;
9470 }
9471
9472 /* Otherwise, we need two or three jumps. */
9473
9474 label2 = gen_label_rtx ();
9475
9476 code1 = code;
9477 code2 = swap_condition (code);
9478 code3 = unsigned_condition (code);
9479
9480 switch (code)
9481 {
9482 case LT: case GT: case LTU: case GTU:
9483 break;
9484
9485 case LE: code1 = LT; code2 = GT; break;
9486 case GE: code1 = GT; code2 = LT; break;
9487 case LEU: code1 = LTU; code2 = GTU; break;
9488 case GEU: code1 = GTU; code2 = LTU; break;
9489
9490 case EQ: code1 = NIL; code2 = NE; break;
9491 case NE: code2 = NIL; break;
9492
9493 default:
9494 abort ();
9495 }
9496
9497 /*
9498 * a < b =>
9499 * if (hi(a) < hi(b)) goto true;
9500 * if (hi(a) > hi(b)) goto false;
9501 * if (lo(a) < lo(b)) goto true;
9502 * false:
9503 */
9504
9505 ix86_compare_op0 = hi[0];
9506 ix86_compare_op1 = hi[1];
9507
9508 if (code1 != NIL)
9509 ix86_expand_branch (code1, label);
9510 if (code2 != NIL)
9511 ix86_expand_branch (code2, label2);
9512
9513 ix86_compare_op0 = lo[0];
9514 ix86_compare_op1 = lo[1];
9515 ix86_expand_branch (code3, label);
9516
9517 if (code2 != NIL)
9518 emit_label (label2);
9519 return;
9520 }
9521
9522 default:
9523 abort ();
9524 }
9525}
9526
9527/* Split branch based on floating point condition. */
9528void
9529ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9530 rtx target1, rtx target2, rtx tmp)
9531{
9532 rtx second, bypass;
9533 rtx label = NULL_RTX;
9534 rtx condition;
9535 int bypass_probability = -1, second_probability = -1, probability = -1;
9536 rtx i;
9537
9538 if (target2 != pc_rtx)
9539 {
9540 rtx tmp = target2;
9541 code = reverse_condition_maybe_unordered (code);
9542 target2 = target1;
9543 target1 = tmp;
9544 }
9545
9546 condition = ix86_expand_fp_compare (code, op1, op2,
9547 tmp, &second, &bypass);
9548
9549 if (split_branch_probability >= 0)
9550 {
9551 /* Distribute the probabilities across the jumps.
9552 Assume the BYPASS and SECOND to be always test
9553 for UNORDERED. */
9554 probability = split_branch_probability;
9555
9556 /* Value of 1 is low enough to make no need for probability
9557 to be updated. Later we may run some experiments and see
9558 if unordered values are more frequent in practice. */
9559 if (bypass)
9560 bypass_probability = 1;
9561 if (second)
9562 second_probability = 1;
9563 }
9564 if (bypass != NULL_RTX)
9565 {
9566 label = gen_label_rtx ();
9567 i = emit_jump_insn (gen_rtx_SET
9568 (VOIDmode, pc_rtx,
9569 gen_rtx_IF_THEN_ELSE (VOIDmode,
9570 bypass,
9571 gen_rtx_LABEL_REF (VOIDmode,
9572 label),
9573 pc_rtx)));
9574 if (bypass_probability >= 0)
9575 REG_NOTES (i)
9576 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9577 GEN_INT (bypass_probability),
9578 REG_NOTES (i));
9579 }
9580 i = emit_jump_insn (gen_rtx_SET
9581 (VOIDmode, pc_rtx,
9582 gen_rtx_IF_THEN_ELSE (VOIDmode,
9583 condition, target1, target2)));
9584 if (probability >= 0)
9585 REG_NOTES (i)
9586 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9587 GEN_INT (probability),
9588 REG_NOTES (i));
9589 if (second != NULL_RTX)
9590 {
9591 i = emit_jump_insn (gen_rtx_SET
9592 (VOIDmode, pc_rtx,
9593 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9594 target2)));
9595 if (second_probability >= 0)
9596 REG_NOTES (i)
9597 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9598 GEN_INT (second_probability),
9599 REG_NOTES (i));
9600 }
9601 if (label != NULL_RTX)
9602 emit_label (label);
9603}
9604
9605int
9606ix86_expand_setcc (enum rtx_code code, rtx dest)
9607{
9608 rtx ret, tmp, tmpreg, equiv;
9609 rtx second_test, bypass_test;
9610
9611 if (GET_MODE (ix86_compare_op0) == DImode
9612 && !TARGET_64BIT)
9613 return 0; /* FAIL */
9614
9615 if (GET_MODE (dest) != QImode)
9616 abort ();
9617
9618 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9619 PUT_MODE (ret, QImode);
9620
9621 tmp = dest;
9622 tmpreg = dest;
9623
9624 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9625 if (bypass_test || second_test)
9626 {
9627 rtx test = second_test;
9628 int bypass = 0;
9629 rtx tmp2 = gen_reg_rtx (QImode);
9630 if (bypass_test)
9631 {
9632 if (second_test)
9633 abort ();
9634 test = bypass_test;
9635 bypass = 1;
9636 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9637 }
9638 PUT_MODE (test, QImode);
9639 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9640
9641 if (bypass)
9642 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9643 else
9644 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9645 }
9646
9647 /* Attach a REG_EQUAL note describing the comparison result. */
9648 equiv = simplify_gen_relational (code, QImode,
9649 GET_MODE (ix86_compare_op0),
9650 ix86_compare_op0, ix86_compare_op1);
9651 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9652
9653 return 1; /* DONE */
9654}
9655
9656/* Expand comparison setting or clearing carry flag. Return true when
9657 successful and set pop for the operation. */
9658static bool
9659ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9660{
9661 enum machine_mode mode =
9662 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9663
9664 /* Do not handle DImode compares that go trought special path. Also we can't
9665 deal with FP compares yet. This is possible to add. */
9666 if ((mode == DImode && !TARGET_64BIT))
9667 return false;
9668 if (FLOAT_MODE_P (mode))
9669 {
9670 rtx second_test = NULL, bypass_test = NULL;
9671 rtx compare_op, compare_seq;
9672
9673 /* Shortcut: following common codes never translate into carry flag compares. */
9674 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9675 || code == ORDERED || code == UNORDERED)
9676 return false;
9677
9678 /* These comparisons require zero flag; swap operands so they won't. */
9679 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9680 && !TARGET_IEEE_FP)
9681 {
9682 rtx tmp = op0;
9683 op0 = op1;
9684 op1 = tmp;
9685 code = swap_condition (code);
9686 }
9687
9688 /* Try to expand the comparison and verify that we end up with carry flag
9689 based comparison. This is fails to be true only when we decide to expand
9690 comparison using arithmetic that is not too common scenario. */
9691 start_sequence ();
9692 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9693 &second_test, &bypass_test);
9694 compare_seq = get_insns ();
9695 end_sequence ();
9696
9697 if (second_test || bypass_test)
9698 return false;
9699 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9700 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9701 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9702 else
9703 code = GET_CODE (compare_op);
9704 if (code != LTU && code != GEU)
9705 return false;
9706 emit_insn (compare_seq);
9707 *pop = compare_op;
9708 return true;
9709 }
9710 if (!INTEGRAL_MODE_P (mode))
9711 return false;
9712 switch (code)
9713 {
9714 case LTU:
9715 case GEU:
9716 break;
9717
9718 /* Convert a==0 into (unsigned)a<1. */
9719 case EQ:
9720 case NE:
9721 if (op1 != const0_rtx)
9722 return false;
9723 op1 = const1_rtx;
9724 code = (code == EQ ? LTU : GEU);
9725 break;
9726
9727 /* Convert a>b into b<a or a>=b-1. */
9728 case GTU:
9729 case LEU:
9730 if (GET_CODE (op1) == CONST_INT)
9731 {
9732 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9733 /* Bail out on overflow. We still can swap operands but that
9734 would force loading of the constant into register. */
9735 if (op1 == const0_rtx
9736 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9737 return false;
9738 code = (code == GTU ? GEU : LTU);
9739 }
9740 else
9741 {
9742 rtx tmp = op1;
9743 op1 = op0;
9744 op0 = tmp;
9745 code = (code == GTU ? LTU : GEU);
9746 }
9747 break;
9748
9749 /* Convert a>=0 into (unsigned)a<0x80000000. */
9750 case LT:
9751 case GE:
9752 if (mode == DImode || op1 != const0_rtx)
9753 return false;
9754 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9755 code = (code == LT ? GEU : LTU);
9756 break;
9757 case LE:
9758 case GT:
9759 if (mode == DImode || op1 != constm1_rtx)
9760 return false;
9761 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9762 code = (code == LE ? GEU : LTU);
9763 break;
9764
9765 default:
9766 return false;
9767 }
9768 /* Swapping operands may cause constant to appear as first operand. */
9769 if (!nonimmediate_operand (op0, VOIDmode))
9770 {
9771 if (no_new_pseudos)
9772 return false;
9773 op0 = force_reg (mode, op0);
9774 }
9775 ix86_compare_op0 = op0;
9776 ix86_compare_op1 = op1;
9777 *pop = ix86_expand_compare (code, NULL, NULL);
9778 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9779 abort ();
9780 return true;
9781}
9782
9783int
9784ix86_expand_int_movcc (rtx operands[])
9785{
9786 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9787 rtx compare_seq, compare_op;
9788 rtx second_test, bypass_test;
9789 enum machine_mode mode = GET_MODE (operands[0]);
9790 bool sign_bit_compare_p = false;;
9791
9792 start_sequence ();
9793 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9794 compare_seq = get_insns ();
9795 end_sequence ();
9796
9797 compare_code = GET_CODE (compare_op);
9798
9799 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9800 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9801 sign_bit_compare_p = true;
9802
9803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9804 HImode insns, we'd be swallowed in word prefix ops. */
9805
9806 if ((mode != HImode || TARGET_FAST_PREFIX)
9807 && (mode != DImode || TARGET_64BIT)
9808 && GET_CODE (operands[2]) == CONST_INT
9809 && GET_CODE (operands[3]) == CONST_INT)
9810 {
9811 rtx out = operands[0];
9812 HOST_WIDE_INT ct = INTVAL (operands[2]);
9813 HOST_WIDE_INT cf = INTVAL (operands[3]);
9814 HOST_WIDE_INT diff;
9815
9816 diff = ct - cf;
9817 /* Sign bit compares are better done using shifts than we do by using
9818 sbb. */
9819 if (sign_bit_compare_p
9820 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9821 ix86_compare_op1, &compare_op))
9822 {
9823 /* Detect overlap between destination and compare sources. */
9824 rtx tmp = out;
9825
9826 if (!sign_bit_compare_p)
9827 {
9828 bool fpcmp = false;
9829
9830 compare_code = GET_CODE (compare_op);
9831
9832 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9833 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9834 {
9835 fpcmp = true;
9836 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9837 }
9838
9839 /* To simplify rest of code, restrict to the GEU case. */
9840 if (compare_code == LTU)
9841 {
9842 HOST_WIDE_INT tmp = ct;
9843 ct = cf;
9844 cf = tmp;
9845 compare_code = reverse_condition (compare_code);
9846 code = reverse_condition (code);
9847 }
9848 else
9849 {
9850 if (fpcmp)
9851 PUT_CODE (compare_op,
9852 reverse_condition_maybe_unordered
9853 (GET_CODE (compare_op)));
9854 else
9855 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9856 }
9857 diff = ct - cf;
9858
9859 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9860 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9861 tmp = gen_reg_rtx (mode);
9862
9863 if (mode == DImode)
9864 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9865 else
9866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9867 }
9868 else
9869 {
9870 if (code == GT || code == GE)
9871 code = reverse_condition (code);
9872 else
9873 {
9874 HOST_WIDE_INT tmp = ct;
9875 ct = cf;
9876 cf = tmp;
9877 diff = ct - cf;
9878 }
9879 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9880 ix86_compare_op1, VOIDmode, 0, -1);
9881 }
9882
9883 if (diff == 1)
9884 {
9885 /*
9886 * cmpl op0,op1
9887 * sbbl dest,dest
9888 * [addl dest, ct]
9889 *
9890 * Size 5 - 8.
9891 */
9892 if (ct)
9893 tmp = expand_simple_binop (mode, PLUS,
9894 tmp, GEN_INT (ct),
9895 copy_rtx (tmp), 1, OPTAB_DIRECT);
9896 }
9897 else if (cf == -1)
9898 {
9899 /*
9900 * cmpl op0,op1
9901 * sbbl dest,dest
9902 * orl $ct, dest
9903 *
9904 * Size 8.
9905 */
9906 tmp = expand_simple_binop (mode, IOR,
9907 tmp, GEN_INT (ct),
9908 copy_rtx (tmp), 1, OPTAB_DIRECT);
9909 }
9910 else if (diff == -1 && ct)
9911 {
9912 /*
9913 * cmpl op0,op1
9914 * sbbl dest,dest
9915 * notl dest
9916 * [addl dest, cf]
9917 *
9918 * Size 8 - 11.
9919 */
9920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9921 if (cf)
9922 tmp = expand_simple_binop (mode, PLUS,
9923 copy_rtx (tmp), GEN_INT (cf),
9924 copy_rtx (tmp), 1, OPTAB_DIRECT);
9925 }
9926 else
9927 {
9928 /*
9929 * cmpl op0,op1
9930 * sbbl dest,dest
9931 * [notl dest]
9932 * andl cf - ct, dest
9933 * [addl dest, ct]
9934 *
9935 * Size 8 - 11.
9936 */
9937
9938 if (cf == 0)
9939 {
9940 cf = ct;
9941 ct = 0;
9942 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9943 }
9944
9945 tmp = expand_simple_binop (mode, AND,
9946 copy_rtx (tmp),
9947 gen_int_mode (cf - ct, mode),
9948 copy_rtx (tmp), 1, OPTAB_DIRECT);
9949 if (ct)
9950 tmp = expand_simple_binop (mode, PLUS,
9951 copy_rtx (tmp), GEN_INT (ct),
9952 copy_rtx (tmp), 1, OPTAB_DIRECT);
9953 }
9954
9955 if (!rtx_equal_p (tmp, out))
9956 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9957
9958 return 1; /* DONE */
9959 }
9960
9961 if (diff < 0)
9962 {
9963 HOST_WIDE_INT tmp;
9964 tmp = ct, ct = cf, cf = tmp;
9965 diff = -diff;
9966 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9967 {
9968 /* We may be reversing unordered compare to normal compare, that
9969 is not valid in general (we may convert non-trapping condition
9970 to trapping one), however on i386 we currently emit all
9971 comparisons unordered. */
9972 compare_code = reverse_condition_maybe_unordered (compare_code);
9973 code = reverse_condition_maybe_unordered (code);
9974 }
9975 else
9976 {
9977 compare_code = reverse_condition (compare_code);
9978 code = reverse_condition (code);
9979 }
9980 }
9981
9982 compare_code = NIL;
9983 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9984 && GET_CODE (ix86_compare_op1) == CONST_INT)
9985 {
9986 if (ix86_compare_op1 == const0_rtx
9987 && (code == LT || code == GE))
9988 compare_code = code;
9989 else if (ix86_compare_op1 == constm1_rtx)
9990 {
9991 if (code == LE)
9992 compare_code = LT;
9993 else if (code == GT)
9994 compare_code = GE;
9995 }
9996 }
9997
9998 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9999 if (compare_code != NIL
10000 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10001 && (cf == -1 || ct == -1))
10002 {
10003 /* If lea code below could be used, only optimize
10004 if it results in a 2 insn sequence. */
10005
10006 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10007 || diff == 3 || diff == 5 || diff == 9)
10008 || (compare_code == LT && ct == -1)
10009 || (compare_code == GE && cf == -1))
10010 {
10011 /*
10012 * notl op1 (if necessary)
10013 * sarl $31, op1
10014 * orl cf, op1
10015 */
10016 if (ct != -1)
10017 {
10018 cf = ct;
10019 ct = -1;
10020 code = reverse_condition (code);
10021 }
10022
10023 out = emit_store_flag (out, code, ix86_compare_op0,
10024 ix86_compare_op1, VOIDmode, 0, -1);
10025
10026 out = expand_simple_binop (mode, IOR,
10027 out, GEN_INT (cf),
10028 out, 1, OPTAB_DIRECT);
10029 if (out != operands[0])
10030 emit_move_insn (operands[0], out);
10031
10032 return 1; /* DONE */
10033 }
10034 }
10035
10036
10037 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10038 || diff == 3 || diff == 5 || diff == 9)
10039 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10040 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10041 {
10042 /*
10043 * xorl dest,dest
10044 * cmpl op1,op2
10045 * setcc dest
10046 * lea cf(dest*(ct-cf)),dest
10047 *
10048 * Size 14.
10049 *
10050 * This also catches the degenerate setcc-only case.
10051 */
10052
10053 rtx tmp;
10054 int nops;
10055
10056 out = emit_store_flag (out, code, ix86_compare_op0,
10057 ix86_compare_op1, VOIDmode, 0, 1);
10058
10059 nops = 0;
10060 /* On x86_64 the lea instruction operates on Pmode, so we need
10061 to get arithmetics done in proper mode to match. */
10062 if (diff == 1)
10063 tmp = copy_rtx (out);
10064 else
10065 {
10066 rtx out1;
10067 out1 = copy_rtx (out);
10068 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10069 nops++;
10070 if (diff & 1)
10071 {
10072 tmp = gen_rtx_PLUS (mode, tmp, out1);
10073 nops++;
10074 }
10075 }
10076 if (cf != 0)
10077 {
10078 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10079 nops++;
10080 }
10081 if (!rtx_equal_p (tmp, out))
10082 {
10083 if (nops == 1)
10084 out = force_operand (tmp, copy_rtx (out));
10085 else
10086 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10087 }
10088 if (!rtx_equal_p (out, operands[0]))
10089 emit_move_insn (operands[0], copy_rtx (out));
10090
10091 return 1; /* DONE */
10092 }
10093
10094 /*
10095 * General case: Jumpful:
10096 * xorl dest,dest cmpl op1, op2
10097 * cmpl op1, op2 movl ct, dest
10098 * setcc dest jcc 1f
10099 * decl dest movl cf, dest
10100 * andl (cf-ct),dest 1:
10101 * addl ct,dest
10102 *
10103 * Size 20. Size 14.
10104 *
10105 * This is reasonably steep, but branch mispredict costs are
10106 * high on modern cpus, so consider failing only if optimizing
10107 * for space.
10108 */
10109
10110 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10111 && BRANCH_COST >= 2)
10112 {
10113 if (cf == 0)
10114 {
10115 cf = ct;
10116 ct = 0;
10117 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10118 /* We may be reversing unordered compare to normal compare,
10119 that is not valid in general (we may convert non-trapping
10120 condition to trapping one), however on i386 we currently
10121 emit all comparisons unordered. */
10122 code = reverse_condition_maybe_unordered (code);
10123 else
10124 {
10125 code = reverse_condition (code);
10126 if (compare_code != NIL)
10127 compare_code = reverse_condition (compare_code);
10128 }
10129 }
10130
10131 if (compare_code != NIL)
10132 {
10133 /* notl op1 (if needed)
10134 sarl $31, op1
10135 andl (cf-ct), op1
10136 addl ct, op1
10137
10138 For x < 0 (resp. x <= -1) there will be no notl,
10139 so if possible swap the constants to get rid of the
10140 complement.
10141 True/false will be -1/0 while code below (store flag
10142 followed by decrement) is 0/-1, so the constants need
10143 to be exchanged once more. */
10144
10145 if (compare_code == GE || !cf)
10146 {
10147 code = reverse_condition (code);
10148 compare_code = LT;
10149 }
10150 else
10151 {
10152 HOST_WIDE_INT tmp = cf;
10153 cf = ct;
10154 ct = tmp;
10155 }
10156
10157 out = emit_store_flag (out, code, ix86_compare_op0,
10158 ix86_compare_op1, VOIDmode, 0, -1);
10159 }
10160 else
10161 {
10162 out = emit_store_flag (out, code, ix86_compare_op0,
10163 ix86_compare_op1, VOIDmode, 0, 1);
10164
10165 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10166 copy_rtx (out), 1, OPTAB_DIRECT);
10167 }
10168
10169 out = expand_simple_binop (mode, AND, copy_rtx (out),
10170 gen_int_mode (cf - ct, mode),
10171 copy_rtx (out), 1, OPTAB_DIRECT);
10172 if (ct)
10173 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10174 copy_rtx (out), 1, OPTAB_DIRECT);
10175 if (!rtx_equal_p (out, operands[0]))
10176 emit_move_insn (operands[0], copy_rtx (out));
10177
10178 return 1; /* DONE */
10179 }
10180 }
10181
10182 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10183 {
10184 /* Try a few things more with specific constants and a variable. */
10185
10186 optab op;
10187 rtx var, orig_out, out, tmp;
10188
10189 if (BRANCH_COST <= 2)
10190 return 0; /* FAIL */
10191
10192 /* If one of the two operands is an interesting constant, load a
10193 constant with the above and mask it in with a logical operation. */
10194
10195 if (GET_CODE (operands[2]) == CONST_INT)
10196 {
10197 var = operands[3];
10198 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10199 operands[3] = constm1_rtx, op = and_optab;
10200 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10201 operands[3] = const0_rtx, op = ior_optab;
10202 else
10203 return 0; /* FAIL */
10204 }
10205 else if (GET_CODE (operands[3]) == CONST_INT)
10206 {
10207 var = operands[2];
10208 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10209 operands[2] = constm1_rtx, op = and_optab;
10210 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10211 operands[2] = const0_rtx, op = ior_optab;
10212 else
10213 return 0; /* FAIL */
10214 }
10215 else
10216 return 0; /* FAIL */
10217
10218 orig_out = operands[0];
10219 tmp = gen_reg_rtx (mode);
10220 operands[0] = tmp;
10221
10222 /* Recurse to get the constant loaded. */
10223 if (ix86_expand_int_movcc (operands) == 0)
10224 return 0; /* FAIL */
10225
10226 /* Mask in the interesting variable. */
10227 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10228 OPTAB_WIDEN);
10229 if (!rtx_equal_p (out, orig_out))
10230 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10231
10232 return 1; /* DONE */
10233 }
10234
10235 /*
10236 * For comparison with above,
10237 *
10238 * movl cf,dest
10239 * movl ct,tmp
10240 * cmpl op1,op2
10241 * cmovcc tmp,dest
10242 *
10243 * Size 15.
10244 */
10245
10246 if (! nonimmediate_operand (operands[2], mode))
10247 operands[2] = force_reg (mode, operands[2]);
10248 if (! nonimmediate_operand (operands[3], mode))
10249 operands[3] = force_reg (mode, operands[3]);
10250
10251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10252 {
10253 rtx tmp = gen_reg_rtx (mode);
10254 emit_move_insn (tmp, operands[3]);
10255 operands[3] = tmp;
10256 }
10257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10258 {
10259 rtx tmp = gen_reg_rtx (mode);
10260 emit_move_insn (tmp, operands[2]);
10261 operands[2] = tmp;
10262 }
10263
10264 if (! register_operand (operands[2], VOIDmode)
10265 && (mode == QImode
10266 || ! register_operand (operands[3], VOIDmode)))
10267 operands[2] = force_reg (mode, operands[2]);
10268
10269 if (mode == QImode
10270 && ! register_operand (operands[3], VOIDmode))
10271 operands[3] = force_reg (mode, operands[3]);
10272
10273 emit_insn (compare_seq);
10274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10275 gen_rtx_IF_THEN_ELSE (mode,
10276 compare_op, operands[2],
10277 operands[3])));
10278 if (bypass_test)
10279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10280 gen_rtx_IF_THEN_ELSE (mode,
10281 bypass_test,
10282 copy_rtx (operands[3]),
10283 copy_rtx (operands[0]))));
10284 if (second_test)
10285 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10286 gen_rtx_IF_THEN_ELSE (mode,
10287 second_test,
10288 copy_rtx (operands[2]),
10289 copy_rtx (operands[0]))));
10290
10291 return 1; /* DONE */
10292}
10293
10294int
10295ix86_expand_fp_movcc (rtx operands[])
10296{
10297 enum rtx_code code;
10298 rtx tmp;
10299 rtx compare_op, second_test, bypass_test;
10300
10301 /* For SF/DFmode conditional moves based on comparisons
10302 in same mode, we may want to use SSE min/max instructions. */
10303 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10304 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10305 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10306 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10307 && (!TARGET_IEEE_FP
10308 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10309 /* We may be called from the post-reload splitter. */
10310 && (!REG_P (operands[0])
10311 || SSE_REG_P (operands[0])
10312 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10313 {
10314 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10315 code = GET_CODE (operands[1]);
10316
10317 /* See if we have (cross) match between comparison operands and
10318 conditional move operands. */
10319 if (rtx_equal_p (operands[2], op1))
10320 {
10321 rtx tmp = op0;
10322 op0 = op1;
10323 op1 = tmp;
10324 code = reverse_condition_maybe_unordered (code);
10325 }
10326 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10327 {
10328 /* Check for min operation. */
10329 if (code == LT || code == UNLE)
10330 {
10331 if (code == UNLE)
10332 {
10333 rtx tmp = op0;
10334 op0 = op1;
10335 op1 = tmp;
10336 }
10337 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10338 if (memory_operand (op0, VOIDmode))
10339 op0 = force_reg (GET_MODE (operands[0]), op0);
10340 if (GET_MODE (operands[0]) == SFmode)
10341 emit_insn (gen_minsf3 (operands[0], op0, op1));
10342 else
10343 emit_insn (gen_mindf3 (operands[0], op0, op1));
10344 return 1;
10345 }
10346 /* Check for max operation. */
10347 if (code == GT || code == UNGE)
10348 {
10349 if (code == UNGE)
10350 {
10351 rtx tmp = op0;
10352 op0 = op1;
10353 op1 = tmp;
10354 }
10355 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10356 if (memory_operand (op0, VOIDmode))
10357 op0 = force_reg (GET_MODE (operands[0]), op0);
10358 if (GET_MODE (operands[0]) == SFmode)
10359 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10360 else
10361 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10362 return 1;
10363 }
10364 }
10365 /* Manage condition to be sse_comparison_operator. In case we are
10366 in non-ieee mode, try to canonicalize the destination operand
10367 to be first in the comparison - this helps reload to avoid extra
10368 moves. */
10369 if (!sse_comparison_operator (operands[1], VOIDmode)
10370 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10371 {
10372 rtx tmp = ix86_compare_op0;
10373 ix86_compare_op0 = ix86_compare_op1;
10374 ix86_compare_op1 = tmp;
10375 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10376 VOIDmode, ix86_compare_op0,
10377 ix86_compare_op1);
10378 }
10379 /* Similarly try to manage result to be first operand of conditional
10380 move. We also don't support the NE comparison on SSE, so try to
10381 avoid it. */
10382 if ((rtx_equal_p (operands[0], operands[3])
10383 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10384 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10385 {
10386 rtx tmp = operands[2];
10387 operands[2] = operands[3];
10388 operands[3] = tmp;
10389 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10390 (GET_CODE (operands[1])),
10391 VOIDmode, ix86_compare_op0,
10392 ix86_compare_op1);
10393 }
10394 if (GET_MODE (operands[0]) == SFmode)
10395 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10396 operands[2], operands[3],
10397 ix86_compare_op0, ix86_compare_op1));
10398 else
10399 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10400 operands[2], operands[3],
10401 ix86_compare_op0, ix86_compare_op1));
10402 return 1;
10403 }
10404
10405 /* The floating point conditional move instructions don't directly
10406 support conditions resulting from a signed integer comparison. */
10407
10408 code = GET_CODE (operands[1]);
10409 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10410
10411 /* The floating point conditional move instructions don't directly
10412 support signed integer comparisons. */
10413
10414 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10415 {
10416 if (second_test != NULL || bypass_test != NULL)
10417 abort ();
10418 tmp = gen_reg_rtx (QImode);
10419 ix86_expand_setcc (code, tmp);
10420 code = NE;
10421 ix86_compare_op0 = tmp;
10422 ix86_compare_op1 = const0_rtx;
10423 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10424 }
10425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10426 {
10427 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10428 emit_move_insn (tmp, operands[3]);
10429 operands[3] = tmp;
10430 }
10431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10432 {
10433 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10434 emit_move_insn (tmp, operands[2]);
10435 operands[2] = tmp;
10436 }
10437
10438 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10439 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10440 compare_op,
10441 operands[2],
10442 operands[3])));
10443 if (bypass_test)
10444 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10445 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10446 bypass_test,
10447 operands[3],
10448 operands[0])));
10449 if (second_test)
10450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10452 second_test,
10453 operands[2],
10454 operands[0])));
10455
10456 return 1;
10457}
10458
10459/* Expand conditional increment or decrement using adb/sbb instructions.
10460 The default case using setcc followed by the conditional move can be
10461 done by generic code. */
10462int
10463ix86_expand_int_addcc (rtx operands[])
10464{
10465 enum rtx_code code = GET_CODE (operands[1]);
10466 rtx compare_op;
10467 rtx val = const0_rtx;
10468 bool fpcmp = false;
10469 enum machine_mode mode = GET_MODE (operands[0]);
10470
10471 if (operands[3] != const1_rtx
10472 && operands[3] != constm1_rtx)
10473 return 0;
10474 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10475 ix86_compare_op1, &compare_op))
10476 return 0;
10477 code = GET_CODE (compare_op);
10478
10479 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10480 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10481 {
10482 fpcmp = true;
10483 code = ix86_fp_compare_code_to_integer (code);
10484 }
10485
10486 if (code != LTU)
10487 {
10488 val = constm1_rtx;
10489 if (fpcmp)
10490 PUT_CODE (compare_op,
10491 reverse_condition_maybe_unordered
10492 (GET_CODE (compare_op)));
10493 else
10494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10495 }
10496 PUT_MODE (compare_op, mode);
10497
10498 /* Construct either adc or sbb insn. */
10499 if ((code == LTU) == (operands[3] == constm1_rtx))
10500 {
10501 switch (GET_MODE (operands[0]))
10502 {
10503 case QImode:
10504 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10505 break;
10506 case HImode:
10507 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10508 break;
10509 case SImode:
10510 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10511 break;
10512 case DImode:
10513 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10514 break;
10515 default:
10516 abort ();
10517 }
10518 }
10519 else
10520 {
10521 switch (GET_MODE (operands[0]))
10522 {
10523 case QImode:
10524 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10525 break;
10526 case HImode:
10527 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10528 break;
10529 case SImode:
10530 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10531 break;
10532 case DImode:
10533 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10534 break;
10535 default:
10536 abort ();
10537 }
10538 }
10539 return 1; /* DONE */
10540}
10541
10542
10543/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10544 works for floating pointer parameters and nonoffsetable memories.
10545 For pushes, it returns just stack offsets; the values will be saved
10546 in the right order. Maximally three parts are generated. */
10547
10548static int
10549ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10550{
10551 int size;
10552
10553 if (!TARGET_64BIT)
10554 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10555 else
10556 size = (GET_MODE_SIZE (mode) + 4) / 8;
10557
10558 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10559 abort ();
10560 if (size < 2 || size > 3)
10561 abort ();
10562
10563 /* Optimize constant pool reference to immediates. This is used by fp
10564 moves, that force all constants to memory to allow combining. */
10565 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10566 {
10567 rtx tmp = maybe_get_pool_constant (operand);
10568 if (tmp)
10569 operand = tmp;
10570 }
10571
10572 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10573 {
10574 /* The only non-offsetable memories we handle are pushes. */
10575 if (! push_operand (operand, VOIDmode))
10576 abort ();
10577
10578 operand = copy_rtx (operand);
10579 PUT_MODE (operand, Pmode);
10580 parts[0] = parts[1] = parts[2] = operand;
10581 }
10582 else if (!TARGET_64BIT)
10583 {
10584 if (mode == DImode)
10585 split_di (&operand, 1, &parts[0], &parts[1]);
10586 else
10587 {
10588 if (REG_P (operand))
10589 {
10590 if (!reload_completed)
10591 abort ();
10592 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10593 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10594 if (size == 3)
10595 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10596 }
10597 else if (offsettable_memref_p (operand))
10598 {
10599 operand = adjust_address (operand, SImode, 0);
10600 parts[0] = operand;
10601 parts[1] = adjust_address (operand, SImode, 4);
10602 if (size == 3)
10603 parts[2] = adjust_address (operand, SImode, 8);
10604 }
10605 else if (GET_CODE (operand) == CONST_DOUBLE)
10606 {
10607 REAL_VALUE_TYPE r;
10608 long l[4];
10609
10610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10611 switch (mode)
10612 {
10613 case XFmode:
10614 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10615 parts[2] = gen_int_mode (l[2], SImode);
10616 break;
10617 case DFmode:
10618 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10619 break;
10620 default:
10621 abort ();
10622 }
10623 parts[1] = gen_int_mode (l[1], SImode);
10624 parts[0] = gen_int_mode (l[0], SImode);
10625 }
10626 else
10627 abort ();
10628 }
10629 }
10630 else
10631 {
10632 if (mode == TImode)
10633 split_ti (&operand, 1, &parts[0], &parts[1]);
10634 if (mode == XFmode || mode == TFmode)
10635 {
10636 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10637 if (REG_P (operand))
10638 {
10639 if (!reload_completed)
10640 abort ();
10641 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10642 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10643 }
10644 else if (offsettable_memref_p (operand))
10645 {
10646 operand = adjust_address (operand, DImode, 0);
10647 parts[0] = operand;
10648 parts[1] = adjust_address (operand, upper_mode, 8);
10649 }
10650 else if (GET_CODE (operand) == CONST_DOUBLE)
10651 {
10652 REAL_VALUE_TYPE r;
10653 long l[4];
10654
10655 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10656 real_to_target (l, &r, mode);
10657
10658 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10659 if (HOST_BITS_PER_WIDE_INT >= 64)
10660 parts[0]
10661 = gen_int_mode
10662 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10663 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10664 DImode);
10665 else
10666 parts[0] = immed_double_const (l[0], l[1], DImode);
10667
10668 if (upper_mode == SImode)
10669 parts[1] = gen_int_mode (l[2], SImode);
10670 else if (HOST_BITS_PER_WIDE_INT >= 64)
10671 parts[1]
10672 = gen_int_mode
10673 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10674 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10675 DImode);
10676 else
10677 parts[1] = immed_double_const (l[2], l[3], DImode);
10678 }
10679 else
10680 abort ();
10681 }
10682 }
10683
10684 return size;
10685}
10686
10687/* Emit insns to perform a move or push of DI, DF, and XF values.
10688 Return false when normal moves are needed; true when all required
10689 insns have been emitted. Operands 2-4 contain the input values
10690 int the correct order; operands 5-7 contain the output values. */
10691
10692void
10693ix86_split_long_move (rtx operands[])
10694{
10695 rtx part[2][3];
10696 int nparts;
10697 int push = 0;
10698 int collisions = 0;
10699 enum machine_mode mode = GET_MODE (operands[0]);
10700
10701 /* The DFmode expanders may ask us to move double.
10702 For 64bit target this is single move. By hiding the fact
10703 here we simplify i386.md splitters. */
10704 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10705 {
10706 /* Optimize constant pool reference to immediates. This is used by
10707 fp moves, that force all constants to memory to allow combining. */
10708
10709 if (GET_CODE (operands[1]) == MEM
10710 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10711 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10712 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10713 if (push_operand (operands[0], VOIDmode))
10714 {
10715 operands[0] = copy_rtx (operands[0]);
10716 PUT_MODE (operands[0], Pmode);
10717 }
10718 else
10719 operands[0] = gen_lowpart (DImode, operands[0]);
10720 operands[1] = gen_lowpart (DImode, operands[1]);
10721 emit_move_insn (operands[0], operands[1]);
10722 return;
10723 }
10724
10725 /* The only non-offsettable memory we handle is push. */
10726 if (push_operand (operands[0], VOIDmode))
10727 push = 1;
10728 else if (GET_CODE (operands[0]) == MEM
10729 && ! offsettable_memref_p (operands[0]))
10730 abort ();
10731
10732 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10733 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10734
10735 /* When emitting push, take care for source operands on the stack. */
10736 if (push && GET_CODE (operands[1]) == MEM
10737 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10738 {
10739 if (nparts == 3)
10740 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10741 XEXP (part[1][2], 0));
10742 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10743 XEXP (part[1][1], 0));
10744 }
10745
10746 /* We need to do copy in the right order in case an address register
10747 of the source overlaps the destination. */
10748 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10749 {
10750 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10751 collisions++;
10752 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10753 collisions++;
10754 if (nparts == 3
10755 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10756 collisions++;
10757
10758 /* Collision in the middle part can be handled by reordering. */
10759 if (collisions == 1 && nparts == 3
10760 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10761 {
10762 rtx tmp;
10763 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10764 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10765 }
10766
10767 /* If there are more collisions, we can't handle it by reordering.
10768 Do an lea to the last part and use only one colliding move. */
10769 else if (collisions > 1)
10770 {
10771 rtx base;
10772
10773 collisions = 1;
10774
10775 base = part[0][nparts - 1];
10776
10777 /* Handle the case when the last part isn't valid for lea.
10778 Happens in 64-bit mode storing the 12-byte XFmode. */
10779 if (GET_MODE (base) != Pmode)
10780 base = gen_rtx_REG (Pmode, REGNO (base));
10781
10782 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10783 part[1][0] = replace_equiv_address (part[1][0], base);
10784 part[1][1] = replace_equiv_address (part[1][1],
10785 plus_constant (base, UNITS_PER_WORD));
10786 if (nparts == 3)
10787 part[1][2] = replace_equiv_address (part[1][2],
10788 plus_constant (base, 8));
10789 }
10790 }
10791
10792 if (push)
10793 {
10794 if (!TARGET_64BIT)
10795 {
10796 if (nparts == 3)
10797 {
10798 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10800 emit_move_insn (part[0][2], part[1][2]);
10801 }
10802 }
10803 else
10804 {
10805 /* In 64bit mode we don't have 32bit push available. In case this is
10806 register, it is OK - we will just use larger counterpart. We also
10807 retype memory - these comes from attempt to avoid REX prefix on
10808 moving of second half of TFmode value. */
10809 if (GET_MODE (part[1][1]) == SImode)
10810 {
10811 if (GET_CODE (part[1][1]) == MEM)
10812 part[1][1] = adjust_address (part[1][1], DImode, 0);
10813 else if (REG_P (part[1][1]))
10814 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10815 else
10816 abort ();
10817 if (GET_MODE (part[1][0]) == SImode)
10818 part[1][0] = part[1][1];
10819 }
10820 }
10821 emit_move_insn (part[0][1], part[1][1]);
10822 emit_move_insn (part[0][0], part[1][0]);
10823 return;
10824 }
10825
10826 /* Choose correct order to not overwrite the source before it is copied. */
10827 if ((REG_P (part[0][0])
10828 && REG_P (part[1][1])
10829 && (REGNO (part[0][0]) == REGNO (part[1][1])
10830 || (nparts == 3
10831 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10832 || (collisions > 0
10833 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10834 {
10835 if (nparts == 3)
10836 {
10837 operands[2] = part[0][2];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][0];
10840 operands[5] = part[1][2];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][0];
10843 }
10844 else
10845 {
10846 operands[2] = part[0][1];
10847 operands[3] = part[0][0];
10848 operands[5] = part[1][1];
10849 operands[6] = part[1][0];
10850 }
10851 }
10852 else
10853 {
10854 if (nparts == 3)
10855 {
10856 operands[2] = part[0][0];
10857 operands[3] = part[0][1];
10858 operands[4] = part[0][2];
10859 operands[5] = part[1][0];
10860 operands[6] = part[1][1];
10861 operands[7] = part[1][2];
10862 }
10863 else
10864 {
10865 operands[2] = part[0][0];
10866 operands[3] = part[0][1];
10867 operands[5] = part[1][0];
10868 operands[6] = part[1][1];
10869 }
10870 }
10871 emit_move_insn (operands[2], operands[5]);
10872 emit_move_insn (operands[3], operands[6]);
10873 if (nparts == 3)
10874 emit_move_insn (operands[4], operands[7]);
10875
10876 return;
10877}
10878
10879void
10880ix86_split_ashldi (rtx *operands, rtx scratch)
10881{
10882 rtx low[2], high[2];
10883 int count;
10884
10885 if (GET_CODE (operands[2]) == CONST_INT)
10886 {
10887 split_di (operands, 2, low, high);
10888 count = INTVAL (operands[2]) & 63;
10889
10890 if (count >= 32)
10891 {
10892 emit_move_insn (high[0], low[1]);
10893 emit_move_insn (low[0], const0_rtx);
10894
10895 if (count > 32)
10896 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10897 }
10898 else
10899 {
10900 if (!rtx_equal_p (operands[0], operands[1]))
10901 emit_move_insn (operands[0], operands[1]);
10902 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10903 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10904 }
10905 }
10906 else
10907 {
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10910
10911 split_di (operands, 1, low, high);
10912
10913 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10914 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10915
10916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10917 {
10918 if (! no_new_pseudos)
10919 scratch = force_reg (SImode, const0_rtx);
10920 else
10921 emit_move_insn (scratch, const0_rtx);
10922
10923 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10924 scratch));
10925 }
10926 else
10927 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10928 }
10929}
10930
10931void
10932ix86_split_ashrdi (rtx *operands, rtx scratch)
10933{
10934 rtx low[2], high[2];
10935 int count;
10936
10937 if (GET_CODE (operands[2]) == CONST_INT)
10938 {
10939 split_di (operands, 2, low, high);
10940 count = INTVAL (operands[2]) & 63;
10941
10942 if (count >= 32)
10943 {
10944 emit_move_insn (low[0], high[1]);
10945
10946 if (! reload_completed)
10947 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10948 else
10949 {
10950 emit_move_insn (high[0], low[0]);
10951 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10952 }
10953
10954 if (count > 32)
10955 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10956 }
10957 else
10958 {
10959 if (!rtx_equal_p (operands[0], operands[1]))
10960 emit_move_insn (operands[0], operands[1]);
10961 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10962 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10963 }
10964 }
10965 else
10966 {
10967 if (!rtx_equal_p (operands[0], operands[1]))
10968 emit_move_insn (operands[0], operands[1]);
10969
10970 split_di (operands, 1, low, high);
10971
10972 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10973 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10974
10975 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10976 {
10977 if (! no_new_pseudos)
10978 scratch = gen_reg_rtx (SImode);
10979 emit_move_insn (scratch, high[0]);
10980 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10981 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10982 scratch));
10983 }
10984 else
10985 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10986 }
10987}
10988
10989void
10990ix86_split_lshrdi (rtx *operands, rtx scratch)
10991{
10992 rtx low[2], high[2];
10993 int count;
10994
10995 if (GET_CODE (operands[2]) == CONST_INT)
10996 {
10997 split_di (operands, 2, low, high);
10998 count = INTVAL (operands[2]) & 63;
10999
11000 if (count >= 32)
11001 {
11002 emit_move_insn (low[0], high[1]);
11003 emit_move_insn (high[0], const0_rtx);
11004
11005 if (count > 32)
11006 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11007 }
11008 else
11009 {
11010 if (!rtx_equal_p (operands[0], operands[1]))
11011 emit_move_insn (operands[0], operands[1]);
11012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11013 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11014 }
11015 }
11016 else
11017 {
11018 if (!rtx_equal_p (operands[0], operands[1]))
11019 emit_move_insn (operands[0], operands[1]);
11020
11021 split_di (operands, 1, low, high);
11022
11023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11024 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11025
11026 /* Heh. By reversing the arguments, we can reuse this pattern. */
11027 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11028 {
11029 if (! no_new_pseudos)
11030 scratch = force_reg (SImode, const0_rtx);
11031 else
11032 emit_move_insn (scratch, const0_rtx);
11033
11034 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11035 scratch));
11036 }
11037 else
11038 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11039 }
11040}
11041
11042/* Helper function for the string operations below. Dest VARIABLE whether
11043 it is aligned to VALUE bytes. If true, jump to the label. */
11044static rtx
11045ix86_expand_aligntest (rtx variable, int value)
11046{
11047 rtx label = gen_label_rtx ();
11048 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11049 if (GET_MODE (variable) == DImode)
11050 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11051 else
11052 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11053 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11054 1, label);
11055 return label;
11056}
11057
11058/* Adjust COUNTER by the VALUE. */
11059static void
11060ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11061{
11062 if (GET_MODE (countreg) == DImode)
11063 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11064 else
11065 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11066}
11067
11068/* Zero extend possibly SImode EXP to Pmode register. */
11069rtx
11070ix86_zero_extend_to_Pmode (rtx exp)
11071{
11072 rtx r;
11073 if (GET_MODE (exp) == VOIDmode)
11074 return force_reg (Pmode, exp);
11075 if (GET_MODE (exp) == Pmode)
11076 return copy_to_mode_reg (Pmode, exp);
11077 r = gen_reg_rtx (Pmode);
11078 emit_insn (gen_zero_extendsidi2 (r, exp));
11079 return r;
11080}
11081
11082/* Expand string move (memcpy) operation. Use i386 string operations when
11083 profitable. expand_clrstr contains similar code. */
11084int
11085ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11086{
11087 rtx srcreg, destreg, countreg, srcexp, destexp;
11088 enum machine_mode counter_mode;
11089 HOST_WIDE_INT align = 0;
11090 unsigned HOST_WIDE_INT count = 0;
11091
11092 if (GET_CODE (align_exp) == CONST_INT)
11093 align = INTVAL (align_exp);
11094
11095 /* Can't use any of this if the user has appropriated esi or edi. */
11096 if (global_regs[4] || global_regs[5])
11097 return 0;
11098
11099 /* This simple hack avoids all inlining code and simplifies code below. */
11100 if (!TARGET_ALIGN_STRINGOPS)
11101 align = 64;
11102
11103 if (GET_CODE (count_exp) == CONST_INT)
11104 {
11105 count = INTVAL (count_exp);
11106 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11107 return 0;
11108 }
11109
11110 /* Figure out proper mode for counter. For 32bits it is always SImode,
11111 for 64bits use SImode when possible, otherwise DImode.
11112 Set count to number of bytes copied when known at compile time. */
11113 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11114 || x86_64_zero_extended_value (count_exp))
11115 counter_mode = SImode;
11116 else
11117 counter_mode = DImode;
11118
11119 if (counter_mode != SImode && counter_mode != DImode)
11120 abort ();
11121
11122 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11123 if (destreg != XEXP (dst, 0))
11124 dst = replace_equiv_address_nv (dst, destreg);
11125 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11126 if (srcreg != XEXP (src, 0))
11127 src = replace_equiv_address_nv (src, srcreg);
11128
11129 /* When optimizing for size emit simple rep ; movsb instruction for
11130 counts not divisible by 4. */
11131
11132 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11133 {
11134 emit_insn (gen_cld ());
11135 countreg = ix86_zero_extend_to_Pmode (count_exp);
11136 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11137 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11139 destexp, srcexp));
11140 }
11141
11142 /* For constant aligned (or small unaligned) copies use rep movsl
11143 followed by code copying the rest. For PentiumPro ensure 8 byte
11144 alignment to allow rep movsl acceleration. */
11145
11146 else if (count != 0
11147 && (align >= 8
11148 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11149 || optimize_size || count < (unsigned int) 64))
11150 {
11151 unsigned HOST_WIDE_INT offset = 0;
11152 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11153 rtx srcmem, dstmem;
11154
11155 emit_insn (gen_cld ());
11156 if (count & ~(size - 1))
11157 {
11158 countreg = copy_to_mode_reg (counter_mode,
11159 GEN_INT ((count >> (size == 4 ? 2 : 3))
11160 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11161 countreg = ix86_zero_extend_to_Pmode (countreg);
11162
11163 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11164 GEN_INT (size == 4 ? 2 : 3));
11165 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11166 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11167
11168 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11169 countreg, destexp, srcexp));
11170 offset = count & ~(size - 1);
11171 }
11172 if (size == 8 && (count & 0x04))
11173 {
11174 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11175 offset);
11176 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11177 offset);
11178 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11179 offset += 4;
11180 }
11181 if (count & 0x02)
11182 {
11183 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11184 offset);
11185 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11186 offset);
11187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11188 offset += 2;
11189 }
11190 if (count & 0x01)
11191 {
11192 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11193 offset);
11194 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11195 offset);
11196 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11197 }
11198 }
11199 /* The generic code based on the glibc implementation:
11200 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11201 allowing accelerated copying there)
11202 - copy the data using rep movsl
11203 - copy the rest. */
11204 else
11205 {
11206 rtx countreg2;
11207 rtx label = NULL;
11208 rtx srcmem, dstmem;
11209 int desired_alignment = (TARGET_PENTIUMPRO
11210 && (count == 0 || count >= (unsigned int) 260)
11211 ? 8 : UNITS_PER_WORD);
11212 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11213 dst = change_address (dst, BLKmode, destreg);
11214 src = change_address (src, BLKmode, srcreg);
11215
11216 /* In case we don't know anything about the alignment, default to
11217 library version, since it is usually equally fast and result in
11218 shorter code.
11219
11220 Also emit call when we know that the count is large and call overhead
11221 will not be important. */
11222 if (!TARGET_INLINE_ALL_STRINGOPS
11223 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11224 return 0;
11225
11226 if (TARGET_SINGLE_STRINGOP)
11227 emit_insn (gen_cld ());
11228
11229 countreg2 = gen_reg_rtx (Pmode);
11230 countreg = copy_to_mode_reg (counter_mode, count_exp);
11231
11232 /* We don't use loops to align destination and to copy parts smaller
11233 than 4 bytes, because gcc is able to optimize such code better (in
11234 the case the destination or the count really is aligned, gcc is often
11235 able to predict the branches) and also it is friendlier to the
11236 hardware branch prediction.
11237
11238 Using loops is beneficial for generic case, because we can
11239 handle small counts using the loops. Many CPUs (such as Athlon)
11240 have large REP prefix setup costs.
11241
11242 This is quite costly. Maybe we can revisit this decision later or
11243 add some customizability to this code. */
11244
11245 if (count == 0 && align < desired_alignment)
11246 {
11247 label = gen_label_rtx ();
11248 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11249 LEU, 0, counter_mode, 1, label);
11250 }
11251 if (align <= 1)
11252 {
11253 rtx label = ix86_expand_aligntest (destreg, 1);
11254 srcmem = change_address (src, QImode, srcreg);
11255 dstmem = change_address (dst, QImode, destreg);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257 ix86_adjust_counter (countreg, 1);
11258 emit_label (label);
11259 LABEL_NUSES (label) = 1;
11260 }
11261 if (align <= 2)
11262 {
11263 rtx label = ix86_expand_aligntest (destreg, 2);
11264 srcmem = change_address (src, HImode, srcreg);
11265 dstmem = change_address (dst, HImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 ix86_adjust_counter (countreg, 2);
11268 emit_label (label);
11269 LABEL_NUSES (label) = 1;
11270 }
11271 if (align <= 4 && desired_alignment > 4)
11272 {
11273 rtx label = ix86_expand_aligntest (destreg, 4);
11274 srcmem = change_address (src, SImode, srcreg);
11275 dstmem = change_address (dst, SImode, destreg);
11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277 ix86_adjust_counter (countreg, 4);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11280 }
11281
11282 if (label && desired_alignment > 4 && !TARGET_64BIT)
11283 {
11284 emit_label (label);
11285 LABEL_NUSES (label) = 1;
11286 label = NULL_RTX;
11287 }
11288 if (!TARGET_SINGLE_STRINGOP)
11289 emit_insn (gen_cld ());
11290 if (TARGET_64BIT)
11291 {
11292 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11293 GEN_INT (3)));
11294 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11295 }
11296 else
11297 {
11298 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11299 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11300 }
11301 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11302 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11303 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11304 countreg2, destexp, srcexp));
11305
11306 if (label)
11307 {
11308 emit_label (label);
11309 LABEL_NUSES (label) = 1;
11310 }
11311 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11312 {
11313 srcmem = change_address (src, SImode, srcreg);
11314 dstmem = change_address (dst, SImode, destreg);
11315 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11316 }
11317 if ((align <= 4 || count == 0) && TARGET_64BIT)
11318 {
11319 rtx label = ix86_expand_aligntest (countreg, 4);
11320 srcmem = change_address (src, SImode, srcreg);
11321 dstmem = change_address (dst, SImode, destreg);
11322 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11325 }
11326 if (align > 2 && count != 0 && (count & 2))
11327 {
11328 srcmem = change_address (src, HImode, srcreg);
11329 dstmem = change_address (dst, HImode, destreg);
11330 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11331 }
11332 if (align <= 2 || count == 0)
11333 {
11334 rtx label = ix86_expand_aligntest (countreg, 2);
11335 srcmem = change_address (src, HImode, srcreg);
11336 dstmem = change_address (dst, HImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11338 emit_label (label);
11339 LABEL_NUSES (label) = 1;
11340 }
11341 if (align > 1 && count != 0 && (count & 1))
11342 {
11343 srcmem = change_address (src, QImode, srcreg);
11344 dstmem = change_address (dst, QImode, destreg);
11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11346 }
11347 if (align <= 1 || count == 0)
11348 {
11349 rtx label = ix86_expand_aligntest (countreg, 1);
11350 srcmem = change_address (src, QImode, srcreg);
11351 dstmem = change_address (dst, QImode, destreg);
11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11353 emit_label (label);
11354 LABEL_NUSES (label) = 1;
11355 }
11356 }
11357
11358 return 1;
11359}
11360
11361/* Expand string clear operation (bzero). Use i386 string operations when
11362 profitable. expand_movstr contains similar code. */
11363int
11364ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11365{
11366 rtx destreg, zeroreg, countreg, destexp;
11367 enum machine_mode counter_mode;
11368 HOST_WIDE_INT align = 0;
11369 unsigned HOST_WIDE_INT count = 0;
11370
11371 if (GET_CODE (align_exp) == CONST_INT)
11372 align = INTVAL (align_exp);
11373
11374 /* Can't use any of this if the user has appropriated esi. */
11375 if (global_regs[4])
11376 return 0;
11377
11378 /* This simple hack avoids all inlining code and simplifies code below. */
11379 if (!TARGET_ALIGN_STRINGOPS)
11380 align = 32;
11381
11382 if (GET_CODE (count_exp) == CONST_INT)
11383 {
11384 count = INTVAL (count_exp);
11385 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11386 return 0;
11387 }
11388 /* Figure out proper mode for counter. For 32bits it is always SImode,
11389 for 64bits use SImode when possible, otherwise DImode.
11390 Set count to number of bytes copied when known at compile time. */
11391 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11392 || x86_64_zero_extended_value (count_exp))
11393 counter_mode = SImode;
11394 else
11395 counter_mode = DImode;
11396
11397 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11398 if (destreg != XEXP (dst, 0))
11399 dst = replace_equiv_address_nv (dst, destreg);
11400
11401 emit_insn (gen_cld ());
11402
11403 /* When optimizing for size emit simple rep ; movsb instruction for
11404 counts not divisible by 4. */
11405
11406 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11407 {
11408 countreg = ix86_zero_extend_to_Pmode (count_exp);
11409 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11410 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11411 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11412 }
11413 else if (count != 0
11414 && (align >= 8
11415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11416 || optimize_size || count < (unsigned int) 64))
11417 {
11418 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11419 unsigned HOST_WIDE_INT offset = 0;
11420
11421 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11422 if (count & ~(size - 1))
11423 {
11424 countreg = copy_to_mode_reg (counter_mode,
11425 GEN_INT ((count >> (size == 4 ? 2 : 3))
11426 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11427 countreg = ix86_zero_extend_to_Pmode (countreg);
11428 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11429 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11430 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11431 offset = count & ~(size - 1);
11432 }
11433 if (size == 8 && (count & 0x04))
11434 {
11435 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11436 offset);
11437 emit_insn (gen_strset (destreg, mem,
11438 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11439 offset += 4;
11440 }
11441 if (count & 0x02)
11442 {
11443 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11444 offset);
11445 emit_insn (gen_strset (destreg, mem,
11446 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11447 offset += 2;
11448 }
11449 if (count & 0x01)
11450 {
11451 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11452 offset);
11453 emit_insn (gen_strset (destreg, mem,
11454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11455 }
11456 }
11457 else
11458 {
11459 rtx countreg2;
11460 rtx label = NULL;
11461 /* Compute desired alignment of the string operation. */
11462 int desired_alignment = (TARGET_PENTIUMPRO
11463 && (count == 0 || count >= (unsigned int) 260)
11464 ? 8 : UNITS_PER_WORD);
11465
11466 /* In case we don't know anything about the alignment, default to
11467 library version, since it is usually equally fast and result in
11468 shorter code.
11469
11470 Also emit call when we know that the count is large and call overhead
11471 will not be important. */
11472 if (!TARGET_INLINE_ALL_STRINGOPS
11473 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11474 return 0;
11475
11476 if (TARGET_SINGLE_STRINGOP)
11477 emit_insn (gen_cld ());
11478
11479 countreg2 = gen_reg_rtx (Pmode);
11480 countreg = copy_to_mode_reg (counter_mode, count_exp);
11481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11482 /* Get rid of MEM_OFFSET, it won't be accurate. */
11483 dst = change_address (dst, BLKmode, destreg);
11484
11485 if (count == 0 && align < desired_alignment)
11486 {
11487 label = gen_label_rtx ();
11488 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11489 LEU, 0, counter_mode, 1, label);
11490 }
11491 if (align <= 1)
11492 {
11493 rtx label = ix86_expand_aligntest (destreg, 1);
11494 emit_insn (gen_strset (destreg, dst,
11495 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11496 ix86_adjust_counter (countreg, 1);
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11499 }
11500 if (align <= 2)
11501 {
11502 rtx label = ix86_expand_aligntest (destreg, 2);
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11505 ix86_adjust_counter (countreg, 2);
11506 emit_label (label);
11507 LABEL_NUSES (label) = 1;
11508 }
11509 if (align <= 4 && desired_alignment > 4)
11510 {
11511 rtx label = ix86_expand_aligntest (destreg, 4);
11512 emit_insn (gen_strset (destreg, dst,
11513 (TARGET_64BIT
11514 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11515 : zeroreg)));
11516 ix86_adjust_counter (countreg, 4);
11517 emit_label (label);
11518 LABEL_NUSES (label) = 1;
11519 }
11520
11521 if (label && desired_alignment > 4 && !TARGET_64BIT)
11522 {
11523 emit_label (label);
11524 LABEL_NUSES (label) = 1;
11525 label = NULL_RTX;
11526 }
11527
11528 if (!TARGET_SINGLE_STRINGOP)
11529 emit_insn (gen_cld ());
11530 if (TARGET_64BIT)
11531 {
11532 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11533 GEN_INT (3)));
11534 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11535 }
11536 else
11537 {
11538 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11539 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11540 }
11541 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11542 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11543
11544 if (label)
11545 {
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11548 }
11549
11550 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11551 emit_insn (gen_strset (destreg, dst,
11552 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11553 if (TARGET_64BIT && (align <= 4 || count == 0))
11554 {
11555 rtx label = ix86_expand_aligntest (countreg, 4);
11556 emit_insn (gen_strset (destreg, dst,
11557 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11558 emit_label (label);
11559 LABEL_NUSES (label) = 1;
11560 }
11561 if (align > 2 && count != 0 && (count & 2))
11562 emit_insn (gen_strset (destreg, dst,
11563 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11564 if (align <= 2 || count == 0)
11565 {
11566 rtx label = ix86_expand_aligntest (countreg, 2);
11567 emit_insn (gen_strset (destreg, dst,
11568 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11569 emit_label (label);
11570 LABEL_NUSES (label) = 1;
11571 }
11572 if (align > 1 && count != 0 && (count & 1))
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11575 if (align <= 1 || count == 0)
11576 {
11577 rtx label = ix86_expand_aligntest (countreg, 1);
11578 emit_insn (gen_strset (destreg, dst,
11579 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11580 emit_label (label);
11581 LABEL_NUSES (label) = 1;
11582 }
11583 }
11584 return 1;
11585}
11586
11587/* Expand strlen. */
11588int
11589ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11590{
11591 rtx addr, scratch1, scratch2, scratch3, scratch4;
11592
11593 /* The generic case of strlen expander is long. Avoid it's
11594 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11595
11596 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11597 && !TARGET_INLINE_ALL_STRINGOPS
11598 && !optimize_size
11599 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11600 return 0;
11601
11602 addr = force_reg (Pmode, XEXP (src, 0));
11603 scratch1 = gen_reg_rtx (Pmode);
11604
11605 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11606 && !optimize_size)
11607 {
11608 /* Well it seems that some optimizer does not combine a call like
11609 foo(strlen(bar), strlen(bar));
11610 when the move and the subtraction is done here. It does calculate
11611 the length just once when these instructions are done inside of
11612 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11613 often used and I use one fewer register for the lifetime of
11614 output_strlen_unroll() this is better. */
11615
11616 emit_move_insn (out, addr);
11617
11618 ix86_expand_strlensi_unroll_1 (out, src, align);
11619
11620 /* strlensi_unroll_1 returns the address of the zero at the end of
11621 the string, like memchr(), so compute the length by subtracting
11622 the start address. */
11623 if (TARGET_64BIT)
11624 emit_insn (gen_subdi3 (out, out, addr));
11625 else
11626 emit_insn (gen_subsi3 (out, out, addr));
11627 }
11628 else
11629 {
11630 rtx unspec;
11631 scratch2 = gen_reg_rtx (Pmode);
11632 scratch3 = gen_reg_rtx (Pmode);
11633 scratch4 = force_reg (Pmode, constm1_rtx);
11634
11635 emit_move_insn (scratch3, addr);
11636 eoschar = force_reg (QImode, eoschar);
11637
11638 emit_insn (gen_cld ());
11639 src = replace_equiv_address_nv (src, scratch3);
11640
11641 /* If .md starts supporting :P, this can be done in .md. */
11642 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11643 scratch4), UNSPEC_SCAS);
11644 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11645 if (TARGET_64BIT)
11646 {
11647 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11648 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11649 }
11650 else
11651 {
11652 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11653 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11654 }
11655 }
11656 return 1;
11657}
11658
11659/* Expand the appropriate insns for doing strlen if not just doing
11660 repnz; scasb
11661
11662 out = result, initialized with the start address
11663 align_rtx = alignment of the address.
11664 scratch = scratch register, initialized with the startaddress when
11665 not aligned, otherwise undefined
11666
11667 This is just the body. It needs the initializations mentioned above and
11668 some address computing at the end. These things are done in i386.md. */
11669
11670static void
11671ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11672{
11673 int align;
11674 rtx tmp;
11675 rtx align_2_label = NULL_RTX;
11676 rtx align_3_label = NULL_RTX;
11677 rtx align_4_label = gen_label_rtx ();
11678 rtx end_0_label = gen_label_rtx ();
11679 rtx mem;
11680 rtx tmpreg = gen_reg_rtx (SImode);
11681 rtx scratch = gen_reg_rtx (SImode);
11682 rtx cmp;
11683
11684 align = 0;
11685 if (GET_CODE (align_rtx) == CONST_INT)
11686 align = INTVAL (align_rtx);
11687
11688 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11689
11690 /* Is there a known alignment and is it less than 4? */
11691 if (align < 4)
11692 {
11693 rtx scratch1 = gen_reg_rtx (Pmode);
11694 emit_move_insn (scratch1, out);
11695 /* Is there a known alignment and is it not 2? */
11696 if (align != 2)
11697 {
11698 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11699 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11700
11701 /* Leave just the 3 lower bits. */
11702 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11703 NULL_RTX, 0, OPTAB_WIDEN);
11704
11705 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11706 Pmode, 1, align_4_label);
11707 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11708 Pmode, 1, align_2_label);
11709 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11710 Pmode, 1, align_3_label);
11711 }
11712 else
11713 {
11714 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11715 check if is aligned to 4 - byte. */
11716
11717 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11718 NULL_RTX, 0, OPTAB_WIDEN);
11719
11720 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11721 Pmode, 1, align_4_label);
11722 }
11723
11724 mem = change_address (src, QImode, out);
11725
11726 /* Now compare the bytes. */
11727
11728 /* Compare the first n unaligned byte on a byte per byte basis. */
11729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11730 QImode, 1, end_0_label);
11731
11732 /* Increment the address. */
11733 if (TARGET_64BIT)
11734 emit_insn (gen_adddi3 (out, out, const1_rtx));
11735 else
11736 emit_insn (gen_addsi3 (out, out, const1_rtx));
11737
11738 /* Not needed with an alignment of 2 */
11739 if (align != 2)
11740 {
11741 emit_label (align_2_label);
11742
11743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11744 end_0_label);
11745
11746 if (TARGET_64BIT)
11747 emit_insn (gen_adddi3 (out, out, const1_rtx));
11748 else
11749 emit_insn (gen_addsi3 (out, out, const1_rtx));
11750
11751 emit_label (align_3_label);
11752 }
11753
11754 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11755 end_0_label);
11756
11757 if (TARGET_64BIT)
11758 emit_insn (gen_adddi3 (out, out, const1_rtx));
11759 else
11760 emit_insn (gen_addsi3 (out, out, const1_rtx));
11761 }
11762
11763 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11764 align this loop. It gives only huge programs, but does not help to
11765 speed up. */
11766 emit_label (align_4_label);
11767
11768 mem = change_address (src, SImode, out);
11769 emit_move_insn (scratch, mem);
11770 if (TARGET_64BIT)
11771 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11772 else
11773 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11774
11775 /* This formula yields a nonzero result iff one of the bytes is zero.
11776 This saves three branches inside loop and many cycles. */
11777
11778 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11779 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11780 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11781 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11782 gen_int_mode (0x80808080, SImode)));
11783 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11784 align_4_label);
11785
11786 if (TARGET_CMOVE)
11787 {
11788 rtx reg = gen_reg_rtx (SImode);
11789 rtx reg2 = gen_reg_rtx (Pmode);
11790 emit_move_insn (reg, tmpreg);
11791 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11792
11793 /* If zero is not in the first two bytes, move two bytes forward. */
11794 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11795 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11796 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11797 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11798 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11799 reg,
11800 tmpreg)));
11801 /* Emit lea manually to avoid clobbering of flags. */
11802 emit_insn (gen_rtx_SET (SImode, reg2,
11803 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11804
11805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11806 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11807 emit_insn (gen_rtx_SET (VOIDmode, out,
11808 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11809 reg2,
11810 out)));
11811
11812 }
11813 else
11814 {
11815 rtx end_2_label = gen_label_rtx ();
11816 /* Is zero in the first two bytes? */
11817
11818 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11819 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11820 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11821 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11822 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11823 pc_rtx);
11824 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11825 JUMP_LABEL (tmp) = end_2_label;
11826
11827 /* Not in the first two. Move two bytes forward. */
11828 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11829 if (TARGET_64BIT)
11830 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11831 else
11832 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11833
11834 emit_label (end_2_label);
11835
11836 }
11837
11838 /* Avoid branch in fixing the byte. */
11839 tmpreg = gen_lowpart (QImode, tmpreg);
11840 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11841 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11842 if (TARGET_64BIT)
11843 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11844 else
11845 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11846
11847 emit_label (end_0_label);
11848}
11849
11850void
11851ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11852 rtx callarg2 ATTRIBUTE_UNUSED,
11853 rtx pop, int sibcall)
11854{
11855 rtx use = NULL, call;
11856
11857 if (pop == const0_rtx)
11858 pop = NULL;
11859 if (TARGET_64BIT && pop)
11860 abort ();
11861
11862#if TARGET_MACHO
11863 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11864 fnaddr = machopic_indirect_call_target (fnaddr);
11865#else
11866 /* Static functions and indirect calls don't need the pic register. */
11867 if (! TARGET_64BIT && flag_pic
11868 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11869 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11870 use_reg (&use, pic_offset_table_rtx);
11871
11872 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11873 {
11874 rtx al = gen_rtx_REG (QImode, 0);
11875 emit_move_insn (al, callarg2);
11876 use_reg (&use, al);
11877 }
11878#endif /* TARGET_MACHO */
11879
11880 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11881 {
11882 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11883 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11884 }
11885 if (sibcall && TARGET_64BIT
11886 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11887 {
11888 rtx addr;
11889 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11890 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11891 emit_move_insn (fnaddr, addr);
11892 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11893 }
11894
11895 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11896 if (retval)
11897 call = gen_rtx_SET (VOIDmode, retval, call);
11898 if (pop)
11899 {
11900 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11901 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11902 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11903 }
11904
11905 call = emit_call_insn (call);
11906 if (use)
11907 CALL_INSN_FUNCTION_USAGE (call) = use;
11908}
11909
11910
11911/* Clear stack slot assignments remembered from previous functions.
11912 This is called from INIT_EXPANDERS once before RTL is emitted for each
11913 function. */
11914
11915static struct machine_function *
11916ix86_init_machine_status (void)
11917{
11918 struct machine_function *f;
11919
11920 f = ggc_alloc_cleared (sizeof (struct machine_function));
11921 f->use_fast_prologue_epilogue_nregs = -1;
11922
11923 return f;
11924}
11925
11926/* Return a MEM corresponding to a stack slot with mode MODE.
11927 Allocate a new slot if necessary.
11928
11929 The RTL for a function can have several slots available: N is
11930 which slot to use. */
11931
11932rtx
11933assign_386_stack_local (enum machine_mode mode, int n)
11934{
11935 struct stack_local_entry *s;
11936
11937 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11938 abort ();
11939
11940 for (s = ix86_stack_locals; s; s = s->next)
11941 if (s->mode == mode && s->n == n)
11942 return s->rtl;
11943
11944 s = (struct stack_local_entry *)
11945 ggc_alloc (sizeof (struct stack_local_entry));
11946 s->n = n;
11947 s->mode = mode;
11948 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11949
11950 s->next = ix86_stack_locals;
11951 ix86_stack_locals = s;
11952 return s->rtl;
11953}
11954
11955/* Construct the SYMBOL_REF for the tls_get_addr function. */
11956
11957static GTY(()) rtx ix86_tls_symbol;
11958rtx
11959ix86_tls_get_addr (void)
11960{
11961
11962 if (!ix86_tls_symbol)
11963 {
11964 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11965 (TARGET_GNU_TLS && !TARGET_64BIT)
11966 ? "___tls_get_addr"
11967 : "__tls_get_addr");
11968 }
11969
11970 return ix86_tls_symbol;
11971}
11972
11973/* Calculate the length of the memory address in the instruction
11974 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11975
11976static int
11977memory_address_length (rtx addr)
11978{
11979 struct ix86_address parts;
11980 rtx base, index, disp;
11981 int len;
11982
11983 if (GET_CODE (addr) == PRE_DEC
11984 || GET_CODE (addr) == POST_INC
11985 || GET_CODE (addr) == PRE_MODIFY
11986 || GET_CODE (addr) == POST_MODIFY)
11987 return 0;
11988
11989 if (! ix86_decompose_address (addr, &parts))
11990 abort ();
11991
11992 base = parts.base;
11993 index = parts.index;
11994 disp = parts.disp;
11995 len = 0;
11996
11997 /* Rule of thumb:
11998 - esp as the base always wants an index,
11999 - ebp as the base always wants a displacement. */
12000
12001 /* Register Indirect. */
12002 if (base && !index && !disp)
12003 {
12004 /* esp (for its index) and ebp (for its displacement) need
12005 the two-byte modrm form. */
12006 if (addr == stack_pointer_rtx
12007 || addr == arg_pointer_rtx
12008 || addr == frame_pointer_rtx
12009 || addr == hard_frame_pointer_rtx)
12010 len = 1;
12011 }
12012
12013 /* Direct Addressing. */
12014 else if (disp && !base && !index)
12015 len = 4;
12016
12017 else
12018 {
12019 /* Find the length of the displacement constant. */
12020 if (disp)
12021 {
12022 if (GET_CODE (disp) == CONST_INT
12023 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12024 && base)
12025 len = 1;
12026 else
12027 len = 4;
12028 }
12029 /* ebp always wants a displacement. */
12030 else if (base == hard_frame_pointer_rtx)
12031 len = 1;
12032
12033 /* An index requires the two-byte modrm form.... */
12034 if (index
12035 /* ...like esp, which always wants an index. */
12036 || base == stack_pointer_rtx
12037 || base == arg_pointer_rtx
12038 || base == frame_pointer_rtx)
12039 len += 1;
12040 }
12041
12042 return len;
12043}
12044
12045/* Compute default value for "length_immediate" attribute. When SHORTFORM
12046 is set, expect that insn have 8bit immediate alternative. */
12047int
12048ix86_attr_length_immediate_default (rtx insn, int shortform)
12049{
12050 int len = 0;
12051 int i;
12052 extract_insn_cached (insn);
12053 for (i = recog_data.n_operands - 1; i >= 0; --i)
12054 if (CONSTANT_P (recog_data.operand[i]))
12055 {
12056 if (len)
12057 abort ();
12058 if (shortform
12059 && GET_CODE (recog_data.operand[i]) == CONST_INT
12060 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12061 len = 1;
12062 else
12063 {
12064 switch (get_attr_mode (insn))
12065 {
12066 case MODE_QI:
12067 len+=1;
12068 break;
12069 case MODE_HI:
12070 len+=2;
12071 break;
12072 case MODE_SI:
12073 len+=4;
12074 break;
12075 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12076 case MODE_DI:
12077 len+=4;
12078 break;
12079 default:
12080 fatal_insn ("unknown insn mode", insn);
12081 }
12082 }
12083 }
12084 return len;
12085}
12086/* Compute default value for "length_address" attribute. */
12087int
12088ix86_attr_length_address_default (rtx insn)
12089{
12090 int i;
12091
12092 if (get_attr_type (insn) == TYPE_LEA)
12093 {
12094 rtx set = PATTERN (insn);
12095 if (GET_CODE (set) == SET)
12096 ;
12097 else if (GET_CODE (set) == PARALLEL
12098 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12099 set = XVECEXP (set, 0, 0);
12100 else
12101 {
12102#ifdef ENABLE_CHECKING
12103 abort ();
12104#endif
12105 return 0;
12106 }
12107
12108 return memory_address_length (SET_SRC (set));
12109 }
12110
12111 extract_insn_cached (insn);
12112 for (i = recog_data.n_operands - 1; i >= 0; --i)
12113 if (GET_CODE (recog_data.operand[i]) == MEM)
12114 {
12115 return memory_address_length (XEXP (recog_data.operand[i], 0));
12116 break;
12117 }
12118 return 0;
12119}
12120
12121/* Return the maximum number of instructions a cpu can issue. */
12122
12123static int
12124ix86_issue_rate (void)
12125{
12126 switch (ix86_tune)
12127 {
12128 case PROCESSOR_PENTIUM:
12129 case PROCESSOR_K6:
12130 return 2;
12131
12132 case PROCESSOR_PENTIUMPRO:
12133 case PROCESSOR_PENTIUM4:
12134 case PROCESSOR_ATHLON:
12135 case PROCESSOR_K8:
12136 return 3;
12137
12138 default:
12139 return 1;
12140 }
12141}
12142
12143/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12144 by DEP_INSN and nothing set by DEP_INSN. */
12145
12146static int
12147ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12148{
12149 rtx set, set2;
12150
12151 /* Simplify the test for uninteresting insns. */
12152 if (insn_type != TYPE_SETCC
12153 && insn_type != TYPE_ICMOV
12154 && insn_type != TYPE_FCMOV
12155 && insn_type != TYPE_IBR)
12156 return 0;
12157
12158 if ((set = single_set (dep_insn)) != 0)
12159 {
12160 set = SET_DEST (set);
12161 set2 = NULL_RTX;
12162 }
12163 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12164 && XVECLEN (PATTERN (dep_insn), 0) == 2
12165 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12166 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12167 {
12168 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12169 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12170 }
12171 else
12172 return 0;
12173
12174 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12175 return 0;
12176
12177 /* This test is true if the dependent insn reads the flags but
12178 not any other potentially set register. */
12179 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12180 return 0;
12181
12182 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12183 return 0;
12184
12185 return 1;
12186}
12187
12188/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12189 address with operands set by DEP_INSN. */
12190
12191static int
12192ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12193{
12194 rtx addr;
12195
12196 if (insn_type == TYPE_LEA
12197 && TARGET_PENTIUM)
12198 {
12199 addr = PATTERN (insn);
12200 if (GET_CODE (addr) == SET)
12201 ;
12202 else if (GET_CODE (addr) == PARALLEL
12203 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12204 addr = XVECEXP (addr, 0, 0);
12205 else
12206 abort ();
12207 addr = SET_SRC (addr);
12208 }
12209 else
12210 {
12211 int i;
12212 extract_insn_cached (insn);
12213 for (i = recog_data.n_operands - 1; i >= 0; --i)
12214 if (GET_CODE (recog_data.operand[i]) == MEM)
12215 {
12216 addr = XEXP (recog_data.operand[i], 0);
12217 goto found;
12218 }
12219 return 0;
12220 found:;
12221 }
12222
12223 return modified_in_p (addr, dep_insn);
12224}
12225
12226static int
12227ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12228{
12229 enum attr_type insn_type, dep_insn_type;
12230 enum attr_memory memory, dep_memory;
12231 rtx set, set2;
12232 int dep_insn_code_number;
12233
12234 /* Anti and output dependencies have zero cost on all CPUs. */
12235 if (REG_NOTE_KIND (link) != 0)
12236 return 0;
12237
12238 dep_insn_code_number = recog_memoized (dep_insn);
12239
12240 /* If we can't recognize the insns, we can't really do anything. */
12241 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12242 return cost;
12243
12244 insn_type = get_attr_type (insn);
12245 dep_insn_type = get_attr_type (dep_insn);
12246
12247 switch (ix86_tune)
12248 {
12249 case PROCESSOR_PENTIUM:
12250 /* Address Generation Interlock adds a cycle of latency. */
12251 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12252 cost += 1;
12253
12254 /* ??? Compares pair with jump/setcc. */
12255 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12256 cost = 0;
12257
12258 /* Floating point stores require value to be ready one cycle earlier. */
12259 if (insn_type == TYPE_FMOV
12260 && get_attr_memory (insn) == MEMORY_STORE
12261 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12262 cost += 1;
12263 break;
12264
12265 case PROCESSOR_PENTIUMPRO:
12266 memory = get_attr_memory (insn);
12267 dep_memory = get_attr_memory (dep_insn);
12268
12269 /* Since we can't represent delayed latencies of load+operation,
12270 increase the cost here for non-imov insns. */
12271 if (dep_insn_type != TYPE_IMOV
12272 && dep_insn_type != TYPE_FMOV
12273 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12274 cost += 1;
12275
12276 /* INT->FP conversion is expensive. */
12277 if (get_attr_fp_int_src (dep_insn))
12278 cost += 5;
12279
12280 /* There is one cycle extra latency between an FP op and a store. */
12281 if (insn_type == TYPE_FMOV
12282 && (set = single_set (dep_insn)) != NULL_RTX
12283 && (set2 = single_set (insn)) != NULL_RTX
12284 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12285 && GET_CODE (SET_DEST (set2)) == MEM)
12286 cost += 1;
12287
12288 /* Show ability of reorder buffer to hide latency of load by executing
12289 in parallel with previous instruction in case
12290 previous instruction is not needed to compute the address. */
12291 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12292 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12293 {
12294 /* Claim moves to take one cycle, as core can issue one load
12295 at time and the next load can start cycle later. */
12296 if (dep_insn_type == TYPE_IMOV
12297 || dep_insn_type == TYPE_FMOV)
12298 cost = 1;
12299 else if (cost > 1)
12300 cost--;
12301 }
12302 break;
12303
12304 case PROCESSOR_K6:
12305 memory = get_attr_memory (insn);
12306 dep_memory = get_attr_memory (dep_insn);
12307 /* The esp dependency is resolved before the instruction is really
12308 finished. */
12309 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12310 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12311 return 1;
12312
12313 /* Since we can't represent delayed latencies of load+operation,
12314 increase the cost here for non-imov insns. */
12315 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12316 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12317
12318 /* INT->FP conversion is expensive. */
12319 if (get_attr_fp_int_src (dep_insn))
12320 cost += 5;
12321
12322 /* Show ability of reorder buffer to hide latency of load by executing
12323 in parallel with previous instruction in case
12324 previous instruction is not needed to compute the address. */
12325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12326 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12327 {
12328 /* Claim moves to take one cycle, as core can issue one load
12329 at time and the next load can start cycle later. */
12330 if (dep_insn_type == TYPE_IMOV
12331 || dep_insn_type == TYPE_FMOV)
12332 cost = 1;
12333 else if (cost > 2)
12334 cost -= 2;
12335 else
12336 cost = 1;
12337 }
12338 break;
12339
12340 case PROCESSOR_ATHLON:
12341 case PROCESSOR_K8:
12342 memory = get_attr_memory (insn);
12343 dep_memory = get_attr_memory (dep_insn);
12344
12345 /* Show ability of reorder buffer to hide latency of load by executing
12346 in parallel with previous instruction in case
12347 previous instruction is not needed to compute the address. */
12348 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12349 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12350 {
12351 enum attr_unit unit = get_attr_unit (insn);
12352 int loadcost = 3;
12353
12354 /* Because of the difference between the length of integer and
12355 floating unit pipeline preparation stages, the memory operands
12356 for floating point are cheaper.
12357
12358 ??? For Athlon it the difference is most probably 2. */
12359 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12360 loadcost = 3;
12361 else
12362 loadcost = TARGET_ATHLON ? 2 : 0;
12363
12364 if (cost >= loadcost)
12365 cost -= loadcost;
12366 else
12367 cost = 0;
12368 }
12369
12370 default:
12371 break;
12372 }
12373
12374 return cost;
12375}
12376
12377static union
12378{
12379 struct ppro_sched_data
12380 {
12381 rtx decode[3];
12382 int issued_this_cycle;
12383 } ppro;
12384} ix86_sched_data;
12385
12386static enum attr_ppro_uops
12387ix86_safe_ppro_uops (rtx insn)
12388{
12389 if (recog_memoized (insn) >= 0)
12390 return get_attr_ppro_uops (insn);
12391 else
12392 return PPRO_UOPS_MANY;
12393}
12394
12395static void
12396ix86_dump_ppro_packet (FILE *dump)
12397{
12398 if (ix86_sched_data.ppro.decode[0])
12399 {
12400 fprintf (dump, "PPRO packet: %d",
12401 INSN_UID (ix86_sched_data.ppro.decode[0]));
12402 if (ix86_sched_data.ppro.decode[1])
12403 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12404 if (ix86_sched_data.ppro.decode[2])
12405 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12406 fputc ('\n', dump);
12407 }
12408}
12409
12410/* We're beginning a new block. Initialize data structures as necessary. */
12411
12412static void
12413ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12414 int sched_verbose ATTRIBUTE_UNUSED,
12415 int veclen ATTRIBUTE_UNUSED)
12416{
12417 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12418}
12419
12420/* Shift INSN to SLOT, and shift everything else down. */
12421
12422static void
12423ix86_reorder_insn (rtx *insnp, rtx *slot)
12424{
12425 if (insnp != slot)
12426 {
12427 rtx insn = *insnp;
12428 do
12429 insnp[0] = insnp[1];
12430 while (++insnp != slot);
12431 *insnp = insn;
12432 }
12433}
12434
12435static void
12436ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12437{
12438 rtx decode[3];
12439 enum attr_ppro_uops cur_uops;
12440 int issued_this_cycle;
12441 rtx *insnp;
12442 int i;
12443
12444 /* At this point .ppro.decode contains the state of the three
12445 decoders from last "cycle". That is, those insns that were
12446 actually independent. But here we're scheduling for the
12447 decoder, and we may find things that are decodable in the
12448 same cycle. */
12449
12450 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12451 issued_this_cycle = 0;
12452
12453 insnp = e_ready;
12454 cur_uops = ix86_safe_ppro_uops (*insnp);
12455
12456 /* If the decoders are empty, and we've a complex insn at the
12457 head of the priority queue, let it issue without complaint. */
12458 if (decode[0] == NULL)
12459 {
12460 if (cur_uops == PPRO_UOPS_MANY)
12461 {
12462 decode[0] = *insnp;
12463 goto ppro_done;
12464 }
12465
12466 /* Otherwise, search for a 2-4 uop unsn to issue. */
12467 while (cur_uops != PPRO_UOPS_FEW)
12468 {
12469 if (insnp == ready)
12470 break;
12471 cur_uops = ix86_safe_ppro_uops (*--insnp);
12472 }
12473
12474 /* If so, move it to the head of the line. */
12475 if (cur_uops == PPRO_UOPS_FEW)
12476 ix86_reorder_insn (insnp, e_ready);
12477
12478 /* Issue the head of the queue. */
12479 issued_this_cycle = 1;
12480 decode[0] = *e_ready--;
12481 }
12482
12483 /* Look for simple insns to fill in the other two slots. */
12484 for (i = 1; i < 3; ++i)
12485 if (decode[i] == NULL)
12486 {
12487 if (ready > e_ready)
12488 goto ppro_done;
12489
12490 insnp = e_ready;
12491 cur_uops = ix86_safe_ppro_uops (*insnp);
12492 while (cur_uops != PPRO_UOPS_ONE)
12493 {
12494 if (insnp == ready)
12495 break;
12496 cur_uops = ix86_safe_ppro_uops (*--insnp);
12497 }
12498
12499 /* Found one. Move it to the head of the queue and issue it. */
12500 if (cur_uops == PPRO_UOPS_ONE)
12501 {
12502 ix86_reorder_insn (insnp, e_ready);
12503 decode[i] = *e_ready--;
12504 issued_this_cycle++;
12505 continue;
12506 }
12507
12508 /* ??? Didn't find one. Ideally, here we would do a lazy split
12509 of 2-uop insns, issue one and queue the other. */
12510 }
12511
12512 ppro_done:
12513 if (issued_this_cycle == 0)
12514 issued_this_cycle = 1;
12515 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12516}
12517
12518/* We are about to being issuing insns for this clock cycle.
12519 Override the default sort algorithm to better slot instructions. */
12520static int
12521ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12522 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12523 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12524{
12525 int n_ready = *n_readyp;
12526 rtx *e_ready = ready + n_ready - 1;
12527
12528 /* Make sure to go ahead and initialize key items in
12529 ix86_sched_data if we are not going to bother trying to
12530 reorder the ready queue. */
12531 if (n_ready < 2)
12532 {
12533 ix86_sched_data.ppro.issued_this_cycle = 1;
12534 goto out;
12535 }
12536
12537 switch (ix86_tune)
12538 {
12539 default:
12540 break;
12541
12542 case PROCESSOR_PENTIUMPRO:
12543 ix86_sched_reorder_ppro (ready, e_ready);
12544 break;
12545 }
12546
12547out:
12548 return ix86_issue_rate ();
12549}
12550
12551/* We are about to issue INSN. Return the number of insns left on the
12552 ready queue that can be issued this cycle. */
12553
12554static int
12555ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12556 int can_issue_more)
12557{
12558 int i;
12559 switch (ix86_tune)
12560 {
12561 default:
12562 return can_issue_more - 1;
12563
12564 case PROCESSOR_PENTIUMPRO:
12565 {
12566 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12567
12568 if (uops == PPRO_UOPS_MANY)
12569 {
12570 if (sched_verbose)
12571 ix86_dump_ppro_packet (dump);
12572 ix86_sched_data.ppro.decode[0] = insn;
12573 ix86_sched_data.ppro.decode[1] = NULL;
12574 ix86_sched_data.ppro.decode[2] = NULL;
12575 if (sched_verbose)
12576 ix86_dump_ppro_packet (dump);
12577 ix86_sched_data.ppro.decode[0] = NULL;
12578 }
12579 else if (uops == PPRO_UOPS_FEW)
12580 {
12581 if (sched_verbose)
12582 ix86_dump_ppro_packet (dump);
12583 ix86_sched_data.ppro.decode[0] = insn;
12584 ix86_sched_data.ppro.decode[1] = NULL;
12585 ix86_sched_data.ppro.decode[2] = NULL;
12586 }
12587 else
12588 {
12589 for (i = 0; i < 3; ++i)
12590 if (ix86_sched_data.ppro.decode[i] == NULL)
12591 {
12592 ix86_sched_data.ppro.decode[i] = insn;
12593 break;
12594 }
12595 if (i == 3)
12596 abort ();
12597 if (i == 2)
12598 {
12599 if (sched_verbose)
12600 ix86_dump_ppro_packet (dump);
12601 ix86_sched_data.ppro.decode[0] = NULL;
12602 ix86_sched_data.ppro.decode[1] = NULL;
12603 ix86_sched_data.ppro.decode[2] = NULL;
12604 }
12605 }
12606 }
12607 return --ix86_sched_data.ppro.issued_this_cycle;
12608 }
12609}
12610
12611static int
12612ia32_use_dfa_pipeline_interface (void)
12613{
12614 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12615 return 1;
12616 return 0;
12617}
12618
12619/* How many alternative schedules to try. This should be as wide as the
12620 scheduling freedom in the DFA, but no wider. Making this value too
12621 large results extra work for the scheduler. */
12622
12623static int
12624ia32_multipass_dfa_lookahead (void)
12625{
12626 if (ix86_tune == PROCESSOR_PENTIUM)
12627 return 2;
12628 else
12629 return 0;
12630}
12631
12632
12633/* Compute the alignment given to a constant that is being placed in memory.
12634 EXP is the constant and ALIGN is the alignment that the object would
12635 ordinarily have.
12636 The value of this function is used instead of that alignment to align
12637 the object. */
12638
12639int
12640ix86_constant_alignment (tree exp, int align)
12641{
12642 if (TREE_CODE (exp) == REAL_CST)
12643 {
12644 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12645 return 64;
12646 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12647 return 128;
12648 }
12649 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12650 && !TARGET_NO_ALIGN_LONG_STRINGS
12651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12652 return BITS_PER_WORD;
12653
12654 return align;
12655}
12656
12657/* Compute the alignment for a static variable.
12658 TYPE is the data type, and ALIGN is the alignment that
12659 the object would ordinarily have. The value of this function is used
12660 instead of that alignment to align the object. */
12661
12662int
12663ix86_data_alignment (tree type, int align)
12664{
12665 if (AGGREGATE_TYPE_P (type)
12666 && TYPE_SIZE (type)
12667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12670 return 256;
12671
12672 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12673 to 16byte boundary. */
12674 if (TARGET_64BIT)
12675 {
12676 if (AGGREGATE_TYPE_P (type)
12677 && TYPE_SIZE (type)
12678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12681 return 128;
12682 }
12683
12684 if (TREE_CODE (type) == ARRAY_TYPE)
12685 {
12686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12687 return 64;
12688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12689 return 128;
12690 }
12691 else if (TREE_CODE (type) == COMPLEX_TYPE)
12692 {
12693
12694 if (TYPE_MODE (type) == DCmode && align < 64)
12695 return 64;
12696 if (TYPE_MODE (type) == XCmode && align < 128)
12697 return 128;
12698 }
12699 else if ((TREE_CODE (type) == RECORD_TYPE
12700 || TREE_CODE (type) == UNION_TYPE
12701 || TREE_CODE (type) == QUAL_UNION_TYPE)
12702 && TYPE_FIELDS (type))
12703 {
12704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12705 return 64;
12706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12707 return 128;
12708 }
12709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12710 || TREE_CODE (type) == INTEGER_TYPE)
12711 {
12712 if (TYPE_MODE (type) == DFmode && align < 64)
12713 return 64;
12714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12715 return 128;
12716 }
12717
12718 return align;
12719}
12720
12721/* Compute the alignment for a local variable.
12722 TYPE is the data type, and ALIGN is the alignment that
12723 the object would ordinarily have. The value of this macro is used
12724 instead of that alignment to align the object. */
12725
12726int
12727ix86_local_alignment (tree type, int align)
12728{
12729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12730 to 16byte boundary. */
12731 if (TARGET_64BIT)
12732 {
12733 if (AGGREGATE_TYPE_P (type)
12734 && TYPE_SIZE (type)
12735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12738 return 128;
12739 }
12740 if (TREE_CODE (type) == ARRAY_TYPE)
12741 {
12742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12743 return 64;
12744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12745 return 128;
12746 }
12747 else if (TREE_CODE (type) == COMPLEX_TYPE)
12748 {
12749 if (TYPE_MODE (type) == DCmode && align < 64)
12750 return 64;
12751 if (TYPE_MODE (type) == XCmode && align < 128)
12752 return 128;
12753 }
12754 else if ((TREE_CODE (type) == RECORD_TYPE
12755 || TREE_CODE (type) == UNION_TYPE
12756 || TREE_CODE (type) == QUAL_UNION_TYPE)
12757 && TYPE_FIELDS (type))
12758 {
12759 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12760 return 64;
12761 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12762 return 128;
12763 }
12764 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12765 || TREE_CODE (type) == INTEGER_TYPE)
12766 {
12767
12768 if (TYPE_MODE (type) == DFmode && align < 64)
12769 return 64;
12770 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12771 return 128;
12772 }
12773 return align;
12774}
12775
12776/* Emit RTL insns to initialize the variable parts of a trampoline.
12777 FNADDR is an RTX for the address of the function's pure code.
12778 CXT is an RTX for the static chain value for the function. */
12779void
12780x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12781{
12782 if (!TARGET_64BIT)
12783 {
12784 /* Compute offset from the end of the jmp to the target function. */
12785 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12786 plus_constant (tramp, 10),
12787 NULL_RTX, 1, OPTAB_DIRECT);
12788 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12789 gen_int_mode (0xb9, QImode));
12790 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12792 gen_int_mode (0xe9, QImode));
12793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12794 }
12795 else
12796 {
12797 int offset = 0;
12798 /* Try to load address using shorter movl instead of movabs.
12799 We may want to support movq for kernel mode, but kernel does not use
12800 trampolines at the moment. */
12801 if (x86_64_zero_extended_value (fnaddr))
12802 {
12803 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12805 gen_int_mode (0xbb41, HImode));
12806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12807 gen_lowpart (SImode, fnaddr));
12808 offset += 6;
12809 }
12810 else
12811 {
12812 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12813 gen_int_mode (0xbb49, HImode));
12814 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12815 fnaddr);
12816 offset += 10;
12817 }
12818 /* Load static chain using movabs to r10. */
12819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12820 gen_int_mode (0xba49, HImode));
12821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12822 cxt);
12823 offset += 10;
12824 /* Jump to the r11 */
12825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12826 gen_int_mode (0xff49, HImode));
12827 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12828 gen_int_mode (0xe3, QImode));
12829 offset += 3;
12830 if (offset > TRAMPOLINE_SIZE)
12831 abort ();
12832 }
12833
12834#ifdef ENABLE_EXECUTE_STACK
12835 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12836 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12837#endif
12838}
12839
12840#define def_builtin(MASK, NAME, TYPE, CODE) \
12841do { \
12842 if ((MASK) & target_flags \
12843 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12844 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12845 NULL, NULL_TREE); \
12846} while (0)
12847
12848struct builtin_description
12849{
12850 const unsigned int mask;
12851 const enum insn_code icode;
12852 const char *const name;
12853 const enum ix86_builtins code;
12854 const enum rtx_code comparison;
12855 const unsigned int flag;
12856};
12857
12858static const struct builtin_description bdesc_comi[] =
12859{
12860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12884};
12885
12886static const struct builtin_description bdesc_2arg[] =
12887{
12888 /* SSE */
12889 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12890 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12891 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12892 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12893 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12894 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12895 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12896 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12897
12898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12910 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12914 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12918
12919 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12920 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12921 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12923
12924 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12926 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12927 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12928
12929 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12930 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12931 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12933 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12934
12935 /* MMX */
12936 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12937 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12940 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12941 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12943 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12944
12945 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12947 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12948 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12949 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12950 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12951 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12952 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12953
12954 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12955 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12956 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12957
12958 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12959 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12960 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12961 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12962
12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12965
12966 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12967 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12968 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12969 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12970 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12971 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12972
12973 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12977
12978 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12979 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12980 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12981 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12982 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12983 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12984
12985 /* Special. */
12986 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12987 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12988 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12989
12990 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12992 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12993
12994 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12998 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13000
13001 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13003 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13005 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13007
13008 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13010 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13012
13013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13014 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13015
13016 /* SSE2 */
13017 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13025
13026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
13030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
13031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
13033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
13034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
13035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
13036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
13037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
13038 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13042 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13046
13047 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13051
13052 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13056
13057 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13060
13061 /* SSE2 MMX */
13062 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13070
13071 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13072 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13073 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13074 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13075 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13076 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13077 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13078 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13079
13080 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13084
13085 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13089
13090 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13092
13093 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13094 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13099
13100 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13104
13105 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13113
13114 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13117
13118 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13120
13121 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13127
13128 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13134
13135 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13139
13140 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13141
13142 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13144 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13145 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13146
13147 /* SSE3 MMX */
13148 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13149 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13150 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13151 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13152 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13153 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13154};
13155
13156static const struct builtin_description bdesc_1arg[] =
13157{
13158 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13159 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13160
13161 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13162 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13163 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13164
13165 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13166 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13167 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13168 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13169 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13170 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13171
13172 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13176
13177 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13178
13179 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13181
13182 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13187
13188 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13189
13190 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13191 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13192 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13193 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13194
13195 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13196 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13197 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13198
13199 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13200
13201 /* SSE3 */
13202 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13203 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13204 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13205};
13206
13207void
13208ix86_init_builtins (void)
13209{
13210 if (TARGET_MMX)
13211 ix86_init_mmx_sse_builtins ();
13212}
13213
13214/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13215 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13216 builtins. */
13217static void
13218ix86_init_mmx_sse_builtins (void)
13219{
13220 const struct builtin_description * d;
13221 size_t i;
13222
13223 tree pchar_type_node = build_pointer_type (char_type_node);
13224 tree pcchar_type_node = build_pointer_type (
13225 build_type_variant (char_type_node, 1, 0));
13226 tree pfloat_type_node = build_pointer_type (float_type_node);
13227 tree pcfloat_type_node = build_pointer_type (
13228 build_type_variant (float_type_node, 1, 0));
13229 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13230 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13231 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13232
13233 /* Comparisons. */
13234 tree int_ftype_v4sf_v4sf
13235 = build_function_type_list (integer_type_node,
13236 V4SF_type_node, V4SF_type_node, NULL_TREE);
13237 tree v4si_ftype_v4sf_v4sf
13238 = build_function_type_list (V4SI_type_node,
13239 V4SF_type_node, V4SF_type_node, NULL_TREE);
13240 /* MMX/SSE/integer conversions. */
13241 tree int_ftype_v4sf
13242 = build_function_type_list (integer_type_node,
13243 V4SF_type_node, NULL_TREE);
13244 tree int64_ftype_v4sf
13245 = build_function_type_list (long_long_integer_type_node,
13246 V4SF_type_node, NULL_TREE);
13247 tree int_ftype_v8qi
13248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13249 tree v4sf_ftype_v4sf_int
13250 = build_function_type_list (V4SF_type_node,
13251 V4SF_type_node, integer_type_node, NULL_TREE);
13252 tree v4sf_ftype_v4sf_int64
13253 = build_function_type_list (V4SF_type_node,
13254 V4SF_type_node, long_long_integer_type_node,
13255 NULL_TREE);
13256 tree v4sf_ftype_v4sf_v2si
13257 = build_function_type_list (V4SF_type_node,
13258 V4SF_type_node, V2SI_type_node, NULL_TREE);
13259 tree int_ftype_v4hi_int
13260 = build_function_type_list (integer_type_node,
13261 V4HI_type_node, integer_type_node, NULL_TREE);
13262 tree v4hi_ftype_v4hi_int_int
13263 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13264 integer_type_node, integer_type_node,
13265 NULL_TREE);
13266 /* Miscellaneous. */
13267 tree v8qi_ftype_v4hi_v4hi
13268 = build_function_type_list (V8QI_type_node,
13269 V4HI_type_node, V4HI_type_node, NULL_TREE);
13270 tree v4hi_ftype_v2si_v2si
13271 = build_function_type_list (V4HI_type_node,
13272 V2SI_type_node, V2SI_type_node, NULL_TREE);
13273 tree v4sf_ftype_v4sf_v4sf_int
13274 = build_function_type_list (V4SF_type_node,
13275 V4SF_type_node, V4SF_type_node,
13276 integer_type_node, NULL_TREE);
13277 tree v2si_ftype_v4hi_v4hi
13278 = build_function_type_list (V2SI_type_node,
13279 V4HI_type_node, V4HI_type_node, NULL_TREE);
13280 tree v4hi_ftype_v4hi_int
13281 = build_function_type_list (V4HI_type_node,
13282 V4HI_type_node, integer_type_node, NULL_TREE);
13283 tree v4hi_ftype_v4hi_di
13284 = build_function_type_list (V4HI_type_node,
13285 V4HI_type_node, long_long_unsigned_type_node,
13286 NULL_TREE);
13287 tree v2si_ftype_v2si_di
13288 = build_function_type_list (V2SI_type_node,
13289 V2SI_type_node, long_long_unsigned_type_node,
13290 NULL_TREE);
13291 tree void_ftype_void
13292 = build_function_type (void_type_node, void_list_node);
13293 tree void_ftype_unsigned
13294 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13295 tree void_ftype_unsigned_unsigned
13296 = build_function_type_list (void_type_node, unsigned_type_node,
13297 unsigned_type_node, NULL_TREE);
13298 tree void_ftype_pcvoid_unsigned_unsigned
13299 = build_function_type_list (void_type_node, const_ptr_type_node,
13300 unsigned_type_node, unsigned_type_node,
13301 NULL_TREE);
13302 tree unsigned_ftype_void
13303 = build_function_type (unsigned_type_node, void_list_node);
13304 tree di_ftype_void
13305 = build_function_type (long_long_unsigned_type_node, void_list_node);
13306 tree v4sf_ftype_void
13307 = build_function_type (V4SF_type_node, void_list_node);
13308 tree v2si_ftype_v4sf
13309 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13310 /* Loads/stores. */
13311 tree void_ftype_v8qi_v8qi_pchar
13312 = build_function_type_list (void_type_node,
13313 V8QI_type_node, V8QI_type_node,
13314 pchar_type_node, NULL_TREE);
13315 tree v4sf_ftype_pcfloat
13316 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13317 /* @@@ the type is bogus */
13318 tree v4sf_ftype_v4sf_pv2si
13319 = build_function_type_list (V4SF_type_node,
13320 V4SF_type_node, pv2si_type_node, NULL_TREE);
13321 tree void_ftype_pv2si_v4sf
13322 = build_function_type_list (void_type_node,
13323 pv2si_type_node, V4SF_type_node, NULL_TREE);
13324 tree void_ftype_pfloat_v4sf
13325 = build_function_type_list (void_type_node,
13326 pfloat_type_node, V4SF_type_node, NULL_TREE);
13327 tree void_ftype_pdi_di
13328 = build_function_type_list (void_type_node,
13329 pdi_type_node, long_long_unsigned_type_node,
13330 NULL_TREE);
13331 tree void_ftype_pv2di_v2di
13332 = build_function_type_list (void_type_node,
13333 pv2di_type_node, V2DI_type_node, NULL_TREE);
13334 /* Normal vector unops. */
13335 tree v4sf_ftype_v4sf
13336 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13337
13338 /* Normal vector binops. */
13339 tree v4sf_ftype_v4sf_v4sf
13340 = build_function_type_list (V4SF_type_node,
13341 V4SF_type_node, V4SF_type_node, NULL_TREE);
13342 tree v8qi_ftype_v8qi_v8qi
13343 = build_function_type_list (V8QI_type_node,
13344 V8QI_type_node, V8QI_type_node, NULL_TREE);
13345 tree v4hi_ftype_v4hi_v4hi
13346 = build_function_type_list (V4HI_type_node,
13347 V4HI_type_node, V4HI_type_node, NULL_TREE);
13348 tree v2si_ftype_v2si_v2si
13349 = build_function_type_list (V2SI_type_node,
13350 V2SI_type_node, V2SI_type_node, NULL_TREE);
13351 tree di_ftype_di_di
13352 = build_function_type_list (long_long_unsigned_type_node,
13353 long_long_unsigned_type_node,
13354 long_long_unsigned_type_node, NULL_TREE);
13355
13356 tree v2si_ftype_v2sf
13357 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13358 tree v2sf_ftype_v2si
13359 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13360 tree v2si_ftype_v2si
13361 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13362 tree v2sf_ftype_v2sf
13363 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13364 tree v2sf_ftype_v2sf_v2sf
13365 = build_function_type_list (V2SF_type_node,
13366 V2SF_type_node, V2SF_type_node, NULL_TREE);
13367 tree v2si_ftype_v2sf_v2sf
13368 = build_function_type_list (V2SI_type_node,
13369 V2SF_type_node, V2SF_type_node, NULL_TREE);
13370 tree pint_type_node = build_pointer_type (integer_type_node);
13371 tree pcint_type_node = build_pointer_type (
13372 build_type_variant (integer_type_node, 1, 0));
13373 tree pdouble_type_node = build_pointer_type (double_type_node);
13374 tree pcdouble_type_node = build_pointer_type (
13375 build_type_variant (double_type_node, 1, 0));
13376 tree int_ftype_v2df_v2df
13377 = build_function_type_list (integer_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13379
13380 tree ti_ftype_void
13381 = build_function_type (intTI_type_node, void_list_node);
13382 tree v2di_ftype_void
13383 = build_function_type (V2DI_type_node, void_list_node);
13384 tree ti_ftype_ti_ti
13385 = build_function_type_list (intTI_type_node,
13386 intTI_type_node, intTI_type_node, NULL_TREE);
13387 tree void_ftype_pcvoid
13388 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13389 tree v2di_ftype_di
13390 = build_function_type_list (V2DI_type_node,
13391 long_long_unsigned_type_node, NULL_TREE);
13392 tree di_ftype_v2di
13393 = build_function_type_list (long_long_unsigned_type_node,
13394 V2DI_type_node, NULL_TREE);
13395 tree v4sf_ftype_v4si
13396 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13397 tree v4si_ftype_v4sf
13398 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13399 tree v2df_ftype_v4si
13400 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13401 tree v4si_ftype_v2df
13402 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13403 tree v2si_ftype_v2df
13404 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13405 tree v4sf_ftype_v2df
13406 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13407 tree v2df_ftype_v2si
13408 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13409 tree v2df_ftype_v4sf
13410 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13411 tree int_ftype_v2df
13412 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13413 tree int64_ftype_v2df
13414 = build_function_type_list (long_long_integer_type_node,
13415 V2DF_type_node, NULL_TREE);
13416 tree v2df_ftype_v2df_int
13417 = build_function_type_list (V2DF_type_node,
13418 V2DF_type_node, integer_type_node, NULL_TREE);
13419 tree v2df_ftype_v2df_int64
13420 = build_function_type_list (V2DF_type_node,
13421 V2DF_type_node, long_long_integer_type_node,
13422 NULL_TREE);
13423 tree v4sf_ftype_v4sf_v2df
13424 = build_function_type_list (V4SF_type_node,
13425 V4SF_type_node, V2DF_type_node, NULL_TREE);
13426 tree v2df_ftype_v2df_v4sf
13427 = build_function_type_list (V2DF_type_node,
13428 V2DF_type_node, V4SF_type_node, NULL_TREE);
13429 tree v2df_ftype_v2df_v2df_int
13430 = build_function_type_list (V2DF_type_node,
13431 V2DF_type_node, V2DF_type_node,
13432 integer_type_node,
13433 NULL_TREE);
13434 tree v2df_ftype_v2df_pv2si
13435 = build_function_type_list (V2DF_type_node,
13436 V2DF_type_node, pv2si_type_node, NULL_TREE);
13437 tree void_ftype_pv2si_v2df
13438 = build_function_type_list (void_type_node,
13439 pv2si_type_node, V2DF_type_node, NULL_TREE);
13440 tree void_ftype_pdouble_v2df
13441 = build_function_type_list (void_type_node,
13442 pdouble_type_node, V2DF_type_node, NULL_TREE);
13443 tree void_ftype_pint_int
13444 = build_function_type_list (void_type_node,
13445 pint_type_node, integer_type_node, NULL_TREE);
13446 tree void_ftype_v16qi_v16qi_pchar
13447 = build_function_type_list (void_type_node,
13448 V16QI_type_node, V16QI_type_node,
13449 pchar_type_node, NULL_TREE);
13450 tree v2df_ftype_pcdouble
13451 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13452 tree v2df_ftype_v2df_v2df
13453 = build_function_type_list (V2DF_type_node,
13454 V2DF_type_node, V2DF_type_node, NULL_TREE);
13455 tree v16qi_ftype_v16qi_v16qi
13456 = build_function_type_list (V16QI_type_node,
13457 V16QI_type_node, V16QI_type_node, NULL_TREE);
13458 tree v8hi_ftype_v8hi_v8hi
13459 = build_function_type_list (V8HI_type_node,
13460 V8HI_type_node, V8HI_type_node, NULL_TREE);
13461 tree v4si_ftype_v4si_v4si
13462 = build_function_type_list (V4SI_type_node,
13463 V4SI_type_node, V4SI_type_node, NULL_TREE);
13464 tree v2di_ftype_v2di_v2di
13465 = build_function_type_list (V2DI_type_node,
13466 V2DI_type_node, V2DI_type_node, NULL_TREE);
13467 tree v2di_ftype_v2df_v2df
13468 = build_function_type_list (V2DI_type_node,
13469 V2DF_type_node, V2DF_type_node, NULL_TREE);
13470 tree v2df_ftype_v2df
13471 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13472 tree v2df_ftype_double
13473 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13474 tree v2df_ftype_double_double
13475 = build_function_type_list (V2DF_type_node,
13476 double_type_node, double_type_node, NULL_TREE);
13477 tree int_ftype_v8hi_int
13478 = build_function_type_list (integer_type_node,
13479 V8HI_type_node, integer_type_node, NULL_TREE);
13480 tree v8hi_ftype_v8hi_int_int
13481 = build_function_type_list (V8HI_type_node,
13482 V8HI_type_node, integer_type_node,
13483 integer_type_node, NULL_TREE);
13484 tree v2di_ftype_v2di_int
13485 = build_function_type_list (V2DI_type_node,
13486 V2DI_type_node, integer_type_node, NULL_TREE);
13487 tree v4si_ftype_v4si_int
13488 = build_function_type_list (V4SI_type_node,
13489 V4SI_type_node, integer_type_node, NULL_TREE);
13490 tree v8hi_ftype_v8hi_int
13491 = build_function_type_list (V8HI_type_node,
13492 V8HI_type_node, integer_type_node, NULL_TREE);
13493 tree v8hi_ftype_v8hi_v2di
13494 = build_function_type_list (V8HI_type_node,
13495 V8HI_type_node, V2DI_type_node, NULL_TREE);
13496 tree v4si_ftype_v4si_v2di
13497 = build_function_type_list (V4SI_type_node,
13498 V4SI_type_node, V2DI_type_node, NULL_TREE);
13499 tree v4si_ftype_v8hi_v8hi
13500 = build_function_type_list (V4SI_type_node,
13501 V8HI_type_node, V8HI_type_node, NULL_TREE);
13502 tree di_ftype_v8qi_v8qi
13503 = build_function_type_list (long_long_unsigned_type_node,
13504 V8QI_type_node, V8QI_type_node, NULL_TREE);
13505 tree v2di_ftype_v16qi_v16qi
13506 = build_function_type_list (V2DI_type_node,
13507 V16QI_type_node, V16QI_type_node, NULL_TREE);
13508 tree int_ftype_v16qi
13509 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13510 tree v16qi_ftype_pcchar
13511 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13512 tree void_ftype_pchar_v16qi
13513 = build_function_type_list (void_type_node,
13514 pchar_type_node, V16QI_type_node, NULL_TREE);
13515 tree v4si_ftype_pcint
13516 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13517 tree void_ftype_pcint_v4si
13518 = build_function_type_list (void_type_node,
13519 pcint_type_node, V4SI_type_node, NULL_TREE);
13520 tree v2di_ftype_v2di
13521 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13522
13523 tree float80_type;
13524 tree float128_type;
13525
13526 /* The __float80 type. */
13527 if (TYPE_MODE (long_double_type_node) == XFmode)
13528 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13529 "__float80");
13530 else
13531 {
13532 /* The __float80 type. */
13533 float80_type = make_node (REAL_TYPE);
13534 TYPE_PRECISION (float80_type) = 96;
13535 layout_type (float80_type);
13536 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13537 }
13538
13539 float128_type = make_node (REAL_TYPE);
13540 TYPE_PRECISION (float128_type) = 128;
13541 layout_type (float128_type);
13542 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13543
13544 /* Add all builtins that are more or less simple operations on two
13545 operands. */
13546 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13547 {
13548 /* Use one of the operands; the target can have a different mode for
13549 mask-generating compares. */
13550 enum machine_mode mode;
13551 tree type;
13552
13553 if (d->name == 0)
13554 continue;
13555 mode = insn_data[d->icode].operand[1].mode;
13556
13557 switch (mode)
13558 {
13559 case V16QImode:
13560 type = v16qi_ftype_v16qi_v16qi;
13561 break;
13562 case V8HImode:
13563 type = v8hi_ftype_v8hi_v8hi;
13564 break;
13565 case V4SImode:
13566 type = v4si_ftype_v4si_v4si;
13567 break;
13568 case V2DImode:
13569 type = v2di_ftype_v2di_v2di;
13570 break;
13571 case V2DFmode:
13572 type = v2df_ftype_v2df_v2df;
13573 break;
13574 case TImode:
13575 type = ti_ftype_ti_ti;
13576 break;
13577 case V4SFmode:
13578 type = v4sf_ftype_v4sf_v4sf;
13579 break;
13580 case V8QImode:
13581 type = v8qi_ftype_v8qi_v8qi;
13582 break;
13583 case V4HImode:
13584 type = v4hi_ftype_v4hi_v4hi;
13585 break;
13586 case V2SImode:
13587 type = v2si_ftype_v2si_v2si;
13588 break;
13589 case DImode:
13590 type = di_ftype_di_di;
13591 break;
13592
13593 default:
13594 abort ();
13595 }
13596
13597 /* Override for comparisons. */
13598 if (d->icode == CODE_FOR_maskcmpv4sf3
13599 || d->icode == CODE_FOR_maskncmpv4sf3
13600 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13601 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13602 type = v4si_ftype_v4sf_v4sf;
13603
13604 if (d->icode == CODE_FOR_maskcmpv2df3
13605 || d->icode == CODE_FOR_maskncmpv2df3
13606 || d->icode == CODE_FOR_vmmaskcmpv2df3
13607 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13608 type = v2di_ftype_v2df_v2df;
13609
13610 def_builtin (d->mask, d->name, type, d->code);
13611 }
13612
13613 /* Add the remaining MMX insns with somewhat more complicated types. */
13614 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13615 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13616 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13617 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13618 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13619
13620 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13621 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13622 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13623
13624 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13625 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13626
13627 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13628 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13629
13630 /* comi/ucomi insns. */
13631 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13632 if (d->mask == MASK_SSE2)
13633 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13634 else
13635 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13636
13637 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13638 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13639 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13640
13641 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13642 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13643 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13644 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13645 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13646 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13647 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13648 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13649 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13650 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13651 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13652
13653 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13655
13656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13657
13658 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13659 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13660 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13661 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13662 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13663 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13664
13665 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13666 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13667 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13668 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13669
13670 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13671 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13672 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13673 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13674
13675 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13676
13677 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13678
13679 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13680 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13681 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13682 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13683 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13684 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13685
13686 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13687
13688 /* Original 3DNow! */
13689 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13690 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13691 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13692 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13707 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13708 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13709
13710 /* 3DNow! extension as used in the Athlon CPU. */
13711 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13712 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13713 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13717
13718 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13719
13720 /* SSE2 */
13721 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13723
13724 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13727
13728 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13734
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13739
13740 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13745
13746 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13750
13751 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13753
13754 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13755
13756 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13758
13759 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13761 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13764
13765 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13766
13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13769 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13770 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13771
13772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13775
13776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13777 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13780
13781 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13783 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13785 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13786 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13787 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13788
13789 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13792
13793 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13795 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13797 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13798 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13800
13801 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13802
13803 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13805 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13806
13807 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13808 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13810
13811 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13812 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13813
13814 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13818
13819 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13823
13824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13826
13827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13828
13829 /* Prescott New Instructions. */
13830 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13831 void_ftype_pcvoid_unsigned_unsigned,
13832 IX86_BUILTIN_MONITOR);
13833 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13834 void_ftype_unsigned_unsigned,
13835 IX86_BUILTIN_MWAIT);
13836 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13837 v4sf_ftype_v4sf,
13838 IX86_BUILTIN_MOVSHDUP);
13839 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13840 v4sf_ftype_v4sf,
13841 IX86_BUILTIN_MOVSLDUP);
13842 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13843 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13844 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13845 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13846 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13847 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13848}
13849
13850/* Errors in the source file can cause expand_expr to return const0_rtx
13851 where we expect a vector. To avoid crashing, use one of the vector
13852 clear instructions. */
13853static rtx
13854safe_vector_operand (rtx x, enum machine_mode mode)
13855{
13856 if (x != const0_rtx)
13857 return x;
13858 x = gen_reg_rtx (mode);
13859
13860 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13861 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13862 : gen_rtx_SUBREG (DImode, x, 0)));
13863 else
13864 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13865 : gen_rtx_SUBREG (V4SFmode, x, 0),
13866 CONST0_RTX (V4SFmode)));
13867 return x;
13868}
13869
13870/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13871
13872static rtx
13873ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13874{
13875 rtx pat;
13876 tree arg0 = TREE_VALUE (arglist);
13877 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13882 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13883
13884 if (VECTOR_MODE_P (mode0))
13885 op0 = safe_vector_operand (op0, mode0);
13886 if (VECTOR_MODE_P (mode1))
13887 op1 = safe_vector_operand (op1, mode1);
13888
13889 if (! target
13890 || GET_MODE (target) != tmode
13891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13892 target = gen_reg_rtx (tmode);
13893
13894 if (GET_MODE (op1) == SImode && mode1 == TImode)
13895 {
13896 rtx x = gen_reg_rtx (V4SImode);
13897 emit_insn (gen_sse2_loadd (x, op1));
13898 op1 = gen_lowpart (TImode, x);
13899 }
13900
13901 /* In case the insn wants input operands in modes different from
13902 the result, abort. */
13903 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13904 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13905 abort ();
13906
13907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13908 op0 = copy_to_mode_reg (mode0, op0);
13909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13910 op1 = copy_to_mode_reg (mode1, op1);
13911
13912 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13913 yet one of the two must not be a memory. This is normally enforced
13914 by expanders, but we didn't bother to create one here. */
13915 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13916 op0 = copy_to_mode_reg (mode0, op0);
13917
13918 pat = GEN_FCN (icode) (target, op0, op1);
13919 if (! pat)
13920 return 0;
13921 emit_insn (pat);
13922 return target;
13923}
13924
13925/* Subroutine of ix86_expand_builtin to take care of stores. */
13926
13927static rtx
13928ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13929{
13930 rtx pat;
13931 tree arg0 = TREE_VALUE (arglist);
13932 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13933 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13934 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13935 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13936 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13937
13938 if (VECTOR_MODE_P (mode1))
13939 op1 = safe_vector_operand (op1, mode1);
13940
13941 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13942 op1 = copy_to_mode_reg (mode1, op1);
13943
13944 pat = GEN_FCN (icode) (op0, op1);
13945 if (pat)
13946 emit_insn (pat);
13947 return 0;
13948}
13949
13950/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13951
13952static rtx
13953ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13954 rtx target, int do_load)
13955{
13956 rtx pat;
13957 tree arg0 = TREE_VALUE (arglist);
13958 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13960 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13961
13962 if (! target
13963 || GET_MODE (target) != tmode
4907
4908 return "";
4909}
4910
4911/* Generate an "push" pattern for input ARG. */
4912
4913static rtx
4914gen_push (rtx arg)
4915{
4916 return gen_rtx_SET (VOIDmode,
4917 gen_rtx_MEM (Pmode,
4918 gen_rtx_PRE_DEC (Pmode,
4919 stack_pointer_rtx)),
4920 arg);
4921}
4922
4923/* Return >= 0 if there is an unused call-clobbered register available
4924 for the entire function. */
4925
4926static unsigned int
4927ix86_select_alt_pic_regnum (void)
4928{
4929 if (current_function_is_leaf && !current_function_profile)
4930 {
4931 int i;
4932 for (i = 2; i >= 0; --i)
4933 if (!regs_ever_live[i])
4934 return i;
4935 }
4936
4937 return INVALID_REGNUM;
4938}
4939
4940/* Return 1 if we need to save REGNO. */
4941static int
4942ix86_save_reg (unsigned int regno, int maybe_eh_return)
4943{
4944 if (pic_offset_table_rtx
4945 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4946 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4947 || current_function_profile
4948 || current_function_calls_eh_return
4949 || current_function_uses_const_pool))
4950 {
4951 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4952 return 0;
4953 return 1;
4954 }
4955
4956 if (current_function_calls_eh_return && maybe_eh_return)
4957 {
4958 unsigned i;
4959 for (i = 0; ; i++)
4960 {
4961 unsigned test = EH_RETURN_DATA_REGNO (i);
4962 if (test == INVALID_REGNUM)
4963 break;
4964 if (test == regno)
4965 return 1;
4966 }
4967 }
4968
4969 return (regs_ever_live[regno]
4970 && !call_used_regs[regno]
4971 && !fixed_regs[regno]
4972 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4973}
4974
4975/* Return number of registers to be saved on the stack. */
4976
4977static int
4978ix86_nsaved_regs (void)
4979{
4980 int nregs = 0;
4981 int regno;
4982
4983 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4984 if (ix86_save_reg (regno, true))
4985 nregs++;
4986 return nregs;
4987}
4988
4989/* Return the offset between two registers, one to be eliminated, and the other
4990 its replacement, at the start of a routine. */
4991
4992HOST_WIDE_INT
4993ix86_initial_elimination_offset (int from, int to)
4994{
4995 struct ix86_frame frame;
4996 ix86_compute_frame_layout (&frame);
4997
4998 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4999 return frame.hard_frame_pointer_offset;
5000 else if (from == FRAME_POINTER_REGNUM
5001 && to == HARD_FRAME_POINTER_REGNUM)
5002 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5003 else
5004 {
5005 if (to != STACK_POINTER_REGNUM)
5006 abort ();
5007 else if (from == ARG_POINTER_REGNUM)
5008 return frame.stack_pointer_offset;
5009 else if (from != FRAME_POINTER_REGNUM)
5010 abort ();
5011 else
5012 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5013 }
5014}
5015
5016/* Fill structure ix86_frame about frame of currently computed function. */
5017
5018static void
5019ix86_compute_frame_layout (struct ix86_frame *frame)
5020{
5021 HOST_WIDE_INT total_size;
5022 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5023 HOST_WIDE_INT offset;
5024 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5025 HOST_WIDE_INT size = get_frame_size ();
5026
5027 frame->nregs = ix86_nsaved_regs ();
5028 total_size = size;
5029
5030 /* During reload iteration the amount of registers saved can change.
5031 Recompute the value as needed. Do not recompute when amount of registers
5032 didn't change as reload does mutiple calls to the function and does not
5033 expect the decision to change within single iteration. */
5034 if (!optimize_size
5035 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5036 {
5037 int count = frame->nregs;
5038
5039 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5040 /* The fast prologue uses move instead of push to save registers. This
5041 is significantly longer, but also executes faster as modern hardware
5042 can execute the moves in parallel, but can't do that for push/pop.
5043
5044 Be careful about choosing what prologue to emit: When function takes
5045 many instructions to execute we may use slow version as well as in
5046 case function is known to be outside hot spot (this is known with
5047 feedback only). Weight the size of function by number of registers
5048 to save as it is cheap to use one or two push instructions but very
5049 slow to use many of them. */
5050 if (count)
5051 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5052 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5053 || (flag_branch_probabilities
5054 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5055 cfun->machine->use_fast_prologue_epilogue = false;
5056 else
5057 cfun->machine->use_fast_prologue_epilogue
5058 = !expensive_function_p (count);
5059 }
5060 if (TARGET_PROLOGUE_USING_MOVE
5061 && cfun->machine->use_fast_prologue_epilogue)
5062 frame->save_regs_using_mov = true;
5063 else
5064 frame->save_regs_using_mov = false;
5065
5066
5067 /* Skip return address and saved base pointer. */
5068 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5069
5070 frame->hard_frame_pointer_offset = offset;
5071
5072 /* Do some sanity checking of stack_alignment_needed and
5073 preferred_alignment, since i386 port is the only using those features
5074 that may break easily. */
5075
5076 if (size && !stack_alignment_needed)
5077 abort ();
5078 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5079 abort ();
5080 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5081 abort ();
5082 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5083 abort ();
5084
5085 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5086 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5087
5088 /* Register save area */
5089 offset += frame->nregs * UNITS_PER_WORD;
5090
5091 /* Va-arg area */
5092 if (ix86_save_varrargs_registers)
5093 {
5094 offset += X86_64_VARARGS_SIZE;
5095 frame->va_arg_size = X86_64_VARARGS_SIZE;
5096 }
5097 else
5098 frame->va_arg_size = 0;
5099
5100 /* Align start of frame for local function. */
5101 frame->padding1 = ((offset + stack_alignment_needed - 1)
5102 & -stack_alignment_needed) - offset;
5103
5104 offset += frame->padding1;
5105
5106 /* Frame pointer points here. */
5107 frame->frame_pointer_offset = offset;
5108
5109 offset += size;
5110
5111 /* Add outgoing arguments area. Can be skipped if we eliminated
5112 all the function calls as dead code.
5113 Skipping is however impossible when function calls alloca. Alloca
5114 expander assumes that last current_function_outgoing_args_size
5115 of stack frame are unused. */
5116 if (ACCUMULATE_OUTGOING_ARGS
5117 && (!current_function_is_leaf || current_function_calls_alloca))
5118 {
5119 offset += current_function_outgoing_args_size;
5120 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5121 }
5122 else
5123 frame->outgoing_arguments_size = 0;
5124
5125 /* Align stack boundary. Only needed if we're calling another function
5126 or using alloca. */
5127 if (!current_function_is_leaf || current_function_calls_alloca)
5128 frame->padding2 = ((offset + preferred_alignment - 1)
5129 & -preferred_alignment) - offset;
5130 else
5131 frame->padding2 = 0;
5132
5133 offset += frame->padding2;
5134
5135 /* We've reached end of stack frame. */
5136 frame->stack_pointer_offset = offset;
5137
5138 /* Size prologue needs to allocate. */
5139 frame->to_allocate =
5140 (size + frame->padding1 + frame->padding2
5141 + frame->outgoing_arguments_size + frame->va_arg_size);
5142
5143 if ((!frame->to_allocate && frame->nregs <= 1)
5144 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5145 frame->save_regs_using_mov = false;
5146
5147 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5148 && current_function_is_leaf)
5149 {
5150 frame->red_zone_size = frame->to_allocate;
5151 if (frame->save_regs_using_mov)
5152 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5153 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5154 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5155 }
5156 else
5157 frame->red_zone_size = 0;
5158 frame->to_allocate -= frame->red_zone_size;
5159 frame->stack_pointer_offset -= frame->red_zone_size;
5160#if 0
5161 fprintf (stderr, "nregs: %i\n", frame->nregs);
5162 fprintf (stderr, "size: %i\n", size);
5163 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5164 fprintf (stderr, "padding1: %i\n", frame->padding1);
5165 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5166 fprintf (stderr, "padding2: %i\n", frame->padding2);
5167 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5168 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5169 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5170 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5171 frame->hard_frame_pointer_offset);
5172 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5173#endif
5174}
5175
5176/* Emit code to save registers in the prologue. */
5177
5178static void
5179ix86_emit_save_regs (void)
5180{
5181 int regno;
5182 rtx insn;
5183
5184 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5185 if (ix86_save_reg (regno, true))
5186 {
5187 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5188 RTX_FRAME_RELATED_P (insn) = 1;
5189 }
5190}
5191
5192/* Emit code to save registers using MOV insns. First register
5193 is restored from POINTER + OFFSET. */
5194static void
5195ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5196{
5197 int regno;
5198 rtx insn;
5199
5200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5201 if (ix86_save_reg (regno, true))
5202 {
5203 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5204 Pmode, offset),
5205 gen_rtx_REG (Pmode, regno));
5206 RTX_FRAME_RELATED_P (insn) = 1;
5207 offset += UNITS_PER_WORD;
5208 }
5209}
5210
5211/* Expand prologue or epilogue stack adjustment.
5212 The pattern exist to put a dependency on all ebp-based memory accesses.
5213 STYLE should be negative if instructions should be marked as frame related,
5214 zero if %r11 register is live and cannot be freely used and positive
5215 otherwise. */
5216
5217static void
5218pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5219{
5220 rtx insn;
5221
5222 if (! TARGET_64BIT)
5223 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5224 else if (x86_64_immediate_operand (offset, DImode))
5225 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5226 else
5227 {
5228 rtx r11;
5229 /* r11 is used by indirect sibcall return as well, set before the
5230 epilogue and used after the epilogue. ATM indirect sibcall
5231 shouldn't be used together with huge frame sizes in one
5232 function because of the frame_size check in sibcall.c. */
5233 if (style == 0)
5234 abort ();
5235 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5236 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5237 if (style < 0)
5238 RTX_FRAME_RELATED_P (insn) = 1;
5239 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5240 offset));
5241 }
5242 if (style < 0)
5243 RTX_FRAME_RELATED_P (insn) = 1;
5244}
5245
5246/* Expand the prologue into a bunch of separate insns. */
5247
5248void
5249ix86_expand_prologue (void)
5250{
5251 rtx insn;
5252 bool pic_reg_used;
5253 struct ix86_frame frame;
5254 HOST_WIDE_INT allocate;
5255
5256 ix86_compute_frame_layout (&frame);
5257
5258 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5259 slower on all targets. Also sdb doesn't like it. */
5260
5261 if (frame_pointer_needed)
5262 {
5263 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5264 RTX_FRAME_RELATED_P (insn) = 1;
5265
5266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5267 RTX_FRAME_RELATED_P (insn) = 1;
5268 }
5269
5270 allocate = frame.to_allocate;
5271
5272 if (!frame.save_regs_using_mov)
5273 ix86_emit_save_regs ();
5274 else
5275 allocate += frame.nregs * UNITS_PER_WORD;
5276
5277 /* When using red zone we may start register saving before allocating
5278 the stack frame saving one cycle of the prologue. */
5279 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5280 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5281 : stack_pointer_rtx,
5282 -frame.nregs * UNITS_PER_WORD);
5283
5284 if (allocate == 0)
5285 ;
5286 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5287 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5288 GEN_INT (-allocate), -1);
5289 else
5290 {
5291 /* Only valid for Win32. */
5292 rtx eax = gen_rtx_REG (SImode, 0);
5293 bool eax_live = ix86_eax_live_at_start_p ();
5294
5295 if (TARGET_64BIT)
5296 abort ();
5297
5298 if (eax_live)
5299 {
5300 emit_insn (gen_push (eax));
5301 allocate -= 4;
5302 }
5303
5304 insn = emit_move_insn (eax, GEN_INT (allocate));
5305 RTX_FRAME_RELATED_P (insn) = 1;
5306
5307 insn = emit_insn (gen_allocate_stack_worker (eax));
5308 RTX_FRAME_RELATED_P (insn) = 1;
5309
5310 if (eax_live)
5311 {
5312 rtx t = plus_constant (stack_pointer_rtx, allocate);
5313 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5314 }
5315 }
5316
5317 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5318 {
5319 if (!frame_pointer_needed || !frame.to_allocate)
5320 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5321 else
5322 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5323 -frame.nregs * UNITS_PER_WORD);
5324 }
5325
5326 pic_reg_used = false;
5327 if (pic_offset_table_rtx
5328 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329 || current_function_profile))
5330 {
5331 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5332
5333 if (alt_pic_reg_used != INVALID_REGNUM)
5334 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5335
5336 pic_reg_used = true;
5337 }
5338
5339 if (pic_reg_used)
5340 {
5341 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5342
5343 /* Even with accurate pre-reload life analysis, we can wind up
5344 deleting all references to the pic register after reload.
5345 Consider if cross-jumping unifies two sides of a branch
5346 controlled by a comparison vs the only read from a global.
5347 In which case, allow the set_got to be deleted, though we're
5348 too late to do anything about the ebx save in the prologue. */
5349 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5350 }
5351
5352 /* Prevent function calls from be scheduled before the call to mcount.
5353 In the pic_reg_used case, make sure that the got load isn't deleted. */
5354 if (current_function_profile)
5355 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5356}
5357
5358/* Emit code to restore saved registers using MOV insns. First register
5359 is restored from POINTER + OFFSET. */
5360static void
5361ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5362 int maybe_eh_return)
5363{
5364 int regno;
5365 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5366
5367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5368 if (ix86_save_reg (regno, maybe_eh_return))
5369 {
5370 /* Ensure that adjust_address won't be forced to produce pointer
5371 out of range allowed by x86-64 instruction set. */
5372 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5373 {
5374 rtx r11;
5375
5376 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5377 emit_move_insn (r11, GEN_INT (offset));
5378 emit_insn (gen_adddi3 (r11, r11, pointer));
5379 base_address = gen_rtx_MEM (Pmode, r11);
5380 offset = 0;
5381 }
5382 emit_move_insn (gen_rtx_REG (Pmode, regno),
5383 adjust_address (base_address, Pmode, offset));
5384 offset += UNITS_PER_WORD;
5385 }
5386}
5387
5388/* Restore function stack, frame, and registers. */
5389
5390void
5391ix86_expand_epilogue (int style)
5392{
5393 int regno;
5394 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5395 struct ix86_frame frame;
5396 HOST_WIDE_INT offset;
5397
5398 ix86_compute_frame_layout (&frame);
5399
5400 /* Calculate start of saved registers relative to ebp. Special care
5401 must be taken for the normal return case of a function using
5402 eh_return: the eax and edx registers are marked as saved, but not
5403 restored along this path. */
5404 offset = frame.nregs;
5405 if (current_function_calls_eh_return && style != 2)
5406 offset -= 2;
5407 offset *= -UNITS_PER_WORD;
5408
5409 /* If we're only restoring one register and sp is not valid then
5410 using a move instruction to restore the register since it's
5411 less work than reloading sp and popping the register.
5412
5413 The default code result in stack adjustment using add/lea instruction,
5414 while this code results in LEAVE instruction (or discrete equivalent),
5415 so it is profitable in some other cases as well. Especially when there
5416 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5417 and there is exactly one register to pop. This heuristic may need some
5418 tuning in future. */
5419 if ((!sp_valid && frame.nregs <= 1)
5420 || (TARGET_EPILOGUE_USING_MOVE
5421 && cfun->machine->use_fast_prologue_epilogue
5422 && (frame.nregs > 1 || frame.to_allocate))
5423 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5424 || (frame_pointer_needed && TARGET_USE_LEAVE
5425 && cfun->machine->use_fast_prologue_epilogue
5426 && frame.nregs == 1)
5427 || current_function_calls_eh_return)
5428 {
5429 /* Restore registers. We can use ebp or esp to address the memory
5430 locations. If both are available, default to ebp, since offsets
5431 are known to be small. Only exception is esp pointing directly to the
5432 end of block of saved registers, where we may simplify addressing
5433 mode. */
5434
5435 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5436 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5437 frame.to_allocate, style == 2);
5438 else
5439 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5440 offset, style == 2);
5441
5442 /* eh_return epilogues need %ecx added to the stack pointer. */
5443 if (style == 2)
5444 {
5445 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5446
5447 if (frame_pointer_needed)
5448 {
5449 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5450 tmp = plus_constant (tmp, UNITS_PER_WORD);
5451 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5452
5453 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5454 emit_move_insn (hard_frame_pointer_rtx, tmp);
5455
5456 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5457 const0_rtx, style);
5458 }
5459 else
5460 {
5461 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5462 tmp = plus_constant (tmp, (frame.to_allocate
5463 + frame.nregs * UNITS_PER_WORD));
5464 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5465 }
5466 }
5467 else if (!frame_pointer_needed)
5468 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5469 GEN_INT (frame.to_allocate
5470 + frame.nregs * UNITS_PER_WORD),
5471 style);
5472 /* If not an i386, mov & pop is faster than "leave". */
5473 else if (TARGET_USE_LEAVE || optimize_size
5474 || !cfun->machine->use_fast_prologue_epilogue)
5475 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5476 else
5477 {
5478 pro_epilogue_adjust_stack (stack_pointer_rtx,
5479 hard_frame_pointer_rtx,
5480 const0_rtx, style);
5481 if (TARGET_64BIT)
5482 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5483 else
5484 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5485 }
5486 }
5487 else
5488 {
5489 /* First step is to deallocate the stack frame so that we can
5490 pop the registers. */
5491 if (!sp_valid)
5492 {
5493 if (!frame_pointer_needed)
5494 abort ();
5495 pro_epilogue_adjust_stack (stack_pointer_rtx,
5496 hard_frame_pointer_rtx,
5497 GEN_INT (offset), style);
5498 }
5499 else if (frame.to_allocate)
5500 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5501 GEN_INT (frame.to_allocate), style);
5502
5503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5504 if (ix86_save_reg (regno, false))
5505 {
5506 if (TARGET_64BIT)
5507 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5508 else
5509 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5510 }
5511 if (frame_pointer_needed)
5512 {
5513 /* Leave results in shorter dependency chains on CPUs that are
5514 able to grok it fast. */
5515 if (TARGET_USE_LEAVE)
5516 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5517 else if (TARGET_64BIT)
5518 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5519 else
5520 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5521 }
5522 }
5523
5524 /* Sibcall epilogues don't want a return instruction. */
5525 if (style == 0)
5526 return;
5527
5528 if (current_function_pops_args && current_function_args_size)
5529 {
5530 rtx popc = GEN_INT (current_function_pops_args);
5531
5532 /* i386 can only pop 64K bytes. If asked to pop more, pop
5533 return address, do explicit add, and jump indirectly to the
5534 caller. */
5535
5536 if (current_function_pops_args >= 65536)
5537 {
5538 rtx ecx = gen_rtx_REG (SImode, 2);
5539
5540 /* There is no "pascal" calling convention in 64bit ABI. */
5541 if (TARGET_64BIT)
5542 abort ();
5543
5544 emit_insn (gen_popsi1 (ecx));
5545 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5546 emit_jump_insn (gen_return_indirect_internal (ecx));
5547 }
5548 else
5549 emit_jump_insn (gen_return_pop_internal (popc));
5550 }
5551 else
5552 emit_jump_insn (gen_return_internal ());
5553}
5554
5555/* Reset from the function's potential modifications. */
5556
5557static void
5558ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5559 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5560{
5561 if (pic_offset_table_rtx)
5562 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5563}
5564
5565/* Extract the parts of an RTL expression that is a valid memory address
5566 for an instruction. Return 0 if the structure of the address is
5567 grossly off. Return -1 if the address contains ASHIFT, so it is not
5568 strictly valid, but still used for computing length of lea instruction. */
5569
5570static int
5571ix86_decompose_address (rtx addr, struct ix86_address *out)
5572{
5573 rtx base = NULL_RTX;
5574 rtx index = NULL_RTX;
5575 rtx disp = NULL_RTX;
5576 HOST_WIDE_INT scale = 1;
5577 rtx scale_rtx = NULL_RTX;
5578 int retval = 1;
5579 enum ix86_address_seg seg = SEG_DEFAULT;
5580
5581 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5582 base = addr;
5583 else if (GET_CODE (addr) == PLUS)
5584 {
5585 rtx addends[4], op;
5586 int n = 0, i;
5587
5588 op = addr;
5589 do
5590 {
5591 if (n >= 4)
5592 return 0;
5593 addends[n++] = XEXP (op, 1);
5594 op = XEXP (op, 0);
5595 }
5596 while (GET_CODE (op) == PLUS);
5597 if (n >= 4)
5598 return 0;
5599 addends[n] = op;
5600
5601 for (i = n; i >= 0; --i)
5602 {
5603 op = addends[i];
5604 switch (GET_CODE (op))
5605 {
5606 case MULT:
5607 if (index)
5608 return 0;
5609 index = XEXP (op, 0);
5610 scale_rtx = XEXP (op, 1);
5611 break;
5612
5613 case UNSPEC:
5614 if (XINT (op, 1) == UNSPEC_TP
5615 && TARGET_TLS_DIRECT_SEG_REFS
5616 && seg == SEG_DEFAULT)
5617 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5618 else
5619 return 0;
5620 break;
5621
5622 case REG:
5623 case SUBREG:
5624 if (!base)
5625 base = op;
5626 else if (!index)
5627 index = op;
5628 else
5629 return 0;
5630 break;
5631
5632 case CONST:
5633 case CONST_INT:
5634 case SYMBOL_REF:
5635 case LABEL_REF:
5636 if (disp)
5637 return 0;
5638 disp = op;
5639 break;
5640
5641 default:
5642 return 0;
5643 }
5644 }
5645 }
5646 else if (GET_CODE (addr) == MULT)
5647 {
5648 index = XEXP (addr, 0); /* index*scale */
5649 scale_rtx = XEXP (addr, 1);
5650 }
5651 else if (GET_CODE (addr) == ASHIFT)
5652 {
5653 rtx tmp;
5654
5655 /* We're called for lea too, which implements ashift on occasion. */
5656 index = XEXP (addr, 0);
5657 tmp = XEXP (addr, 1);
5658 if (GET_CODE (tmp) != CONST_INT)
5659 return 0;
5660 scale = INTVAL (tmp);
5661 if ((unsigned HOST_WIDE_INT) scale > 3)
5662 return 0;
5663 scale = 1 << scale;
5664 retval = -1;
5665 }
5666 else
5667 disp = addr; /* displacement */
5668
5669 /* Extract the integral value of scale. */
5670 if (scale_rtx)
5671 {
5672 if (GET_CODE (scale_rtx) != CONST_INT)
5673 return 0;
5674 scale = INTVAL (scale_rtx);
5675 }
5676
5677 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5678 if (base && index && scale == 1
5679 && (index == arg_pointer_rtx
5680 || index == frame_pointer_rtx
5681 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5682 {
5683 rtx tmp = base;
5684 base = index;
5685 index = tmp;
5686 }
5687
5688 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5689 if ((base == hard_frame_pointer_rtx
5690 || base == frame_pointer_rtx
5691 || base == arg_pointer_rtx) && !disp)
5692 disp = const0_rtx;
5693
5694 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5695 Avoid this by transforming to [%esi+0]. */
5696 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5697 && base && !index && !disp
5698 && REG_P (base)
5699 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5700 disp = const0_rtx;
5701
5702 /* Special case: encode reg+reg instead of reg*2. */
5703 if (!base && index && scale && scale == 2)
5704 base = index, scale = 1;
5705
5706 /* Special case: scaling cannot be encoded without base or displacement. */
5707 if (!base && !disp && index && scale != 1)
5708 disp = const0_rtx;
5709
5710 out->base = base;
5711 out->index = index;
5712 out->disp = disp;
5713 out->scale = scale;
5714 out->seg = seg;
5715
5716 return retval;
5717}
5718
5719/* Return cost of the memory address x.
5720 For i386, it is better to use a complex address than let gcc copy
5721 the address into a reg and make a new pseudo. But not if the address
5722 requires to two regs - that would mean more pseudos with longer
5723 lifetimes. */
5724static int
5725ix86_address_cost (rtx x)
5726{
5727 struct ix86_address parts;
5728 int cost = 1;
5729
5730 if (!ix86_decompose_address (x, &parts))
5731 abort ();
5732
5733 /* More complex memory references are better. */
5734 if (parts.disp && parts.disp != const0_rtx)
5735 cost--;
5736 if (parts.seg != SEG_DEFAULT)
5737 cost--;
5738
5739 /* Attempt to minimize number of registers in the address. */
5740 if ((parts.base
5741 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5742 || (parts.index
5743 && (!REG_P (parts.index)
5744 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5745 cost++;
5746
5747 if (parts.base
5748 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5749 && parts.index
5750 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5751 && parts.base != parts.index)
5752 cost++;
5753
5754 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5755 since it's predecode logic can't detect the length of instructions
5756 and it degenerates to vector decoded. Increase cost of such
5757 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5758 to split such addresses or even refuse such addresses at all.
5759
5760 Following addressing modes are affected:
5761 [base+scale*index]
5762 [scale*index+disp]
5763 [base+index]
5764
5765 The first and last case may be avoidable by explicitly coding the zero in
5766 memory address, but I don't have AMD-K6 machine handy to check this
5767 theory. */
5768
5769 if (TARGET_K6
5770 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5771 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5772 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5773 cost += 10;
5774
5775 return cost;
5776}
5777
5778/* If X is a machine specific address (i.e. a symbol or label being
5779 referenced as a displacement from the GOT implemented using an
5780 UNSPEC), then return the base term. Otherwise return X. */
5781
5782rtx
5783ix86_find_base_term (rtx x)
5784{
5785 rtx term;
5786
5787 if (TARGET_64BIT)
5788 {
5789 if (GET_CODE (x) != CONST)
5790 return x;
5791 term = XEXP (x, 0);
5792 if (GET_CODE (term) == PLUS
5793 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5794 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5795 term = XEXP (term, 0);
5796 if (GET_CODE (term) != UNSPEC
5797 || XINT (term, 1) != UNSPEC_GOTPCREL)
5798 return x;
5799
5800 term = XVECEXP (term, 0, 0);
5801
5802 if (GET_CODE (term) != SYMBOL_REF
5803 && GET_CODE (term) != LABEL_REF)
5804 return x;
5805
5806 return term;
5807 }
5808
5809 term = ix86_delegitimize_address (x);
5810
5811 if (GET_CODE (term) != SYMBOL_REF
5812 && GET_CODE (term) != LABEL_REF)
5813 return x;
5814
5815 return term;
5816}
5817
5818/* Determine if a given RTX is a valid constant. We already know this
5819 satisfies CONSTANT_P. */
5820
5821bool
5822legitimate_constant_p (rtx x)
5823{
5824 switch (GET_CODE (x))
5825 {
5826 case CONST:
5827 x = XEXP (x, 0);
5828
5829 if (GET_CODE (x) == PLUS)
5830 {
5831 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5832 return false;
5833 x = XEXP (x, 0);
5834 }
5835
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (x) == UNSPEC)
5838 switch (XINT (x, 1))
5839 {
5840 case UNSPEC_TPOFF:
5841 case UNSPEC_NTPOFF:
5842 return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5843 case UNSPEC_DTPOFF:
5844 return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5845 default:
5846 return false;
5847 }
5848
5849 /* We must have drilled down to a symbol. */
5850 if (!symbolic_operand (x, Pmode))
5851 return false;
5852 /* FALLTHRU */
5853
5854 case SYMBOL_REF:
5855 /* TLS symbols are never valid. */
5856 if (tls_symbolic_operand (x, Pmode))
5857 return false;
5858 break;
5859
5860 default:
5861 break;
5862 }
5863
5864 /* Otherwise we handle everything else in the move patterns. */
5865 return true;
5866}
5867
5868/* Determine if it's legal to put X into the constant pool. This
5869 is not possible for the address of thread-local symbols, which
5870 is checked above. */
5871
5872static bool
5873ix86_cannot_force_const_mem (rtx x)
5874{
5875 return !legitimate_constant_p (x);
5876}
5877
5878/* Determine if a given RTX is a valid constant address. */
5879
5880bool
5881constant_address_p (rtx x)
5882{
5883 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5884}
5885
5886/* Nonzero if the constant value X is a legitimate general operand
5887 when generating PIC code. It is given that flag_pic is on and
5888 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5889
5890bool
5891legitimate_pic_operand_p (rtx x)
5892{
5893 rtx inner;
5894
5895 switch (GET_CODE (x))
5896 {
5897 case CONST:
5898 inner = XEXP (x, 0);
5899
5900 /* Only some unspecs are valid as "constants". */
5901 if (GET_CODE (inner) == UNSPEC)
5902 switch (XINT (inner, 1))
5903 {
5904 case UNSPEC_TPOFF:
5905 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5906 default:
5907 return false;
5908 }
5909 /* FALLTHRU */
5910
5911 case SYMBOL_REF:
5912 case LABEL_REF:
5913 return legitimate_pic_address_disp_p (x);
5914
5915 default:
5916 return true;
5917 }
5918}
5919
5920/* Determine if a given CONST RTX is a valid memory displacement
5921 in PIC mode. */
5922
5923int
5924legitimate_pic_address_disp_p (rtx disp)
5925{
5926 bool saw_plus;
5927
5928 /* In 64bit mode we can allow direct addresses of symbols and labels
5929 when they are not dynamic symbols. */
5930 if (TARGET_64BIT)
5931 {
5932 /* TLS references should always be enclosed in UNSPEC. */
5933 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5934 return 0;
5935 if (GET_CODE (disp) == SYMBOL_REF
5936 && ix86_cmodel == CM_SMALL_PIC
5937 && SYMBOL_REF_LOCAL_P (disp))
5938 return 1;
5939 if (GET_CODE (disp) == LABEL_REF)
5940 return 1;
5941 if (GET_CODE (disp) == CONST
5942 && GET_CODE (XEXP (disp, 0)) == PLUS)
5943 {
5944 rtx op0 = XEXP (XEXP (disp, 0), 0);
5945 rtx op1 = XEXP (XEXP (disp, 0), 1);
5946
5947 /* TLS references should always be enclosed in UNSPEC. */
5948 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5949 return 0;
5950 if (((GET_CODE (op0) == SYMBOL_REF
5951 && ix86_cmodel == CM_SMALL_PIC
5952 && SYMBOL_REF_LOCAL_P (op0))
5953 || GET_CODE (op0) == LABEL_REF)
5954 && GET_CODE (op1) == CONST_INT
5955 && INTVAL (op1) < 16*1024*1024
5956 && INTVAL (op1) >= -16*1024*1024)
5957 return 1;
5958 }
5959 }
5960 if (GET_CODE (disp) != CONST)
5961 return 0;
5962 disp = XEXP (disp, 0);
5963
5964 if (TARGET_64BIT)
5965 {
5966 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5967 of GOT tables. We should not need these anyway. */
5968 if (GET_CODE (disp) != UNSPEC
5969 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5970 return 0;
5971
5972 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5973 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5974 return 0;
5975 return 1;
5976 }
5977
5978 saw_plus = false;
5979 if (GET_CODE (disp) == PLUS)
5980 {
5981 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5982 return 0;
5983 disp = XEXP (disp, 0);
5984 saw_plus = true;
5985 }
5986
5987 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5988 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5989 {
5990 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5991 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5992 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5993 {
5994 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5995 if (! strcmp (sym_name, "<pic base>"))
5996 return 1;
5997 }
5998 }
5999
6000 if (GET_CODE (disp) != UNSPEC)
6001 return 0;
6002
6003 switch (XINT (disp, 1))
6004 {
6005 case UNSPEC_GOT:
6006 if (saw_plus)
6007 return false;
6008 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6009 case UNSPEC_GOTOFF:
6010 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6011 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6012 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6013 return false;
6014 case UNSPEC_GOTTPOFF:
6015 case UNSPEC_GOTNTPOFF:
6016 case UNSPEC_INDNTPOFF:
6017 if (saw_plus)
6018 return false;
6019 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6020 case UNSPEC_NTPOFF:
6021 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6022 case UNSPEC_DTPOFF:
6023 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6024 }
6025
6026 return 0;
6027}
6028
6029/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6030 memory address for an instruction. The MODE argument is the machine mode
6031 for the MEM expression that wants to use this address.
6032
6033 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6034 convert common non-canonical forms to canonical form so that they will
6035 be recognized. */
6036
6037int
6038legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6039{
6040 struct ix86_address parts;
6041 rtx base, index, disp;
6042 HOST_WIDE_INT scale;
6043 const char *reason = NULL;
6044 rtx reason_rtx = NULL_RTX;
6045
6046 if (TARGET_DEBUG_ADDR)
6047 {
6048 fprintf (stderr,
6049 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6050 GET_MODE_NAME (mode), strict);
6051 debug_rtx (addr);
6052 }
6053
6054 if (ix86_decompose_address (addr, &parts) <= 0)
6055 {
6056 reason = "decomposition failed";
6057 goto report_error;
6058 }
6059
6060 base = parts.base;
6061 index = parts.index;
6062 disp = parts.disp;
6063 scale = parts.scale;
6064
6065 /* Validate base register.
6066
6067 Don't allow SUBREG's here, it can lead to spill failures when the base
6068 is one word out of a two word structure, which is represented internally
6069 as a DImode int. */
6070
6071 if (base)
6072 {
6073 reason_rtx = base;
6074
6075 if (GET_CODE (base) != REG)
6076 {
6077 reason = "base is not a register";
6078 goto report_error;
6079 }
6080
6081 if (GET_MODE (base) != Pmode)
6082 {
6083 reason = "base is not in Pmode";
6084 goto report_error;
6085 }
6086
6087 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6088 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6089 {
6090 reason = "base is not valid";
6091 goto report_error;
6092 }
6093 }
6094
6095 /* Validate index register.
6096
6097 Don't allow SUBREG's here, it can lead to spill failures when the index
6098 is one word out of a two word structure, which is represented internally
6099 as a DImode int. */
6100
6101 if (index)
6102 {
6103 reason_rtx = index;
6104
6105 if (GET_CODE (index) != REG)
6106 {
6107 reason = "index is not a register";
6108 goto report_error;
6109 }
6110
6111 if (GET_MODE (index) != Pmode)
6112 {
6113 reason = "index is not in Pmode";
6114 goto report_error;
6115 }
6116
6117 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6118 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6119 {
6120 reason = "index is not valid";
6121 goto report_error;
6122 }
6123 }
6124
6125 /* Validate scale factor. */
6126 if (scale != 1)
6127 {
6128 reason_rtx = GEN_INT (scale);
6129 if (!index)
6130 {
6131 reason = "scale without index";
6132 goto report_error;
6133 }
6134
6135 if (scale != 2 && scale != 4 && scale != 8)
6136 {
6137 reason = "scale is not a valid multiplier";
6138 goto report_error;
6139 }
6140 }
6141
6142 /* Validate displacement. */
6143 if (disp)
6144 {
6145 reason_rtx = disp;
6146
6147 if (GET_CODE (disp) == CONST
6148 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6149 switch (XINT (XEXP (disp, 0), 1))
6150 {
6151 case UNSPEC_GOT:
6152 case UNSPEC_GOTOFF:
6153 case UNSPEC_GOTPCREL:
6154 if (!flag_pic)
6155 abort ();
6156 goto is_legitimate_pic;
6157
6158 case UNSPEC_GOTTPOFF:
6159 case UNSPEC_GOTNTPOFF:
6160 case UNSPEC_INDNTPOFF:
6161 case UNSPEC_NTPOFF:
6162 case UNSPEC_DTPOFF:
6163 break;
6164
6165 default:
6166 reason = "invalid address unspec";
6167 goto report_error;
6168 }
6169
6170 else if (flag_pic && (SYMBOLIC_CONST (disp)
6171#if TARGET_MACHO
6172 && !machopic_operand_p (disp)
6173#endif
6174 ))
6175 {
6176 is_legitimate_pic:
6177 if (TARGET_64BIT && (index || base))
6178 {
6179 /* foo@dtpoff(%rX) is ok. */
6180 if (GET_CODE (disp) != CONST
6181 || GET_CODE (XEXP (disp, 0)) != PLUS
6182 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6183 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6184 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6185 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6186 {
6187 reason = "non-constant pic memory reference";
6188 goto report_error;
6189 }
6190 }
6191 else if (! legitimate_pic_address_disp_p (disp))
6192 {
6193 reason = "displacement is an invalid pic construct";
6194 goto report_error;
6195 }
6196
6197 /* This code used to verify that a symbolic pic displacement
6198 includes the pic_offset_table_rtx register.
6199
6200 While this is good idea, unfortunately these constructs may
6201 be created by "adds using lea" optimization for incorrect
6202 code like:
6203
6204 int a;
6205 int foo(int i)
6206 {
6207 return *(&a+i);
6208 }
6209
6210 This code is nonsensical, but results in addressing
6211 GOT table with pic_offset_table_rtx base. We can't
6212 just refuse it easily, since it gets matched by
6213 "addsi3" pattern, that later gets split to lea in the
6214 case output register differs from input. While this
6215 can be handled by separate addsi pattern for this case
6216 that never results in lea, this seems to be easier and
6217 correct fix for crash to disable this test. */
6218 }
6219 else if (GET_CODE (disp) != LABEL_REF
6220 && GET_CODE (disp) != CONST_INT
6221 && (GET_CODE (disp) != CONST
6222 || !legitimate_constant_p (disp))
6223 && (GET_CODE (disp) != SYMBOL_REF
6224 || !legitimate_constant_p (disp)))
6225 {
6226 reason = "displacement is not constant";
6227 goto report_error;
6228 }
6229 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6230 {
6231 reason = "displacement is out of range";
6232 goto report_error;
6233 }
6234 }
6235
6236 /* Everything looks valid. */
6237 if (TARGET_DEBUG_ADDR)
6238 fprintf (stderr, "Success.\n");
6239 return TRUE;
6240
6241 report_error:
6242 if (TARGET_DEBUG_ADDR)
6243 {
6244 fprintf (stderr, "Error: %s\n", reason);
6245 debug_rtx (reason_rtx);
6246 }
6247 return FALSE;
6248}
6249
6250/* Return an unique alias set for the GOT. */
6251
6252static HOST_WIDE_INT
6253ix86_GOT_alias_set (void)
6254{
6255 static HOST_WIDE_INT set = -1;
6256 if (set == -1)
6257 set = new_alias_set ();
6258 return set;
6259}
6260
6261/* Return a legitimate reference for ORIG (an address) using the
6262 register REG. If REG is 0, a new pseudo is generated.
6263
6264 There are two types of references that must be handled:
6265
6266 1. Global data references must load the address from the GOT, via
6267 the PIC reg. An insn is emitted to do this load, and the reg is
6268 returned.
6269
6270 2. Static data references, constant pool addresses, and code labels
6271 compute the address as an offset from the GOT, whose base is in
6272 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6273 differentiate them from global data objects. The returned
6274 address is the PIC reg + an unspec constant.
6275
6276 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6277 reg also appears in the address. */
6278
6279rtx
6280legitimize_pic_address (rtx orig, rtx reg)
6281{
6282 rtx addr = orig;
6283 rtx new = orig;
6284 rtx base;
6285
6286#if TARGET_MACHO
6287 if (reg == 0)
6288 reg = gen_reg_rtx (Pmode);
6289 /* Use the generic Mach-O PIC machinery. */
6290 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6291#endif
6292
6293 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6294 new = addr;
6295 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6296 {
6297 /* This symbol may be referenced via a displacement from the PIC
6298 base address (@GOTOFF). */
6299
6300 if (reload_in_progress)
6301 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6302 if (GET_CODE (addr) == CONST)
6303 addr = XEXP (addr, 0);
6304 if (GET_CODE (addr) == PLUS)
6305 {
6306 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6307 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6308 }
6309 else
6310 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6311 new = gen_rtx_CONST (Pmode, new);
6312 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6313
6314 if (reg != 0)
6315 {
6316 emit_move_insn (reg, new);
6317 new = reg;
6318 }
6319 }
6320 else if (GET_CODE (addr) == SYMBOL_REF)
6321 {
6322 if (TARGET_64BIT)
6323 {
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6325 new = gen_rtx_CONST (Pmode, new);
6326 new = gen_rtx_MEM (Pmode, new);
6327 RTX_UNCHANGING_P (new) = 1;
6328 set_mem_alias_set (new, ix86_GOT_alias_set ());
6329
6330 if (reg == 0)
6331 reg = gen_reg_rtx (Pmode);
6332 /* Use directly gen_movsi, otherwise the address is loaded
6333 into register for CSE. We don't want to CSE this addresses,
6334 instead we CSE addresses from the GOT table, so skip this. */
6335 emit_insn (gen_movsi (reg, new));
6336 new = reg;
6337 }
6338 else
6339 {
6340 /* This symbol must be referenced via a load from the
6341 Global Offset Table (@GOT). */
6342
6343 if (reload_in_progress)
6344 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6346 new = gen_rtx_CONST (Pmode, new);
6347 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6348 new = gen_rtx_MEM (Pmode, new);
6349 RTX_UNCHANGING_P (new) = 1;
6350 set_mem_alias_set (new, ix86_GOT_alias_set ());
6351
6352 if (reg == 0)
6353 reg = gen_reg_rtx (Pmode);
6354 emit_move_insn (reg, new);
6355 new = reg;
6356 }
6357 }
6358 else
6359 {
6360 if (GET_CODE (addr) == CONST)
6361 {
6362 addr = XEXP (addr, 0);
6363
6364 /* We must match stuff we generate before. Assume the only
6365 unspecs that can get here are ours. Not that we could do
6366 anything with them anyway.... */
6367 if (GET_CODE (addr) == UNSPEC
6368 || (GET_CODE (addr) == PLUS
6369 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6370 return orig;
6371 if (GET_CODE (addr) != PLUS)
6372 abort ();
6373 }
6374 if (GET_CODE (addr) == PLUS)
6375 {
6376 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6377
6378 /* Check first to see if this is a constant offset from a @GOTOFF
6379 symbol reference. */
6380 if (local_symbolic_operand (op0, Pmode)
6381 && GET_CODE (op1) == CONST_INT)
6382 {
6383 if (!TARGET_64BIT)
6384 {
6385 if (reload_in_progress)
6386 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6387 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6388 UNSPEC_GOTOFF);
6389 new = gen_rtx_PLUS (Pmode, new, op1);
6390 new = gen_rtx_CONST (Pmode, new);
6391 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6392
6393 if (reg != 0)
6394 {
6395 emit_move_insn (reg, new);
6396 new = reg;
6397 }
6398 }
6399 else
6400 {
6401 if (INTVAL (op1) < -16*1024*1024
6402 || INTVAL (op1) >= 16*1024*1024)
6403 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6404 }
6405 }
6406 else
6407 {
6408 base = legitimize_pic_address (XEXP (addr, 0), reg);
6409 new = legitimize_pic_address (XEXP (addr, 1),
6410 base == reg ? NULL_RTX : reg);
6411
6412 if (GET_CODE (new) == CONST_INT)
6413 new = plus_constant (base, INTVAL (new));
6414 else
6415 {
6416 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6417 {
6418 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6419 new = XEXP (new, 1);
6420 }
6421 new = gen_rtx_PLUS (Pmode, base, new);
6422 }
6423 }
6424 }
6425 }
6426 return new;
6427}
6428
6429/* Load the thread pointer. If TO_REG is true, force it into a register. */
6430
6431static rtx
6432get_thread_pointer (int to_reg)
6433{
6434 rtx tp, reg, insn;
6435
6436 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6437 if (!to_reg)
6438 return tp;
6439
6440 reg = gen_reg_rtx (Pmode);
6441 insn = gen_rtx_SET (VOIDmode, reg, tp);
6442 insn = emit_insn (insn);
6443
6444 return reg;
6445}
6446
6447/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6448 false if we expect this to be used for a memory address and true if
6449 we expect to load the address into a register. */
6450
6451static rtx
6452legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6453{
6454 rtx dest, base, off, pic;
6455 int type;
6456
6457 switch (model)
6458 {
6459 case TLS_MODEL_GLOBAL_DYNAMIC:
6460 dest = gen_reg_rtx (Pmode);
6461 if (TARGET_64BIT)
6462 {
6463 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6464
6465 start_sequence ();
6466 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6467 insns = get_insns ();
6468 end_sequence ();
6469
6470 emit_libcall_block (insns, dest, rax, x);
6471 }
6472 else
6473 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6474 break;
6475
6476 case TLS_MODEL_LOCAL_DYNAMIC:
6477 base = gen_reg_rtx (Pmode);
6478 if (TARGET_64BIT)
6479 {
6480 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6481
6482 start_sequence ();
6483 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6484 insns = get_insns ();
6485 end_sequence ();
6486
6487 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6488 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6489 emit_libcall_block (insns, base, rax, note);
6490 }
6491 else
6492 emit_insn (gen_tls_local_dynamic_base_32 (base));
6493
6494 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6495 off = gen_rtx_CONST (Pmode, off);
6496
6497 return gen_rtx_PLUS (Pmode, base, off);
6498
6499 case TLS_MODEL_INITIAL_EXEC:
6500 if (TARGET_64BIT)
6501 {
6502 pic = NULL;
6503 type = UNSPEC_GOTNTPOFF;
6504 }
6505 else if (flag_pic)
6506 {
6507 if (reload_in_progress)
6508 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6509 pic = pic_offset_table_rtx;
6510 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6511 }
6512 else if (!TARGET_GNU_TLS)
6513 {
6514 pic = gen_reg_rtx (Pmode);
6515 emit_insn (gen_set_got (pic));
6516 type = UNSPEC_GOTTPOFF;
6517 }
6518 else
6519 {
6520 pic = NULL;
6521 type = UNSPEC_INDNTPOFF;
6522 }
6523
6524 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6525 off = gen_rtx_CONST (Pmode, off);
6526 if (pic)
6527 off = gen_rtx_PLUS (Pmode, pic, off);
6528 off = gen_rtx_MEM (Pmode, off);
6529 RTX_UNCHANGING_P (off) = 1;
6530 set_mem_alias_set (off, ix86_GOT_alias_set ());
6531
6532 if (TARGET_64BIT || TARGET_GNU_TLS)
6533 {
6534 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6535 off = force_reg (Pmode, off);
6536 return gen_rtx_PLUS (Pmode, base, off);
6537 }
6538 else
6539 {
6540 base = get_thread_pointer (true);
6541 dest = gen_reg_rtx (Pmode);
6542 emit_insn (gen_subsi3 (dest, base, off));
6543 }
6544 break;
6545
6546 case TLS_MODEL_LOCAL_EXEC:
6547 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6548 (TARGET_64BIT || TARGET_GNU_TLS)
6549 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6550 off = gen_rtx_CONST (Pmode, off);
6551
6552 if (TARGET_64BIT || TARGET_GNU_TLS)
6553 {
6554 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6555 return gen_rtx_PLUS (Pmode, base, off);
6556 }
6557 else
6558 {
6559 base = get_thread_pointer (true);
6560 dest = gen_reg_rtx (Pmode);
6561 emit_insn (gen_subsi3 (dest, base, off));
6562 }
6563 break;
6564
6565 default:
6566 abort ();
6567 }
6568
6569 return dest;
6570}
6571
6572/* Try machine-dependent ways of modifying an illegitimate address
6573 to be legitimate. If we find one, return the new, valid address.
6574 This macro is used in only one place: `memory_address' in explow.c.
6575
6576 OLDX is the address as it was before break_out_memory_refs was called.
6577 In some cases it is useful to look at this to decide what needs to be done.
6578
6579 MODE and WIN are passed so that this macro can use
6580 GO_IF_LEGITIMATE_ADDRESS.
6581
6582 It is always safe for this macro to do nothing. It exists to recognize
6583 opportunities to optimize the output.
6584
6585 For the 80386, we handle X+REG by loading X into a register R and
6586 using R+REG. R will go in a general reg and indexing will be used.
6587 However, if REG is a broken-out memory address or multiplication,
6588 nothing needs to be done because REG can certainly go in a general reg.
6589
6590 When -fpic is used, special handling is needed for symbolic references.
6591 See comments by legitimize_pic_address in i386.c for details. */
6592
6593rtx
6594legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6595{
6596 int changed = 0;
6597 unsigned log;
6598
6599 if (TARGET_DEBUG_ADDR)
6600 {
6601 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6602 GET_MODE_NAME (mode));
6603 debug_rtx (x);
6604 }
6605
6606 log = tls_symbolic_operand (x, mode);
6607 if (log)
6608 return legitimize_tls_address (x, log, false);
6609
6610 if (flag_pic && SYMBOLIC_CONST (x))
6611 return legitimize_pic_address (x, 0);
6612
6613 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6614 if (GET_CODE (x) == ASHIFT
6615 && GET_CODE (XEXP (x, 1)) == CONST_INT
6616 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6617 {
6618 changed = 1;
6619 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6620 GEN_INT (1 << log));
6621 }
6622
6623 if (GET_CODE (x) == PLUS)
6624 {
6625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6626
6627 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6630 {
6631 changed = 1;
6632 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6633 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6634 GEN_INT (1 << log));
6635 }
6636
6637 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6638 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6639 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6640 {
6641 changed = 1;
6642 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6643 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6644 GEN_INT (1 << log));
6645 }
6646
6647 /* Put multiply first if it isn't already. */
6648 if (GET_CODE (XEXP (x, 1)) == MULT)
6649 {
6650 rtx tmp = XEXP (x, 0);
6651 XEXP (x, 0) = XEXP (x, 1);
6652 XEXP (x, 1) = tmp;
6653 changed = 1;
6654 }
6655
6656 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6657 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6658 created by virtual register instantiation, register elimination, and
6659 similar optimizations. */
6660 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6661 {
6662 changed = 1;
6663 x = gen_rtx_PLUS (Pmode,
6664 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6665 XEXP (XEXP (x, 1), 0)),
6666 XEXP (XEXP (x, 1), 1));
6667 }
6668
6669 /* Canonicalize
6670 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6671 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6672 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6673 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6674 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6675 && CONSTANT_P (XEXP (x, 1)))
6676 {
6677 rtx constant;
6678 rtx other = NULL_RTX;
6679
6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6681 {
6682 constant = XEXP (x, 1);
6683 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6684 }
6685 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6686 {
6687 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6688 other = XEXP (x, 1);
6689 }
6690 else
6691 constant = 0;
6692
6693 if (constant)
6694 {
6695 changed = 1;
6696 x = gen_rtx_PLUS (Pmode,
6697 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6698 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6699 plus_constant (other, INTVAL (constant)));
6700 }
6701 }
6702
6703 if (changed && legitimate_address_p (mode, x, FALSE))
6704 return x;
6705
6706 if (GET_CODE (XEXP (x, 0)) == MULT)
6707 {
6708 changed = 1;
6709 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6710 }
6711
6712 if (GET_CODE (XEXP (x, 1)) == MULT)
6713 {
6714 changed = 1;
6715 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6716 }
6717
6718 if (changed
6719 && GET_CODE (XEXP (x, 1)) == REG
6720 && GET_CODE (XEXP (x, 0)) == REG)
6721 return x;
6722
6723 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6724 {
6725 changed = 1;
6726 x = legitimize_pic_address (x, 0);
6727 }
6728
6729 if (changed && legitimate_address_p (mode, x, FALSE))
6730 return x;
6731
6732 if (GET_CODE (XEXP (x, 0)) == REG)
6733 {
6734 rtx temp = gen_reg_rtx (Pmode);
6735 rtx val = force_operand (XEXP (x, 1), temp);
6736 if (val != temp)
6737 emit_move_insn (temp, val);
6738
6739 XEXP (x, 1) = temp;
6740 return x;
6741 }
6742
6743 else if (GET_CODE (XEXP (x, 1)) == REG)
6744 {
6745 rtx temp = gen_reg_rtx (Pmode);
6746 rtx val = force_operand (XEXP (x, 0), temp);
6747 if (val != temp)
6748 emit_move_insn (temp, val);
6749
6750 XEXP (x, 0) = temp;
6751 return x;
6752 }
6753 }
6754
6755 return x;
6756}
6757
6758/* Print an integer constant expression in assembler syntax. Addition
6759 and subtraction are the only arithmetic that may appear in these
6760 expressions. FILE is the stdio stream to write to, X is the rtx, and
6761 CODE is the operand print code from the output string. */
6762
6763static void
6764output_pic_addr_const (FILE *file, rtx x, int code)
6765{
6766 char buf[256];
6767
6768 switch (GET_CODE (x))
6769 {
6770 case PC:
6771 if (flag_pic)
6772 putc ('.', file);
6773 else
6774 abort ();
6775 break;
6776
6777 case SYMBOL_REF:
6778 assemble_name (file, XSTR (x, 0));
6779 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6780 fputs ("@PLT", file);
6781 break;
6782
6783 case LABEL_REF:
6784 x = XEXP (x, 0);
6785 /* FALLTHRU */
6786 case CODE_LABEL:
6787 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6788 assemble_name (asm_out_file, buf);
6789 break;
6790
6791 case CONST_INT:
6792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6793 break;
6794
6795 case CONST:
6796 /* This used to output parentheses around the expression,
6797 but that does not work on the 386 (either ATT or BSD assembler). */
6798 output_pic_addr_const (file, XEXP (x, 0), code);
6799 break;
6800
6801 case CONST_DOUBLE:
6802 if (GET_MODE (x) == VOIDmode)
6803 {
6804 /* We can use %d if the number is <32 bits and positive. */
6805 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6806 fprintf (file, "0x%lx%08lx",
6807 (unsigned long) CONST_DOUBLE_HIGH (x),
6808 (unsigned long) CONST_DOUBLE_LOW (x));
6809 else
6810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6811 }
6812 else
6813 /* We can't handle floating point constants;
6814 PRINT_OPERAND must handle them. */
6815 output_operand_lossage ("floating constant misused");
6816 break;
6817
6818 case PLUS:
6819 /* Some assemblers need integer constants to appear first. */
6820 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6821 {
6822 output_pic_addr_const (file, XEXP (x, 0), code);
6823 putc ('+', file);
6824 output_pic_addr_const (file, XEXP (x, 1), code);
6825 }
6826 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6827 {
6828 output_pic_addr_const (file, XEXP (x, 1), code);
6829 putc ('+', file);
6830 output_pic_addr_const (file, XEXP (x, 0), code);
6831 }
6832 else
6833 abort ();
6834 break;
6835
6836 case MINUS:
6837 if (!TARGET_MACHO)
6838 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6839 output_pic_addr_const (file, XEXP (x, 0), code);
6840 putc ('-', file);
6841 output_pic_addr_const (file, XEXP (x, 1), code);
6842 if (!TARGET_MACHO)
6843 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6844 break;
6845
6846 case UNSPEC:
6847 if (XVECLEN (x, 0) != 1)
6848 abort ();
6849 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6850 switch (XINT (x, 1))
6851 {
6852 case UNSPEC_GOT:
6853 fputs ("@GOT", file);
6854 break;
6855 case UNSPEC_GOTOFF:
6856 fputs ("@GOTOFF", file);
6857 break;
6858 case UNSPEC_GOTPCREL:
6859 fputs ("@GOTPCREL(%rip)", file);
6860 break;
6861 case UNSPEC_GOTTPOFF:
6862 /* FIXME: This might be @TPOFF in Sun ld too. */
6863 fputs ("@GOTTPOFF", file);
6864 break;
6865 case UNSPEC_TPOFF:
6866 fputs ("@TPOFF", file);
6867 break;
6868 case UNSPEC_NTPOFF:
6869 if (TARGET_64BIT)
6870 fputs ("@TPOFF", file);
6871 else
6872 fputs ("@NTPOFF", file);
6873 break;
6874 case UNSPEC_DTPOFF:
6875 fputs ("@DTPOFF", file);
6876 break;
6877 case UNSPEC_GOTNTPOFF:
6878 if (TARGET_64BIT)
6879 fputs ("@GOTTPOFF(%rip)", file);
6880 else
6881 fputs ("@GOTNTPOFF", file);
6882 break;
6883 case UNSPEC_INDNTPOFF:
6884 fputs ("@INDNTPOFF", file);
6885 break;
6886 default:
6887 output_operand_lossage ("invalid UNSPEC as operand");
6888 break;
6889 }
6890 break;
6891
6892 default:
6893 output_operand_lossage ("invalid expression as operand");
6894 }
6895}
6896
6897/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6898 We need to handle our special PIC relocations. */
6899
6900void
6901i386_dwarf_output_addr_const (FILE *file, rtx x)
6902{
6903#ifdef ASM_QUAD
6904 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6905#else
6906 if (TARGET_64BIT)
6907 abort ();
6908 fprintf (file, "%s", ASM_LONG);
6909#endif
6910 if (flag_pic)
6911 output_pic_addr_const (file, x, '\0');
6912 else
6913 output_addr_const (file, x);
6914 fputc ('\n', file);
6915}
6916
6917/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6918 We need to emit DTP-relative relocations. */
6919
6920void
6921i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6922{
6923 fputs (ASM_LONG, file);
6924 output_addr_const (file, x);
6925 fputs ("@DTPOFF", file);
6926 switch (size)
6927 {
6928 case 4:
6929 break;
6930 case 8:
6931 fputs (", 0", file);
6932 break;
6933 default:
6934 abort ();
6935 }
6936}
6937
6938/* In the name of slightly smaller debug output, and to cater to
6939 general assembler losage, recognize PIC+GOTOFF and turn it back
6940 into a direct symbol reference. */
6941
6942static rtx
6943ix86_delegitimize_address (rtx orig_x)
6944{
6945 rtx x = orig_x, y;
6946
6947 if (GET_CODE (x) == MEM)
6948 x = XEXP (x, 0);
6949
6950 if (TARGET_64BIT)
6951 {
6952 if (GET_CODE (x) != CONST
6953 || GET_CODE (XEXP (x, 0)) != UNSPEC
6954 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6955 || GET_CODE (orig_x) != MEM)
6956 return orig_x;
6957 return XVECEXP (XEXP (x, 0), 0, 0);
6958 }
6959
6960 if (GET_CODE (x) != PLUS
6961 || GET_CODE (XEXP (x, 1)) != CONST)
6962 return orig_x;
6963
6964 if (GET_CODE (XEXP (x, 0)) == REG
6965 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6966 /* %ebx + GOT/GOTOFF */
6967 y = NULL;
6968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6969 {
6970 /* %ebx + %reg * scale + GOT/GOTOFF */
6971 y = XEXP (x, 0);
6972 if (GET_CODE (XEXP (y, 0)) == REG
6973 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6974 y = XEXP (y, 1);
6975 else if (GET_CODE (XEXP (y, 1)) == REG
6976 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6977 y = XEXP (y, 0);
6978 else
6979 return orig_x;
6980 if (GET_CODE (y) != REG
6981 && GET_CODE (y) != MULT
6982 && GET_CODE (y) != ASHIFT)
6983 return orig_x;
6984 }
6985 else
6986 return orig_x;
6987
6988 x = XEXP (XEXP (x, 1), 0);
6989 if (GET_CODE (x) == UNSPEC
6990 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6991 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6992 {
6993 if (y)
6994 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6995 return XVECEXP (x, 0, 0);
6996 }
6997
6998 if (GET_CODE (x) == PLUS
6999 && GET_CODE (XEXP (x, 0)) == UNSPEC
7000 && GET_CODE (XEXP (x, 1)) == CONST_INT
7001 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7002 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7003 && GET_CODE (orig_x) != MEM)))
7004 {
7005 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7006 if (y)
7007 return gen_rtx_PLUS (Pmode, y, x);
7008 return x;
7009 }
7010
7011 return orig_x;
7012}
7013
7014static void
7015put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7016 int fp, FILE *file)
7017{
7018 const char *suffix;
7019
7020 if (mode == CCFPmode || mode == CCFPUmode)
7021 {
7022 enum rtx_code second_code, bypass_code;
7023 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7024 if (bypass_code != NIL || second_code != NIL)
7025 abort ();
7026 code = ix86_fp_compare_code_to_integer (code);
7027 mode = CCmode;
7028 }
7029 if (reverse)
7030 code = reverse_condition (code);
7031
7032 switch (code)
7033 {
7034 case EQ:
7035 suffix = "e";
7036 break;
7037 case NE:
7038 suffix = "ne";
7039 break;
7040 case GT:
7041 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7042 abort ();
7043 suffix = "g";
7044 break;
7045 case GTU:
7046 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7047 Those same assemblers have the same but opposite losage on cmov. */
7048 if (mode != CCmode)
7049 abort ();
7050 suffix = fp ? "nbe" : "a";
7051 break;
7052 case LT:
7053 if (mode == CCNOmode || mode == CCGOCmode)
7054 suffix = "s";
7055 else if (mode == CCmode || mode == CCGCmode)
7056 suffix = "l";
7057 else
7058 abort ();
7059 break;
7060 case LTU:
7061 if (mode != CCmode)
7062 abort ();
7063 suffix = "b";
7064 break;
7065 case GE:
7066 if (mode == CCNOmode || mode == CCGOCmode)
7067 suffix = "ns";
7068 else if (mode == CCmode || mode == CCGCmode)
7069 suffix = "ge";
7070 else
7071 abort ();
7072 break;
7073 case GEU:
7074 /* ??? As above. */
7075 if (mode != CCmode)
7076 abort ();
7077 suffix = fp ? "nb" : "ae";
7078 break;
7079 case LE:
7080 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7081 abort ();
7082 suffix = "le";
7083 break;
7084 case LEU:
7085 if (mode != CCmode)
7086 abort ();
7087 suffix = "be";
7088 break;
7089 case UNORDERED:
7090 suffix = fp ? "u" : "p";
7091 break;
7092 case ORDERED:
7093 suffix = fp ? "nu" : "np";
7094 break;
7095 default:
7096 abort ();
7097 }
7098 fputs (suffix, file);
7099}
7100
7101/* Print the name of register X to FILE based on its machine mode and number.
7102 If CODE is 'w', pretend the mode is HImode.
7103 If CODE is 'b', pretend the mode is QImode.
7104 If CODE is 'k', pretend the mode is SImode.
7105 If CODE is 'q', pretend the mode is DImode.
7106 If CODE is 'h', pretend the reg is the `high' byte register.
7107 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7108
7109void
7110print_reg (rtx x, int code, FILE *file)
7111{
7112 if (REGNO (x) == ARG_POINTER_REGNUM
7113 || REGNO (x) == FRAME_POINTER_REGNUM
7114 || REGNO (x) == FLAGS_REG
7115 || REGNO (x) == FPSR_REG)
7116 abort ();
7117
7118 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7119 putc ('%', file);
7120
7121 if (code == 'w' || MMX_REG_P (x))
7122 code = 2;
7123 else if (code == 'b')
7124 code = 1;
7125 else if (code == 'k')
7126 code = 4;
7127 else if (code == 'q')
7128 code = 8;
7129 else if (code == 'y')
7130 code = 3;
7131 else if (code == 'h')
7132 code = 0;
7133 else
7134 code = GET_MODE_SIZE (GET_MODE (x));
7135
7136 /* Irritatingly, AMD extended registers use different naming convention
7137 from the normal registers. */
7138 if (REX_INT_REG_P (x))
7139 {
7140 if (!TARGET_64BIT)
7141 abort ();
7142 switch (code)
7143 {
7144 case 0:
7145 error ("extended registers have no high halves");
7146 break;
7147 case 1:
7148 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7149 break;
7150 case 2:
7151 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7152 break;
7153 case 4:
7154 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7155 break;
7156 case 8:
7157 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7158 break;
7159 default:
7160 error ("unsupported operand size for extended register");
7161 break;
7162 }
7163 return;
7164 }
7165 switch (code)
7166 {
7167 case 3:
7168 if (STACK_TOP_P (x))
7169 {
7170 fputs ("st(0)", file);
7171 break;
7172 }
7173 /* FALLTHRU */
7174 case 8:
7175 case 4:
7176 case 12:
7177 if (! ANY_FP_REG_P (x))
7178 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7179 /* FALLTHRU */
7180 case 16:
7181 case 2:
7182 normal:
7183 fputs (hi_reg_name[REGNO (x)], file);
7184 break;
7185 case 1:
7186 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7187 goto normal;
7188 fputs (qi_reg_name[REGNO (x)], file);
7189 break;
7190 case 0:
7191 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7192 goto normal;
7193 fputs (qi_high_reg_name[REGNO (x)], file);
7194 break;
7195 default:
7196 abort ();
7197 }
7198}
7199
7200/* Locate some local-dynamic symbol still in use by this function
7201 so that we can print its name in some tls_local_dynamic_base
7202 pattern. */
7203
7204static const char *
7205get_some_local_dynamic_name (void)
7206{
7207 rtx insn;
7208
7209 if (cfun->machine->some_ld_name)
7210 return cfun->machine->some_ld_name;
7211
7212 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7213 if (INSN_P (insn)
7214 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7215 return cfun->machine->some_ld_name;
7216
7217 abort ();
7218}
7219
7220static int
7221get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7222{
7223 rtx x = *px;
7224
7225 if (GET_CODE (x) == SYMBOL_REF
7226 && local_dynamic_symbolic_operand (x, Pmode))
7227 {
7228 cfun->machine->some_ld_name = XSTR (x, 0);
7229 return 1;
7230 }
7231
7232 return 0;
7233}
7234
7235/* Meaning of CODE:
7236 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7237 C -- print opcode suffix for set/cmov insn.
7238 c -- like C, but print reversed condition
7239 F,f -- likewise, but for floating-point.
7240 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7241 otherwise nothing
7242 R -- print the prefix for register names.
7243 z -- print the opcode suffix for the size of the current operand.
7244 * -- print a star (in certain assembler syntax)
7245 A -- print an absolute memory reference.
7246 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7247 s -- print a shift double count, followed by the assemblers argument
7248 delimiter.
7249 b -- print the QImode name of the register for the indicated operand.
7250 %b0 would print %al if operands[0] is reg 0.
7251 w -- likewise, print the HImode name of the register.
7252 k -- likewise, print the SImode name of the register.
7253 q -- likewise, print the DImode name of the register.
7254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7255 y -- print "st(0)" instead of "st" as a register.
7256 D -- print condition for SSE cmp instruction.
7257 P -- if PIC, print an @PLT suffix.
7258 X -- don't print any sort of PIC '@' suffix for a symbol.
7259 & -- print some in-use local-dynamic symbol name.
7260 */
7261
7262void
7263print_operand (FILE *file, rtx x, int code)
7264{
7265 if (code)
7266 {
7267 switch (code)
7268 {
7269 case '*':
7270 if (ASSEMBLER_DIALECT == ASM_ATT)
7271 putc ('*', file);
7272 return;
7273
7274 case '&':
7275 assemble_name (file, get_some_local_dynamic_name ());
7276 return;
7277
7278 case 'A':
7279 if (ASSEMBLER_DIALECT == ASM_ATT)
7280 putc ('*', file);
7281 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7282 {
7283 /* Intel syntax. For absolute addresses, registers should not
7284 be surrounded by braces. */
7285 if (GET_CODE (x) != REG)
7286 {
7287 putc ('[', file);
7288 PRINT_OPERAND (file, x, 0);
7289 putc (']', file);
7290 return;
7291 }
7292 }
7293 else
7294 abort ();
7295
7296 PRINT_OPERAND (file, x, 0);
7297 return;
7298
7299
7300 case 'L':
7301 if (ASSEMBLER_DIALECT == ASM_ATT)
7302 putc ('l', file);
7303 return;
7304
7305 case 'W':
7306 if (ASSEMBLER_DIALECT == ASM_ATT)
7307 putc ('w', file);
7308 return;
7309
7310 case 'B':
7311 if (ASSEMBLER_DIALECT == ASM_ATT)
7312 putc ('b', file);
7313 return;
7314
7315 case 'Q':
7316 if (ASSEMBLER_DIALECT == ASM_ATT)
7317 putc ('l', file);
7318 return;
7319
7320 case 'S':
7321 if (ASSEMBLER_DIALECT == ASM_ATT)
7322 putc ('s', file);
7323 return;
7324
7325 case 'T':
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7327 putc ('t', file);
7328 return;
7329
7330 case 'z':
7331 /* 387 opcodes don't get size suffixes if the operands are
7332 registers. */
7333 if (STACK_REG_P (x))
7334 return;
7335
7336 /* Likewise if using Intel opcodes. */
7337 if (ASSEMBLER_DIALECT == ASM_INTEL)
7338 return;
7339
7340 /* This is the size of op from size of operand. */
7341 switch (GET_MODE_SIZE (GET_MODE (x)))
7342 {
7343 case 2:
7344#ifdef HAVE_GAS_FILDS_FISTS
7345 putc ('s', file);
7346#endif
7347 return;
7348
7349 case 4:
7350 if (GET_MODE (x) == SFmode)
7351 {
7352 putc ('s', file);
7353 return;
7354 }
7355 else
7356 putc ('l', file);
7357 return;
7358
7359 case 12:
7360 case 16:
7361 putc ('t', file);
7362 return;
7363
7364 case 8:
7365 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7366 {
7367#ifdef GAS_MNEMONICS
7368 putc ('q', file);
7369#else
7370 putc ('l', file);
7371 putc ('l', file);
7372#endif
7373 }
7374 else
7375 putc ('l', file);
7376 return;
7377
7378 default:
7379 abort ();
7380 }
7381
7382 case 'b':
7383 case 'w':
7384 case 'k':
7385 case 'q':
7386 case 'h':
7387 case 'y':
7388 case 'X':
7389 case 'P':
7390 break;
7391
7392 case 's':
7393 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7394 {
7395 PRINT_OPERAND (file, x, 0);
7396 putc (',', file);
7397 }
7398 return;
7399
7400 case 'D':
7401 /* Little bit of braindamage here. The SSE compare instructions
7402 does use completely different names for the comparisons that the
7403 fp conditional moves. */
7404 switch (GET_CODE (x))
7405 {
7406 case EQ:
7407 case UNEQ:
7408 fputs ("eq", file);
7409 break;
7410 case LT:
7411 case UNLT:
7412 fputs ("lt", file);
7413 break;
7414 case LE:
7415 case UNLE:
7416 fputs ("le", file);
7417 break;
7418 case UNORDERED:
7419 fputs ("unord", file);
7420 break;
7421 case NE:
7422 case LTGT:
7423 fputs ("neq", file);
7424 break;
7425 case UNGE:
7426 case GE:
7427 fputs ("nlt", file);
7428 break;
7429 case UNGT:
7430 case GT:
7431 fputs ("nle", file);
7432 break;
7433 case ORDERED:
7434 fputs ("ord", file);
7435 break;
7436 default:
7437 abort ();
7438 break;
7439 }
7440 return;
7441 case 'O':
7442#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7443 if (ASSEMBLER_DIALECT == ASM_ATT)
7444 {
7445 switch (GET_MODE (x))
7446 {
7447 case HImode: putc ('w', file); break;
7448 case SImode:
7449 case SFmode: putc ('l', file); break;
7450 case DImode:
7451 case DFmode: putc ('q', file); break;
7452 default: abort ();
7453 }
7454 putc ('.', file);
7455 }
7456#endif
7457 return;
7458 case 'C':
7459 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7460 return;
7461 case 'F':
7462#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7463 if (ASSEMBLER_DIALECT == ASM_ATT)
7464 putc ('.', file);
7465#endif
7466 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7467 return;
7468
7469 /* Like above, but reverse condition */
7470 case 'c':
7471 /* Check to see if argument to %c is really a constant
7472 and not a condition code which needs to be reversed. */
7473 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7474 {
7475 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7476 return;
7477 }
7478 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7479 return;
7480 case 'f':
7481#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7482 if (ASSEMBLER_DIALECT == ASM_ATT)
7483 putc ('.', file);
7484#endif
7485 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7486 return;
7487 case '+':
7488 {
7489 rtx x;
7490
7491 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7492 return;
7493
7494 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7495 if (x)
7496 {
7497 int pred_val = INTVAL (XEXP (x, 0));
7498
7499 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7500 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7501 {
7502 int taken = pred_val > REG_BR_PROB_BASE / 2;
7503 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7504
7505 /* Emit hints only in the case default branch prediction
7506 heuristics would fail. */
7507 if (taken != cputaken)
7508 {
7509 /* We use 3e (DS) prefix for taken branches and
7510 2e (CS) prefix for not taken branches. */
7511 if (taken)
7512 fputs ("ds ; ", file);
7513 else
7514 fputs ("cs ; ", file);
7515 }
7516 }
7517 }
7518 return;
7519 }
7520 default:
7521 output_operand_lossage ("invalid operand code `%c'", code);
7522 }
7523 }
7524
7525 if (GET_CODE (x) == REG)
7526 print_reg (x, code, file);
7527
7528 else if (GET_CODE (x) == MEM)
7529 {
7530 /* No `byte ptr' prefix for call instructions. */
7531 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7532 {
7533 const char * size;
7534 switch (GET_MODE_SIZE (GET_MODE (x)))
7535 {
7536 case 1: size = "BYTE"; break;
7537 case 2: size = "WORD"; break;
7538 case 4: size = "DWORD"; break;
7539 case 8: size = "QWORD"; break;
7540 case 12: size = "XWORD"; break;
7541 case 16: size = "XMMWORD"; break;
7542 default:
7543 abort ();
7544 }
7545
7546 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7547 if (code == 'b')
7548 size = "BYTE";
7549 else if (code == 'w')
7550 size = "WORD";
7551 else if (code == 'k')
7552 size = "DWORD";
7553
7554 fputs (size, file);
7555 fputs (" PTR ", file);
7556 }
7557
7558 x = XEXP (x, 0);
7559 /* Avoid (%rip) for call operands. */
7560 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7561 && GET_CODE (x) != CONST_INT)
7562 output_addr_const (file, x);
7563 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7564 output_operand_lossage ("invalid constraints for operand");
7565 else
7566 output_address (x);
7567 }
7568
7569 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7570 {
7571 REAL_VALUE_TYPE r;
7572 long l;
7573
7574 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7575 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7576
7577 if (ASSEMBLER_DIALECT == ASM_ATT)
7578 putc ('$', file);
7579 fprintf (file, "0x%08lx", l);
7580 }
7581
7582 /* These float cases don't actually occur as immediate operands. */
7583 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7584 {
7585 char dstr[30];
7586
7587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7588 fprintf (file, "%s", dstr);
7589 }
7590
7591 else if (GET_CODE (x) == CONST_DOUBLE
7592 && GET_MODE (x) == XFmode)
7593 {
7594 char dstr[30];
7595
7596 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7597 fprintf (file, "%s", dstr);
7598 }
7599
7600 else
7601 {
7602 if (code != 'P')
7603 {
7604 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7605 {
7606 if (ASSEMBLER_DIALECT == ASM_ATT)
7607 putc ('$', file);
7608 }
7609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7610 || GET_CODE (x) == LABEL_REF)
7611 {
7612 if (ASSEMBLER_DIALECT == ASM_ATT)
7613 putc ('$', file);
7614 else
7615 fputs ("OFFSET FLAT:", file);
7616 }
7617 }
7618 if (GET_CODE (x) == CONST_INT)
7619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7620 else if (flag_pic)
7621 output_pic_addr_const (file, x, code);
7622 else
7623 output_addr_const (file, x);
7624 }
7625}
7626
7627/* Print a memory operand whose address is ADDR. */
7628
7629void
7630print_operand_address (FILE *file, rtx addr)
7631{
7632 struct ix86_address parts;
7633 rtx base, index, disp;
7634 int scale;
7635
7636 if (! ix86_decompose_address (addr, &parts))
7637 abort ();
7638
7639 base = parts.base;
7640 index = parts.index;
7641 disp = parts.disp;
7642 scale = parts.scale;
7643
7644 switch (parts.seg)
7645 {
7646 case SEG_DEFAULT:
7647 break;
7648 case SEG_FS:
7649 case SEG_GS:
7650 if (USER_LABEL_PREFIX[0] == 0)
7651 putc ('%', file);
7652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7653 break;
7654 default:
7655 abort ();
7656 }
7657
7658 if (!base && !index)
7659 {
7660 /* Displacement only requires special attention. */
7661
7662 if (GET_CODE (disp) == CONST_INT)
7663 {
7664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7665 {
7666 if (USER_LABEL_PREFIX[0] == 0)
7667 putc ('%', file);
7668 fputs ("ds:", file);
7669 }
7670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7671 }
7672 else if (flag_pic)
7673 output_pic_addr_const (file, disp, 0);
7674 else
7675 output_addr_const (file, disp);
7676
7677 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7678 if (TARGET_64BIT
7679 && ((GET_CODE (disp) == SYMBOL_REF
7680 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7681 || GET_CODE (disp) == LABEL_REF
7682 || (GET_CODE (disp) == CONST
7683 && GET_CODE (XEXP (disp, 0)) == PLUS
7684 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7685 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7686 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7687 fputs ("(%rip)", file);
7688 }
7689 else
7690 {
7691 if (ASSEMBLER_DIALECT == ASM_ATT)
7692 {
7693 if (disp)
7694 {
7695 if (flag_pic)
7696 output_pic_addr_const (file, disp, 0);
7697 else if (GET_CODE (disp) == LABEL_REF)
7698 output_asm_label (disp);
7699 else
7700 output_addr_const (file, disp);
7701 }
7702
7703 putc ('(', file);
7704 if (base)
7705 print_reg (base, 0, file);
7706 if (index)
7707 {
7708 putc (',', file);
7709 print_reg (index, 0, file);
7710 if (scale != 1)
7711 fprintf (file, ",%d", scale);
7712 }
7713 putc (')', file);
7714 }
7715 else
7716 {
7717 rtx offset = NULL_RTX;
7718
7719 if (disp)
7720 {
7721 /* Pull out the offset of a symbol; print any symbol itself. */
7722 if (GET_CODE (disp) == CONST
7723 && GET_CODE (XEXP (disp, 0)) == PLUS
7724 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7725 {
7726 offset = XEXP (XEXP (disp, 0), 1);
7727 disp = gen_rtx_CONST (VOIDmode,
7728 XEXP (XEXP (disp, 0), 0));
7729 }
7730
7731 if (flag_pic)
7732 output_pic_addr_const (file, disp, 0);
7733 else if (GET_CODE (disp) == LABEL_REF)
7734 output_asm_label (disp);
7735 else if (GET_CODE (disp) == CONST_INT)
7736 offset = disp;
7737 else
7738 output_addr_const (file, disp);
7739 }
7740
7741 putc ('[', file);
7742 if (base)
7743 {
7744 print_reg (base, 0, file);
7745 if (offset)
7746 {
7747 if (INTVAL (offset) >= 0)
7748 putc ('+', file);
7749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7750 }
7751 }
7752 else if (offset)
7753 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7754 else
7755 putc ('0', file);
7756
7757 if (index)
7758 {
7759 putc ('+', file);
7760 print_reg (index, 0, file);
7761 if (scale != 1)
7762 fprintf (file, "*%d", scale);
7763 }
7764 putc (']', file);
7765 }
7766 }
7767}
7768
7769bool
7770output_addr_const_extra (FILE *file, rtx x)
7771{
7772 rtx op;
7773
7774 if (GET_CODE (x) != UNSPEC)
7775 return false;
7776
7777 op = XVECEXP (x, 0, 0);
7778 switch (XINT (x, 1))
7779 {
7780 case UNSPEC_GOTTPOFF:
7781 output_addr_const (file, op);
7782 /* FIXME: This might be @TPOFF in Sun ld. */
7783 fputs ("@GOTTPOFF", file);
7784 break;
7785 case UNSPEC_TPOFF:
7786 output_addr_const (file, op);
7787 fputs ("@TPOFF", file);
7788 break;
7789 case UNSPEC_NTPOFF:
7790 output_addr_const (file, op);
7791 if (TARGET_64BIT)
7792 fputs ("@TPOFF", file);
7793 else
7794 fputs ("@NTPOFF", file);
7795 break;
7796 case UNSPEC_DTPOFF:
7797 output_addr_const (file, op);
7798 fputs ("@DTPOFF", file);
7799 break;
7800 case UNSPEC_GOTNTPOFF:
7801 output_addr_const (file, op);
7802 if (TARGET_64BIT)
7803 fputs ("@GOTTPOFF(%rip)", file);
7804 else
7805 fputs ("@GOTNTPOFF", file);
7806 break;
7807 case UNSPEC_INDNTPOFF:
7808 output_addr_const (file, op);
7809 fputs ("@INDNTPOFF", file);
7810 break;
7811
7812 default:
7813 return false;
7814 }
7815
7816 return true;
7817}
7818
7819/* Split one or more DImode RTL references into pairs of SImode
7820 references. The RTL can be REG, offsettable MEM, integer constant, or
7821 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7822 split and "num" is its length. lo_half and hi_half are output arrays
7823 that parallel "operands". */
7824
7825void
7826split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7827{
7828 while (num--)
7829 {
7830 rtx op = operands[num];
7831
7832 /* simplify_subreg refuse to split volatile memory addresses,
7833 but we still have to handle it. */
7834 if (GET_CODE (op) == MEM)
7835 {
7836 lo_half[num] = adjust_address (op, SImode, 0);
7837 hi_half[num] = adjust_address (op, SImode, 4);
7838 }
7839 else
7840 {
7841 lo_half[num] = simplify_gen_subreg (SImode, op,
7842 GET_MODE (op) == VOIDmode
7843 ? DImode : GET_MODE (op), 0);
7844 hi_half[num] = simplify_gen_subreg (SImode, op,
7845 GET_MODE (op) == VOIDmode
7846 ? DImode : GET_MODE (op), 4);
7847 }
7848 }
7849}
7850/* Split one or more TImode RTL references into pairs of SImode
7851 references. The RTL can be REG, offsettable MEM, integer constant, or
7852 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7853 split and "num" is its length. lo_half and hi_half are output arrays
7854 that parallel "operands". */
7855
7856void
7857split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7858{
7859 while (num--)
7860 {
7861 rtx op = operands[num];
7862
7863 /* simplify_subreg refuse to split volatile memory addresses, but we
7864 still have to handle it. */
7865 if (GET_CODE (op) == MEM)
7866 {
7867 lo_half[num] = adjust_address (op, DImode, 0);
7868 hi_half[num] = adjust_address (op, DImode, 8);
7869 }
7870 else
7871 {
7872 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7873 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7874 }
7875 }
7876}
7877
7878/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7879 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7880 is the expression of the binary operation. The output may either be
7881 emitted here, or returned to the caller, like all output_* functions.
7882
7883 There is no guarantee that the operands are the same mode, as they
7884 might be within FLOAT or FLOAT_EXTEND expressions. */
7885
7886#ifndef SYSV386_COMPAT
7887/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7888 wants to fix the assemblers because that causes incompatibility
7889 with gcc. No-one wants to fix gcc because that causes
7890 incompatibility with assemblers... You can use the option of
7891 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7892#define SYSV386_COMPAT 1
7893#endif
7894
7895const char *
7896output_387_binary_op (rtx insn, rtx *operands)
7897{
7898 static char buf[30];
7899 const char *p;
7900 const char *ssep;
7901 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7902
7903#ifdef ENABLE_CHECKING
7904 /* Even if we do not want to check the inputs, this documents input
7905 constraints. Which helps in understanding the following code. */
7906 if (STACK_REG_P (operands[0])
7907 && ((REG_P (operands[1])
7908 && REGNO (operands[0]) == REGNO (operands[1])
7909 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7910 || (REG_P (operands[2])
7911 && REGNO (operands[0]) == REGNO (operands[2])
7912 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7913 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7914 ; /* ok */
7915 else if (!is_sse)
7916 abort ();
7917#endif
7918
7919 switch (GET_CODE (operands[3]))
7920 {
7921 case PLUS:
7922 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7923 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7924 p = "fiadd";
7925 else
7926 p = "fadd";
7927 ssep = "add";
7928 break;
7929
7930 case MINUS:
7931 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7932 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7933 p = "fisub";
7934 else
7935 p = "fsub";
7936 ssep = "sub";
7937 break;
7938
7939 case MULT:
7940 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7941 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7942 p = "fimul";
7943 else
7944 p = "fmul";
7945 ssep = "mul";
7946 break;
7947
7948 case DIV:
7949 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7950 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7951 p = "fidiv";
7952 else
7953 p = "fdiv";
7954 ssep = "div";
7955 break;
7956
7957 default:
7958 abort ();
7959 }
7960
7961 if (is_sse)
7962 {
7963 strcpy (buf, ssep);
7964 if (GET_MODE (operands[0]) == SFmode)
7965 strcat (buf, "ss\t{%2, %0|%0, %2}");
7966 else
7967 strcat (buf, "sd\t{%2, %0|%0, %2}");
7968 return buf;
7969 }
7970 strcpy (buf, p);
7971
7972 switch (GET_CODE (operands[3]))
7973 {
7974 case MULT:
7975 case PLUS:
7976 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7977 {
7978 rtx temp = operands[2];
7979 operands[2] = operands[1];
7980 operands[1] = temp;
7981 }
7982
7983 /* know operands[0] == operands[1]. */
7984
7985 if (GET_CODE (operands[2]) == MEM)
7986 {
7987 p = "%z2\t%2";
7988 break;
7989 }
7990
7991 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7992 {
7993 if (STACK_TOP_P (operands[0]))
7994 /* How is it that we are storing to a dead operand[2]?
7995 Well, presumably operands[1] is dead too. We can't
7996 store the result to st(0) as st(0) gets popped on this
7997 instruction. Instead store to operands[2] (which I
7998 think has to be st(1)). st(1) will be popped later.
7999 gcc <= 2.8.1 didn't have this check and generated
8000 assembly code that the Unixware assembler rejected. */
8001 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8002 else
8003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8004 break;
8005 }
8006
8007 if (STACK_TOP_P (operands[0]))
8008 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8009 else
8010 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8011 break;
8012
8013 case MINUS:
8014 case DIV:
8015 if (GET_CODE (operands[1]) == MEM)
8016 {
8017 p = "r%z1\t%1";
8018 break;
8019 }
8020
8021 if (GET_CODE (operands[2]) == MEM)
8022 {
8023 p = "%z2\t%2";
8024 break;
8025 }
8026
8027 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8028 {
8029#if SYSV386_COMPAT
8030 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8031 derived assemblers, confusingly reverse the direction of
8032 the operation for fsub{r} and fdiv{r} when the
8033 destination register is not st(0). The Intel assembler
8034 doesn't have this brain damage. Read !SYSV386_COMPAT to
8035 figure out what the hardware really does. */
8036 if (STACK_TOP_P (operands[0]))
8037 p = "{p\t%0, %2|rp\t%2, %0}";
8038 else
8039 p = "{rp\t%2, %0|p\t%0, %2}";
8040#else
8041 if (STACK_TOP_P (operands[0]))
8042 /* As above for fmul/fadd, we can't store to st(0). */
8043 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8044 else
8045 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8046#endif
8047 break;
8048 }
8049
8050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8051 {
8052#if SYSV386_COMPAT
8053 if (STACK_TOP_P (operands[0]))
8054 p = "{rp\t%0, %1|p\t%1, %0}";
8055 else
8056 p = "{p\t%1, %0|rp\t%0, %1}";
8057#else
8058 if (STACK_TOP_P (operands[0]))
8059 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8060 else
8061 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8062#endif
8063 break;
8064 }
8065
8066 if (STACK_TOP_P (operands[0]))
8067 {
8068 if (STACK_TOP_P (operands[1]))
8069 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8070 else
8071 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8072 break;
8073 }
8074 else if (STACK_TOP_P (operands[1]))
8075 {
8076#if SYSV386_COMPAT
8077 p = "{\t%1, %0|r\t%0, %1}";
8078#else
8079 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8080#endif
8081 }
8082 else
8083 {
8084#if SYSV386_COMPAT
8085 p = "{r\t%2, %0|\t%0, %2}";
8086#else
8087 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8088#endif
8089 }
8090 break;
8091
8092 default:
8093 abort ();
8094 }
8095
8096 strcat (buf, p);
8097 return buf;
8098}
8099
8100/* Output code to initialize control word copies used by
8101 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8102 is set to control word rounding downwards. */
8103void
8104emit_i387_cw_initialization (rtx normal, rtx round_down)
8105{
8106 rtx reg = gen_reg_rtx (HImode);
8107
8108 emit_insn (gen_x86_fnstcw_1 (normal));
8109 emit_move_insn (reg, normal);
8110 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8111 && !TARGET_64BIT)
8112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8113 else
8114 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8115 emit_move_insn (round_down, reg);
8116}
8117
8118/* Output code for INSN to convert a float to a signed int. OPERANDS
8119 are the insn operands. The output may be [HSD]Imode and the input
8120 operand may be [SDX]Fmode. */
8121
8122const char *
8123output_fix_trunc (rtx insn, rtx *operands)
8124{
8125 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8126 int dimode_p = GET_MODE (operands[0]) == DImode;
8127
8128 /* Jump through a hoop or two for DImode, since the hardware has no
8129 non-popping instruction. We used to do this a different way, but
8130 that was somewhat fragile and broke with post-reload splitters. */
8131 if (dimode_p && !stack_top_dies)
8132 output_asm_insn ("fld\t%y1", operands);
8133
8134 if (!STACK_TOP_P (operands[1]))
8135 abort ();
8136
8137 if (GET_CODE (operands[0]) != MEM)
8138 abort ();
8139
8140 output_asm_insn ("fldcw\t%3", operands);
8141 if (stack_top_dies || dimode_p)
8142 output_asm_insn ("fistp%z0\t%0", operands);
8143 else
8144 output_asm_insn ("fist%z0\t%0", operands);
8145 output_asm_insn ("fldcw\t%2", operands);
8146
8147 return "";
8148}
8149
8150/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8151 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8152 when fucom should be used. */
8153
8154const char *
8155output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8156{
8157 int stack_top_dies;
8158 rtx cmp_op0 = operands[0];
8159 rtx cmp_op1 = operands[1];
8160 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8161
8162 if (eflags_p == 2)
8163 {
8164 cmp_op0 = cmp_op1;
8165 cmp_op1 = operands[2];
8166 }
8167 if (is_sse)
8168 {
8169 if (GET_MODE (operands[0]) == SFmode)
8170 if (unordered_p)
8171 return "ucomiss\t{%1, %0|%0, %1}";
8172 else
8173 return "comiss\t{%1, %0|%0, %1}";
8174 else
8175 if (unordered_p)
8176 return "ucomisd\t{%1, %0|%0, %1}";
8177 else
8178 return "comisd\t{%1, %0|%0, %1}";
8179 }
8180
8181 if (! STACK_TOP_P (cmp_op0))
8182 abort ();
8183
8184 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8185
8186 if (STACK_REG_P (cmp_op1)
8187 && stack_top_dies
8188 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8189 && REGNO (cmp_op1) != FIRST_STACK_REG)
8190 {
8191 /* If both the top of the 387 stack dies, and the other operand
8192 is also a stack register that dies, then this must be a
8193 `fcompp' float compare */
8194
8195 if (eflags_p == 1)
8196 {
8197 /* There is no double popping fcomi variant. Fortunately,
8198 eflags is immune from the fstp's cc clobbering. */
8199 if (unordered_p)
8200 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8201 else
8202 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8203 return "fstp\t%y0";
8204 }
8205 else
8206 {
8207 if (eflags_p == 2)
8208 {
8209 if (unordered_p)
8210 return "fucompp\n\tfnstsw\t%0";
8211 else
8212 return "fcompp\n\tfnstsw\t%0";
8213 }
8214 else
8215 {
8216 if (unordered_p)
8217 return "fucompp";
8218 else
8219 return "fcompp";
8220 }
8221 }
8222 }
8223 else
8224 {
8225 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8226
8227 static const char * const alt[24] =
8228 {
8229 "fcom%z1\t%y1",
8230 "fcomp%z1\t%y1",
8231 "fucom%z1\t%y1",
8232 "fucomp%z1\t%y1",
8233
8234 "ficom%z1\t%y1",
8235 "ficomp%z1\t%y1",
8236 NULL,
8237 NULL,
8238
8239 "fcomi\t{%y1, %0|%0, %y1}",
8240 "fcomip\t{%y1, %0|%0, %y1}",
8241 "fucomi\t{%y1, %0|%0, %y1}",
8242 "fucomip\t{%y1, %0|%0, %y1}",
8243
8244 NULL,
8245 NULL,
8246 NULL,
8247 NULL,
8248
8249 "fcom%z2\t%y2\n\tfnstsw\t%0",
8250 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8251 "fucom%z2\t%y2\n\tfnstsw\t%0",
8252 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8253
8254 "ficom%z2\t%y2\n\tfnstsw\t%0",
8255 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8256 NULL,
8257 NULL
8258 };
8259
8260 int mask;
8261 const char *ret;
8262
8263 mask = eflags_p << 3;
8264 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8265 mask |= unordered_p << 1;
8266 mask |= stack_top_dies;
8267
8268 if (mask >= 24)
8269 abort ();
8270 ret = alt[mask];
8271 if (ret == NULL)
8272 abort ();
8273
8274 return ret;
8275 }
8276}
8277
8278void
8279ix86_output_addr_vec_elt (FILE *file, int value)
8280{
8281 const char *directive = ASM_LONG;
8282
8283 if (TARGET_64BIT)
8284 {
8285#ifdef ASM_QUAD
8286 directive = ASM_QUAD;
8287#else
8288 abort ();
8289#endif
8290 }
8291
8292 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8293}
8294
8295void
8296ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8297{
8298 if (TARGET_64BIT)
8299 fprintf (file, "%s%s%d-%s%d\n",
8300 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8301 else if (HAVE_AS_GOTOFF_IN_DATA)
8302 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8303#if TARGET_MACHO
8304 else if (TARGET_MACHO)
8305 {
8306 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8307 machopic_output_function_base_name (file);
8308 fprintf(file, "\n");
8309 }
8310#endif
8311 else
8312 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8313 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8314}
8315
8316/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8317 for the target. */
8318
8319void
8320ix86_expand_clear (rtx dest)
8321{
8322 rtx tmp;
8323
8324 /* We play register width games, which are only valid after reload. */
8325 if (!reload_completed)
8326 abort ();
8327
8328 /* Avoid HImode and its attendant prefix byte. */
8329 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8330 dest = gen_rtx_REG (SImode, REGNO (dest));
8331
8332 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8333
8334 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8335 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8336 {
8337 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8338 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8339 }
8340
8341 emit_insn (tmp);
8342}
8343
8344/* X is an unchanging MEM. If it is a constant pool reference, return
8345 the constant pool rtx, else NULL. */
8346
8347static rtx
8348maybe_get_pool_constant (rtx x)
8349{
8350 x = ix86_delegitimize_address (XEXP (x, 0));
8351
8352 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8353 return get_pool_constant (x);
8354
8355 return NULL_RTX;
8356}
8357
8358void
8359ix86_expand_move (enum machine_mode mode, rtx operands[])
8360{
8361 int strict = (reload_in_progress || reload_completed);
8362 rtx op0, op1;
8363 enum tls_model model;
8364
8365 op0 = operands[0];
8366 op1 = operands[1];
8367
8368 model = tls_symbolic_operand (op1, Pmode);
8369 if (model)
8370 {
8371 op1 = legitimize_tls_address (op1, model, true);
8372 op1 = force_operand (op1, op0);
8373 if (op1 == op0)
8374 return;
8375 }
8376
8377 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8378 {
8379#if TARGET_MACHO
8380 if (MACHOPIC_PURE)
8381 {
8382 rtx temp = ((reload_in_progress
8383 || ((op0 && GET_CODE (op0) == REG)
8384 && mode == Pmode))
8385 ? op0 : gen_reg_rtx (Pmode));
8386 op1 = machopic_indirect_data_reference (op1, temp);
8387 op1 = machopic_legitimize_pic_address (op1, mode,
8388 temp == op1 ? 0 : temp);
8389 }
8390 else if (MACHOPIC_INDIRECT)
8391 op1 = machopic_indirect_data_reference (op1, 0);
8392 if (op0 == op1)
8393 return;
8394#else
8395 if (GET_CODE (op0) == MEM)
8396 op1 = force_reg (Pmode, op1);
8397 else
8398 {
8399 rtx temp = op0;
8400 if (GET_CODE (temp) != REG)
8401 temp = gen_reg_rtx (Pmode);
8402 temp = legitimize_pic_address (op1, temp);
8403 if (temp == op0)
8404 return;
8405 op1 = temp;
8406 }
8407#endif /* TARGET_MACHO */
8408 }
8409 else
8410 {
8411 if (GET_CODE (op0) == MEM
8412 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8413 || !push_operand (op0, mode))
8414 && GET_CODE (op1) == MEM)
8415 op1 = force_reg (mode, op1);
8416
8417 if (push_operand (op0, mode)
8418 && ! general_no_elim_operand (op1, mode))
8419 op1 = copy_to_mode_reg (mode, op1);
8420
8421 /* Force large constants in 64bit compilation into register
8422 to get them CSEed. */
8423 if (TARGET_64BIT && mode == DImode
8424 && immediate_operand (op1, mode)
8425 && !x86_64_zero_extended_value (op1)
8426 && !register_operand (op0, mode)
8427 && optimize && !reload_completed && !reload_in_progress)
8428 op1 = copy_to_mode_reg (mode, op1);
8429
8430 if (FLOAT_MODE_P (mode))
8431 {
8432 /* If we are loading a floating point constant to a register,
8433 force the value to memory now, since we'll get better code
8434 out the back end. */
8435
8436 if (strict)
8437 ;
8438 else if (GET_CODE (op1) == CONST_DOUBLE)
8439 {
8440 op1 = validize_mem (force_const_mem (mode, op1));
8441 if (!register_operand (op0, mode))
8442 {
8443 rtx temp = gen_reg_rtx (mode);
8444 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8445 emit_move_insn (op0, temp);
8446 return;
8447 }
8448 }
8449 }
8450 }
8451
8452 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8453}
8454
8455void
8456ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8457{
8458 /* Force constants other than zero into memory. We do not know how
8459 the instructions used to build constants modify the upper 64 bits
8460 of the register, once we have that information we may be able
8461 to handle some of them more efficiently. */
8462 if ((reload_in_progress | reload_completed) == 0
8463 && register_operand (operands[0], mode)
8464 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8465 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8466
8467 /* Make operand1 a register if it isn't already. */
8468 if (!no_new_pseudos
8469 && !register_operand (operands[0], mode)
8470 && !register_operand (operands[1], mode))
8471 {
8472 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8473 emit_move_insn (operands[0], temp);
8474 return;
8475 }
8476
8477 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8478}
8479
8480/* Attempt to expand a binary operator. Make the expansion closer to the
8481 actual machine, then just general_operand, which will allow 3 separate
8482 memory references (one output, two input) in a single insn. */
8483
8484void
8485ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8486 rtx operands[])
8487{
8488 int matching_memory;
8489 rtx src1, src2, dst, op, clob;
8490
8491 dst = operands[0];
8492 src1 = operands[1];
8493 src2 = operands[2];
8494
8495 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8496 if (GET_RTX_CLASS (code) == 'c'
8497 && (rtx_equal_p (dst, src2)
8498 || immediate_operand (src1, mode)))
8499 {
8500 rtx temp = src1;
8501 src1 = src2;
8502 src2 = temp;
8503 }
8504
8505 /* If the destination is memory, and we do not have matching source
8506 operands, do things in registers. */
8507 matching_memory = 0;
8508 if (GET_CODE (dst) == MEM)
8509 {
8510 if (rtx_equal_p (dst, src1))
8511 matching_memory = 1;
8512 else if (GET_RTX_CLASS (code) == 'c'
8513 && rtx_equal_p (dst, src2))
8514 matching_memory = 2;
8515 else
8516 dst = gen_reg_rtx (mode);
8517 }
8518
8519 /* Both source operands cannot be in memory. */
8520 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8521 {
8522 if (matching_memory != 2)
8523 src2 = force_reg (mode, src2);
8524 else
8525 src1 = force_reg (mode, src1);
8526 }
8527
8528 /* If the operation is not commutable, source 1 cannot be a constant
8529 or non-matching memory. */
8530 if ((CONSTANT_P (src1)
8531 || (!matching_memory && GET_CODE (src1) == MEM))
8532 && GET_RTX_CLASS (code) != 'c')
8533 src1 = force_reg (mode, src1);
8534
8535 /* If optimizing, copy to regs to improve CSE */
8536 if (optimize && ! no_new_pseudos)
8537 {
8538 if (GET_CODE (dst) == MEM)
8539 dst = gen_reg_rtx (mode);
8540 if (GET_CODE (src1) == MEM)
8541 src1 = force_reg (mode, src1);
8542 if (GET_CODE (src2) == MEM)
8543 src2 = force_reg (mode, src2);
8544 }
8545
8546 /* Emit the instruction. */
8547
8548 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8549 if (reload_in_progress)
8550 {
8551 /* Reload doesn't know about the flags register, and doesn't know that
8552 it doesn't want to clobber it. We can only do this with PLUS. */
8553 if (code != PLUS)
8554 abort ();
8555 emit_insn (op);
8556 }
8557 else
8558 {
8559 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8560 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8561 }
8562
8563 /* Fix up the destination if needed. */
8564 if (dst != operands[0])
8565 emit_move_insn (operands[0], dst);
8566}
8567
8568/* Return TRUE or FALSE depending on whether the binary operator meets the
8569 appropriate constraints. */
8570
8571int
8572ix86_binary_operator_ok (enum rtx_code code,
8573 enum machine_mode mode ATTRIBUTE_UNUSED,
8574 rtx operands[3])
8575{
8576 /* Both source operands cannot be in memory. */
8577 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8578 return 0;
8579 /* If the operation is not commutable, source 1 cannot be a constant. */
8580 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8581 return 0;
8582 /* If the destination is memory, we must have a matching source operand. */
8583 if (GET_CODE (operands[0]) == MEM
8584 && ! (rtx_equal_p (operands[0], operands[1])
8585 || (GET_RTX_CLASS (code) == 'c'
8586 && rtx_equal_p (operands[0], operands[2]))))
8587 return 0;
8588 /* If the operation is not commutable and the source 1 is memory, we must
8589 have a matching destination. */
8590 if (GET_CODE (operands[1]) == MEM
8591 && GET_RTX_CLASS (code) != 'c'
8592 && ! rtx_equal_p (operands[0], operands[1]))
8593 return 0;
8594 return 1;
8595}
8596
8597/* Attempt to expand a unary operator. Make the expansion closer to the
8598 actual machine, then just general_operand, which will allow 2 separate
8599 memory references (one output, one input) in a single insn. */
8600
8601void
8602ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8603 rtx operands[])
8604{
8605 int matching_memory;
8606 rtx src, dst, op, clob;
8607
8608 dst = operands[0];
8609 src = operands[1];
8610
8611 /* If the destination is memory, and we do not have matching source
8612 operands, do things in registers. */
8613 matching_memory = 0;
8614 if (GET_CODE (dst) == MEM)
8615 {
8616 if (rtx_equal_p (dst, src))
8617 matching_memory = 1;
8618 else
8619 dst = gen_reg_rtx (mode);
8620 }
8621
8622 /* When source operand is memory, destination must match. */
8623 if (!matching_memory && GET_CODE (src) == MEM)
8624 src = force_reg (mode, src);
8625
8626 /* If optimizing, copy to regs to improve CSE */
8627 if (optimize && ! no_new_pseudos)
8628 {
8629 if (GET_CODE (dst) == MEM)
8630 dst = gen_reg_rtx (mode);
8631 if (GET_CODE (src) == MEM)
8632 src = force_reg (mode, src);
8633 }
8634
8635 /* Emit the instruction. */
8636
8637 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8638 if (reload_in_progress || code == NOT)
8639 {
8640 /* Reload doesn't know about the flags register, and doesn't know that
8641 it doesn't want to clobber it. */
8642 if (code != NOT)
8643 abort ();
8644 emit_insn (op);
8645 }
8646 else
8647 {
8648 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8649 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8650 }
8651
8652 /* Fix up the destination if needed. */
8653 if (dst != operands[0])
8654 emit_move_insn (operands[0], dst);
8655}
8656
8657/* Return TRUE or FALSE depending on whether the unary operator meets the
8658 appropriate constraints. */
8659
8660int
8661ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8662 enum machine_mode mode ATTRIBUTE_UNUSED,
8663 rtx operands[2] ATTRIBUTE_UNUSED)
8664{
8665 /* If one of operands is memory, source and destination must match. */
8666 if ((GET_CODE (operands[0]) == MEM
8667 || GET_CODE (operands[1]) == MEM)
8668 && ! rtx_equal_p (operands[0], operands[1]))
8669 return FALSE;
8670 return TRUE;
8671}
8672
8673/* Return TRUE or FALSE depending on whether the first SET in INSN
8674 has source and destination with matching CC modes, and that the
8675 CC mode is at least as constrained as REQ_MODE. */
8676
8677int
8678ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8679{
8680 rtx set;
8681 enum machine_mode set_mode;
8682
8683 set = PATTERN (insn);
8684 if (GET_CODE (set) == PARALLEL)
8685 set = XVECEXP (set, 0, 0);
8686 if (GET_CODE (set) != SET)
8687 abort ();
8688 if (GET_CODE (SET_SRC (set)) != COMPARE)
8689 abort ();
8690
8691 set_mode = GET_MODE (SET_DEST (set));
8692 switch (set_mode)
8693 {
8694 case CCNOmode:
8695 if (req_mode != CCNOmode
8696 && (req_mode != CCmode
8697 || XEXP (SET_SRC (set), 1) != const0_rtx))
8698 return 0;
8699 break;
8700 case CCmode:
8701 if (req_mode == CCGCmode)
8702 return 0;
8703 /* FALLTHRU */
8704 case CCGCmode:
8705 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8706 return 0;
8707 /* FALLTHRU */
8708 case CCGOCmode:
8709 if (req_mode == CCZmode)
8710 return 0;
8711 /* FALLTHRU */
8712 case CCZmode:
8713 break;
8714
8715 default:
8716 abort ();
8717 }
8718
8719 return (GET_MODE (SET_SRC (set)) == set_mode);
8720}
8721
8722/* Generate insn patterns to do an integer compare of OPERANDS. */
8723
8724static rtx
8725ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8726{
8727 enum machine_mode cmpmode;
8728 rtx tmp, flags;
8729
8730 cmpmode = SELECT_CC_MODE (code, op0, op1);
8731 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8732
8733 /* This is very simple, but making the interface the same as in the
8734 FP case makes the rest of the code easier. */
8735 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8736 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8737
8738 /* Return the test that should be put into the flags user, i.e.
8739 the bcc, scc, or cmov instruction. */
8740 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8741}
8742
8743/* Figure out whether to use ordered or unordered fp comparisons.
8744 Return the appropriate mode to use. */
8745
8746enum machine_mode
8747ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8748{
8749 /* ??? In order to make all comparisons reversible, we do all comparisons
8750 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8751 all forms trapping and nontrapping comparisons, we can make inequality
8752 comparisons trapping again, since it results in better code when using
8753 FCOM based compares. */
8754 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8755}
8756
8757enum machine_mode
8758ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8759{
8760 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8761 return ix86_fp_compare_mode (code);
8762 switch (code)
8763 {
8764 /* Only zero flag is needed. */
8765 case EQ: /* ZF=0 */
8766 case NE: /* ZF!=0 */
8767 return CCZmode;
8768 /* Codes needing carry flag. */
8769 case GEU: /* CF=0 */
8770 case GTU: /* CF=0 & ZF=0 */
8771 case LTU: /* CF=1 */
8772 case LEU: /* CF=1 | ZF=1 */
8773 return CCmode;
8774 /* Codes possibly doable only with sign flag when
8775 comparing against zero. */
8776 case GE: /* SF=OF or SF=0 */
8777 case LT: /* SF<>OF or SF=1 */
8778 if (op1 == const0_rtx)
8779 return CCGOCmode;
8780 else
8781 /* For other cases Carry flag is not required. */
8782 return CCGCmode;
8783 /* Codes doable only with sign flag when comparing
8784 against zero, but we miss jump instruction for it
8785 so we need to use relational tests against overflow
8786 that thus needs to be zero. */
8787 case GT: /* ZF=0 & SF=OF */
8788 case LE: /* ZF=1 | SF<>OF */
8789 if (op1 == const0_rtx)
8790 return CCNOmode;
8791 else
8792 return CCGCmode;
8793 /* strcmp pattern do (use flags) and combine may ask us for proper
8794 mode. */
8795 case USE:
8796 return CCmode;
8797 default:
8798 abort ();
8799 }
8800}
8801
8802/* Return the fixed registers used for condition codes. */
8803
8804static bool
8805ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8806{
8807 *p1 = FLAGS_REG;
8808 *p2 = FPSR_REG;
8809 return true;
8810}
8811
8812/* If two condition code modes are compatible, return a condition code
8813 mode which is compatible with both. Otherwise, return
8814 VOIDmode. */
8815
8816static enum machine_mode
8817ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8818{
8819 if (m1 == m2)
8820 return m1;
8821
8822 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8823 return VOIDmode;
8824
8825 if ((m1 == CCGCmode && m2 == CCGOCmode)
8826 || (m1 == CCGOCmode && m2 == CCGCmode))
8827 return CCGCmode;
8828
8829 switch (m1)
8830 {
8831 default:
8832 abort ();
8833
8834 case CCmode:
8835 case CCGCmode:
8836 case CCGOCmode:
8837 case CCNOmode:
8838 case CCZmode:
8839 switch (m2)
8840 {
8841 default:
8842 return VOIDmode;
8843
8844 case CCmode:
8845 case CCGCmode:
8846 case CCGOCmode:
8847 case CCNOmode:
8848 case CCZmode:
8849 return CCmode;
8850 }
8851
8852 case CCFPmode:
8853 case CCFPUmode:
8854 /* These are only compatible with themselves, which we already
8855 checked above. */
8856 return VOIDmode;
8857 }
8858}
8859
8860/* Return true if we should use an FCOMI instruction for this fp comparison. */
8861
8862int
8863ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8864{
8865 enum rtx_code swapped_code = swap_condition (code);
8866 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8867 || (ix86_fp_comparison_cost (swapped_code)
8868 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8869}
8870
8871/* Swap, force into registers, or otherwise massage the two operands
8872 to a fp comparison. The operands are updated in place; the new
8873 comparison code is returned. */
8874
8875static enum rtx_code
8876ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8877{
8878 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8879 rtx op0 = *pop0, op1 = *pop1;
8880 enum machine_mode op_mode = GET_MODE (op0);
8881 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8882
8883 /* All of the unordered compare instructions only work on registers.
8884 The same is true of the XFmode compare instructions. The same is
8885 true of the fcomi compare instructions. */
8886
8887 if (!is_sse
8888 && (fpcmp_mode == CCFPUmode
8889 || op_mode == XFmode
8890 || ix86_use_fcomi_compare (code)))
8891 {
8892 op0 = force_reg (op_mode, op0);
8893 op1 = force_reg (op_mode, op1);
8894 }
8895 else
8896 {
8897 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8898 things around if they appear profitable, otherwise force op0
8899 into a register. */
8900
8901 if (standard_80387_constant_p (op0) == 0
8902 || (GET_CODE (op0) == MEM
8903 && ! (standard_80387_constant_p (op1) == 0
8904 || GET_CODE (op1) == MEM)))
8905 {
8906 rtx tmp;
8907 tmp = op0, op0 = op1, op1 = tmp;
8908 code = swap_condition (code);
8909 }
8910
8911 if (GET_CODE (op0) != REG)
8912 op0 = force_reg (op_mode, op0);
8913
8914 if (CONSTANT_P (op1))
8915 {
8916 if (standard_80387_constant_p (op1))
8917 op1 = force_reg (op_mode, op1);
8918 else
8919 op1 = validize_mem (force_const_mem (op_mode, op1));
8920 }
8921 }
8922
8923 /* Try to rearrange the comparison to make it cheaper. */
8924 if (ix86_fp_comparison_cost (code)
8925 > ix86_fp_comparison_cost (swap_condition (code))
8926 && (GET_CODE (op1) == REG || !no_new_pseudos))
8927 {
8928 rtx tmp;
8929 tmp = op0, op0 = op1, op1 = tmp;
8930 code = swap_condition (code);
8931 if (GET_CODE (op0) != REG)
8932 op0 = force_reg (op_mode, op0);
8933 }
8934
8935 *pop0 = op0;
8936 *pop1 = op1;
8937 return code;
8938}
8939
8940/* Convert comparison codes we use to represent FP comparison to integer
8941 code that will result in proper branch. Return UNKNOWN if no such code
8942 is available. */
8943static enum rtx_code
8944ix86_fp_compare_code_to_integer (enum rtx_code code)
8945{
8946 switch (code)
8947 {
8948 case GT:
8949 return GTU;
8950 case GE:
8951 return GEU;
8952 case ORDERED:
8953 case UNORDERED:
8954 return code;
8955 break;
8956 case UNEQ:
8957 return EQ;
8958 break;
8959 case UNLT:
8960 return LTU;
8961 break;
8962 case UNLE:
8963 return LEU;
8964 break;
8965 case LTGT:
8966 return NE;
8967 break;
8968 default:
8969 return UNKNOWN;
8970 }
8971}
8972
8973/* Split comparison code CODE into comparisons we can do using branch
8974 instructions. BYPASS_CODE is comparison code for branch that will
8975 branch around FIRST_CODE and SECOND_CODE. If some of branches
8976 is not required, set value to NIL.
8977 We never require more than two branches. */
8978static void
8979ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8980 enum rtx_code *first_code,
8981 enum rtx_code *second_code)
8982{
8983 *first_code = code;
8984 *bypass_code = NIL;
8985 *second_code = NIL;
8986
8987 /* The fcomi comparison sets flags as follows:
8988
8989 cmp ZF PF CF
8990 > 0 0 0
8991 < 0 0 1
8992 = 1 0 0
8993 un 1 1 1 */
8994
8995 switch (code)
8996 {
8997 case GT: /* GTU - CF=0 & ZF=0 */
8998 case GE: /* GEU - CF=0 */
8999 case ORDERED: /* PF=0 */
9000 case UNORDERED: /* PF=1 */
9001 case UNEQ: /* EQ - ZF=1 */
9002 case UNLT: /* LTU - CF=1 */
9003 case UNLE: /* LEU - CF=1 | ZF=1 */
9004 case LTGT: /* EQ - ZF=0 */
9005 break;
9006 case LT: /* LTU - CF=1 - fails on unordered */
9007 *first_code = UNLT;
9008 *bypass_code = UNORDERED;
9009 break;
9010 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9011 *first_code = UNLE;
9012 *bypass_code = UNORDERED;
9013 break;
9014 case EQ: /* EQ - ZF=1 - fails on unordered */
9015 *first_code = UNEQ;
9016 *bypass_code = UNORDERED;
9017 break;
9018 case NE: /* NE - ZF=0 - fails on unordered */
9019 *first_code = LTGT;
9020 *second_code = UNORDERED;
9021 break;
9022 case UNGE: /* GEU - CF=0 - fails on unordered */
9023 *first_code = GE;
9024 *second_code = UNORDERED;
9025 break;
9026 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9027 *first_code = GT;
9028 *second_code = UNORDERED;
9029 break;
9030 default:
9031 abort ();
9032 }
9033 if (!TARGET_IEEE_FP)
9034 {
9035 *second_code = NIL;
9036 *bypass_code = NIL;
9037 }
9038}
9039
9040/* Return cost of comparison done fcom + arithmetics operations on AX.
9041 All following functions do use number of instructions as a cost metrics.
9042 In future this should be tweaked to compute bytes for optimize_size and
9043 take into account performance of various instructions on various CPUs. */
9044static int
9045ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9046{
9047 if (!TARGET_IEEE_FP)
9048 return 4;
9049 /* The cost of code output by ix86_expand_fp_compare. */
9050 switch (code)
9051 {
9052 case UNLE:
9053 case UNLT:
9054 case LTGT:
9055 case GT:
9056 case GE:
9057 case UNORDERED:
9058 case ORDERED:
9059 case UNEQ:
9060 return 4;
9061 break;
9062 case LT:
9063 case NE:
9064 case EQ:
9065 case UNGE:
9066 return 5;
9067 break;
9068 case LE:
9069 case UNGT:
9070 return 6;
9071 break;
9072 default:
9073 abort ();
9074 }
9075}
9076
9077/* Return cost of comparison done using fcomi operation.
9078 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9079static int
9080ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9081{
9082 enum rtx_code bypass_code, first_code, second_code;
9083 /* Return arbitrarily high cost when instruction is not supported - this
9084 prevents gcc from using it. */
9085 if (!TARGET_CMOVE)
9086 return 1024;
9087 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9088 return (bypass_code != NIL || second_code != NIL) + 2;
9089}
9090
9091/* Return cost of comparison done using sahf operation.
9092 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9093static int
9094ix86_fp_comparison_sahf_cost (enum rtx_code code)
9095{
9096 enum rtx_code bypass_code, first_code, second_code;
9097 /* Return arbitrarily high cost when instruction is not preferred - this
9098 avoids gcc from using it. */
9099 if (!TARGET_USE_SAHF && !optimize_size)
9100 return 1024;
9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9102 return (bypass_code != NIL || second_code != NIL) + 3;
9103}
9104
9105/* Compute cost of the comparison done using any method.
9106 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9107static int
9108ix86_fp_comparison_cost (enum rtx_code code)
9109{
9110 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9111 int min;
9112
9113 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9114 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9115
9116 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9117 if (min > sahf_cost)
9118 min = sahf_cost;
9119 if (min > fcomi_cost)
9120 min = fcomi_cost;
9121 return min;
9122}
9123
9124/* Generate insn patterns to do a floating point compare of OPERANDS. */
9125
9126static rtx
9127ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9128 rtx *second_test, rtx *bypass_test)
9129{
9130 enum machine_mode fpcmp_mode, intcmp_mode;
9131 rtx tmp, tmp2;
9132 int cost = ix86_fp_comparison_cost (code);
9133 enum rtx_code bypass_code, first_code, second_code;
9134
9135 fpcmp_mode = ix86_fp_compare_mode (code);
9136 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9137
9138 if (second_test)
9139 *second_test = NULL_RTX;
9140 if (bypass_test)
9141 *bypass_test = NULL_RTX;
9142
9143 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9144
9145 /* Do fcomi/sahf based test when profitable. */
9146 if ((bypass_code == NIL || bypass_test)
9147 && (second_code == NIL || second_test)
9148 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9149 {
9150 if (TARGET_CMOVE)
9151 {
9152 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9153 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9154 tmp);
9155 emit_insn (tmp);
9156 }
9157 else
9158 {
9159 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9160 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9161 if (!scratch)
9162 scratch = gen_reg_rtx (HImode);
9163 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9164 emit_insn (gen_x86_sahf_1 (scratch));
9165 }
9166
9167 /* The FP codes work out to act like unsigned. */
9168 intcmp_mode = fpcmp_mode;
9169 code = first_code;
9170 if (bypass_code != NIL)
9171 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9172 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9173 const0_rtx);
9174 if (second_code != NIL)
9175 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9176 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9177 const0_rtx);
9178 }
9179 else
9180 {
9181 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9182 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9183 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9184 if (!scratch)
9185 scratch = gen_reg_rtx (HImode);
9186 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9187
9188 /* In the unordered case, we have to check C2 for NaN's, which
9189 doesn't happen to work out to anything nice combination-wise.
9190 So do some bit twiddling on the value we've got in AH to come
9191 up with an appropriate set of condition codes. */
9192
9193 intcmp_mode = CCNOmode;
9194 switch (code)
9195 {
9196 case GT:
9197 case UNGT:
9198 if (code == GT || !TARGET_IEEE_FP)
9199 {
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9201 code = EQ;
9202 }
9203 else
9204 {
9205 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9206 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9207 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9208 intcmp_mode = CCmode;
9209 code = GEU;
9210 }
9211 break;
9212 case LT:
9213 case UNLT:
9214 if (code == LT && TARGET_IEEE_FP)
9215 {
9216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9218 intcmp_mode = CCmode;
9219 code = EQ;
9220 }
9221 else
9222 {
9223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9224 code = NE;
9225 }
9226 break;
9227 case GE:
9228 case UNGE:
9229 if (code == GE || !TARGET_IEEE_FP)
9230 {
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9232 code = EQ;
9233 }
9234 else
9235 {
9236 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9237 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9238 GEN_INT (0x01)));
9239 code = NE;
9240 }
9241 break;
9242 case LE:
9243 case UNLE:
9244 if (code == LE && TARGET_IEEE_FP)
9245 {
9246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9249 intcmp_mode = CCmode;
9250 code = LTU;
9251 }
9252 else
9253 {
9254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9255 code = NE;
9256 }
9257 break;
9258 case EQ:
9259 case UNEQ:
9260 if (code == EQ && TARGET_IEEE_FP)
9261 {
9262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9264 intcmp_mode = CCmode;
9265 code = EQ;
9266 }
9267 else
9268 {
9269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9270 code = NE;
9271 break;
9272 }
9273 break;
9274 case NE:
9275 case LTGT:
9276 if (code == NE && TARGET_IEEE_FP)
9277 {
9278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9279 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9280 GEN_INT (0x40)));
9281 code = NE;
9282 }
9283 else
9284 {
9285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9286 code = EQ;
9287 }
9288 break;
9289
9290 case UNORDERED:
9291 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9292 code = NE;
9293 break;
9294 case ORDERED:
9295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9296 code = EQ;
9297 break;
9298
9299 default:
9300 abort ();
9301 }
9302 }
9303
9304 /* Return the test that should be put into the flags user, i.e.
9305 the bcc, scc, or cmov instruction. */
9306 return gen_rtx_fmt_ee (code, VOIDmode,
9307 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9308 const0_rtx);
9309}
9310
9311rtx
9312ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9313{
9314 rtx op0, op1, ret;
9315 op0 = ix86_compare_op0;
9316 op1 = ix86_compare_op1;
9317
9318 if (second_test)
9319 *second_test = NULL_RTX;
9320 if (bypass_test)
9321 *bypass_test = NULL_RTX;
9322
9323 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9324 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9325 second_test, bypass_test);
9326 else
9327 ret = ix86_expand_int_compare (code, op0, op1);
9328
9329 return ret;
9330}
9331
9332/* Return true if the CODE will result in nontrivial jump sequence. */
9333bool
9334ix86_fp_jump_nontrivial_p (enum rtx_code code)
9335{
9336 enum rtx_code bypass_code, first_code, second_code;
9337 if (!TARGET_CMOVE)
9338 return true;
9339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9340 return bypass_code != NIL || second_code != NIL;
9341}
9342
9343void
9344ix86_expand_branch (enum rtx_code code, rtx label)
9345{
9346 rtx tmp;
9347
9348 switch (GET_MODE (ix86_compare_op0))
9349 {
9350 case QImode:
9351 case HImode:
9352 case SImode:
9353 simple:
9354 tmp = ix86_expand_compare (code, NULL, NULL);
9355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9356 gen_rtx_LABEL_REF (VOIDmode, label),
9357 pc_rtx);
9358 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9359 return;
9360
9361 case SFmode:
9362 case DFmode:
9363 case XFmode:
9364 {
9365 rtvec vec;
9366 int use_fcomi;
9367 enum rtx_code bypass_code, first_code, second_code;
9368
9369 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9370 &ix86_compare_op1);
9371
9372 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9373
9374 /* Check whether we will use the natural sequence with one jump. If
9375 so, we can expand jump early. Otherwise delay expansion by
9376 creating compound insn to not confuse optimizers. */
9377 if (bypass_code == NIL && second_code == NIL
9378 && TARGET_CMOVE)
9379 {
9380 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9381 gen_rtx_LABEL_REF (VOIDmode, label),
9382 pc_rtx, NULL_RTX);
9383 }
9384 else
9385 {
9386 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9387 ix86_compare_op0, ix86_compare_op1);
9388 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9389 gen_rtx_LABEL_REF (VOIDmode, label),
9390 pc_rtx);
9391 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9392
9393 use_fcomi = ix86_use_fcomi_compare (code);
9394 vec = rtvec_alloc (3 + !use_fcomi);
9395 RTVEC_ELT (vec, 0) = tmp;
9396 RTVEC_ELT (vec, 1)
9397 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9398 RTVEC_ELT (vec, 2)
9399 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9400 if (! use_fcomi)
9401 RTVEC_ELT (vec, 3)
9402 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9403
9404 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9405 }
9406 return;
9407 }
9408
9409 case DImode:
9410 if (TARGET_64BIT)
9411 goto simple;
9412 /* Expand DImode branch into multiple compare+branch. */
9413 {
9414 rtx lo[2], hi[2], label2;
9415 enum rtx_code code1, code2, code3;
9416
9417 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9418 {
9419 tmp = ix86_compare_op0;
9420 ix86_compare_op0 = ix86_compare_op1;
9421 ix86_compare_op1 = tmp;
9422 code = swap_condition (code);
9423 }
9424 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9425 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9426
9427 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9428 avoid two branches. This costs one extra insn, so disable when
9429 optimizing for size. */
9430
9431 if ((code == EQ || code == NE)
9432 && (!optimize_size
9433 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9434 {
9435 rtx xor0, xor1;
9436
9437 xor1 = hi[0];
9438 if (hi[1] != const0_rtx)
9439 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9440 NULL_RTX, 0, OPTAB_WIDEN);
9441
9442 xor0 = lo[0];
9443 if (lo[1] != const0_rtx)
9444 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9445 NULL_RTX, 0, OPTAB_WIDEN);
9446
9447 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9448 NULL_RTX, 0, OPTAB_WIDEN);
9449
9450 ix86_compare_op0 = tmp;
9451 ix86_compare_op1 = const0_rtx;
9452 ix86_expand_branch (code, label);
9453 return;
9454 }
9455
9456 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9457 op1 is a constant and the low word is zero, then we can just
9458 examine the high word. */
9459
9460 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9461 switch (code)
9462 {
9463 case LT: case LTU: case GE: case GEU:
9464 ix86_compare_op0 = hi[0];
9465 ix86_compare_op1 = hi[1];
9466 ix86_expand_branch (code, label);
9467 return;
9468 default:
9469 break;
9470 }
9471
9472 /* Otherwise, we need two or three jumps. */
9473
9474 label2 = gen_label_rtx ();
9475
9476 code1 = code;
9477 code2 = swap_condition (code);
9478 code3 = unsigned_condition (code);
9479
9480 switch (code)
9481 {
9482 case LT: case GT: case LTU: case GTU:
9483 break;
9484
9485 case LE: code1 = LT; code2 = GT; break;
9486 case GE: code1 = GT; code2 = LT; break;
9487 case LEU: code1 = LTU; code2 = GTU; break;
9488 case GEU: code1 = GTU; code2 = LTU; break;
9489
9490 case EQ: code1 = NIL; code2 = NE; break;
9491 case NE: code2 = NIL; break;
9492
9493 default:
9494 abort ();
9495 }
9496
9497 /*
9498 * a < b =>
9499 * if (hi(a) < hi(b)) goto true;
9500 * if (hi(a) > hi(b)) goto false;
9501 * if (lo(a) < lo(b)) goto true;
9502 * false:
9503 */
9504
9505 ix86_compare_op0 = hi[0];
9506 ix86_compare_op1 = hi[1];
9507
9508 if (code1 != NIL)
9509 ix86_expand_branch (code1, label);
9510 if (code2 != NIL)
9511 ix86_expand_branch (code2, label2);
9512
9513 ix86_compare_op0 = lo[0];
9514 ix86_compare_op1 = lo[1];
9515 ix86_expand_branch (code3, label);
9516
9517 if (code2 != NIL)
9518 emit_label (label2);
9519 return;
9520 }
9521
9522 default:
9523 abort ();
9524 }
9525}
9526
9527/* Split branch based on floating point condition. */
9528void
9529ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9530 rtx target1, rtx target2, rtx tmp)
9531{
9532 rtx second, bypass;
9533 rtx label = NULL_RTX;
9534 rtx condition;
9535 int bypass_probability = -1, second_probability = -1, probability = -1;
9536 rtx i;
9537
9538 if (target2 != pc_rtx)
9539 {
9540 rtx tmp = target2;
9541 code = reverse_condition_maybe_unordered (code);
9542 target2 = target1;
9543 target1 = tmp;
9544 }
9545
9546 condition = ix86_expand_fp_compare (code, op1, op2,
9547 tmp, &second, &bypass);
9548
9549 if (split_branch_probability >= 0)
9550 {
9551 /* Distribute the probabilities across the jumps.
9552 Assume the BYPASS and SECOND to be always test
9553 for UNORDERED. */
9554 probability = split_branch_probability;
9555
9556 /* Value of 1 is low enough to make no need for probability
9557 to be updated. Later we may run some experiments and see
9558 if unordered values are more frequent in practice. */
9559 if (bypass)
9560 bypass_probability = 1;
9561 if (second)
9562 second_probability = 1;
9563 }
9564 if (bypass != NULL_RTX)
9565 {
9566 label = gen_label_rtx ();
9567 i = emit_jump_insn (gen_rtx_SET
9568 (VOIDmode, pc_rtx,
9569 gen_rtx_IF_THEN_ELSE (VOIDmode,
9570 bypass,
9571 gen_rtx_LABEL_REF (VOIDmode,
9572 label),
9573 pc_rtx)));
9574 if (bypass_probability >= 0)
9575 REG_NOTES (i)
9576 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9577 GEN_INT (bypass_probability),
9578 REG_NOTES (i));
9579 }
9580 i = emit_jump_insn (gen_rtx_SET
9581 (VOIDmode, pc_rtx,
9582 gen_rtx_IF_THEN_ELSE (VOIDmode,
9583 condition, target1, target2)));
9584 if (probability >= 0)
9585 REG_NOTES (i)
9586 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9587 GEN_INT (probability),
9588 REG_NOTES (i));
9589 if (second != NULL_RTX)
9590 {
9591 i = emit_jump_insn (gen_rtx_SET
9592 (VOIDmode, pc_rtx,
9593 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9594 target2)));
9595 if (second_probability >= 0)
9596 REG_NOTES (i)
9597 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9598 GEN_INT (second_probability),
9599 REG_NOTES (i));
9600 }
9601 if (label != NULL_RTX)
9602 emit_label (label);
9603}
9604
9605int
9606ix86_expand_setcc (enum rtx_code code, rtx dest)
9607{
9608 rtx ret, tmp, tmpreg, equiv;
9609 rtx second_test, bypass_test;
9610
9611 if (GET_MODE (ix86_compare_op0) == DImode
9612 && !TARGET_64BIT)
9613 return 0; /* FAIL */
9614
9615 if (GET_MODE (dest) != QImode)
9616 abort ();
9617
9618 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9619 PUT_MODE (ret, QImode);
9620
9621 tmp = dest;
9622 tmpreg = dest;
9623
9624 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9625 if (bypass_test || second_test)
9626 {
9627 rtx test = second_test;
9628 int bypass = 0;
9629 rtx tmp2 = gen_reg_rtx (QImode);
9630 if (bypass_test)
9631 {
9632 if (second_test)
9633 abort ();
9634 test = bypass_test;
9635 bypass = 1;
9636 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9637 }
9638 PUT_MODE (test, QImode);
9639 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9640
9641 if (bypass)
9642 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9643 else
9644 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9645 }
9646
9647 /* Attach a REG_EQUAL note describing the comparison result. */
9648 equiv = simplify_gen_relational (code, QImode,
9649 GET_MODE (ix86_compare_op0),
9650 ix86_compare_op0, ix86_compare_op1);
9651 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9652
9653 return 1; /* DONE */
9654}
9655
9656/* Expand comparison setting or clearing carry flag. Return true when
9657 successful and set pop for the operation. */
9658static bool
9659ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9660{
9661 enum machine_mode mode =
9662 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9663
9664 /* Do not handle DImode compares that go trought special path. Also we can't
9665 deal with FP compares yet. This is possible to add. */
9666 if ((mode == DImode && !TARGET_64BIT))
9667 return false;
9668 if (FLOAT_MODE_P (mode))
9669 {
9670 rtx second_test = NULL, bypass_test = NULL;
9671 rtx compare_op, compare_seq;
9672
9673 /* Shortcut: following common codes never translate into carry flag compares. */
9674 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9675 || code == ORDERED || code == UNORDERED)
9676 return false;
9677
9678 /* These comparisons require zero flag; swap operands so they won't. */
9679 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9680 && !TARGET_IEEE_FP)
9681 {
9682 rtx tmp = op0;
9683 op0 = op1;
9684 op1 = tmp;
9685 code = swap_condition (code);
9686 }
9687
9688 /* Try to expand the comparison and verify that we end up with carry flag
9689 based comparison. This is fails to be true only when we decide to expand
9690 comparison using arithmetic that is not too common scenario. */
9691 start_sequence ();
9692 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9693 &second_test, &bypass_test);
9694 compare_seq = get_insns ();
9695 end_sequence ();
9696
9697 if (second_test || bypass_test)
9698 return false;
9699 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9700 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9701 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9702 else
9703 code = GET_CODE (compare_op);
9704 if (code != LTU && code != GEU)
9705 return false;
9706 emit_insn (compare_seq);
9707 *pop = compare_op;
9708 return true;
9709 }
9710 if (!INTEGRAL_MODE_P (mode))
9711 return false;
9712 switch (code)
9713 {
9714 case LTU:
9715 case GEU:
9716 break;
9717
9718 /* Convert a==0 into (unsigned)a<1. */
9719 case EQ:
9720 case NE:
9721 if (op1 != const0_rtx)
9722 return false;
9723 op1 = const1_rtx;
9724 code = (code == EQ ? LTU : GEU);
9725 break;
9726
9727 /* Convert a>b into b<a or a>=b-1. */
9728 case GTU:
9729 case LEU:
9730 if (GET_CODE (op1) == CONST_INT)
9731 {
9732 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9733 /* Bail out on overflow. We still can swap operands but that
9734 would force loading of the constant into register. */
9735 if (op1 == const0_rtx
9736 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9737 return false;
9738 code = (code == GTU ? GEU : LTU);
9739 }
9740 else
9741 {
9742 rtx tmp = op1;
9743 op1 = op0;
9744 op0 = tmp;
9745 code = (code == GTU ? LTU : GEU);
9746 }
9747 break;
9748
9749 /* Convert a>=0 into (unsigned)a<0x80000000. */
9750 case LT:
9751 case GE:
9752 if (mode == DImode || op1 != const0_rtx)
9753 return false;
9754 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9755 code = (code == LT ? GEU : LTU);
9756 break;
9757 case LE:
9758 case GT:
9759 if (mode == DImode || op1 != constm1_rtx)
9760 return false;
9761 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9762 code = (code == LE ? GEU : LTU);
9763 break;
9764
9765 default:
9766 return false;
9767 }
9768 /* Swapping operands may cause constant to appear as first operand. */
9769 if (!nonimmediate_operand (op0, VOIDmode))
9770 {
9771 if (no_new_pseudos)
9772 return false;
9773 op0 = force_reg (mode, op0);
9774 }
9775 ix86_compare_op0 = op0;
9776 ix86_compare_op1 = op1;
9777 *pop = ix86_expand_compare (code, NULL, NULL);
9778 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9779 abort ();
9780 return true;
9781}
9782
9783int
9784ix86_expand_int_movcc (rtx operands[])
9785{
9786 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9787 rtx compare_seq, compare_op;
9788 rtx second_test, bypass_test;
9789 enum machine_mode mode = GET_MODE (operands[0]);
9790 bool sign_bit_compare_p = false;;
9791
9792 start_sequence ();
9793 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9794 compare_seq = get_insns ();
9795 end_sequence ();
9796
9797 compare_code = GET_CODE (compare_op);
9798
9799 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9800 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9801 sign_bit_compare_p = true;
9802
9803 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9804 HImode insns, we'd be swallowed in word prefix ops. */
9805
9806 if ((mode != HImode || TARGET_FAST_PREFIX)
9807 && (mode != DImode || TARGET_64BIT)
9808 && GET_CODE (operands[2]) == CONST_INT
9809 && GET_CODE (operands[3]) == CONST_INT)
9810 {
9811 rtx out = operands[0];
9812 HOST_WIDE_INT ct = INTVAL (operands[2]);
9813 HOST_WIDE_INT cf = INTVAL (operands[3]);
9814 HOST_WIDE_INT diff;
9815
9816 diff = ct - cf;
9817 /* Sign bit compares are better done using shifts than we do by using
9818 sbb. */
9819 if (sign_bit_compare_p
9820 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9821 ix86_compare_op1, &compare_op))
9822 {
9823 /* Detect overlap between destination and compare sources. */
9824 rtx tmp = out;
9825
9826 if (!sign_bit_compare_p)
9827 {
9828 bool fpcmp = false;
9829
9830 compare_code = GET_CODE (compare_op);
9831
9832 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9833 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9834 {
9835 fpcmp = true;
9836 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9837 }
9838
9839 /* To simplify rest of code, restrict to the GEU case. */
9840 if (compare_code == LTU)
9841 {
9842 HOST_WIDE_INT tmp = ct;
9843 ct = cf;
9844 cf = tmp;
9845 compare_code = reverse_condition (compare_code);
9846 code = reverse_condition (code);
9847 }
9848 else
9849 {
9850 if (fpcmp)
9851 PUT_CODE (compare_op,
9852 reverse_condition_maybe_unordered
9853 (GET_CODE (compare_op)));
9854 else
9855 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9856 }
9857 diff = ct - cf;
9858
9859 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9860 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9861 tmp = gen_reg_rtx (mode);
9862
9863 if (mode == DImode)
9864 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9865 else
9866 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9867 }
9868 else
9869 {
9870 if (code == GT || code == GE)
9871 code = reverse_condition (code);
9872 else
9873 {
9874 HOST_WIDE_INT tmp = ct;
9875 ct = cf;
9876 cf = tmp;
9877 diff = ct - cf;
9878 }
9879 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9880 ix86_compare_op1, VOIDmode, 0, -1);
9881 }
9882
9883 if (diff == 1)
9884 {
9885 /*
9886 * cmpl op0,op1
9887 * sbbl dest,dest
9888 * [addl dest, ct]
9889 *
9890 * Size 5 - 8.
9891 */
9892 if (ct)
9893 tmp = expand_simple_binop (mode, PLUS,
9894 tmp, GEN_INT (ct),
9895 copy_rtx (tmp), 1, OPTAB_DIRECT);
9896 }
9897 else if (cf == -1)
9898 {
9899 /*
9900 * cmpl op0,op1
9901 * sbbl dest,dest
9902 * orl $ct, dest
9903 *
9904 * Size 8.
9905 */
9906 tmp = expand_simple_binop (mode, IOR,
9907 tmp, GEN_INT (ct),
9908 copy_rtx (tmp), 1, OPTAB_DIRECT);
9909 }
9910 else if (diff == -1 && ct)
9911 {
9912 /*
9913 * cmpl op0,op1
9914 * sbbl dest,dest
9915 * notl dest
9916 * [addl dest, cf]
9917 *
9918 * Size 8 - 11.
9919 */
9920 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9921 if (cf)
9922 tmp = expand_simple_binop (mode, PLUS,
9923 copy_rtx (tmp), GEN_INT (cf),
9924 copy_rtx (tmp), 1, OPTAB_DIRECT);
9925 }
9926 else
9927 {
9928 /*
9929 * cmpl op0,op1
9930 * sbbl dest,dest
9931 * [notl dest]
9932 * andl cf - ct, dest
9933 * [addl dest, ct]
9934 *
9935 * Size 8 - 11.
9936 */
9937
9938 if (cf == 0)
9939 {
9940 cf = ct;
9941 ct = 0;
9942 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9943 }
9944
9945 tmp = expand_simple_binop (mode, AND,
9946 copy_rtx (tmp),
9947 gen_int_mode (cf - ct, mode),
9948 copy_rtx (tmp), 1, OPTAB_DIRECT);
9949 if (ct)
9950 tmp = expand_simple_binop (mode, PLUS,
9951 copy_rtx (tmp), GEN_INT (ct),
9952 copy_rtx (tmp), 1, OPTAB_DIRECT);
9953 }
9954
9955 if (!rtx_equal_p (tmp, out))
9956 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9957
9958 return 1; /* DONE */
9959 }
9960
9961 if (diff < 0)
9962 {
9963 HOST_WIDE_INT tmp;
9964 tmp = ct, ct = cf, cf = tmp;
9965 diff = -diff;
9966 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9967 {
9968 /* We may be reversing unordered compare to normal compare, that
9969 is not valid in general (we may convert non-trapping condition
9970 to trapping one), however on i386 we currently emit all
9971 comparisons unordered. */
9972 compare_code = reverse_condition_maybe_unordered (compare_code);
9973 code = reverse_condition_maybe_unordered (code);
9974 }
9975 else
9976 {
9977 compare_code = reverse_condition (compare_code);
9978 code = reverse_condition (code);
9979 }
9980 }
9981
9982 compare_code = NIL;
9983 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9984 && GET_CODE (ix86_compare_op1) == CONST_INT)
9985 {
9986 if (ix86_compare_op1 == const0_rtx
9987 && (code == LT || code == GE))
9988 compare_code = code;
9989 else if (ix86_compare_op1 == constm1_rtx)
9990 {
9991 if (code == LE)
9992 compare_code = LT;
9993 else if (code == GT)
9994 compare_code = GE;
9995 }
9996 }
9997
9998 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9999 if (compare_code != NIL
10000 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10001 && (cf == -1 || ct == -1))
10002 {
10003 /* If lea code below could be used, only optimize
10004 if it results in a 2 insn sequence. */
10005
10006 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10007 || diff == 3 || diff == 5 || diff == 9)
10008 || (compare_code == LT && ct == -1)
10009 || (compare_code == GE && cf == -1))
10010 {
10011 /*
10012 * notl op1 (if necessary)
10013 * sarl $31, op1
10014 * orl cf, op1
10015 */
10016 if (ct != -1)
10017 {
10018 cf = ct;
10019 ct = -1;
10020 code = reverse_condition (code);
10021 }
10022
10023 out = emit_store_flag (out, code, ix86_compare_op0,
10024 ix86_compare_op1, VOIDmode, 0, -1);
10025
10026 out = expand_simple_binop (mode, IOR,
10027 out, GEN_INT (cf),
10028 out, 1, OPTAB_DIRECT);
10029 if (out != operands[0])
10030 emit_move_insn (operands[0], out);
10031
10032 return 1; /* DONE */
10033 }
10034 }
10035
10036
10037 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10038 || diff == 3 || diff == 5 || diff == 9)
10039 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10040 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10041 {
10042 /*
10043 * xorl dest,dest
10044 * cmpl op1,op2
10045 * setcc dest
10046 * lea cf(dest*(ct-cf)),dest
10047 *
10048 * Size 14.
10049 *
10050 * This also catches the degenerate setcc-only case.
10051 */
10052
10053 rtx tmp;
10054 int nops;
10055
10056 out = emit_store_flag (out, code, ix86_compare_op0,
10057 ix86_compare_op1, VOIDmode, 0, 1);
10058
10059 nops = 0;
10060 /* On x86_64 the lea instruction operates on Pmode, so we need
10061 to get arithmetics done in proper mode to match. */
10062 if (diff == 1)
10063 tmp = copy_rtx (out);
10064 else
10065 {
10066 rtx out1;
10067 out1 = copy_rtx (out);
10068 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10069 nops++;
10070 if (diff & 1)
10071 {
10072 tmp = gen_rtx_PLUS (mode, tmp, out1);
10073 nops++;
10074 }
10075 }
10076 if (cf != 0)
10077 {
10078 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10079 nops++;
10080 }
10081 if (!rtx_equal_p (tmp, out))
10082 {
10083 if (nops == 1)
10084 out = force_operand (tmp, copy_rtx (out));
10085 else
10086 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10087 }
10088 if (!rtx_equal_p (out, operands[0]))
10089 emit_move_insn (operands[0], copy_rtx (out));
10090
10091 return 1; /* DONE */
10092 }
10093
10094 /*
10095 * General case: Jumpful:
10096 * xorl dest,dest cmpl op1, op2
10097 * cmpl op1, op2 movl ct, dest
10098 * setcc dest jcc 1f
10099 * decl dest movl cf, dest
10100 * andl (cf-ct),dest 1:
10101 * addl ct,dest
10102 *
10103 * Size 20. Size 14.
10104 *
10105 * This is reasonably steep, but branch mispredict costs are
10106 * high on modern cpus, so consider failing only if optimizing
10107 * for space.
10108 */
10109
10110 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10111 && BRANCH_COST >= 2)
10112 {
10113 if (cf == 0)
10114 {
10115 cf = ct;
10116 ct = 0;
10117 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10118 /* We may be reversing unordered compare to normal compare,
10119 that is not valid in general (we may convert non-trapping
10120 condition to trapping one), however on i386 we currently
10121 emit all comparisons unordered. */
10122 code = reverse_condition_maybe_unordered (code);
10123 else
10124 {
10125 code = reverse_condition (code);
10126 if (compare_code != NIL)
10127 compare_code = reverse_condition (compare_code);
10128 }
10129 }
10130
10131 if (compare_code != NIL)
10132 {
10133 /* notl op1 (if needed)
10134 sarl $31, op1
10135 andl (cf-ct), op1
10136 addl ct, op1
10137
10138 For x < 0 (resp. x <= -1) there will be no notl,
10139 so if possible swap the constants to get rid of the
10140 complement.
10141 True/false will be -1/0 while code below (store flag
10142 followed by decrement) is 0/-1, so the constants need
10143 to be exchanged once more. */
10144
10145 if (compare_code == GE || !cf)
10146 {
10147 code = reverse_condition (code);
10148 compare_code = LT;
10149 }
10150 else
10151 {
10152 HOST_WIDE_INT tmp = cf;
10153 cf = ct;
10154 ct = tmp;
10155 }
10156
10157 out = emit_store_flag (out, code, ix86_compare_op0,
10158 ix86_compare_op1, VOIDmode, 0, -1);
10159 }
10160 else
10161 {
10162 out = emit_store_flag (out, code, ix86_compare_op0,
10163 ix86_compare_op1, VOIDmode, 0, 1);
10164
10165 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10166 copy_rtx (out), 1, OPTAB_DIRECT);
10167 }
10168
10169 out = expand_simple_binop (mode, AND, copy_rtx (out),
10170 gen_int_mode (cf - ct, mode),
10171 copy_rtx (out), 1, OPTAB_DIRECT);
10172 if (ct)
10173 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10174 copy_rtx (out), 1, OPTAB_DIRECT);
10175 if (!rtx_equal_p (out, operands[0]))
10176 emit_move_insn (operands[0], copy_rtx (out));
10177
10178 return 1; /* DONE */
10179 }
10180 }
10181
10182 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10183 {
10184 /* Try a few things more with specific constants and a variable. */
10185
10186 optab op;
10187 rtx var, orig_out, out, tmp;
10188
10189 if (BRANCH_COST <= 2)
10190 return 0; /* FAIL */
10191
10192 /* If one of the two operands is an interesting constant, load a
10193 constant with the above and mask it in with a logical operation. */
10194
10195 if (GET_CODE (operands[2]) == CONST_INT)
10196 {
10197 var = operands[3];
10198 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10199 operands[3] = constm1_rtx, op = and_optab;
10200 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10201 operands[3] = const0_rtx, op = ior_optab;
10202 else
10203 return 0; /* FAIL */
10204 }
10205 else if (GET_CODE (operands[3]) == CONST_INT)
10206 {
10207 var = operands[2];
10208 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10209 operands[2] = constm1_rtx, op = and_optab;
10210 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10211 operands[2] = const0_rtx, op = ior_optab;
10212 else
10213 return 0; /* FAIL */
10214 }
10215 else
10216 return 0; /* FAIL */
10217
10218 orig_out = operands[0];
10219 tmp = gen_reg_rtx (mode);
10220 operands[0] = tmp;
10221
10222 /* Recurse to get the constant loaded. */
10223 if (ix86_expand_int_movcc (operands) == 0)
10224 return 0; /* FAIL */
10225
10226 /* Mask in the interesting variable. */
10227 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10228 OPTAB_WIDEN);
10229 if (!rtx_equal_p (out, orig_out))
10230 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10231
10232 return 1; /* DONE */
10233 }
10234
10235 /*
10236 * For comparison with above,
10237 *
10238 * movl cf,dest
10239 * movl ct,tmp
10240 * cmpl op1,op2
10241 * cmovcc tmp,dest
10242 *
10243 * Size 15.
10244 */
10245
10246 if (! nonimmediate_operand (operands[2], mode))
10247 operands[2] = force_reg (mode, operands[2]);
10248 if (! nonimmediate_operand (operands[3], mode))
10249 operands[3] = force_reg (mode, operands[3]);
10250
10251 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10252 {
10253 rtx tmp = gen_reg_rtx (mode);
10254 emit_move_insn (tmp, operands[3]);
10255 operands[3] = tmp;
10256 }
10257 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10258 {
10259 rtx tmp = gen_reg_rtx (mode);
10260 emit_move_insn (tmp, operands[2]);
10261 operands[2] = tmp;
10262 }
10263
10264 if (! register_operand (operands[2], VOIDmode)
10265 && (mode == QImode
10266 || ! register_operand (operands[3], VOIDmode)))
10267 operands[2] = force_reg (mode, operands[2]);
10268
10269 if (mode == QImode
10270 && ! register_operand (operands[3], VOIDmode))
10271 operands[3] = force_reg (mode, operands[3]);
10272
10273 emit_insn (compare_seq);
10274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10275 gen_rtx_IF_THEN_ELSE (mode,
10276 compare_op, operands[2],
10277 operands[3])));
10278 if (bypass_test)
10279 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10280 gen_rtx_IF_THEN_ELSE (mode,
10281 bypass_test,
10282 copy_rtx (operands[3]),
10283 copy_rtx (operands[0]))));
10284 if (second_test)
10285 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10286 gen_rtx_IF_THEN_ELSE (mode,
10287 second_test,
10288 copy_rtx (operands[2]),
10289 copy_rtx (operands[0]))));
10290
10291 return 1; /* DONE */
10292}
10293
10294int
10295ix86_expand_fp_movcc (rtx operands[])
10296{
10297 enum rtx_code code;
10298 rtx tmp;
10299 rtx compare_op, second_test, bypass_test;
10300
10301 /* For SF/DFmode conditional moves based on comparisons
10302 in same mode, we may want to use SSE min/max instructions. */
10303 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10304 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10305 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10306 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10307 && (!TARGET_IEEE_FP
10308 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10309 /* We may be called from the post-reload splitter. */
10310 && (!REG_P (operands[0])
10311 || SSE_REG_P (operands[0])
10312 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10313 {
10314 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10315 code = GET_CODE (operands[1]);
10316
10317 /* See if we have (cross) match between comparison operands and
10318 conditional move operands. */
10319 if (rtx_equal_p (operands[2], op1))
10320 {
10321 rtx tmp = op0;
10322 op0 = op1;
10323 op1 = tmp;
10324 code = reverse_condition_maybe_unordered (code);
10325 }
10326 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10327 {
10328 /* Check for min operation. */
10329 if (code == LT || code == UNLE)
10330 {
10331 if (code == UNLE)
10332 {
10333 rtx tmp = op0;
10334 op0 = op1;
10335 op1 = tmp;
10336 }
10337 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10338 if (memory_operand (op0, VOIDmode))
10339 op0 = force_reg (GET_MODE (operands[0]), op0);
10340 if (GET_MODE (operands[0]) == SFmode)
10341 emit_insn (gen_minsf3 (operands[0], op0, op1));
10342 else
10343 emit_insn (gen_mindf3 (operands[0], op0, op1));
10344 return 1;
10345 }
10346 /* Check for max operation. */
10347 if (code == GT || code == UNGE)
10348 {
10349 if (code == UNGE)
10350 {
10351 rtx tmp = op0;
10352 op0 = op1;
10353 op1 = tmp;
10354 }
10355 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10356 if (memory_operand (op0, VOIDmode))
10357 op0 = force_reg (GET_MODE (operands[0]), op0);
10358 if (GET_MODE (operands[0]) == SFmode)
10359 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10360 else
10361 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10362 return 1;
10363 }
10364 }
10365 /* Manage condition to be sse_comparison_operator. In case we are
10366 in non-ieee mode, try to canonicalize the destination operand
10367 to be first in the comparison - this helps reload to avoid extra
10368 moves. */
10369 if (!sse_comparison_operator (operands[1], VOIDmode)
10370 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10371 {
10372 rtx tmp = ix86_compare_op0;
10373 ix86_compare_op0 = ix86_compare_op1;
10374 ix86_compare_op1 = tmp;
10375 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10376 VOIDmode, ix86_compare_op0,
10377 ix86_compare_op1);
10378 }
10379 /* Similarly try to manage result to be first operand of conditional
10380 move. We also don't support the NE comparison on SSE, so try to
10381 avoid it. */
10382 if ((rtx_equal_p (operands[0], operands[3])
10383 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10384 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10385 {
10386 rtx tmp = operands[2];
10387 operands[2] = operands[3];
10388 operands[3] = tmp;
10389 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10390 (GET_CODE (operands[1])),
10391 VOIDmode, ix86_compare_op0,
10392 ix86_compare_op1);
10393 }
10394 if (GET_MODE (operands[0]) == SFmode)
10395 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10396 operands[2], operands[3],
10397 ix86_compare_op0, ix86_compare_op1));
10398 else
10399 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10400 operands[2], operands[3],
10401 ix86_compare_op0, ix86_compare_op1));
10402 return 1;
10403 }
10404
10405 /* The floating point conditional move instructions don't directly
10406 support conditions resulting from a signed integer comparison. */
10407
10408 code = GET_CODE (operands[1]);
10409 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10410
10411 /* The floating point conditional move instructions don't directly
10412 support signed integer comparisons. */
10413
10414 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10415 {
10416 if (second_test != NULL || bypass_test != NULL)
10417 abort ();
10418 tmp = gen_reg_rtx (QImode);
10419 ix86_expand_setcc (code, tmp);
10420 code = NE;
10421 ix86_compare_op0 = tmp;
10422 ix86_compare_op1 = const0_rtx;
10423 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10424 }
10425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10426 {
10427 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10428 emit_move_insn (tmp, operands[3]);
10429 operands[3] = tmp;
10430 }
10431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10432 {
10433 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10434 emit_move_insn (tmp, operands[2]);
10435 operands[2] = tmp;
10436 }
10437
10438 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10439 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10440 compare_op,
10441 operands[2],
10442 operands[3])));
10443 if (bypass_test)
10444 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10445 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10446 bypass_test,
10447 operands[3],
10448 operands[0])));
10449 if (second_test)
10450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10452 second_test,
10453 operands[2],
10454 operands[0])));
10455
10456 return 1;
10457}
10458
10459/* Expand conditional increment or decrement using adb/sbb instructions.
10460 The default case using setcc followed by the conditional move can be
10461 done by generic code. */
10462int
10463ix86_expand_int_addcc (rtx operands[])
10464{
10465 enum rtx_code code = GET_CODE (operands[1]);
10466 rtx compare_op;
10467 rtx val = const0_rtx;
10468 bool fpcmp = false;
10469 enum machine_mode mode = GET_MODE (operands[0]);
10470
10471 if (operands[3] != const1_rtx
10472 && operands[3] != constm1_rtx)
10473 return 0;
10474 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10475 ix86_compare_op1, &compare_op))
10476 return 0;
10477 code = GET_CODE (compare_op);
10478
10479 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10480 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10481 {
10482 fpcmp = true;
10483 code = ix86_fp_compare_code_to_integer (code);
10484 }
10485
10486 if (code != LTU)
10487 {
10488 val = constm1_rtx;
10489 if (fpcmp)
10490 PUT_CODE (compare_op,
10491 reverse_condition_maybe_unordered
10492 (GET_CODE (compare_op)));
10493 else
10494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10495 }
10496 PUT_MODE (compare_op, mode);
10497
10498 /* Construct either adc or sbb insn. */
10499 if ((code == LTU) == (operands[3] == constm1_rtx))
10500 {
10501 switch (GET_MODE (operands[0]))
10502 {
10503 case QImode:
10504 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10505 break;
10506 case HImode:
10507 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10508 break;
10509 case SImode:
10510 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10511 break;
10512 case DImode:
10513 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10514 break;
10515 default:
10516 abort ();
10517 }
10518 }
10519 else
10520 {
10521 switch (GET_MODE (operands[0]))
10522 {
10523 case QImode:
10524 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10525 break;
10526 case HImode:
10527 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10528 break;
10529 case SImode:
10530 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10531 break;
10532 case DImode:
10533 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10534 break;
10535 default:
10536 abort ();
10537 }
10538 }
10539 return 1; /* DONE */
10540}
10541
10542
10543/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10544 works for floating pointer parameters and nonoffsetable memories.
10545 For pushes, it returns just stack offsets; the values will be saved
10546 in the right order. Maximally three parts are generated. */
10547
10548static int
10549ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10550{
10551 int size;
10552
10553 if (!TARGET_64BIT)
10554 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10555 else
10556 size = (GET_MODE_SIZE (mode) + 4) / 8;
10557
10558 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10559 abort ();
10560 if (size < 2 || size > 3)
10561 abort ();
10562
10563 /* Optimize constant pool reference to immediates. This is used by fp
10564 moves, that force all constants to memory to allow combining. */
10565 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10566 {
10567 rtx tmp = maybe_get_pool_constant (operand);
10568 if (tmp)
10569 operand = tmp;
10570 }
10571
10572 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10573 {
10574 /* The only non-offsetable memories we handle are pushes. */
10575 if (! push_operand (operand, VOIDmode))
10576 abort ();
10577
10578 operand = copy_rtx (operand);
10579 PUT_MODE (operand, Pmode);
10580 parts[0] = parts[1] = parts[2] = operand;
10581 }
10582 else if (!TARGET_64BIT)
10583 {
10584 if (mode == DImode)
10585 split_di (&operand, 1, &parts[0], &parts[1]);
10586 else
10587 {
10588 if (REG_P (operand))
10589 {
10590 if (!reload_completed)
10591 abort ();
10592 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10593 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10594 if (size == 3)
10595 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10596 }
10597 else if (offsettable_memref_p (operand))
10598 {
10599 operand = adjust_address (operand, SImode, 0);
10600 parts[0] = operand;
10601 parts[1] = adjust_address (operand, SImode, 4);
10602 if (size == 3)
10603 parts[2] = adjust_address (operand, SImode, 8);
10604 }
10605 else if (GET_CODE (operand) == CONST_DOUBLE)
10606 {
10607 REAL_VALUE_TYPE r;
10608 long l[4];
10609
10610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10611 switch (mode)
10612 {
10613 case XFmode:
10614 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10615 parts[2] = gen_int_mode (l[2], SImode);
10616 break;
10617 case DFmode:
10618 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10619 break;
10620 default:
10621 abort ();
10622 }
10623 parts[1] = gen_int_mode (l[1], SImode);
10624 parts[0] = gen_int_mode (l[0], SImode);
10625 }
10626 else
10627 abort ();
10628 }
10629 }
10630 else
10631 {
10632 if (mode == TImode)
10633 split_ti (&operand, 1, &parts[0], &parts[1]);
10634 if (mode == XFmode || mode == TFmode)
10635 {
10636 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10637 if (REG_P (operand))
10638 {
10639 if (!reload_completed)
10640 abort ();
10641 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10642 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10643 }
10644 else if (offsettable_memref_p (operand))
10645 {
10646 operand = adjust_address (operand, DImode, 0);
10647 parts[0] = operand;
10648 parts[1] = adjust_address (operand, upper_mode, 8);
10649 }
10650 else if (GET_CODE (operand) == CONST_DOUBLE)
10651 {
10652 REAL_VALUE_TYPE r;
10653 long l[4];
10654
10655 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10656 real_to_target (l, &r, mode);
10657
10658 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10659 if (HOST_BITS_PER_WIDE_INT >= 64)
10660 parts[0]
10661 = gen_int_mode
10662 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10663 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10664 DImode);
10665 else
10666 parts[0] = immed_double_const (l[0], l[1], DImode);
10667
10668 if (upper_mode == SImode)
10669 parts[1] = gen_int_mode (l[2], SImode);
10670 else if (HOST_BITS_PER_WIDE_INT >= 64)
10671 parts[1]
10672 = gen_int_mode
10673 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10674 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10675 DImode);
10676 else
10677 parts[1] = immed_double_const (l[2], l[3], DImode);
10678 }
10679 else
10680 abort ();
10681 }
10682 }
10683
10684 return size;
10685}
10686
10687/* Emit insns to perform a move or push of DI, DF, and XF values.
10688 Return false when normal moves are needed; true when all required
10689 insns have been emitted. Operands 2-4 contain the input values
10690 int the correct order; operands 5-7 contain the output values. */
10691
10692void
10693ix86_split_long_move (rtx operands[])
10694{
10695 rtx part[2][3];
10696 int nparts;
10697 int push = 0;
10698 int collisions = 0;
10699 enum machine_mode mode = GET_MODE (operands[0]);
10700
10701 /* The DFmode expanders may ask us to move double.
10702 For 64bit target this is single move. By hiding the fact
10703 here we simplify i386.md splitters. */
10704 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10705 {
10706 /* Optimize constant pool reference to immediates. This is used by
10707 fp moves, that force all constants to memory to allow combining. */
10708
10709 if (GET_CODE (operands[1]) == MEM
10710 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10711 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10712 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10713 if (push_operand (operands[0], VOIDmode))
10714 {
10715 operands[0] = copy_rtx (operands[0]);
10716 PUT_MODE (operands[0], Pmode);
10717 }
10718 else
10719 operands[0] = gen_lowpart (DImode, operands[0]);
10720 operands[1] = gen_lowpart (DImode, operands[1]);
10721 emit_move_insn (operands[0], operands[1]);
10722 return;
10723 }
10724
10725 /* The only non-offsettable memory we handle is push. */
10726 if (push_operand (operands[0], VOIDmode))
10727 push = 1;
10728 else if (GET_CODE (operands[0]) == MEM
10729 && ! offsettable_memref_p (operands[0]))
10730 abort ();
10731
10732 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10733 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10734
10735 /* When emitting push, take care for source operands on the stack. */
10736 if (push && GET_CODE (operands[1]) == MEM
10737 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10738 {
10739 if (nparts == 3)
10740 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10741 XEXP (part[1][2], 0));
10742 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10743 XEXP (part[1][1], 0));
10744 }
10745
10746 /* We need to do copy in the right order in case an address register
10747 of the source overlaps the destination. */
10748 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10749 {
10750 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10751 collisions++;
10752 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10753 collisions++;
10754 if (nparts == 3
10755 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10756 collisions++;
10757
10758 /* Collision in the middle part can be handled by reordering. */
10759 if (collisions == 1 && nparts == 3
10760 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10761 {
10762 rtx tmp;
10763 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10764 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10765 }
10766
10767 /* If there are more collisions, we can't handle it by reordering.
10768 Do an lea to the last part and use only one colliding move. */
10769 else if (collisions > 1)
10770 {
10771 rtx base;
10772
10773 collisions = 1;
10774
10775 base = part[0][nparts - 1];
10776
10777 /* Handle the case when the last part isn't valid for lea.
10778 Happens in 64-bit mode storing the 12-byte XFmode. */
10779 if (GET_MODE (base) != Pmode)
10780 base = gen_rtx_REG (Pmode, REGNO (base));
10781
10782 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10783 part[1][0] = replace_equiv_address (part[1][0], base);
10784 part[1][1] = replace_equiv_address (part[1][1],
10785 plus_constant (base, UNITS_PER_WORD));
10786 if (nparts == 3)
10787 part[1][2] = replace_equiv_address (part[1][2],
10788 plus_constant (base, 8));
10789 }
10790 }
10791
10792 if (push)
10793 {
10794 if (!TARGET_64BIT)
10795 {
10796 if (nparts == 3)
10797 {
10798 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10800 emit_move_insn (part[0][2], part[1][2]);
10801 }
10802 }
10803 else
10804 {
10805 /* In 64bit mode we don't have 32bit push available. In case this is
10806 register, it is OK - we will just use larger counterpart. We also
10807 retype memory - these comes from attempt to avoid REX prefix on
10808 moving of second half of TFmode value. */
10809 if (GET_MODE (part[1][1]) == SImode)
10810 {
10811 if (GET_CODE (part[1][1]) == MEM)
10812 part[1][1] = adjust_address (part[1][1], DImode, 0);
10813 else if (REG_P (part[1][1]))
10814 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10815 else
10816 abort ();
10817 if (GET_MODE (part[1][0]) == SImode)
10818 part[1][0] = part[1][1];
10819 }
10820 }
10821 emit_move_insn (part[0][1], part[1][1]);
10822 emit_move_insn (part[0][0], part[1][0]);
10823 return;
10824 }
10825
10826 /* Choose correct order to not overwrite the source before it is copied. */
10827 if ((REG_P (part[0][0])
10828 && REG_P (part[1][1])
10829 && (REGNO (part[0][0]) == REGNO (part[1][1])
10830 || (nparts == 3
10831 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10832 || (collisions > 0
10833 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10834 {
10835 if (nparts == 3)
10836 {
10837 operands[2] = part[0][2];
10838 operands[3] = part[0][1];
10839 operands[4] = part[0][0];
10840 operands[5] = part[1][2];
10841 operands[6] = part[1][1];
10842 operands[7] = part[1][0];
10843 }
10844 else
10845 {
10846 operands[2] = part[0][1];
10847 operands[3] = part[0][0];
10848 operands[5] = part[1][1];
10849 operands[6] = part[1][0];
10850 }
10851 }
10852 else
10853 {
10854 if (nparts == 3)
10855 {
10856 operands[2] = part[0][0];
10857 operands[3] = part[0][1];
10858 operands[4] = part[0][2];
10859 operands[5] = part[1][0];
10860 operands[6] = part[1][1];
10861 operands[7] = part[1][2];
10862 }
10863 else
10864 {
10865 operands[2] = part[0][0];
10866 operands[3] = part[0][1];
10867 operands[5] = part[1][0];
10868 operands[6] = part[1][1];
10869 }
10870 }
10871 emit_move_insn (operands[2], operands[5]);
10872 emit_move_insn (operands[3], operands[6]);
10873 if (nparts == 3)
10874 emit_move_insn (operands[4], operands[7]);
10875
10876 return;
10877}
10878
10879void
10880ix86_split_ashldi (rtx *operands, rtx scratch)
10881{
10882 rtx low[2], high[2];
10883 int count;
10884
10885 if (GET_CODE (operands[2]) == CONST_INT)
10886 {
10887 split_di (operands, 2, low, high);
10888 count = INTVAL (operands[2]) & 63;
10889
10890 if (count >= 32)
10891 {
10892 emit_move_insn (high[0], low[1]);
10893 emit_move_insn (low[0], const0_rtx);
10894
10895 if (count > 32)
10896 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10897 }
10898 else
10899 {
10900 if (!rtx_equal_p (operands[0], operands[1]))
10901 emit_move_insn (operands[0], operands[1]);
10902 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10903 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10904 }
10905 }
10906 else
10907 {
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10910
10911 split_di (operands, 1, low, high);
10912
10913 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10914 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10915
10916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10917 {
10918 if (! no_new_pseudos)
10919 scratch = force_reg (SImode, const0_rtx);
10920 else
10921 emit_move_insn (scratch, const0_rtx);
10922
10923 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10924 scratch));
10925 }
10926 else
10927 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10928 }
10929}
10930
10931void
10932ix86_split_ashrdi (rtx *operands, rtx scratch)
10933{
10934 rtx low[2], high[2];
10935 int count;
10936
10937 if (GET_CODE (operands[2]) == CONST_INT)
10938 {
10939 split_di (operands, 2, low, high);
10940 count = INTVAL (operands[2]) & 63;
10941
10942 if (count >= 32)
10943 {
10944 emit_move_insn (low[0], high[1]);
10945
10946 if (! reload_completed)
10947 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10948 else
10949 {
10950 emit_move_insn (high[0], low[0]);
10951 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10952 }
10953
10954 if (count > 32)
10955 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10956 }
10957 else
10958 {
10959 if (!rtx_equal_p (operands[0], operands[1]))
10960 emit_move_insn (operands[0], operands[1]);
10961 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10962 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10963 }
10964 }
10965 else
10966 {
10967 if (!rtx_equal_p (operands[0], operands[1]))
10968 emit_move_insn (operands[0], operands[1]);
10969
10970 split_di (operands, 1, low, high);
10971
10972 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10973 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10974
10975 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10976 {
10977 if (! no_new_pseudos)
10978 scratch = gen_reg_rtx (SImode);
10979 emit_move_insn (scratch, high[0]);
10980 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10981 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10982 scratch));
10983 }
10984 else
10985 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10986 }
10987}
10988
10989void
10990ix86_split_lshrdi (rtx *operands, rtx scratch)
10991{
10992 rtx low[2], high[2];
10993 int count;
10994
10995 if (GET_CODE (operands[2]) == CONST_INT)
10996 {
10997 split_di (operands, 2, low, high);
10998 count = INTVAL (operands[2]) & 63;
10999
11000 if (count >= 32)
11001 {
11002 emit_move_insn (low[0], high[1]);
11003 emit_move_insn (high[0], const0_rtx);
11004
11005 if (count > 32)
11006 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11007 }
11008 else
11009 {
11010 if (!rtx_equal_p (operands[0], operands[1]))
11011 emit_move_insn (operands[0], operands[1]);
11012 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11013 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11014 }
11015 }
11016 else
11017 {
11018 if (!rtx_equal_p (operands[0], operands[1]))
11019 emit_move_insn (operands[0], operands[1]);
11020
11021 split_di (operands, 1, low, high);
11022
11023 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11024 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11025
11026 /* Heh. By reversing the arguments, we can reuse this pattern. */
11027 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11028 {
11029 if (! no_new_pseudos)
11030 scratch = force_reg (SImode, const0_rtx);
11031 else
11032 emit_move_insn (scratch, const0_rtx);
11033
11034 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11035 scratch));
11036 }
11037 else
11038 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11039 }
11040}
11041
11042/* Helper function for the string operations below. Dest VARIABLE whether
11043 it is aligned to VALUE bytes. If true, jump to the label. */
11044static rtx
11045ix86_expand_aligntest (rtx variable, int value)
11046{
11047 rtx label = gen_label_rtx ();
11048 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11049 if (GET_MODE (variable) == DImode)
11050 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11051 else
11052 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11053 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11054 1, label);
11055 return label;
11056}
11057
11058/* Adjust COUNTER by the VALUE. */
11059static void
11060ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11061{
11062 if (GET_MODE (countreg) == DImode)
11063 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11064 else
11065 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11066}
11067
11068/* Zero extend possibly SImode EXP to Pmode register. */
11069rtx
11070ix86_zero_extend_to_Pmode (rtx exp)
11071{
11072 rtx r;
11073 if (GET_MODE (exp) == VOIDmode)
11074 return force_reg (Pmode, exp);
11075 if (GET_MODE (exp) == Pmode)
11076 return copy_to_mode_reg (Pmode, exp);
11077 r = gen_reg_rtx (Pmode);
11078 emit_insn (gen_zero_extendsidi2 (r, exp));
11079 return r;
11080}
11081
11082/* Expand string move (memcpy) operation. Use i386 string operations when
11083 profitable. expand_clrstr contains similar code. */
11084int
11085ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11086{
11087 rtx srcreg, destreg, countreg, srcexp, destexp;
11088 enum machine_mode counter_mode;
11089 HOST_WIDE_INT align = 0;
11090 unsigned HOST_WIDE_INT count = 0;
11091
11092 if (GET_CODE (align_exp) == CONST_INT)
11093 align = INTVAL (align_exp);
11094
11095 /* Can't use any of this if the user has appropriated esi or edi. */
11096 if (global_regs[4] || global_regs[5])
11097 return 0;
11098
11099 /* This simple hack avoids all inlining code and simplifies code below. */
11100 if (!TARGET_ALIGN_STRINGOPS)
11101 align = 64;
11102
11103 if (GET_CODE (count_exp) == CONST_INT)
11104 {
11105 count = INTVAL (count_exp);
11106 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11107 return 0;
11108 }
11109
11110 /* Figure out proper mode for counter. For 32bits it is always SImode,
11111 for 64bits use SImode when possible, otherwise DImode.
11112 Set count to number of bytes copied when known at compile time. */
11113 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11114 || x86_64_zero_extended_value (count_exp))
11115 counter_mode = SImode;
11116 else
11117 counter_mode = DImode;
11118
11119 if (counter_mode != SImode && counter_mode != DImode)
11120 abort ();
11121
11122 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11123 if (destreg != XEXP (dst, 0))
11124 dst = replace_equiv_address_nv (dst, destreg);
11125 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11126 if (srcreg != XEXP (src, 0))
11127 src = replace_equiv_address_nv (src, srcreg);
11128
11129 /* When optimizing for size emit simple rep ; movsb instruction for
11130 counts not divisible by 4. */
11131
11132 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11133 {
11134 emit_insn (gen_cld ());
11135 countreg = ix86_zero_extend_to_Pmode (count_exp);
11136 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11137 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11138 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11139 destexp, srcexp));
11140 }
11141
11142 /* For constant aligned (or small unaligned) copies use rep movsl
11143 followed by code copying the rest. For PentiumPro ensure 8 byte
11144 alignment to allow rep movsl acceleration. */
11145
11146 else if (count != 0
11147 && (align >= 8
11148 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11149 || optimize_size || count < (unsigned int) 64))
11150 {
11151 unsigned HOST_WIDE_INT offset = 0;
11152 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11153 rtx srcmem, dstmem;
11154
11155 emit_insn (gen_cld ());
11156 if (count & ~(size - 1))
11157 {
11158 countreg = copy_to_mode_reg (counter_mode,
11159 GEN_INT ((count >> (size == 4 ? 2 : 3))
11160 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11161 countreg = ix86_zero_extend_to_Pmode (countreg);
11162
11163 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11164 GEN_INT (size == 4 ? 2 : 3));
11165 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11166 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11167
11168 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11169 countreg, destexp, srcexp));
11170 offset = count & ~(size - 1);
11171 }
11172 if (size == 8 && (count & 0x04))
11173 {
11174 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11175 offset);
11176 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11177 offset);
11178 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11179 offset += 4;
11180 }
11181 if (count & 0x02)
11182 {
11183 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11184 offset);
11185 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11186 offset);
11187 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11188 offset += 2;
11189 }
11190 if (count & 0x01)
11191 {
11192 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11193 offset);
11194 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11195 offset);
11196 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11197 }
11198 }
11199 /* The generic code based on the glibc implementation:
11200 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11201 allowing accelerated copying there)
11202 - copy the data using rep movsl
11203 - copy the rest. */
11204 else
11205 {
11206 rtx countreg2;
11207 rtx label = NULL;
11208 rtx srcmem, dstmem;
11209 int desired_alignment = (TARGET_PENTIUMPRO
11210 && (count == 0 || count >= (unsigned int) 260)
11211 ? 8 : UNITS_PER_WORD);
11212 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11213 dst = change_address (dst, BLKmode, destreg);
11214 src = change_address (src, BLKmode, srcreg);
11215
11216 /* In case we don't know anything about the alignment, default to
11217 library version, since it is usually equally fast and result in
11218 shorter code.
11219
11220 Also emit call when we know that the count is large and call overhead
11221 will not be important. */
11222 if (!TARGET_INLINE_ALL_STRINGOPS
11223 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11224 return 0;
11225
11226 if (TARGET_SINGLE_STRINGOP)
11227 emit_insn (gen_cld ());
11228
11229 countreg2 = gen_reg_rtx (Pmode);
11230 countreg = copy_to_mode_reg (counter_mode, count_exp);
11231
11232 /* We don't use loops to align destination and to copy parts smaller
11233 than 4 bytes, because gcc is able to optimize such code better (in
11234 the case the destination or the count really is aligned, gcc is often
11235 able to predict the branches) and also it is friendlier to the
11236 hardware branch prediction.
11237
11238 Using loops is beneficial for generic case, because we can
11239 handle small counts using the loops. Many CPUs (such as Athlon)
11240 have large REP prefix setup costs.
11241
11242 This is quite costly. Maybe we can revisit this decision later or
11243 add some customizability to this code. */
11244
11245 if (count == 0 && align < desired_alignment)
11246 {
11247 label = gen_label_rtx ();
11248 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11249 LEU, 0, counter_mode, 1, label);
11250 }
11251 if (align <= 1)
11252 {
11253 rtx label = ix86_expand_aligntest (destreg, 1);
11254 srcmem = change_address (src, QImode, srcreg);
11255 dstmem = change_address (dst, QImode, destreg);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257 ix86_adjust_counter (countreg, 1);
11258 emit_label (label);
11259 LABEL_NUSES (label) = 1;
11260 }
11261 if (align <= 2)
11262 {
11263 rtx label = ix86_expand_aligntest (destreg, 2);
11264 srcmem = change_address (src, HImode, srcreg);
11265 dstmem = change_address (dst, HImode, destreg);
11266 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267 ix86_adjust_counter (countreg, 2);
11268 emit_label (label);
11269 LABEL_NUSES (label) = 1;
11270 }
11271 if (align <= 4 && desired_alignment > 4)
11272 {
11273 rtx label = ix86_expand_aligntest (destreg, 4);
11274 srcmem = change_address (src, SImode, srcreg);
11275 dstmem = change_address (dst, SImode, destreg);
11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277 ix86_adjust_counter (countreg, 4);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11280 }
11281
11282 if (label && desired_alignment > 4 && !TARGET_64BIT)
11283 {
11284 emit_label (label);
11285 LABEL_NUSES (label) = 1;
11286 label = NULL_RTX;
11287 }
11288 if (!TARGET_SINGLE_STRINGOP)
11289 emit_insn (gen_cld ());
11290 if (TARGET_64BIT)
11291 {
11292 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11293 GEN_INT (3)));
11294 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11295 }
11296 else
11297 {
11298 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11299 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11300 }
11301 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11302 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11303 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11304 countreg2, destexp, srcexp));
11305
11306 if (label)
11307 {
11308 emit_label (label);
11309 LABEL_NUSES (label) = 1;
11310 }
11311 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11312 {
11313 srcmem = change_address (src, SImode, srcreg);
11314 dstmem = change_address (dst, SImode, destreg);
11315 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11316 }
11317 if ((align <= 4 || count == 0) && TARGET_64BIT)
11318 {
11319 rtx label = ix86_expand_aligntest (countreg, 4);
11320 srcmem = change_address (src, SImode, srcreg);
11321 dstmem = change_address (dst, SImode, destreg);
11322 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11325 }
11326 if (align > 2 && count != 0 && (count & 2))
11327 {
11328 srcmem = change_address (src, HImode, srcreg);
11329 dstmem = change_address (dst, HImode, destreg);
11330 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11331 }
11332 if (align <= 2 || count == 0)
11333 {
11334 rtx label = ix86_expand_aligntest (countreg, 2);
11335 srcmem = change_address (src, HImode, srcreg);
11336 dstmem = change_address (dst, HImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11338 emit_label (label);
11339 LABEL_NUSES (label) = 1;
11340 }
11341 if (align > 1 && count != 0 && (count & 1))
11342 {
11343 srcmem = change_address (src, QImode, srcreg);
11344 dstmem = change_address (dst, QImode, destreg);
11345 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11346 }
11347 if (align <= 1 || count == 0)
11348 {
11349 rtx label = ix86_expand_aligntest (countreg, 1);
11350 srcmem = change_address (src, QImode, srcreg);
11351 dstmem = change_address (dst, QImode, destreg);
11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11353 emit_label (label);
11354 LABEL_NUSES (label) = 1;
11355 }
11356 }
11357
11358 return 1;
11359}
11360
11361/* Expand string clear operation (bzero). Use i386 string operations when
11362 profitable. expand_movstr contains similar code. */
11363int
11364ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11365{
11366 rtx destreg, zeroreg, countreg, destexp;
11367 enum machine_mode counter_mode;
11368 HOST_WIDE_INT align = 0;
11369 unsigned HOST_WIDE_INT count = 0;
11370
11371 if (GET_CODE (align_exp) == CONST_INT)
11372 align = INTVAL (align_exp);
11373
11374 /* Can't use any of this if the user has appropriated esi. */
11375 if (global_regs[4])
11376 return 0;
11377
11378 /* This simple hack avoids all inlining code and simplifies code below. */
11379 if (!TARGET_ALIGN_STRINGOPS)
11380 align = 32;
11381
11382 if (GET_CODE (count_exp) == CONST_INT)
11383 {
11384 count = INTVAL (count_exp);
11385 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11386 return 0;
11387 }
11388 /* Figure out proper mode for counter. For 32bits it is always SImode,
11389 for 64bits use SImode when possible, otherwise DImode.
11390 Set count to number of bytes copied when known at compile time. */
11391 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11392 || x86_64_zero_extended_value (count_exp))
11393 counter_mode = SImode;
11394 else
11395 counter_mode = DImode;
11396
11397 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11398 if (destreg != XEXP (dst, 0))
11399 dst = replace_equiv_address_nv (dst, destreg);
11400
11401 emit_insn (gen_cld ());
11402
11403 /* When optimizing for size emit simple rep ; movsb instruction for
11404 counts not divisible by 4. */
11405
11406 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11407 {
11408 countreg = ix86_zero_extend_to_Pmode (count_exp);
11409 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11410 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11411 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11412 }
11413 else if (count != 0
11414 && (align >= 8
11415 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11416 || optimize_size || count < (unsigned int) 64))
11417 {
11418 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11419 unsigned HOST_WIDE_INT offset = 0;
11420
11421 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11422 if (count & ~(size - 1))
11423 {
11424 countreg = copy_to_mode_reg (counter_mode,
11425 GEN_INT ((count >> (size == 4 ? 2 : 3))
11426 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11427 countreg = ix86_zero_extend_to_Pmode (countreg);
11428 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11429 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11430 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11431 offset = count & ~(size - 1);
11432 }
11433 if (size == 8 && (count & 0x04))
11434 {
11435 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11436 offset);
11437 emit_insn (gen_strset (destreg, mem,
11438 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11439 offset += 4;
11440 }
11441 if (count & 0x02)
11442 {
11443 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11444 offset);
11445 emit_insn (gen_strset (destreg, mem,
11446 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11447 offset += 2;
11448 }
11449 if (count & 0x01)
11450 {
11451 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11452 offset);
11453 emit_insn (gen_strset (destreg, mem,
11454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11455 }
11456 }
11457 else
11458 {
11459 rtx countreg2;
11460 rtx label = NULL;
11461 /* Compute desired alignment of the string operation. */
11462 int desired_alignment = (TARGET_PENTIUMPRO
11463 && (count == 0 || count >= (unsigned int) 260)
11464 ? 8 : UNITS_PER_WORD);
11465
11466 /* In case we don't know anything about the alignment, default to
11467 library version, since it is usually equally fast and result in
11468 shorter code.
11469
11470 Also emit call when we know that the count is large and call overhead
11471 will not be important. */
11472 if (!TARGET_INLINE_ALL_STRINGOPS
11473 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11474 return 0;
11475
11476 if (TARGET_SINGLE_STRINGOP)
11477 emit_insn (gen_cld ());
11478
11479 countreg2 = gen_reg_rtx (Pmode);
11480 countreg = copy_to_mode_reg (counter_mode, count_exp);
11481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11482 /* Get rid of MEM_OFFSET, it won't be accurate. */
11483 dst = change_address (dst, BLKmode, destreg);
11484
11485 if (count == 0 && align < desired_alignment)
11486 {
11487 label = gen_label_rtx ();
11488 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11489 LEU, 0, counter_mode, 1, label);
11490 }
11491 if (align <= 1)
11492 {
11493 rtx label = ix86_expand_aligntest (destreg, 1);
11494 emit_insn (gen_strset (destreg, dst,
11495 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11496 ix86_adjust_counter (countreg, 1);
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11499 }
11500 if (align <= 2)
11501 {
11502 rtx label = ix86_expand_aligntest (destreg, 2);
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11505 ix86_adjust_counter (countreg, 2);
11506 emit_label (label);
11507 LABEL_NUSES (label) = 1;
11508 }
11509 if (align <= 4 && desired_alignment > 4)
11510 {
11511 rtx label = ix86_expand_aligntest (destreg, 4);
11512 emit_insn (gen_strset (destreg, dst,
11513 (TARGET_64BIT
11514 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11515 : zeroreg)));
11516 ix86_adjust_counter (countreg, 4);
11517 emit_label (label);
11518 LABEL_NUSES (label) = 1;
11519 }
11520
11521 if (label && desired_alignment > 4 && !TARGET_64BIT)
11522 {
11523 emit_label (label);
11524 LABEL_NUSES (label) = 1;
11525 label = NULL_RTX;
11526 }
11527
11528 if (!TARGET_SINGLE_STRINGOP)
11529 emit_insn (gen_cld ());
11530 if (TARGET_64BIT)
11531 {
11532 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11533 GEN_INT (3)));
11534 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11535 }
11536 else
11537 {
11538 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11539 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11540 }
11541 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11542 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11543
11544 if (label)
11545 {
11546 emit_label (label);
11547 LABEL_NUSES (label) = 1;
11548 }
11549
11550 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11551 emit_insn (gen_strset (destreg, dst,
11552 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11553 if (TARGET_64BIT && (align <= 4 || count == 0))
11554 {
11555 rtx label = ix86_expand_aligntest (countreg, 4);
11556 emit_insn (gen_strset (destreg, dst,
11557 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11558 emit_label (label);
11559 LABEL_NUSES (label) = 1;
11560 }
11561 if (align > 2 && count != 0 && (count & 2))
11562 emit_insn (gen_strset (destreg, dst,
11563 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11564 if (align <= 2 || count == 0)
11565 {
11566 rtx label = ix86_expand_aligntest (countreg, 2);
11567 emit_insn (gen_strset (destreg, dst,
11568 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11569 emit_label (label);
11570 LABEL_NUSES (label) = 1;
11571 }
11572 if (align > 1 && count != 0 && (count & 1))
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11575 if (align <= 1 || count == 0)
11576 {
11577 rtx label = ix86_expand_aligntest (countreg, 1);
11578 emit_insn (gen_strset (destreg, dst,
11579 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11580 emit_label (label);
11581 LABEL_NUSES (label) = 1;
11582 }
11583 }
11584 return 1;
11585}
11586
11587/* Expand strlen. */
11588int
11589ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11590{
11591 rtx addr, scratch1, scratch2, scratch3, scratch4;
11592
11593 /* The generic case of strlen expander is long. Avoid it's
11594 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11595
11596 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11597 && !TARGET_INLINE_ALL_STRINGOPS
11598 && !optimize_size
11599 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11600 return 0;
11601
11602 addr = force_reg (Pmode, XEXP (src, 0));
11603 scratch1 = gen_reg_rtx (Pmode);
11604
11605 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11606 && !optimize_size)
11607 {
11608 /* Well it seems that some optimizer does not combine a call like
11609 foo(strlen(bar), strlen(bar));
11610 when the move and the subtraction is done here. It does calculate
11611 the length just once when these instructions are done inside of
11612 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11613 often used and I use one fewer register for the lifetime of
11614 output_strlen_unroll() this is better. */
11615
11616 emit_move_insn (out, addr);
11617
11618 ix86_expand_strlensi_unroll_1 (out, src, align);
11619
11620 /* strlensi_unroll_1 returns the address of the zero at the end of
11621 the string, like memchr(), so compute the length by subtracting
11622 the start address. */
11623 if (TARGET_64BIT)
11624 emit_insn (gen_subdi3 (out, out, addr));
11625 else
11626 emit_insn (gen_subsi3 (out, out, addr));
11627 }
11628 else
11629 {
11630 rtx unspec;
11631 scratch2 = gen_reg_rtx (Pmode);
11632 scratch3 = gen_reg_rtx (Pmode);
11633 scratch4 = force_reg (Pmode, constm1_rtx);
11634
11635 emit_move_insn (scratch3, addr);
11636 eoschar = force_reg (QImode, eoschar);
11637
11638 emit_insn (gen_cld ());
11639 src = replace_equiv_address_nv (src, scratch3);
11640
11641 /* If .md starts supporting :P, this can be done in .md. */
11642 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11643 scratch4), UNSPEC_SCAS);
11644 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11645 if (TARGET_64BIT)
11646 {
11647 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11648 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11649 }
11650 else
11651 {
11652 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11653 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11654 }
11655 }
11656 return 1;
11657}
11658
11659/* Expand the appropriate insns for doing strlen if not just doing
11660 repnz; scasb
11661
11662 out = result, initialized with the start address
11663 align_rtx = alignment of the address.
11664 scratch = scratch register, initialized with the startaddress when
11665 not aligned, otherwise undefined
11666
11667 This is just the body. It needs the initializations mentioned above and
11668 some address computing at the end. These things are done in i386.md. */
11669
11670static void
11671ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11672{
11673 int align;
11674 rtx tmp;
11675 rtx align_2_label = NULL_RTX;
11676 rtx align_3_label = NULL_RTX;
11677 rtx align_4_label = gen_label_rtx ();
11678 rtx end_0_label = gen_label_rtx ();
11679 rtx mem;
11680 rtx tmpreg = gen_reg_rtx (SImode);
11681 rtx scratch = gen_reg_rtx (SImode);
11682 rtx cmp;
11683
11684 align = 0;
11685 if (GET_CODE (align_rtx) == CONST_INT)
11686 align = INTVAL (align_rtx);
11687
11688 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11689
11690 /* Is there a known alignment and is it less than 4? */
11691 if (align < 4)
11692 {
11693 rtx scratch1 = gen_reg_rtx (Pmode);
11694 emit_move_insn (scratch1, out);
11695 /* Is there a known alignment and is it not 2? */
11696 if (align != 2)
11697 {
11698 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11699 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11700
11701 /* Leave just the 3 lower bits. */
11702 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11703 NULL_RTX, 0, OPTAB_WIDEN);
11704
11705 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11706 Pmode, 1, align_4_label);
11707 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11708 Pmode, 1, align_2_label);
11709 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11710 Pmode, 1, align_3_label);
11711 }
11712 else
11713 {
11714 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11715 check if is aligned to 4 - byte. */
11716
11717 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11718 NULL_RTX, 0, OPTAB_WIDEN);
11719
11720 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11721 Pmode, 1, align_4_label);
11722 }
11723
11724 mem = change_address (src, QImode, out);
11725
11726 /* Now compare the bytes. */
11727
11728 /* Compare the first n unaligned byte on a byte per byte basis. */
11729 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11730 QImode, 1, end_0_label);
11731
11732 /* Increment the address. */
11733 if (TARGET_64BIT)
11734 emit_insn (gen_adddi3 (out, out, const1_rtx));
11735 else
11736 emit_insn (gen_addsi3 (out, out, const1_rtx));
11737
11738 /* Not needed with an alignment of 2 */
11739 if (align != 2)
11740 {
11741 emit_label (align_2_label);
11742
11743 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11744 end_0_label);
11745
11746 if (TARGET_64BIT)
11747 emit_insn (gen_adddi3 (out, out, const1_rtx));
11748 else
11749 emit_insn (gen_addsi3 (out, out, const1_rtx));
11750
11751 emit_label (align_3_label);
11752 }
11753
11754 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11755 end_0_label);
11756
11757 if (TARGET_64BIT)
11758 emit_insn (gen_adddi3 (out, out, const1_rtx));
11759 else
11760 emit_insn (gen_addsi3 (out, out, const1_rtx));
11761 }
11762
11763 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11764 align this loop. It gives only huge programs, but does not help to
11765 speed up. */
11766 emit_label (align_4_label);
11767
11768 mem = change_address (src, SImode, out);
11769 emit_move_insn (scratch, mem);
11770 if (TARGET_64BIT)
11771 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11772 else
11773 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11774
11775 /* This formula yields a nonzero result iff one of the bytes is zero.
11776 This saves three branches inside loop and many cycles. */
11777
11778 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11779 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11780 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11781 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11782 gen_int_mode (0x80808080, SImode)));
11783 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11784 align_4_label);
11785
11786 if (TARGET_CMOVE)
11787 {
11788 rtx reg = gen_reg_rtx (SImode);
11789 rtx reg2 = gen_reg_rtx (Pmode);
11790 emit_move_insn (reg, tmpreg);
11791 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11792
11793 /* If zero is not in the first two bytes, move two bytes forward. */
11794 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11795 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11796 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11797 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11798 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11799 reg,
11800 tmpreg)));
11801 /* Emit lea manually to avoid clobbering of flags. */
11802 emit_insn (gen_rtx_SET (SImode, reg2,
11803 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11804
11805 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11806 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11807 emit_insn (gen_rtx_SET (VOIDmode, out,
11808 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11809 reg2,
11810 out)));
11811
11812 }
11813 else
11814 {
11815 rtx end_2_label = gen_label_rtx ();
11816 /* Is zero in the first two bytes? */
11817
11818 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11819 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11820 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11821 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11822 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11823 pc_rtx);
11824 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11825 JUMP_LABEL (tmp) = end_2_label;
11826
11827 /* Not in the first two. Move two bytes forward. */
11828 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11829 if (TARGET_64BIT)
11830 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11831 else
11832 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11833
11834 emit_label (end_2_label);
11835
11836 }
11837
11838 /* Avoid branch in fixing the byte. */
11839 tmpreg = gen_lowpart (QImode, tmpreg);
11840 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11841 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11842 if (TARGET_64BIT)
11843 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11844 else
11845 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11846
11847 emit_label (end_0_label);
11848}
11849
11850void
11851ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11852 rtx callarg2 ATTRIBUTE_UNUSED,
11853 rtx pop, int sibcall)
11854{
11855 rtx use = NULL, call;
11856
11857 if (pop == const0_rtx)
11858 pop = NULL;
11859 if (TARGET_64BIT && pop)
11860 abort ();
11861
11862#if TARGET_MACHO
11863 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11864 fnaddr = machopic_indirect_call_target (fnaddr);
11865#else
11866 /* Static functions and indirect calls don't need the pic register. */
11867 if (! TARGET_64BIT && flag_pic
11868 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11869 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11870 use_reg (&use, pic_offset_table_rtx);
11871
11872 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11873 {
11874 rtx al = gen_rtx_REG (QImode, 0);
11875 emit_move_insn (al, callarg2);
11876 use_reg (&use, al);
11877 }
11878#endif /* TARGET_MACHO */
11879
11880 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11881 {
11882 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11883 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11884 }
11885 if (sibcall && TARGET_64BIT
11886 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11887 {
11888 rtx addr;
11889 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11890 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11891 emit_move_insn (fnaddr, addr);
11892 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11893 }
11894
11895 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11896 if (retval)
11897 call = gen_rtx_SET (VOIDmode, retval, call);
11898 if (pop)
11899 {
11900 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11901 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11902 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11903 }
11904
11905 call = emit_call_insn (call);
11906 if (use)
11907 CALL_INSN_FUNCTION_USAGE (call) = use;
11908}
11909
11910
11911/* Clear stack slot assignments remembered from previous functions.
11912 This is called from INIT_EXPANDERS once before RTL is emitted for each
11913 function. */
11914
11915static struct machine_function *
11916ix86_init_machine_status (void)
11917{
11918 struct machine_function *f;
11919
11920 f = ggc_alloc_cleared (sizeof (struct machine_function));
11921 f->use_fast_prologue_epilogue_nregs = -1;
11922
11923 return f;
11924}
11925
11926/* Return a MEM corresponding to a stack slot with mode MODE.
11927 Allocate a new slot if necessary.
11928
11929 The RTL for a function can have several slots available: N is
11930 which slot to use. */
11931
11932rtx
11933assign_386_stack_local (enum machine_mode mode, int n)
11934{
11935 struct stack_local_entry *s;
11936
11937 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11938 abort ();
11939
11940 for (s = ix86_stack_locals; s; s = s->next)
11941 if (s->mode == mode && s->n == n)
11942 return s->rtl;
11943
11944 s = (struct stack_local_entry *)
11945 ggc_alloc (sizeof (struct stack_local_entry));
11946 s->n = n;
11947 s->mode = mode;
11948 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11949
11950 s->next = ix86_stack_locals;
11951 ix86_stack_locals = s;
11952 return s->rtl;
11953}
11954
11955/* Construct the SYMBOL_REF for the tls_get_addr function. */
11956
11957static GTY(()) rtx ix86_tls_symbol;
11958rtx
11959ix86_tls_get_addr (void)
11960{
11961
11962 if (!ix86_tls_symbol)
11963 {
11964 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11965 (TARGET_GNU_TLS && !TARGET_64BIT)
11966 ? "___tls_get_addr"
11967 : "__tls_get_addr");
11968 }
11969
11970 return ix86_tls_symbol;
11971}
11972
11973/* Calculate the length of the memory address in the instruction
11974 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11975
11976static int
11977memory_address_length (rtx addr)
11978{
11979 struct ix86_address parts;
11980 rtx base, index, disp;
11981 int len;
11982
11983 if (GET_CODE (addr) == PRE_DEC
11984 || GET_CODE (addr) == POST_INC
11985 || GET_CODE (addr) == PRE_MODIFY
11986 || GET_CODE (addr) == POST_MODIFY)
11987 return 0;
11988
11989 if (! ix86_decompose_address (addr, &parts))
11990 abort ();
11991
11992 base = parts.base;
11993 index = parts.index;
11994 disp = parts.disp;
11995 len = 0;
11996
11997 /* Rule of thumb:
11998 - esp as the base always wants an index,
11999 - ebp as the base always wants a displacement. */
12000
12001 /* Register Indirect. */
12002 if (base && !index && !disp)
12003 {
12004 /* esp (for its index) and ebp (for its displacement) need
12005 the two-byte modrm form. */
12006 if (addr == stack_pointer_rtx
12007 || addr == arg_pointer_rtx
12008 || addr == frame_pointer_rtx
12009 || addr == hard_frame_pointer_rtx)
12010 len = 1;
12011 }
12012
12013 /* Direct Addressing. */
12014 else if (disp && !base && !index)
12015 len = 4;
12016
12017 else
12018 {
12019 /* Find the length of the displacement constant. */
12020 if (disp)
12021 {
12022 if (GET_CODE (disp) == CONST_INT
12023 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12024 && base)
12025 len = 1;
12026 else
12027 len = 4;
12028 }
12029 /* ebp always wants a displacement. */
12030 else if (base == hard_frame_pointer_rtx)
12031 len = 1;
12032
12033 /* An index requires the two-byte modrm form.... */
12034 if (index
12035 /* ...like esp, which always wants an index. */
12036 || base == stack_pointer_rtx
12037 || base == arg_pointer_rtx
12038 || base == frame_pointer_rtx)
12039 len += 1;
12040 }
12041
12042 return len;
12043}
12044
12045/* Compute default value for "length_immediate" attribute. When SHORTFORM
12046 is set, expect that insn have 8bit immediate alternative. */
12047int
12048ix86_attr_length_immediate_default (rtx insn, int shortform)
12049{
12050 int len = 0;
12051 int i;
12052 extract_insn_cached (insn);
12053 for (i = recog_data.n_operands - 1; i >= 0; --i)
12054 if (CONSTANT_P (recog_data.operand[i]))
12055 {
12056 if (len)
12057 abort ();
12058 if (shortform
12059 && GET_CODE (recog_data.operand[i]) == CONST_INT
12060 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12061 len = 1;
12062 else
12063 {
12064 switch (get_attr_mode (insn))
12065 {
12066 case MODE_QI:
12067 len+=1;
12068 break;
12069 case MODE_HI:
12070 len+=2;
12071 break;
12072 case MODE_SI:
12073 len+=4;
12074 break;
12075 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12076 case MODE_DI:
12077 len+=4;
12078 break;
12079 default:
12080 fatal_insn ("unknown insn mode", insn);
12081 }
12082 }
12083 }
12084 return len;
12085}
12086/* Compute default value for "length_address" attribute. */
12087int
12088ix86_attr_length_address_default (rtx insn)
12089{
12090 int i;
12091
12092 if (get_attr_type (insn) == TYPE_LEA)
12093 {
12094 rtx set = PATTERN (insn);
12095 if (GET_CODE (set) == SET)
12096 ;
12097 else if (GET_CODE (set) == PARALLEL
12098 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12099 set = XVECEXP (set, 0, 0);
12100 else
12101 {
12102#ifdef ENABLE_CHECKING
12103 abort ();
12104#endif
12105 return 0;
12106 }
12107
12108 return memory_address_length (SET_SRC (set));
12109 }
12110
12111 extract_insn_cached (insn);
12112 for (i = recog_data.n_operands - 1; i >= 0; --i)
12113 if (GET_CODE (recog_data.operand[i]) == MEM)
12114 {
12115 return memory_address_length (XEXP (recog_data.operand[i], 0));
12116 break;
12117 }
12118 return 0;
12119}
12120
12121/* Return the maximum number of instructions a cpu can issue. */
12122
12123static int
12124ix86_issue_rate (void)
12125{
12126 switch (ix86_tune)
12127 {
12128 case PROCESSOR_PENTIUM:
12129 case PROCESSOR_K6:
12130 return 2;
12131
12132 case PROCESSOR_PENTIUMPRO:
12133 case PROCESSOR_PENTIUM4:
12134 case PROCESSOR_ATHLON:
12135 case PROCESSOR_K8:
12136 return 3;
12137
12138 default:
12139 return 1;
12140 }
12141}
12142
12143/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12144 by DEP_INSN and nothing set by DEP_INSN. */
12145
12146static int
12147ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12148{
12149 rtx set, set2;
12150
12151 /* Simplify the test for uninteresting insns. */
12152 if (insn_type != TYPE_SETCC
12153 && insn_type != TYPE_ICMOV
12154 && insn_type != TYPE_FCMOV
12155 && insn_type != TYPE_IBR)
12156 return 0;
12157
12158 if ((set = single_set (dep_insn)) != 0)
12159 {
12160 set = SET_DEST (set);
12161 set2 = NULL_RTX;
12162 }
12163 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12164 && XVECLEN (PATTERN (dep_insn), 0) == 2
12165 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12166 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12167 {
12168 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12169 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12170 }
12171 else
12172 return 0;
12173
12174 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12175 return 0;
12176
12177 /* This test is true if the dependent insn reads the flags but
12178 not any other potentially set register. */
12179 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12180 return 0;
12181
12182 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12183 return 0;
12184
12185 return 1;
12186}
12187
12188/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12189 address with operands set by DEP_INSN. */
12190
12191static int
12192ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12193{
12194 rtx addr;
12195
12196 if (insn_type == TYPE_LEA
12197 && TARGET_PENTIUM)
12198 {
12199 addr = PATTERN (insn);
12200 if (GET_CODE (addr) == SET)
12201 ;
12202 else if (GET_CODE (addr) == PARALLEL
12203 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12204 addr = XVECEXP (addr, 0, 0);
12205 else
12206 abort ();
12207 addr = SET_SRC (addr);
12208 }
12209 else
12210 {
12211 int i;
12212 extract_insn_cached (insn);
12213 for (i = recog_data.n_operands - 1; i >= 0; --i)
12214 if (GET_CODE (recog_data.operand[i]) == MEM)
12215 {
12216 addr = XEXP (recog_data.operand[i], 0);
12217 goto found;
12218 }
12219 return 0;
12220 found:;
12221 }
12222
12223 return modified_in_p (addr, dep_insn);
12224}
12225
12226static int
12227ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12228{
12229 enum attr_type insn_type, dep_insn_type;
12230 enum attr_memory memory, dep_memory;
12231 rtx set, set2;
12232 int dep_insn_code_number;
12233
12234 /* Anti and output dependencies have zero cost on all CPUs. */
12235 if (REG_NOTE_KIND (link) != 0)
12236 return 0;
12237
12238 dep_insn_code_number = recog_memoized (dep_insn);
12239
12240 /* If we can't recognize the insns, we can't really do anything. */
12241 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12242 return cost;
12243
12244 insn_type = get_attr_type (insn);
12245 dep_insn_type = get_attr_type (dep_insn);
12246
12247 switch (ix86_tune)
12248 {
12249 case PROCESSOR_PENTIUM:
12250 /* Address Generation Interlock adds a cycle of latency. */
12251 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12252 cost += 1;
12253
12254 /* ??? Compares pair with jump/setcc. */
12255 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12256 cost = 0;
12257
12258 /* Floating point stores require value to be ready one cycle earlier. */
12259 if (insn_type == TYPE_FMOV
12260 && get_attr_memory (insn) == MEMORY_STORE
12261 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12262 cost += 1;
12263 break;
12264
12265 case PROCESSOR_PENTIUMPRO:
12266 memory = get_attr_memory (insn);
12267 dep_memory = get_attr_memory (dep_insn);
12268
12269 /* Since we can't represent delayed latencies of load+operation,
12270 increase the cost here for non-imov insns. */
12271 if (dep_insn_type != TYPE_IMOV
12272 && dep_insn_type != TYPE_FMOV
12273 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12274 cost += 1;
12275
12276 /* INT->FP conversion is expensive. */
12277 if (get_attr_fp_int_src (dep_insn))
12278 cost += 5;
12279
12280 /* There is one cycle extra latency between an FP op and a store. */
12281 if (insn_type == TYPE_FMOV
12282 && (set = single_set (dep_insn)) != NULL_RTX
12283 && (set2 = single_set (insn)) != NULL_RTX
12284 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12285 && GET_CODE (SET_DEST (set2)) == MEM)
12286 cost += 1;
12287
12288 /* Show ability of reorder buffer to hide latency of load by executing
12289 in parallel with previous instruction in case
12290 previous instruction is not needed to compute the address. */
12291 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12292 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12293 {
12294 /* Claim moves to take one cycle, as core can issue one load
12295 at time and the next load can start cycle later. */
12296 if (dep_insn_type == TYPE_IMOV
12297 || dep_insn_type == TYPE_FMOV)
12298 cost = 1;
12299 else if (cost > 1)
12300 cost--;
12301 }
12302 break;
12303
12304 case PROCESSOR_K6:
12305 memory = get_attr_memory (insn);
12306 dep_memory = get_attr_memory (dep_insn);
12307 /* The esp dependency is resolved before the instruction is really
12308 finished. */
12309 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12310 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12311 return 1;
12312
12313 /* Since we can't represent delayed latencies of load+operation,
12314 increase the cost here for non-imov insns. */
12315 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12316 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12317
12318 /* INT->FP conversion is expensive. */
12319 if (get_attr_fp_int_src (dep_insn))
12320 cost += 5;
12321
12322 /* Show ability of reorder buffer to hide latency of load by executing
12323 in parallel with previous instruction in case
12324 previous instruction is not needed to compute the address. */
12325 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12326 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12327 {
12328 /* Claim moves to take one cycle, as core can issue one load
12329 at time and the next load can start cycle later. */
12330 if (dep_insn_type == TYPE_IMOV
12331 || dep_insn_type == TYPE_FMOV)
12332 cost = 1;
12333 else if (cost > 2)
12334 cost -= 2;
12335 else
12336 cost = 1;
12337 }
12338 break;
12339
12340 case PROCESSOR_ATHLON:
12341 case PROCESSOR_K8:
12342 memory = get_attr_memory (insn);
12343 dep_memory = get_attr_memory (dep_insn);
12344
12345 /* Show ability of reorder buffer to hide latency of load by executing
12346 in parallel with previous instruction in case
12347 previous instruction is not needed to compute the address. */
12348 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12349 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12350 {
12351 enum attr_unit unit = get_attr_unit (insn);
12352 int loadcost = 3;
12353
12354 /* Because of the difference between the length of integer and
12355 floating unit pipeline preparation stages, the memory operands
12356 for floating point are cheaper.
12357
12358 ??? For Athlon it the difference is most probably 2. */
12359 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12360 loadcost = 3;
12361 else
12362 loadcost = TARGET_ATHLON ? 2 : 0;
12363
12364 if (cost >= loadcost)
12365 cost -= loadcost;
12366 else
12367 cost = 0;
12368 }
12369
12370 default:
12371 break;
12372 }
12373
12374 return cost;
12375}
12376
12377static union
12378{
12379 struct ppro_sched_data
12380 {
12381 rtx decode[3];
12382 int issued_this_cycle;
12383 } ppro;
12384} ix86_sched_data;
12385
12386static enum attr_ppro_uops
12387ix86_safe_ppro_uops (rtx insn)
12388{
12389 if (recog_memoized (insn) >= 0)
12390 return get_attr_ppro_uops (insn);
12391 else
12392 return PPRO_UOPS_MANY;
12393}
12394
12395static void
12396ix86_dump_ppro_packet (FILE *dump)
12397{
12398 if (ix86_sched_data.ppro.decode[0])
12399 {
12400 fprintf (dump, "PPRO packet: %d",
12401 INSN_UID (ix86_sched_data.ppro.decode[0]));
12402 if (ix86_sched_data.ppro.decode[1])
12403 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12404 if (ix86_sched_data.ppro.decode[2])
12405 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12406 fputc ('\n', dump);
12407 }
12408}
12409
12410/* We're beginning a new block. Initialize data structures as necessary. */
12411
12412static void
12413ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12414 int sched_verbose ATTRIBUTE_UNUSED,
12415 int veclen ATTRIBUTE_UNUSED)
12416{
12417 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12418}
12419
12420/* Shift INSN to SLOT, and shift everything else down. */
12421
12422static void
12423ix86_reorder_insn (rtx *insnp, rtx *slot)
12424{
12425 if (insnp != slot)
12426 {
12427 rtx insn = *insnp;
12428 do
12429 insnp[0] = insnp[1];
12430 while (++insnp != slot);
12431 *insnp = insn;
12432 }
12433}
12434
12435static void
12436ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12437{
12438 rtx decode[3];
12439 enum attr_ppro_uops cur_uops;
12440 int issued_this_cycle;
12441 rtx *insnp;
12442 int i;
12443
12444 /* At this point .ppro.decode contains the state of the three
12445 decoders from last "cycle". That is, those insns that were
12446 actually independent. But here we're scheduling for the
12447 decoder, and we may find things that are decodable in the
12448 same cycle. */
12449
12450 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12451 issued_this_cycle = 0;
12452
12453 insnp = e_ready;
12454 cur_uops = ix86_safe_ppro_uops (*insnp);
12455
12456 /* If the decoders are empty, and we've a complex insn at the
12457 head of the priority queue, let it issue without complaint. */
12458 if (decode[0] == NULL)
12459 {
12460 if (cur_uops == PPRO_UOPS_MANY)
12461 {
12462 decode[0] = *insnp;
12463 goto ppro_done;
12464 }
12465
12466 /* Otherwise, search for a 2-4 uop unsn to issue. */
12467 while (cur_uops != PPRO_UOPS_FEW)
12468 {
12469 if (insnp == ready)
12470 break;
12471 cur_uops = ix86_safe_ppro_uops (*--insnp);
12472 }
12473
12474 /* If so, move it to the head of the line. */
12475 if (cur_uops == PPRO_UOPS_FEW)
12476 ix86_reorder_insn (insnp, e_ready);
12477
12478 /* Issue the head of the queue. */
12479 issued_this_cycle = 1;
12480 decode[0] = *e_ready--;
12481 }
12482
12483 /* Look for simple insns to fill in the other two slots. */
12484 for (i = 1; i < 3; ++i)
12485 if (decode[i] == NULL)
12486 {
12487 if (ready > e_ready)
12488 goto ppro_done;
12489
12490 insnp = e_ready;
12491 cur_uops = ix86_safe_ppro_uops (*insnp);
12492 while (cur_uops != PPRO_UOPS_ONE)
12493 {
12494 if (insnp == ready)
12495 break;
12496 cur_uops = ix86_safe_ppro_uops (*--insnp);
12497 }
12498
12499 /* Found one. Move it to the head of the queue and issue it. */
12500 if (cur_uops == PPRO_UOPS_ONE)
12501 {
12502 ix86_reorder_insn (insnp, e_ready);
12503 decode[i] = *e_ready--;
12504 issued_this_cycle++;
12505 continue;
12506 }
12507
12508 /* ??? Didn't find one. Ideally, here we would do a lazy split
12509 of 2-uop insns, issue one and queue the other. */
12510 }
12511
12512 ppro_done:
12513 if (issued_this_cycle == 0)
12514 issued_this_cycle = 1;
12515 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12516}
12517
12518/* We are about to being issuing insns for this clock cycle.
12519 Override the default sort algorithm to better slot instructions. */
12520static int
12521ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12522 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12523 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12524{
12525 int n_ready = *n_readyp;
12526 rtx *e_ready = ready + n_ready - 1;
12527
12528 /* Make sure to go ahead and initialize key items in
12529 ix86_sched_data if we are not going to bother trying to
12530 reorder the ready queue. */
12531 if (n_ready < 2)
12532 {
12533 ix86_sched_data.ppro.issued_this_cycle = 1;
12534 goto out;
12535 }
12536
12537 switch (ix86_tune)
12538 {
12539 default:
12540 break;
12541
12542 case PROCESSOR_PENTIUMPRO:
12543 ix86_sched_reorder_ppro (ready, e_ready);
12544 break;
12545 }
12546
12547out:
12548 return ix86_issue_rate ();
12549}
12550
12551/* We are about to issue INSN. Return the number of insns left on the
12552 ready queue that can be issued this cycle. */
12553
12554static int
12555ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12556 int can_issue_more)
12557{
12558 int i;
12559 switch (ix86_tune)
12560 {
12561 default:
12562 return can_issue_more - 1;
12563
12564 case PROCESSOR_PENTIUMPRO:
12565 {
12566 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12567
12568 if (uops == PPRO_UOPS_MANY)
12569 {
12570 if (sched_verbose)
12571 ix86_dump_ppro_packet (dump);
12572 ix86_sched_data.ppro.decode[0] = insn;
12573 ix86_sched_data.ppro.decode[1] = NULL;
12574 ix86_sched_data.ppro.decode[2] = NULL;
12575 if (sched_verbose)
12576 ix86_dump_ppro_packet (dump);
12577 ix86_sched_data.ppro.decode[0] = NULL;
12578 }
12579 else if (uops == PPRO_UOPS_FEW)
12580 {
12581 if (sched_verbose)
12582 ix86_dump_ppro_packet (dump);
12583 ix86_sched_data.ppro.decode[0] = insn;
12584 ix86_sched_data.ppro.decode[1] = NULL;
12585 ix86_sched_data.ppro.decode[2] = NULL;
12586 }
12587 else
12588 {
12589 for (i = 0; i < 3; ++i)
12590 if (ix86_sched_data.ppro.decode[i] == NULL)
12591 {
12592 ix86_sched_data.ppro.decode[i] = insn;
12593 break;
12594 }
12595 if (i == 3)
12596 abort ();
12597 if (i == 2)
12598 {
12599 if (sched_verbose)
12600 ix86_dump_ppro_packet (dump);
12601 ix86_sched_data.ppro.decode[0] = NULL;
12602 ix86_sched_data.ppro.decode[1] = NULL;
12603 ix86_sched_data.ppro.decode[2] = NULL;
12604 }
12605 }
12606 }
12607 return --ix86_sched_data.ppro.issued_this_cycle;
12608 }
12609}
12610
12611static int
12612ia32_use_dfa_pipeline_interface (void)
12613{
12614 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12615 return 1;
12616 return 0;
12617}
12618
12619/* How many alternative schedules to try. This should be as wide as the
12620 scheduling freedom in the DFA, but no wider. Making this value too
12621 large results extra work for the scheduler. */
12622
12623static int
12624ia32_multipass_dfa_lookahead (void)
12625{
12626 if (ix86_tune == PROCESSOR_PENTIUM)
12627 return 2;
12628 else
12629 return 0;
12630}
12631
12632
12633/* Compute the alignment given to a constant that is being placed in memory.
12634 EXP is the constant and ALIGN is the alignment that the object would
12635 ordinarily have.
12636 The value of this function is used instead of that alignment to align
12637 the object. */
12638
12639int
12640ix86_constant_alignment (tree exp, int align)
12641{
12642 if (TREE_CODE (exp) == REAL_CST)
12643 {
12644 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12645 return 64;
12646 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12647 return 128;
12648 }
12649 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12650 && !TARGET_NO_ALIGN_LONG_STRINGS
12651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12652 return BITS_PER_WORD;
12653
12654 return align;
12655}
12656
12657/* Compute the alignment for a static variable.
12658 TYPE is the data type, and ALIGN is the alignment that
12659 the object would ordinarily have. The value of this function is used
12660 instead of that alignment to align the object. */
12661
12662int
12663ix86_data_alignment (tree type, int align)
12664{
12665 if (AGGREGATE_TYPE_P (type)
12666 && TYPE_SIZE (type)
12667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12670 return 256;
12671
12672 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12673 to 16byte boundary. */
12674 if (TARGET_64BIT)
12675 {
12676 if (AGGREGATE_TYPE_P (type)
12677 && TYPE_SIZE (type)
12678 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12679 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12680 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12681 return 128;
12682 }
12683
12684 if (TREE_CODE (type) == ARRAY_TYPE)
12685 {
12686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12687 return 64;
12688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12689 return 128;
12690 }
12691 else if (TREE_CODE (type) == COMPLEX_TYPE)
12692 {
12693
12694 if (TYPE_MODE (type) == DCmode && align < 64)
12695 return 64;
12696 if (TYPE_MODE (type) == XCmode && align < 128)
12697 return 128;
12698 }
12699 else if ((TREE_CODE (type) == RECORD_TYPE
12700 || TREE_CODE (type) == UNION_TYPE
12701 || TREE_CODE (type) == QUAL_UNION_TYPE)
12702 && TYPE_FIELDS (type))
12703 {
12704 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12705 return 64;
12706 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12707 return 128;
12708 }
12709 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12710 || TREE_CODE (type) == INTEGER_TYPE)
12711 {
12712 if (TYPE_MODE (type) == DFmode && align < 64)
12713 return 64;
12714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12715 return 128;
12716 }
12717
12718 return align;
12719}
12720
12721/* Compute the alignment for a local variable.
12722 TYPE is the data type, and ALIGN is the alignment that
12723 the object would ordinarily have. The value of this macro is used
12724 instead of that alignment to align the object. */
12725
12726int
12727ix86_local_alignment (tree type, int align)
12728{
12729 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12730 to 16byte boundary. */
12731 if (TARGET_64BIT)
12732 {
12733 if (AGGREGATE_TYPE_P (type)
12734 && TYPE_SIZE (type)
12735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12737 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12738 return 128;
12739 }
12740 if (TREE_CODE (type) == ARRAY_TYPE)
12741 {
12742 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12743 return 64;
12744 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12745 return 128;
12746 }
12747 else if (TREE_CODE (type) == COMPLEX_TYPE)
12748 {
12749 if (TYPE_MODE (type) == DCmode && align < 64)
12750 return 64;
12751 if (TYPE_MODE (type) == XCmode && align < 128)
12752 return 128;
12753 }
12754 else if ((TREE_CODE (type) == RECORD_TYPE
12755 || TREE_CODE (type) == UNION_TYPE
12756 || TREE_CODE (type) == QUAL_UNION_TYPE)
12757 && TYPE_FIELDS (type))
12758 {
12759 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12760 return 64;
12761 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12762 return 128;
12763 }
12764 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12765 || TREE_CODE (type) == INTEGER_TYPE)
12766 {
12767
12768 if (TYPE_MODE (type) == DFmode && align < 64)
12769 return 64;
12770 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12771 return 128;
12772 }
12773 return align;
12774}
12775
12776/* Emit RTL insns to initialize the variable parts of a trampoline.
12777 FNADDR is an RTX for the address of the function's pure code.
12778 CXT is an RTX for the static chain value for the function. */
12779void
12780x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12781{
12782 if (!TARGET_64BIT)
12783 {
12784 /* Compute offset from the end of the jmp to the target function. */
12785 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12786 plus_constant (tramp, 10),
12787 NULL_RTX, 1, OPTAB_DIRECT);
12788 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12789 gen_int_mode (0xb9, QImode));
12790 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12791 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12792 gen_int_mode (0xe9, QImode));
12793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12794 }
12795 else
12796 {
12797 int offset = 0;
12798 /* Try to load address using shorter movl instead of movabs.
12799 We may want to support movq for kernel mode, but kernel does not use
12800 trampolines at the moment. */
12801 if (x86_64_zero_extended_value (fnaddr))
12802 {
12803 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12805 gen_int_mode (0xbb41, HImode));
12806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12807 gen_lowpart (SImode, fnaddr));
12808 offset += 6;
12809 }
12810 else
12811 {
12812 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12813 gen_int_mode (0xbb49, HImode));
12814 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12815 fnaddr);
12816 offset += 10;
12817 }
12818 /* Load static chain using movabs to r10. */
12819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12820 gen_int_mode (0xba49, HImode));
12821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12822 cxt);
12823 offset += 10;
12824 /* Jump to the r11 */
12825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12826 gen_int_mode (0xff49, HImode));
12827 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12828 gen_int_mode (0xe3, QImode));
12829 offset += 3;
12830 if (offset > TRAMPOLINE_SIZE)
12831 abort ();
12832 }
12833
12834#ifdef ENABLE_EXECUTE_STACK
12835 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12836 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12837#endif
12838}
12839
12840#define def_builtin(MASK, NAME, TYPE, CODE) \
12841do { \
12842 if ((MASK) & target_flags \
12843 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12844 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12845 NULL, NULL_TREE); \
12846} while (0)
12847
12848struct builtin_description
12849{
12850 const unsigned int mask;
12851 const enum insn_code icode;
12852 const char *const name;
12853 const enum ix86_builtins code;
12854 const enum rtx_code comparison;
12855 const unsigned int flag;
12856};
12857
12858static const struct builtin_description bdesc_comi[] =
12859{
12860 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12861 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12862 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12863 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12864 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12865 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12866 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12867 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12868 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12869 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12870 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12871 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12873 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12874 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12884};
12885
12886static const struct builtin_description bdesc_2arg[] =
12887{
12888 /* SSE */
12889 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12890 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12891 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12892 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12893 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12894 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12895 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12896 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12897
12898 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12899 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12900 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12901 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12902 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12903 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12904 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12905 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12906 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12907 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12908 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12909 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12910 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12911 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12912 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12913 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12914 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12915 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12916 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12917 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12918
12919 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12920 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12921 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12922 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12923
12924 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12925 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12926 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12927 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12928
12929 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12930 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12931 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12933 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12934
12935 /* MMX */
12936 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12937 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12939 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12940 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12941 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12942 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12943 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12944
12945 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12946 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12947 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12948 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12949 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12950 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12951 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12952 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12953
12954 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12955 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12956 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12957
12958 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12959 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12960 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12961 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12962
12963 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12964 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12965
12966 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12967 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12968 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12969 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12970 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12971 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12972
12973 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12974 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12975 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12976 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12977
12978 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12979 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12980 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12981 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12982 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12983 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12984
12985 /* Special. */
12986 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12987 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12988 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12989
12990 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12991 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12992 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12993
12994 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12995 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12998 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12999 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13000
13001 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13002 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13003 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13004 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13005 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13006 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13007
13008 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13009 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13010 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13011 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13012
13013 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13014 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13015
13016 /* SSE2 */
13017 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13025
13026 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13027 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13028 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13029 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
13030 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
13031 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13032 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
13033 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
13034 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
13035 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
13036 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
13037 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
13038 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13039 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13040 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13041 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13042 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13043 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13044 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13045 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13046
13047 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13051
13052 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13056
13057 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13060
13061 /* SSE2 MMX */
13062 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13070
13071 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13072 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13073 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13074 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13075 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13076 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13077 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13078 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13079
13080 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13081 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13082 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13084
13085 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13087 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13089
13090 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13092
13093 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13094 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13099
13100 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13104
13105 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13113
13114 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13117
13118 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13120
13121 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13127
13128 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13129 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13130 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13131 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13132 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13133 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13134
13135 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13136 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13137 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13138 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13139
13140 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13141
13142 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13143 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13144 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13145 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13146
13147 /* SSE3 MMX */
13148 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13149 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13150 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13151 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13152 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13153 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13154};
13155
13156static const struct builtin_description bdesc_1arg[] =
13157{
13158 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13159 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13160
13161 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13162 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13163 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13164
13165 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13166 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13167 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13168 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13169 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13170 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13171
13172 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13173 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13174 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13175 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13176
13177 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13178
13179 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13180 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13181
13182 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13183 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13184 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13185 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13186 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13187
13188 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13189
13190 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13191 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13192 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13193 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13194
13195 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13196 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13197 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13198
13199 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13200
13201 /* SSE3 */
13202 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13203 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13204 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13205};
13206
13207void
13208ix86_init_builtins (void)
13209{
13210 if (TARGET_MMX)
13211 ix86_init_mmx_sse_builtins ();
13212}
13213
13214/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13215 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13216 builtins. */
13217static void
13218ix86_init_mmx_sse_builtins (void)
13219{
13220 const struct builtin_description * d;
13221 size_t i;
13222
13223 tree pchar_type_node = build_pointer_type (char_type_node);
13224 tree pcchar_type_node = build_pointer_type (
13225 build_type_variant (char_type_node, 1, 0));
13226 tree pfloat_type_node = build_pointer_type (float_type_node);
13227 tree pcfloat_type_node = build_pointer_type (
13228 build_type_variant (float_type_node, 1, 0));
13229 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13230 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13231 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13232
13233 /* Comparisons. */
13234 tree int_ftype_v4sf_v4sf
13235 = build_function_type_list (integer_type_node,
13236 V4SF_type_node, V4SF_type_node, NULL_TREE);
13237 tree v4si_ftype_v4sf_v4sf
13238 = build_function_type_list (V4SI_type_node,
13239 V4SF_type_node, V4SF_type_node, NULL_TREE);
13240 /* MMX/SSE/integer conversions. */
13241 tree int_ftype_v4sf
13242 = build_function_type_list (integer_type_node,
13243 V4SF_type_node, NULL_TREE);
13244 tree int64_ftype_v4sf
13245 = build_function_type_list (long_long_integer_type_node,
13246 V4SF_type_node, NULL_TREE);
13247 tree int_ftype_v8qi
13248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13249 tree v4sf_ftype_v4sf_int
13250 = build_function_type_list (V4SF_type_node,
13251 V4SF_type_node, integer_type_node, NULL_TREE);
13252 tree v4sf_ftype_v4sf_int64
13253 = build_function_type_list (V4SF_type_node,
13254 V4SF_type_node, long_long_integer_type_node,
13255 NULL_TREE);
13256 tree v4sf_ftype_v4sf_v2si
13257 = build_function_type_list (V4SF_type_node,
13258 V4SF_type_node, V2SI_type_node, NULL_TREE);
13259 tree int_ftype_v4hi_int
13260 = build_function_type_list (integer_type_node,
13261 V4HI_type_node, integer_type_node, NULL_TREE);
13262 tree v4hi_ftype_v4hi_int_int
13263 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13264 integer_type_node, integer_type_node,
13265 NULL_TREE);
13266 /* Miscellaneous. */
13267 tree v8qi_ftype_v4hi_v4hi
13268 = build_function_type_list (V8QI_type_node,
13269 V4HI_type_node, V4HI_type_node, NULL_TREE);
13270 tree v4hi_ftype_v2si_v2si
13271 = build_function_type_list (V4HI_type_node,
13272 V2SI_type_node, V2SI_type_node, NULL_TREE);
13273 tree v4sf_ftype_v4sf_v4sf_int
13274 = build_function_type_list (V4SF_type_node,
13275 V4SF_type_node, V4SF_type_node,
13276 integer_type_node, NULL_TREE);
13277 tree v2si_ftype_v4hi_v4hi
13278 = build_function_type_list (V2SI_type_node,
13279 V4HI_type_node, V4HI_type_node, NULL_TREE);
13280 tree v4hi_ftype_v4hi_int
13281 = build_function_type_list (V4HI_type_node,
13282 V4HI_type_node, integer_type_node, NULL_TREE);
13283 tree v4hi_ftype_v4hi_di
13284 = build_function_type_list (V4HI_type_node,
13285 V4HI_type_node, long_long_unsigned_type_node,
13286 NULL_TREE);
13287 tree v2si_ftype_v2si_di
13288 = build_function_type_list (V2SI_type_node,
13289 V2SI_type_node, long_long_unsigned_type_node,
13290 NULL_TREE);
13291 tree void_ftype_void
13292 = build_function_type (void_type_node, void_list_node);
13293 tree void_ftype_unsigned
13294 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13295 tree void_ftype_unsigned_unsigned
13296 = build_function_type_list (void_type_node, unsigned_type_node,
13297 unsigned_type_node, NULL_TREE);
13298 tree void_ftype_pcvoid_unsigned_unsigned
13299 = build_function_type_list (void_type_node, const_ptr_type_node,
13300 unsigned_type_node, unsigned_type_node,
13301 NULL_TREE);
13302 tree unsigned_ftype_void
13303 = build_function_type (unsigned_type_node, void_list_node);
13304 tree di_ftype_void
13305 = build_function_type (long_long_unsigned_type_node, void_list_node);
13306 tree v4sf_ftype_void
13307 = build_function_type (V4SF_type_node, void_list_node);
13308 tree v2si_ftype_v4sf
13309 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13310 /* Loads/stores. */
13311 tree void_ftype_v8qi_v8qi_pchar
13312 = build_function_type_list (void_type_node,
13313 V8QI_type_node, V8QI_type_node,
13314 pchar_type_node, NULL_TREE);
13315 tree v4sf_ftype_pcfloat
13316 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13317 /* @@@ the type is bogus */
13318 tree v4sf_ftype_v4sf_pv2si
13319 = build_function_type_list (V4SF_type_node,
13320 V4SF_type_node, pv2si_type_node, NULL_TREE);
13321 tree void_ftype_pv2si_v4sf
13322 = build_function_type_list (void_type_node,
13323 pv2si_type_node, V4SF_type_node, NULL_TREE);
13324 tree void_ftype_pfloat_v4sf
13325 = build_function_type_list (void_type_node,
13326 pfloat_type_node, V4SF_type_node, NULL_TREE);
13327 tree void_ftype_pdi_di
13328 = build_function_type_list (void_type_node,
13329 pdi_type_node, long_long_unsigned_type_node,
13330 NULL_TREE);
13331 tree void_ftype_pv2di_v2di
13332 = build_function_type_list (void_type_node,
13333 pv2di_type_node, V2DI_type_node, NULL_TREE);
13334 /* Normal vector unops. */
13335 tree v4sf_ftype_v4sf
13336 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13337
13338 /* Normal vector binops. */
13339 tree v4sf_ftype_v4sf_v4sf
13340 = build_function_type_list (V4SF_type_node,
13341 V4SF_type_node, V4SF_type_node, NULL_TREE);
13342 tree v8qi_ftype_v8qi_v8qi
13343 = build_function_type_list (V8QI_type_node,
13344 V8QI_type_node, V8QI_type_node, NULL_TREE);
13345 tree v4hi_ftype_v4hi_v4hi
13346 = build_function_type_list (V4HI_type_node,
13347 V4HI_type_node, V4HI_type_node, NULL_TREE);
13348 tree v2si_ftype_v2si_v2si
13349 = build_function_type_list (V2SI_type_node,
13350 V2SI_type_node, V2SI_type_node, NULL_TREE);
13351 tree di_ftype_di_di
13352 = build_function_type_list (long_long_unsigned_type_node,
13353 long_long_unsigned_type_node,
13354 long_long_unsigned_type_node, NULL_TREE);
13355
13356 tree v2si_ftype_v2sf
13357 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13358 tree v2sf_ftype_v2si
13359 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13360 tree v2si_ftype_v2si
13361 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13362 tree v2sf_ftype_v2sf
13363 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13364 tree v2sf_ftype_v2sf_v2sf
13365 = build_function_type_list (V2SF_type_node,
13366 V2SF_type_node, V2SF_type_node, NULL_TREE);
13367 tree v2si_ftype_v2sf_v2sf
13368 = build_function_type_list (V2SI_type_node,
13369 V2SF_type_node, V2SF_type_node, NULL_TREE);
13370 tree pint_type_node = build_pointer_type (integer_type_node);
13371 tree pcint_type_node = build_pointer_type (
13372 build_type_variant (integer_type_node, 1, 0));
13373 tree pdouble_type_node = build_pointer_type (double_type_node);
13374 tree pcdouble_type_node = build_pointer_type (
13375 build_type_variant (double_type_node, 1, 0));
13376 tree int_ftype_v2df_v2df
13377 = build_function_type_list (integer_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13379
13380 tree ti_ftype_void
13381 = build_function_type (intTI_type_node, void_list_node);
13382 tree v2di_ftype_void
13383 = build_function_type (V2DI_type_node, void_list_node);
13384 tree ti_ftype_ti_ti
13385 = build_function_type_list (intTI_type_node,
13386 intTI_type_node, intTI_type_node, NULL_TREE);
13387 tree void_ftype_pcvoid
13388 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13389 tree v2di_ftype_di
13390 = build_function_type_list (V2DI_type_node,
13391 long_long_unsigned_type_node, NULL_TREE);
13392 tree di_ftype_v2di
13393 = build_function_type_list (long_long_unsigned_type_node,
13394 V2DI_type_node, NULL_TREE);
13395 tree v4sf_ftype_v4si
13396 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13397 tree v4si_ftype_v4sf
13398 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13399 tree v2df_ftype_v4si
13400 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13401 tree v4si_ftype_v2df
13402 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13403 tree v2si_ftype_v2df
13404 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13405 tree v4sf_ftype_v2df
13406 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13407 tree v2df_ftype_v2si
13408 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13409 tree v2df_ftype_v4sf
13410 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13411 tree int_ftype_v2df
13412 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13413 tree int64_ftype_v2df
13414 = build_function_type_list (long_long_integer_type_node,
13415 V2DF_type_node, NULL_TREE);
13416 tree v2df_ftype_v2df_int
13417 = build_function_type_list (V2DF_type_node,
13418 V2DF_type_node, integer_type_node, NULL_TREE);
13419 tree v2df_ftype_v2df_int64
13420 = build_function_type_list (V2DF_type_node,
13421 V2DF_type_node, long_long_integer_type_node,
13422 NULL_TREE);
13423 tree v4sf_ftype_v4sf_v2df
13424 = build_function_type_list (V4SF_type_node,
13425 V4SF_type_node, V2DF_type_node, NULL_TREE);
13426 tree v2df_ftype_v2df_v4sf
13427 = build_function_type_list (V2DF_type_node,
13428 V2DF_type_node, V4SF_type_node, NULL_TREE);
13429 tree v2df_ftype_v2df_v2df_int
13430 = build_function_type_list (V2DF_type_node,
13431 V2DF_type_node, V2DF_type_node,
13432 integer_type_node,
13433 NULL_TREE);
13434 tree v2df_ftype_v2df_pv2si
13435 = build_function_type_list (V2DF_type_node,
13436 V2DF_type_node, pv2si_type_node, NULL_TREE);
13437 tree void_ftype_pv2si_v2df
13438 = build_function_type_list (void_type_node,
13439 pv2si_type_node, V2DF_type_node, NULL_TREE);
13440 tree void_ftype_pdouble_v2df
13441 = build_function_type_list (void_type_node,
13442 pdouble_type_node, V2DF_type_node, NULL_TREE);
13443 tree void_ftype_pint_int
13444 = build_function_type_list (void_type_node,
13445 pint_type_node, integer_type_node, NULL_TREE);
13446 tree void_ftype_v16qi_v16qi_pchar
13447 = build_function_type_list (void_type_node,
13448 V16QI_type_node, V16QI_type_node,
13449 pchar_type_node, NULL_TREE);
13450 tree v2df_ftype_pcdouble
13451 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13452 tree v2df_ftype_v2df_v2df
13453 = build_function_type_list (V2DF_type_node,
13454 V2DF_type_node, V2DF_type_node, NULL_TREE);
13455 tree v16qi_ftype_v16qi_v16qi
13456 = build_function_type_list (V16QI_type_node,
13457 V16QI_type_node, V16QI_type_node, NULL_TREE);
13458 tree v8hi_ftype_v8hi_v8hi
13459 = build_function_type_list (V8HI_type_node,
13460 V8HI_type_node, V8HI_type_node, NULL_TREE);
13461 tree v4si_ftype_v4si_v4si
13462 = build_function_type_list (V4SI_type_node,
13463 V4SI_type_node, V4SI_type_node, NULL_TREE);
13464 tree v2di_ftype_v2di_v2di
13465 = build_function_type_list (V2DI_type_node,
13466 V2DI_type_node, V2DI_type_node, NULL_TREE);
13467 tree v2di_ftype_v2df_v2df
13468 = build_function_type_list (V2DI_type_node,
13469 V2DF_type_node, V2DF_type_node, NULL_TREE);
13470 tree v2df_ftype_v2df
13471 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13472 tree v2df_ftype_double
13473 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13474 tree v2df_ftype_double_double
13475 = build_function_type_list (V2DF_type_node,
13476 double_type_node, double_type_node, NULL_TREE);
13477 tree int_ftype_v8hi_int
13478 = build_function_type_list (integer_type_node,
13479 V8HI_type_node, integer_type_node, NULL_TREE);
13480 tree v8hi_ftype_v8hi_int_int
13481 = build_function_type_list (V8HI_type_node,
13482 V8HI_type_node, integer_type_node,
13483 integer_type_node, NULL_TREE);
13484 tree v2di_ftype_v2di_int
13485 = build_function_type_list (V2DI_type_node,
13486 V2DI_type_node, integer_type_node, NULL_TREE);
13487 tree v4si_ftype_v4si_int
13488 = build_function_type_list (V4SI_type_node,
13489 V4SI_type_node, integer_type_node, NULL_TREE);
13490 tree v8hi_ftype_v8hi_int
13491 = build_function_type_list (V8HI_type_node,
13492 V8HI_type_node, integer_type_node, NULL_TREE);
13493 tree v8hi_ftype_v8hi_v2di
13494 = build_function_type_list (V8HI_type_node,
13495 V8HI_type_node, V2DI_type_node, NULL_TREE);
13496 tree v4si_ftype_v4si_v2di
13497 = build_function_type_list (V4SI_type_node,
13498 V4SI_type_node, V2DI_type_node, NULL_TREE);
13499 tree v4si_ftype_v8hi_v8hi
13500 = build_function_type_list (V4SI_type_node,
13501 V8HI_type_node, V8HI_type_node, NULL_TREE);
13502 tree di_ftype_v8qi_v8qi
13503 = build_function_type_list (long_long_unsigned_type_node,
13504 V8QI_type_node, V8QI_type_node, NULL_TREE);
13505 tree v2di_ftype_v16qi_v16qi
13506 = build_function_type_list (V2DI_type_node,
13507 V16QI_type_node, V16QI_type_node, NULL_TREE);
13508 tree int_ftype_v16qi
13509 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13510 tree v16qi_ftype_pcchar
13511 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13512 tree void_ftype_pchar_v16qi
13513 = build_function_type_list (void_type_node,
13514 pchar_type_node, V16QI_type_node, NULL_TREE);
13515 tree v4si_ftype_pcint
13516 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13517 tree void_ftype_pcint_v4si
13518 = build_function_type_list (void_type_node,
13519 pcint_type_node, V4SI_type_node, NULL_TREE);
13520 tree v2di_ftype_v2di
13521 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13522
13523 tree float80_type;
13524 tree float128_type;
13525
13526 /* The __float80 type. */
13527 if (TYPE_MODE (long_double_type_node) == XFmode)
13528 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13529 "__float80");
13530 else
13531 {
13532 /* The __float80 type. */
13533 float80_type = make_node (REAL_TYPE);
13534 TYPE_PRECISION (float80_type) = 96;
13535 layout_type (float80_type);
13536 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13537 }
13538
13539 float128_type = make_node (REAL_TYPE);
13540 TYPE_PRECISION (float128_type) = 128;
13541 layout_type (float128_type);
13542 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13543
13544 /* Add all builtins that are more or less simple operations on two
13545 operands. */
13546 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13547 {
13548 /* Use one of the operands; the target can have a different mode for
13549 mask-generating compares. */
13550 enum machine_mode mode;
13551 tree type;
13552
13553 if (d->name == 0)
13554 continue;
13555 mode = insn_data[d->icode].operand[1].mode;
13556
13557 switch (mode)
13558 {
13559 case V16QImode:
13560 type = v16qi_ftype_v16qi_v16qi;
13561 break;
13562 case V8HImode:
13563 type = v8hi_ftype_v8hi_v8hi;
13564 break;
13565 case V4SImode:
13566 type = v4si_ftype_v4si_v4si;
13567 break;
13568 case V2DImode:
13569 type = v2di_ftype_v2di_v2di;
13570 break;
13571 case V2DFmode:
13572 type = v2df_ftype_v2df_v2df;
13573 break;
13574 case TImode:
13575 type = ti_ftype_ti_ti;
13576 break;
13577 case V4SFmode:
13578 type = v4sf_ftype_v4sf_v4sf;
13579 break;
13580 case V8QImode:
13581 type = v8qi_ftype_v8qi_v8qi;
13582 break;
13583 case V4HImode:
13584 type = v4hi_ftype_v4hi_v4hi;
13585 break;
13586 case V2SImode:
13587 type = v2si_ftype_v2si_v2si;
13588 break;
13589 case DImode:
13590 type = di_ftype_di_di;
13591 break;
13592
13593 default:
13594 abort ();
13595 }
13596
13597 /* Override for comparisons. */
13598 if (d->icode == CODE_FOR_maskcmpv4sf3
13599 || d->icode == CODE_FOR_maskncmpv4sf3
13600 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13601 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13602 type = v4si_ftype_v4sf_v4sf;
13603
13604 if (d->icode == CODE_FOR_maskcmpv2df3
13605 || d->icode == CODE_FOR_maskncmpv2df3
13606 || d->icode == CODE_FOR_vmmaskcmpv2df3
13607 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13608 type = v2di_ftype_v2df_v2df;
13609
13610 def_builtin (d->mask, d->name, type, d->code);
13611 }
13612
13613 /* Add the remaining MMX insns with somewhat more complicated types. */
13614 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13615 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13616 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13617 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13618 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13619
13620 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13621 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13622 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13623
13624 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13625 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13626
13627 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13628 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13629
13630 /* comi/ucomi insns. */
13631 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13632 if (d->mask == MASK_SSE2)
13633 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13634 else
13635 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13636
13637 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13638 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13639 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13640
13641 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13642 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13643 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13644 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13645 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13646 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13647 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13648 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13649 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13650 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13651 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13652
13653 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13654 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13655
13656 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13657
13658 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13659 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13660 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13661 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13662 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13663 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13664
13665 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13666 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13667 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13668 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13669
13670 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13671 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13672 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13673 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13674
13675 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13676
13677 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13678
13679 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13680 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13681 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13682 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13683 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13684 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13685
13686 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13687
13688 /* Original 3DNow! */
13689 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13690 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13691 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13692 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13693 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13694 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13695 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13696 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13697 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13698 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13699 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13700 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13701 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13702 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13703 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13704 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13705 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13706 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13707 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13708 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13709
13710 /* 3DNow! extension as used in the Athlon CPU. */
13711 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13712 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13713 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13714 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13715 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13716 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13717
13718 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13719
13720 /* SSE2 */
13721 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13723
13724 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13727
13728 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13733 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13734
13735 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13737 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13739
13740 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13745
13746 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13750
13751 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13753
13754 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13755
13756 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13757 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13758
13759 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13760 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13761 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13762 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13763 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13764
13765 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13766
13767 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13768 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13769 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13770 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13771
13772 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13773 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13774 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13775
13776 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13777 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13778 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13779 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13780
13781 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13782 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13783 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13784 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13785 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13786 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13787 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13788
13789 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13790 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13791 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13792
13793 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13794 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13795 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13796 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13797 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13798 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13799 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13800
13801 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13802
13803 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13804 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13805 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13806
13807 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13808 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13810
13811 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13812 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13813
13814 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13815 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13816 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13817 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13818
13819 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13820 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13821 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13822 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13823
13824 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13825 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13826
13827 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13828
13829 /* Prescott New Instructions. */
13830 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13831 void_ftype_pcvoid_unsigned_unsigned,
13832 IX86_BUILTIN_MONITOR);
13833 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13834 void_ftype_unsigned_unsigned,
13835 IX86_BUILTIN_MWAIT);
13836 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13837 v4sf_ftype_v4sf,
13838 IX86_BUILTIN_MOVSHDUP);
13839 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13840 v4sf_ftype_v4sf,
13841 IX86_BUILTIN_MOVSLDUP);
13842 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13843 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13844 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13845 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13846 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13847 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13848}
13849
13850/* Errors in the source file can cause expand_expr to return const0_rtx
13851 where we expect a vector. To avoid crashing, use one of the vector
13852 clear instructions. */
13853static rtx
13854safe_vector_operand (rtx x, enum machine_mode mode)
13855{
13856 if (x != const0_rtx)
13857 return x;
13858 x = gen_reg_rtx (mode);
13859
13860 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13861 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13862 : gen_rtx_SUBREG (DImode, x, 0)));
13863 else
13864 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13865 : gen_rtx_SUBREG (V4SFmode, x, 0),
13866 CONST0_RTX (V4SFmode)));
13867 return x;
13868}
13869
13870/* Subroutine of ix86_expand_builtin to take care of binop insns. */
13871
13872static rtx
13873ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13874{
13875 rtx pat;
13876 tree arg0 = TREE_VALUE (arglist);
13877 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13882 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13883
13884 if (VECTOR_MODE_P (mode0))
13885 op0 = safe_vector_operand (op0, mode0);
13886 if (VECTOR_MODE_P (mode1))
13887 op1 = safe_vector_operand (op1, mode1);
13888
13889 if (! target
13890 || GET_MODE (target) != tmode
13891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13892 target = gen_reg_rtx (tmode);
13893
13894 if (GET_MODE (op1) == SImode && mode1 == TImode)
13895 {
13896 rtx x = gen_reg_rtx (V4SImode);
13897 emit_insn (gen_sse2_loadd (x, op1));
13898 op1 = gen_lowpart (TImode, x);
13899 }
13900
13901 /* In case the insn wants input operands in modes different from
13902 the result, abort. */
13903 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13904 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13905 abort ();
13906
13907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13908 op0 = copy_to_mode_reg (mode0, op0);
13909 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13910 op1 = copy_to_mode_reg (mode1, op1);
13911
13912 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13913 yet one of the two must not be a memory. This is normally enforced
13914 by expanders, but we didn't bother to create one here. */
13915 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13916 op0 = copy_to_mode_reg (mode0, op0);
13917
13918 pat = GEN_FCN (icode) (target, op0, op1);
13919 if (! pat)
13920 return 0;
13921 emit_insn (pat);
13922 return target;
13923}
13924
13925/* Subroutine of ix86_expand_builtin to take care of stores. */
13926
13927static rtx
13928ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13929{
13930 rtx pat;
13931 tree arg0 = TREE_VALUE (arglist);
13932 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13933 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13934 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13935 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13936 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13937
13938 if (VECTOR_MODE_P (mode1))
13939 op1 = safe_vector_operand (op1, mode1);
13940
13941 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13942 op1 = copy_to_mode_reg (mode1, op1);
13943
13944 pat = GEN_FCN (icode) (op0, op1);
13945 if (pat)
13946 emit_insn (pat);
13947 return 0;
13948}
13949
13950/* Subroutine of ix86_expand_builtin to take care of unop insns. */
13951
13952static rtx
13953ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13954 rtx target, int do_load)
13955{
13956 rtx pat;
13957 tree arg0 = TREE_VALUE (arglist);
13958 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13960 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13961
13962 if (! target
13963 || GET_MODE (target) != tmode
13964 || (do_load && GET_CODE (target) == MEM)
13964 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13965 target = gen_reg_rtx (tmode);
13966 if (do_load)
13967 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13968 else
13969 {
13970 if (VECTOR_MODE_P (mode0))
13971 op0 = safe_vector_operand (op0, mode0);
13972
13973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13974 op0 = copy_to_mode_reg (mode0, op0);
13975 }
13976
13977 pat = GEN_FCN (icode) (target, op0);
13978 if (! pat)
13979 return 0;
13980 emit_insn (pat);
13981 return target;
13982}
13983
13984/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13985 sqrtss, rsqrtss, rcpss. */
13986
13987static rtx
13988ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13989{
13990 rtx pat;
13991 tree arg0 = TREE_VALUE (arglist);
13992 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13993 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13994 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13995
13996 if (! target
13997 || GET_MODE (target) != tmode
13998 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13999 target = gen_reg_rtx (tmode);
14000
14001 if (VECTOR_MODE_P (mode0))
14002 op0 = safe_vector_operand (op0, mode0);
14003
14004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14005 op0 = copy_to_mode_reg (mode0, op0);
14006
14007 op1 = op0;
14008 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14009 op1 = copy_to_mode_reg (mode0, op1);
14010
14011 pat = GEN_FCN (icode) (target, op0, op1);
14012 if (! pat)
14013 return 0;
14014 emit_insn (pat);
14015 return target;
14016}
14017
14018/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14019
14020static rtx
14021ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14022 rtx target)
14023{
14024 rtx pat;
14025 tree arg0 = TREE_VALUE (arglist);
14026 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14029 rtx op2;
14030 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14031 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14032 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14033 enum rtx_code comparison = d->comparison;
14034
14035 if (VECTOR_MODE_P (mode0))
14036 op0 = safe_vector_operand (op0, mode0);
14037 if (VECTOR_MODE_P (mode1))
14038 op1 = safe_vector_operand (op1, mode1);
14039
14040 /* Swap operands if we have a comparison that isn't available in
14041 hardware. */
14042 if (d->flag)
14043 {
14044 rtx tmp = gen_reg_rtx (mode1);
14045 emit_move_insn (tmp, op1);
14046 op1 = op0;
14047 op0 = tmp;
14048 }
14049
14050 if (! target
14051 || GET_MODE (target) != tmode
14052 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14053 target = gen_reg_rtx (tmode);
14054
14055 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14056 op0 = copy_to_mode_reg (mode0, op0);
14057 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14058 op1 = copy_to_mode_reg (mode1, op1);
14059
14060 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14061 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14062 if (! pat)
14063 return 0;
14064 emit_insn (pat);
14065 return target;
14066}
14067
14068/* Subroutine of ix86_expand_builtin to take care of comi insns. */
14069
14070static rtx
14071ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14072 rtx target)
14073{
14074 rtx pat;
14075 tree arg0 = TREE_VALUE (arglist);
14076 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14077 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14078 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14079 rtx op2;
14080 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14081 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14082 enum rtx_code comparison = d->comparison;
14083
14084 if (VECTOR_MODE_P (mode0))
14085 op0 = safe_vector_operand (op0, mode0);
14086 if (VECTOR_MODE_P (mode1))
14087 op1 = safe_vector_operand (op1, mode1);
14088
14089 /* Swap operands if we have a comparison that isn't available in
14090 hardware. */
14091 if (d->flag)
14092 {
14093 rtx tmp = op1;
14094 op1 = op0;
14095 op0 = tmp;
14096 }
14097
14098 target = gen_reg_rtx (SImode);
14099 emit_move_insn (target, const0_rtx);
14100 target = gen_rtx_SUBREG (QImode, target, 0);
14101
14102 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14103 op0 = copy_to_mode_reg (mode0, op0);
14104 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14105 op1 = copy_to_mode_reg (mode1, op1);
14106
14107 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14108 pat = GEN_FCN (d->icode) (op0, op1);
14109 if (! pat)
14110 return 0;
14111 emit_insn (pat);
14112 emit_insn (gen_rtx_SET (VOIDmode,
14113 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14114 gen_rtx_fmt_ee (comparison, QImode,
14115 SET_DEST (pat),
14116 const0_rtx)));
14117
14118 return SUBREG_REG (target);
14119}
14120
14121/* Expand an expression EXP that calls a built-in function,
14122 with result going to TARGET if that's convenient
14123 (and in mode MODE if that's convenient).
14124 SUBTARGET may be used as the target for computing one of EXP's operands.
14125 IGNORE is nonzero if the value is to be ignored. */
14126
14127rtx
14128ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14129 enum machine_mode mode ATTRIBUTE_UNUSED,
14130 int ignore ATTRIBUTE_UNUSED)
14131{
14132 const struct builtin_description *d;
14133 size_t i;
14134 enum insn_code icode;
14135 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14136 tree arglist = TREE_OPERAND (exp, 1);
14137 tree arg0, arg1, arg2;
14138 rtx op0, op1, op2, pat;
14139 enum machine_mode tmode, mode0, mode1, mode2;
14140 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14141
14142 switch (fcode)
14143 {
14144 case IX86_BUILTIN_EMMS:
14145 emit_insn (gen_emms ());
14146 return 0;
14147
14148 case IX86_BUILTIN_SFENCE:
14149 emit_insn (gen_sfence ());
14150 return 0;
14151
14152 case IX86_BUILTIN_PEXTRW:
14153 case IX86_BUILTIN_PEXTRW128:
14154 icode = (fcode == IX86_BUILTIN_PEXTRW
14155 ? CODE_FOR_mmx_pextrw
14156 : CODE_FOR_sse2_pextrw);
14157 arg0 = TREE_VALUE (arglist);
14158 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14159 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14160 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14161 tmode = insn_data[icode].operand[0].mode;
14162 mode0 = insn_data[icode].operand[1].mode;
14163 mode1 = insn_data[icode].operand[2].mode;
14164
14165 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14166 op0 = copy_to_mode_reg (mode0, op0);
14167 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14168 {
14169 error ("selector must be an integer constant in the range 0..%i",
14170 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14171 return gen_reg_rtx (tmode);
14172 }
14173 if (target == 0
14174 || GET_MODE (target) != tmode
14175 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14176 target = gen_reg_rtx (tmode);
14177 pat = GEN_FCN (icode) (target, op0, op1);
14178 if (! pat)
14179 return 0;
14180 emit_insn (pat);
14181 return target;
14182
14183 case IX86_BUILTIN_PINSRW:
14184 case IX86_BUILTIN_PINSRW128:
14185 icode = (fcode == IX86_BUILTIN_PINSRW
14186 ? CODE_FOR_mmx_pinsrw
14187 : CODE_FOR_sse2_pinsrw);
14188 arg0 = TREE_VALUE (arglist);
14189 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14190 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14191 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14194 tmode = insn_data[icode].operand[0].mode;
14195 mode0 = insn_data[icode].operand[1].mode;
14196 mode1 = insn_data[icode].operand[2].mode;
14197 mode2 = insn_data[icode].operand[3].mode;
14198
14199 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14200 op0 = copy_to_mode_reg (mode0, op0);
14201 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14202 op1 = copy_to_mode_reg (mode1, op1);
14203 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14204 {
14205 error ("selector must be an integer constant in the range 0..%i",
14206 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14207 return const0_rtx;
14208 }
14209 if (target == 0
14210 || GET_MODE (target) != tmode
14211 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14212 target = gen_reg_rtx (tmode);
14213 pat = GEN_FCN (icode) (target, op0, op1, op2);
14214 if (! pat)
14215 return 0;
14216 emit_insn (pat);
14217 return target;
14218
14219 case IX86_BUILTIN_MASKMOVQ:
14220 case IX86_BUILTIN_MASKMOVDQU:
14221 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14222 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14223 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14224 : CODE_FOR_sse2_maskmovdqu));
14225 /* Note the arg order is different from the operand order. */
14226 arg1 = TREE_VALUE (arglist);
14227 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14228 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14232 mode0 = insn_data[icode].operand[0].mode;
14233 mode1 = insn_data[icode].operand[1].mode;
14234 mode2 = insn_data[icode].operand[2].mode;
14235
14236 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14237 op0 = copy_to_mode_reg (mode0, op0);
14238 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14239 op1 = copy_to_mode_reg (mode1, op1);
14240 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14241 op2 = copy_to_mode_reg (mode2, op2);
14242 pat = GEN_FCN (icode) (op0, op1, op2);
14243 if (! pat)
14244 return 0;
14245 emit_insn (pat);
14246 return 0;
14247
14248 case IX86_BUILTIN_SQRTSS:
14249 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14250 case IX86_BUILTIN_RSQRTSS:
14251 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14252 case IX86_BUILTIN_RCPSS:
14253 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14254
14255 case IX86_BUILTIN_LOADAPS:
14256 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14257
14258 case IX86_BUILTIN_LOADUPS:
14259 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14260
14261 case IX86_BUILTIN_STOREAPS:
14262 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14263
14264 case IX86_BUILTIN_STOREUPS:
14265 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14266
14267 case IX86_BUILTIN_LOADSS:
14268 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14269
14270 case IX86_BUILTIN_STORESS:
14271 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14272
14273 case IX86_BUILTIN_LOADHPS:
14274 case IX86_BUILTIN_LOADLPS:
14275 case IX86_BUILTIN_LOADHPD:
14276 case IX86_BUILTIN_LOADLPD:
14277 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14278 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14279 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14280 : CODE_FOR_sse2_movsd);
14281 arg0 = TREE_VALUE (arglist);
14282 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14283 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14284 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14285 tmode = insn_data[icode].operand[0].mode;
14286 mode0 = insn_data[icode].operand[1].mode;
14287 mode1 = insn_data[icode].operand[2].mode;
14288
14289 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14290 op0 = copy_to_mode_reg (mode0, op0);
14291 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14292 if (target == 0
14293 || GET_MODE (target) != tmode
14294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14295 target = gen_reg_rtx (tmode);
14296 pat = GEN_FCN (icode) (target, op0, op1);
14297 if (! pat)
14298 return 0;
14299 emit_insn (pat);
14300 return target;
14301
14302 case IX86_BUILTIN_STOREHPS:
14303 case IX86_BUILTIN_STORELPS:
14304 case IX86_BUILTIN_STOREHPD:
14305 case IX86_BUILTIN_STORELPD:
14306 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14307 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14308 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14309 : CODE_FOR_sse2_movsd);
14310 arg0 = TREE_VALUE (arglist);
14311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14312 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14313 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14314 mode0 = insn_data[icode].operand[1].mode;
14315 mode1 = insn_data[icode].operand[2].mode;
14316
14317 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14318 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14319 op1 = copy_to_mode_reg (mode1, op1);
14320
14321 pat = GEN_FCN (icode) (op0, op0, op1);
14322 if (! pat)
14323 return 0;
14324 emit_insn (pat);
14325 return 0;
14326
14327 case IX86_BUILTIN_MOVNTPS:
14328 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14329 case IX86_BUILTIN_MOVNTQ:
14330 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14331
14332 case IX86_BUILTIN_LDMXCSR:
14333 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14334 target = assign_386_stack_local (SImode, 0);
14335 emit_move_insn (target, op0);
14336 emit_insn (gen_ldmxcsr (target));
14337 return 0;
14338
14339 case IX86_BUILTIN_STMXCSR:
14340 target = assign_386_stack_local (SImode, 0);
14341 emit_insn (gen_stmxcsr (target));
14342 return copy_to_mode_reg (SImode, target);
14343
14344 case IX86_BUILTIN_SHUFPS:
14345 case IX86_BUILTIN_SHUFPD:
14346 icode = (fcode == IX86_BUILTIN_SHUFPS
14347 ? CODE_FOR_sse_shufps
14348 : CODE_FOR_sse2_shufpd);
14349 arg0 = TREE_VALUE (arglist);
14350 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14351 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14352 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14353 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14354 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14355 tmode = insn_data[icode].operand[0].mode;
14356 mode0 = insn_data[icode].operand[1].mode;
14357 mode1 = insn_data[icode].operand[2].mode;
14358 mode2 = insn_data[icode].operand[3].mode;
14359
14360 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14361 op0 = copy_to_mode_reg (mode0, op0);
14362 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14363 op1 = copy_to_mode_reg (mode1, op1);
14364 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14365 {
14366 /* @@@ better error message */
14367 error ("mask must be an immediate");
14368 return gen_reg_rtx (tmode);
14369 }
14370 if (target == 0
14371 || GET_MODE (target) != tmode
14372 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14373 target = gen_reg_rtx (tmode);
14374 pat = GEN_FCN (icode) (target, op0, op1, op2);
14375 if (! pat)
14376 return 0;
14377 emit_insn (pat);
14378 return target;
14379
14380 case IX86_BUILTIN_PSHUFW:
14381 case IX86_BUILTIN_PSHUFD:
14382 case IX86_BUILTIN_PSHUFHW:
14383 case IX86_BUILTIN_PSHUFLW:
14384 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14385 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14386 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14387 : CODE_FOR_mmx_pshufw);
14388 arg0 = TREE_VALUE (arglist);
14389 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14390 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14391 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14392 tmode = insn_data[icode].operand[0].mode;
14393 mode1 = insn_data[icode].operand[1].mode;
14394 mode2 = insn_data[icode].operand[2].mode;
14395
14396 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14397 op0 = copy_to_mode_reg (mode1, op0);
14398 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14399 {
14400 /* @@@ better error message */
14401 error ("mask must be an immediate");
14402 return const0_rtx;
14403 }
14404 if (target == 0
14405 || GET_MODE (target) != tmode
14406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14407 target = gen_reg_rtx (tmode);
14408 pat = GEN_FCN (icode) (target, op0, op1);
14409 if (! pat)
14410 return 0;
14411 emit_insn (pat);
14412 return target;
14413
14414 case IX86_BUILTIN_PSLLDQI128:
14415 case IX86_BUILTIN_PSRLDQI128:
14416 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14417 : CODE_FOR_sse2_lshrti3);
14418 arg0 = TREE_VALUE (arglist);
14419 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14420 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14421 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14422 tmode = insn_data[icode].operand[0].mode;
14423 mode1 = insn_data[icode].operand[1].mode;
14424 mode2 = insn_data[icode].operand[2].mode;
14425
14426 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14427 {
14428 op0 = copy_to_reg (op0);
14429 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14430 }
14431 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14432 {
14433 error ("shift must be an immediate");
14434 return const0_rtx;
14435 }
14436 target = gen_reg_rtx (V2DImode);
14437 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14438 if (! pat)
14439 return 0;
14440 emit_insn (pat);
14441 return target;
14442
14443 case IX86_BUILTIN_FEMMS:
14444 emit_insn (gen_femms ());
14445 return NULL_RTX;
14446
14447 case IX86_BUILTIN_PAVGUSB:
14448 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14449
14450 case IX86_BUILTIN_PF2ID:
14451 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14452
14453 case IX86_BUILTIN_PFACC:
14454 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14455
14456 case IX86_BUILTIN_PFADD:
14457 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14458
14459 case IX86_BUILTIN_PFCMPEQ:
14460 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14461
14462 case IX86_BUILTIN_PFCMPGE:
14463 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14464
14465 case IX86_BUILTIN_PFCMPGT:
14466 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14467
14468 case IX86_BUILTIN_PFMAX:
14469 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14470
14471 case IX86_BUILTIN_PFMIN:
14472 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14473
14474 case IX86_BUILTIN_PFMUL:
14475 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14476
14477 case IX86_BUILTIN_PFRCP:
14478 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14479
14480 case IX86_BUILTIN_PFRCPIT1:
14481 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14482
14483 case IX86_BUILTIN_PFRCPIT2:
14484 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14485
14486 case IX86_BUILTIN_PFRSQIT1:
14487 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14488
14489 case IX86_BUILTIN_PFRSQRT:
14490 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14491
14492 case IX86_BUILTIN_PFSUB:
14493 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14494
14495 case IX86_BUILTIN_PFSUBR:
14496 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14497
14498 case IX86_BUILTIN_PI2FD:
14499 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14500
14501 case IX86_BUILTIN_PMULHRW:
14502 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14503
14504 case IX86_BUILTIN_PF2IW:
14505 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14506
14507 case IX86_BUILTIN_PFNACC:
14508 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14509
14510 case IX86_BUILTIN_PFPNACC:
14511 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14512
14513 case IX86_BUILTIN_PI2FW:
14514 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14515
14516 case IX86_BUILTIN_PSWAPDSI:
14517 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14518
14519 case IX86_BUILTIN_PSWAPDSF:
14520 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14521
14522 case IX86_BUILTIN_SSE_ZERO:
14523 target = gen_reg_rtx (V4SFmode);
14524 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14525 return target;
14526
14527 case IX86_BUILTIN_MMX_ZERO:
14528 target = gen_reg_rtx (DImode);
14529 emit_insn (gen_mmx_clrdi (target));
14530 return target;
14531
14532 case IX86_BUILTIN_CLRTI:
14533 target = gen_reg_rtx (V2DImode);
14534 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14535 return target;
14536
14537
14538 case IX86_BUILTIN_SQRTSD:
14539 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14540 case IX86_BUILTIN_LOADAPD:
14541 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14542 case IX86_BUILTIN_LOADUPD:
14543 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14544
14545 case IX86_BUILTIN_STOREAPD:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14547 case IX86_BUILTIN_STOREUPD:
14548 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14549
14550 case IX86_BUILTIN_LOADSD:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14552
14553 case IX86_BUILTIN_STORESD:
14554 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14555
14556 case IX86_BUILTIN_SETPD1:
14557 target = assign_386_stack_local (DFmode, 0);
14558 arg0 = TREE_VALUE (arglist);
14559 emit_move_insn (adjust_address (target, DFmode, 0),
14560 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14561 op0 = gen_reg_rtx (V2DFmode);
14562 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14563 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14564 return op0;
14565
14566 case IX86_BUILTIN_SETPD:
14567 target = assign_386_stack_local (V2DFmode, 0);
14568 arg0 = TREE_VALUE (arglist);
14569 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14570 emit_move_insn (adjust_address (target, DFmode, 0),
14571 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14572 emit_move_insn (adjust_address (target, DFmode, 8),
14573 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14574 op0 = gen_reg_rtx (V2DFmode);
14575 emit_insn (gen_sse2_movapd (op0, target));
14576 return op0;
14577
14578 case IX86_BUILTIN_LOADRPD:
14579 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14580 gen_reg_rtx (V2DFmode), 1);
14581 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14582 return target;
14583
14584 case IX86_BUILTIN_LOADPD1:
14585 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14586 gen_reg_rtx (V2DFmode), 1);
14587 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14588 return target;
14589
14590 case IX86_BUILTIN_STOREPD1:
14591 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14592 case IX86_BUILTIN_STORERPD:
14593 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14594
14595 case IX86_BUILTIN_CLRPD:
14596 target = gen_reg_rtx (V2DFmode);
14597 emit_insn (gen_sse_clrv2df (target));
14598 return target;
14599
14600 case IX86_BUILTIN_MFENCE:
14601 emit_insn (gen_sse2_mfence ());
14602 return 0;
14603 case IX86_BUILTIN_LFENCE:
14604 emit_insn (gen_sse2_lfence ());
14605 return 0;
14606
14607 case IX86_BUILTIN_CLFLUSH:
14608 arg0 = TREE_VALUE (arglist);
14609 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14610 icode = CODE_FOR_sse2_clflush;
14611 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14612 op0 = copy_to_mode_reg (Pmode, op0);
14613
14614 emit_insn (gen_sse2_clflush (op0));
14615 return 0;
14616
14617 case IX86_BUILTIN_MOVNTPD:
14618 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14619 case IX86_BUILTIN_MOVNTDQ:
14620 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14621 case IX86_BUILTIN_MOVNTI:
14622 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14623
14624 case IX86_BUILTIN_LOADDQA:
14625 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14626 case IX86_BUILTIN_LOADDQU:
14627 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14628 case IX86_BUILTIN_LOADD:
14629 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14630
14631 case IX86_BUILTIN_STOREDQA:
14632 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14633 case IX86_BUILTIN_STOREDQU:
14634 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14635 case IX86_BUILTIN_STORED:
14636 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14637
14638 case IX86_BUILTIN_MONITOR:
14639 arg0 = TREE_VALUE (arglist);
14640 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14641 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14642 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14643 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14644 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14645 if (!REG_P (op0))
14646 op0 = copy_to_mode_reg (SImode, op0);
14647 if (!REG_P (op1))
14648 op1 = copy_to_mode_reg (SImode, op1);
14649 if (!REG_P (op2))
14650 op2 = copy_to_mode_reg (SImode, op2);
14651 emit_insn (gen_monitor (op0, op1, op2));
14652 return 0;
14653
14654 case IX86_BUILTIN_MWAIT:
14655 arg0 = TREE_VALUE (arglist);
14656 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14657 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14658 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14659 if (!REG_P (op0))
14660 op0 = copy_to_mode_reg (SImode, op0);
14661 if (!REG_P (op1))
14662 op1 = copy_to_mode_reg (SImode, op1);
14663 emit_insn (gen_mwait (op0, op1));
14664 return 0;
14665
14666 case IX86_BUILTIN_LOADDDUP:
14667 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14668
14669 case IX86_BUILTIN_LDDQU:
14670 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14671 1);
14672
14673 default:
14674 break;
14675 }
14676
14677 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14678 if (d->code == fcode)
14679 {
14680 /* Compares are treated specially. */
14681 if (d->icode == CODE_FOR_maskcmpv4sf3
14682 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14683 || d->icode == CODE_FOR_maskncmpv4sf3
14684 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14685 || d->icode == CODE_FOR_maskcmpv2df3
14686 || d->icode == CODE_FOR_vmmaskcmpv2df3
14687 || d->icode == CODE_FOR_maskncmpv2df3
14688 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14689 return ix86_expand_sse_compare (d, arglist, target);
14690
14691 return ix86_expand_binop_builtin (d->icode, arglist, target);
14692 }
14693
14694 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14695 if (d->code == fcode)
14696 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14697
14698 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14699 if (d->code == fcode)
14700 return ix86_expand_sse_comi (d, arglist, target);
14701
14702 /* @@@ Should really do something sensible here. */
14703 return 0;
14704}
14705
14706/* Store OPERAND to the memory after reload is completed. This means
14707 that we can't easily use assign_stack_local. */
14708rtx
14709ix86_force_to_memory (enum machine_mode mode, rtx operand)
14710{
14711 rtx result;
14712 if (!reload_completed)
14713 abort ();
14714 if (TARGET_RED_ZONE)
14715 {
14716 result = gen_rtx_MEM (mode,
14717 gen_rtx_PLUS (Pmode,
14718 stack_pointer_rtx,
14719 GEN_INT (-RED_ZONE_SIZE)));
14720 emit_move_insn (result, operand);
14721 }
14722 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14723 {
14724 switch (mode)
14725 {
14726 case HImode:
14727 case SImode:
14728 operand = gen_lowpart (DImode, operand);
14729 /* FALLTHRU */
14730 case DImode:
14731 emit_insn (
14732 gen_rtx_SET (VOIDmode,
14733 gen_rtx_MEM (DImode,
14734 gen_rtx_PRE_DEC (DImode,
14735 stack_pointer_rtx)),
14736 operand));
14737 break;
14738 default:
14739 abort ();
14740 }
14741 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14742 }
14743 else
14744 {
14745 switch (mode)
14746 {
14747 case DImode:
14748 {
14749 rtx operands[2];
14750 split_di (&operand, 1, operands, operands + 1);
14751 emit_insn (
14752 gen_rtx_SET (VOIDmode,
14753 gen_rtx_MEM (SImode,
14754 gen_rtx_PRE_DEC (Pmode,
14755 stack_pointer_rtx)),
14756 operands[1]));
14757 emit_insn (
14758 gen_rtx_SET (VOIDmode,
14759 gen_rtx_MEM (SImode,
14760 gen_rtx_PRE_DEC (Pmode,
14761 stack_pointer_rtx)),
14762 operands[0]));
14763 }
14764 break;
14765 case HImode:
14766 /* It is better to store HImodes as SImodes. */
14767 if (!TARGET_PARTIAL_REG_STALL)
14768 operand = gen_lowpart (SImode, operand);
14769 /* FALLTHRU */
14770 case SImode:
14771 emit_insn (
14772 gen_rtx_SET (VOIDmode,
14773 gen_rtx_MEM (GET_MODE (operand),
14774 gen_rtx_PRE_DEC (SImode,
14775 stack_pointer_rtx)),
14776 operand));
14777 break;
14778 default:
14779 abort ();
14780 }
14781 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14782 }
14783 return result;
14784}
14785
14786/* Free operand from the memory. */
14787void
14788ix86_free_from_memory (enum machine_mode mode)
14789{
14790 if (!TARGET_RED_ZONE)
14791 {
14792 int size;
14793
14794 if (mode == DImode || TARGET_64BIT)
14795 size = 8;
14796 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14797 size = 2;
14798 else
14799 size = 4;
14800 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14801 to pop or add instruction if registers are available. */
14802 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14803 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14804 GEN_INT (size))));
14805 }
14806}
14807
14808/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14809 QImode must go into class Q_REGS.
14810 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14811 movdf to do mem-to-mem moves through integer regs. */
14812enum reg_class
14813ix86_preferred_reload_class (rtx x, enum reg_class class)
14814{
14815 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14816 return NO_REGS;
14817 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14818 {
14819 /* SSE can't load any constant directly yet. */
14820 if (SSE_CLASS_P (class))
14821 return NO_REGS;
14822 /* Floats can load 0 and 1. */
14823 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14824 {
14825 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14826 if (MAYBE_SSE_CLASS_P (class))
14827 return (reg_class_subset_p (class, GENERAL_REGS)
14828 ? GENERAL_REGS : FLOAT_REGS);
14829 else
14830 return class;
14831 }
14832 /* General regs can load everything. */
14833 if (reg_class_subset_p (class, GENERAL_REGS))
14834 return GENERAL_REGS;
14835 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14836 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14837 return NO_REGS;
14838 }
14839 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14840 return NO_REGS;
14841 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14842 return Q_REGS;
14843 return class;
14844}
14845
14846/* If we are copying between general and FP registers, we need a memory
14847 location. The same is true for SSE and MMX registers.
14848
14849 The macro can't work reliably when one of the CLASSES is class containing
14850 registers from multiple units (SSE, MMX, integer). We avoid this by never
14851 combining those units in single alternative in the machine description.
14852 Ensure that this constraint holds to avoid unexpected surprises.
14853
14854 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14855 enforce these sanity checks. */
14856int
14857ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14858 enum machine_mode mode, int strict)
14859{
14860 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14861 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14862 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14863 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14864 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14865 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14866 {
14867 if (strict)
14868 abort ();
14869 else
14870 return 1;
14871 }
14872 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14873 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14874 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14875 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14876 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14877}
14878/* Return the cost of moving data from a register in class CLASS1 to
14879 one in class CLASS2.
14880
14881 It is not required that the cost always equal 2 when FROM is the same as TO;
14882 on some machines it is expensive to move between registers if they are not
14883 general registers. */
14884int
14885ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14886 enum reg_class class2)
14887{
14888 /* In case we require secondary memory, compute cost of the store followed
14889 by load. In order to avoid bad register allocation choices, we need
14890 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14891
14892 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14893 {
14894 int cost = 1;
14895
14896 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14897 MEMORY_MOVE_COST (mode, class1, 1));
14898 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14899 MEMORY_MOVE_COST (mode, class2, 1));
14900
14901 /* In case of copying from general_purpose_register we may emit multiple
14902 stores followed by single load causing memory size mismatch stall.
14903 Count this as arbitrarily high cost of 20. */
14904 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14905 cost += 20;
14906
14907 /* In the case of FP/MMX moves, the registers actually overlap, and we
14908 have to switch modes in order to treat them differently. */
14909 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14910 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14911 cost += 20;
14912
14913 return cost;
14914 }
14915
14916 /* Moves between SSE/MMX and integer unit are expensive. */
14917 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14918 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14919 return ix86_cost->mmxsse_to_integer;
14920 if (MAYBE_FLOAT_CLASS_P (class1))
14921 return ix86_cost->fp_move;
14922 if (MAYBE_SSE_CLASS_P (class1))
14923 return ix86_cost->sse_move;
14924 if (MAYBE_MMX_CLASS_P (class1))
14925 return ix86_cost->mmx_move;
14926 return 2;
14927}
14928
14929/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14930int
14931ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14932{
14933 /* Flags and only flags can only hold CCmode values. */
14934 if (CC_REGNO_P (regno))
14935 return GET_MODE_CLASS (mode) == MODE_CC;
14936 if (GET_MODE_CLASS (mode) == MODE_CC
14937 || GET_MODE_CLASS (mode) == MODE_RANDOM
14938 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14939 return 0;
14940 if (FP_REGNO_P (regno))
14941 return VALID_FP_MODE_P (mode);
14942 if (SSE_REGNO_P (regno))
14943 {
14944 /* HACK! We didn't change all of the constraints for SSE1 for the
14945 scalar modes on the branch. Fortunately, they're not required
14946 for ABI compatibility. */
14947 if (!TARGET_SSE2 && !VECTOR_MODE_P (mode))
14948 return VALID_SSE_REG_MODE (mode);
14949
14950 /* We implement the move patterns for all vector modes into and
14951 out of SSE registers, even when no operation instructions
14952 are available. */
14953 return (VALID_SSE_REG_MODE (mode)
14954 || VALID_SSE2_REG_MODE (mode)
14955 || VALID_MMX_REG_MODE (mode)
14956 || VALID_MMX_REG_MODE_3DNOW (mode));
14957 }
14958 if (MMX_REGNO_P (regno))
14959 {
14960 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14961 so if the register is available at all, then we can move data of
14962 the given mode into or out of it. */
14963 return (VALID_MMX_REG_MODE (mode)
14964 || VALID_MMX_REG_MODE_3DNOW (mode));
14965 }
14966 /* We handle both integer and floats in the general purpose registers.
14967 In future we should be able to handle vector modes as well. */
14968 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14969 return 0;
14970 /* Take care for QImode values - they can be in non-QI regs, but then
14971 they do cause partial register stalls. */
14972 if (regno < 4 || mode != QImode || TARGET_64BIT)
14973 return 1;
14974 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14975}
14976
14977/* Return the cost of moving data of mode M between a
14978 register and memory. A value of 2 is the default; this cost is
14979 relative to those in `REGISTER_MOVE_COST'.
14980
14981 If moving between registers and memory is more expensive than
14982 between two registers, you should define this macro to express the
14983 relative cost.
14984
14985 Model also increased moving costs of QImode registers in non
14986 Q_REGS classes.
14987 */
14988int
14989ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14990{
14991 if (FLOAT_CLASS_P (class))
14992 {
14993 int index;
14994 switch (mode)
14995 {
14996 case SFmode:
14997 index = 0;
14998 break;
14999 case DFmode:
15000 index = 1;
15001 break;
15002 case XFmode:
15003 index = 2;
15004 break;
15005 default:
15006 return 100;
15007 }
15008 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15009 }
15010 if (SSE_CLASS_P (class))
15011 {
15012 int index;
15013 switch (GET_MODE_SIZE (mode))
15014 {
15015 case 4:
15016 index = 0;
15017 break;
15018 case 8:
15019 index = 1;
15020 break;
15021 case 16:
15022 index = 2;
15023 break;
15024 default:
15025 return 100;
15026 }
15027 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15028 }
15029 if (MMX_CLASS_P (class))
15030 {
15031 int index;
15032 switch (GET_MODE_SIZE (mode))
15033 {
15034 case 4:
15035 index = 0;
15036 break;
15037 case 8:
15038 index = 1;
15039 break;
15040 default:
15041 return 100;
15042 }
15043 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15044 }
15045 switch (GET_MODE_SIZE (mode))
15046 {
15047 case 1:
15048 if (in)
15049 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15050 : ix86_cost->movzbl_load);
15051 else
15052 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15053 : ix86_cost->int_store[0] + 4);
15054 break;
15055 case 2:
15056 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15057 default:
15058 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15059 if (mode == TFmode)
15060 mode = XFmode;
15061 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15062 * (((int) GET_MODE_SIZE (mode)
15063 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15064 }
15065}
15066
15067/* Compute a (partial) cost for rtx X. Return true if the complete
15068 cost has been computed, and false if subexpressions should be
15069 scanned. In either case, *TOTAL contains the cost result. */
15070
15071static bool
15072ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15073{
15074 enum machine_mode mode = GET_MODE (x);
15075
15076 switch (code)
15077 {
15078 case CONST_INT:
15079 case CONST:
15080 case LABEL_REF:
15081 case SYMBOL_REF:
15082 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15083 *total = 3;
15084 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15085 *total = 2;
15086 else if (flag_pic && SYMBOLIC_CONST (x)
15087 && (!TARGET_64BIT
15088 || (!GET_CODE (x) != LABEL_REF
15089 && (GET_CODE (x) != SYMBOL_REF
15090 || !SYMBOL_REF_LOCAL_P (x)))))
15091 *total = 1;
15092 else
15093 *total = 0;
15094 return true;
15095
15096 case CONST_DOUBLE:
15097 if (mode == VOIDmode)
15098 *total = 0;
15099 else
15100 switch (standard_80387_constant_p (x))
15101 {
15102 case 1: /* 0.0 */
15103 *total = 1;
15104 break;
15105 default: /* Other constants */
15106 *total = 2;
15107 break;
15108 case 0:
15109 case -1:
15110 /* Start with (MEM (SYMBOL_REF)), since that's where
15111 it'll probably end up. Add a penalty for size. */
15112 *total = (COSTS_N_INSNS (1)
15113 + (flag_pic != 0 && !TARGET_64BIT)
15114 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15115 break;
15116 }
15117 return true;
15118
15119 case ZERO_EXTEND:
15120 /* The zero extensions is often completely free on x86_64, so make
15121 it as cheap as possible. */
15122 if (TARGET_64BIT && mode == DImode
15123 && GET_MODE (XEXP (x, 0)) == SImode)
15124 *total = 1;
15125 else if (TARGET_ZERO_EXTEND_WITH_AND)
15126 *total = COSTS_N_INSNS (ix86_cost->add);
15127 else
15128 *total = COSTS_N_INSNS (ix86_cost->movzx);
15129 return false;
15130
15131 case SIGN_EXTEND:
15132 *total = COSTS_N_INSNS (ix86_cost->movsx);
15133 return false;
15134
15135 case ASHIFT:
15136 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15137 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15138 {
15139 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15140 if (value == 1)
15141 {
15142 *total = COSTS_N_INSNS (ix86_cost->add);
15143 return false;
15144 }
15145 if ((value == 2 || value == 3)
15146 && !TARGET_DECOMPOSE_LEA
15147 && ix86_cost->lea <= ix86_cost->shift_const)
15148 {
15149 *total = COSTS_N_INSNS (ix86_cost->lea);
15150 return false;
15151 }
15152 }
15153 /* FALLTHRU */
15154
15155 case ROTATE:
15156 case ASHIFTRT:
15157 case LSHIFTRT:
15158 case ROTATERT:
15159 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15160 {
15161 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15162 {
15163 if (INTVAL (XEXP (x, 1)) > 32)
15164 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15165 else
15166 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15167 }
15168 else
15169 {
15170 if (GET_CODE (XEXP (x, 1)) == AND)
15171 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15172 else
15173 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15174 }
15175 }
15176 else
15177 {
15178 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15179 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15180 else
15181 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15182 }
15183 return false;
15184
15185 case MULT:
15186 if (FLOAT_MODE_P (mode))
15187 *total = COSTS_N_INSNS (ix86_cost->fmul);
15188 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15189 {
15190 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15191 int nbits;
15192
15193 for (nbits = 0; value != 0; value >>= 1)
15194 nbits++;
15195
15196 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15197 + nbits * ix86_cost->mult_bit);
15198 }
15199 else
15200 {
15201 /* This is arbitrary */
15202 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15203 + 7 * ix86_cost->mult_bit);
15204 }
15205 return false;
15206
15207 case DIV:
15208 case UDIV:
15209 case MOD:
15210 case UMOD:
15211 if (FLOAT_MODE_P (mode))
15212 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15213 else
15214 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15215 return false;
15216
15217 case PLUS:
15218 if (FLOAT_MODE_P (mode))
15219 *total = COSTS_N_INSNS (ix86_cost->fadd);
15220 else if (!TARGET_DECOMPOSE_LEA
15221 && GET_MODE_CLASS (mode) == MODE_INT
15222 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15223 {
15224 if (GET_CODE (XEXP (x, 0)) == PLUS
15225 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15226 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15227 && CONSTANT_P (XEXP (x, 1)))
15228 {
15229 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15230 if (val == 2 || val == 4 || val == 8)
15231 {
15232 *total = COSTS_N_INSNS (ix86_cost->lea);
15233 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15234 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15235 outer_code);
15236 *total += rtx_cost (XEXP (x, 1), outer_code);
15237 return true;
15238 }
15239 }
15240 else if (GET_CODE (XEXP (x, 0)) == MULT
15241 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15242 {
15243 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15244 if (val == 2 || val == 4 || val == 8)
15245 {
15246 *total = COSTS_N_INSNS (ix86_cost->lea);
15247 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15248 *total += rtx_cost (XEXP (x, 1), outer_code);
15249 return true;
15250 }
15251 }
15252 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15253 {
15254 *total = COSTS_N_INSNS (ix86_cost->lea);
15255 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15256 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15257 *total += rtx_cost (XEXP (x, 1), outer_code);
15258 return true;
15259 }
15260 }
15261 /* FALLTHRU */
15262
15263 case MINUS:
15264 if (FLOAT_MODE_P (mode))
15265 {
15266 *total = COSTS_N_INSNS (ix86_cost->fadd);
15267 return false;
15268 }
15269 /* FALLTHRU */
15270
15271 case AND:
15272 case IOR:
15273 case XOR:
15274 if (!TARGET_64BIT && mode == DImode)
15275 {
15276 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15277 + (rtx_cost (XEXP (x, 0), outer_code)
15278 << (GET_MODE (XEXP (x, 0)) != DImode))
15279 + (rtx_cost (XEXP (x, 1), outer_code)
15280 << (GET_MODE (XEXP (x, 1)) != DImode)));
15281 return true;
15282 }
15283 /* FALLTHRU */
15284
15285 case NEG:
15286 if (FLOAT_MODE_P (mode))
15287 {
15288 *total = COSTS_N_INSNS (ix86_cost->fchs);
15289 return false;
15290 }
15291 /* FALLTHRU */
15292
15293 case NOT:
15294 if (!TARGET_64BIT && mode == DImode)
15295 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15296 else
15297 *total = COSTS_N_INSNS (ix86_cost->add);
15298 return false;
15299
15300 case FLOAT_EXTEND:
15301 if (!TARGET_SSE_MATH
15302 || mode == XFmode
15303 || (mode == DFmode && !TARGET_SSE2))
15304 *total = 0;
15305 return false;
15306
15307 case ABS:
15308 if (FLOAT_MODE_P (mode))
15309 *total = COSTS_N_INSNS (ix86_cost->fabs);
15310 return false;
15311
15312 case SQRT:
15313 if (FLOAT_MODE_P (mode))
15314 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15315 return false;
15316
15317 case UNSPEC:
15318 if (XINT (x, 1) == UNSPEC_TP)
15319 *total = 0;
15320 return false;
15321
15322 default:
15323 return false;
15324 }
15325}
15326
15327#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15328static void
15329ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15330{
15331 init_section ();
15332 fputs ("\tpushl $", asm_out_file);
15333 assemble_name (asm_out_file, XSTR (symbol, 0));
15334 fputc ('\n', asm_out_file);
15335}
15336#endif
15337
15338#if TARGET_MACHO
15339
15340static int current_machopic_label_num;
15341
15342/* Given a symbol name and its associated stub, write out the
15343 definition of the stub. */
15344
15345void
15346machopic_output_stub (FILE *file, const char *symb, const char *stub)
15347{
15348 unsigned int length;
15349 char *binder_name, *symbol_name, lazy_ptr_name[32];
15350 int label = ++current_machopic_label_num;
15351
15352 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15353 symb = (*targetm.strip_name_encoding) (symb);
15354
15355 length = strlen (stub);
15356 binder_name = alloca (length + 32);
15357 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15358
15359 length = strlen (symb);
15360 symbol_name = alloca (length + 32);
15361 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15362
15363 sprintf (lazy_ptr_name, "L%d$lz", label);
15364
15365 if (MACHOPIC_PURE)
15366 machopic_picsymbol_stub_section ();
15367 else
15368 machopic_symbol_stub_section ();
15369
15370 fprintf (file, "%s:\n", stub);
15371 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15372
15373 if (MACHOPIC_PURE)
15374 {
15375 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15376 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15377 fprintf (file, "\tjmp %%edx\n");
15378 }
15379 else
15380 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15381
15382 fprintf (file, "%s:\n", binder_name);
15383
15384 if (MACHOPIC_PURE)
15385 {
15386 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15387 fprintf (file, "\tpushl %%eax\n");
15388 }
15389 else
15390 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15391
15392 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15393
15394 machopic_lazy_symbol_ptr_section ();
15395 fprintf (file, "%s:\n", lazy_ptr_name);
15396 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15397 fprintf (file, "\t.long %s\n", binder_name);
15398}
15399#endif /* TARGET_MACHO */
15400
15401/* Order the registers for register allocator. */
15402
15403void
15404x86_order_regs_for_local_alloc (void)
15405{
15406 int pos = 0;
15407 int i;
15408
15409 /* First allocate the local general purpose registers. */
15410 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15411 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15412 reg_alloc_order [pos++] = i;
15413
15414 /* Global general purpose registers. */
15415 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15416 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15417 reg_alloc_order [pos++] = i;
15418
15419 /* x87 registers come first in case we are doing FP math
15420 using them. */
15421 if (!TARGET_SSE_MATH)
15422 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15423 reg_alloc_order [pos++] = i;
15424
15425 /* SSE registers. */
15426 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15427 reg_alloc_order [pos++] = i;
15428 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15429 reg_alloc_order [pos++] = i;
15430
15431 /* x87 registers. */
15432 if (TARGET_SSE_MATH)
15433 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15434 reg_alloc_order [pos++] = i;
15435
15436 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15437 reg_alloc_order [pos++] = i;
15438
15439 /* Initialize the rest of array as we do not allocate some registers
15440 at all. */
15441 while (pos < FIRST_PSEUDO_REGISTER)
15442 reg_alloc_order [pos++] = 0;
15443}
15444
15445#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15446#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15447#endif
15448
15449/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15450 struct attribute_spec.handler. */
15451static tree
15452ix86_handle_struct_attribute (tree *node, tree name,
15453 tree args ATTRIBUTE_UNUSED,
15454 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15455{
15456 tree *type = NULL;
15457 if (DECL_P (*node))
15458 {
15459 if (TREE_CODE (*node) == TYPE_DECL)
15460 type = &TREE_TYPE (*node);
15461 }
15462 else
15463 type = node;
15464
15465 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15466 || TREE_CODE (*type) == UNION_TYPE)))
15467 {
15468 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15469 *no_add_attrs = true;
15470 }
15471
15472 else if ((is_attribute_p ("ms_struct", name)
15473 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15474 || ((is_attribute_p ("gcc_struct", name)
15475 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15476 {
15477 warning ("`%s' incompatible attribute ignored",
15478 IDENTIFIER_POINTER (name));
15479 *no_add_attrs = true;
15480 }
15481
15482 return NULL_TREE;
15483}
15484
15485static bool
15486ix86_ms_bitfield_layout_p (tree record_type)
15487{
15488 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15489 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15490 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15491}
15492
15493/* Returns an expression indicating where the this parameter is
15494 located on entry to the FUNCTION. */
15495
15496static rtx
15497x86_this_parameter (tree function)
15498{
15499 tree type = TREE_TYPE (function);
15500
15501 if (TARGET_64BIT)
15502 {
15503 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15504 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15505 }
15506
15507 if (ix86_function_regparm (type, function) > 0)
15508 {
15509 tree parm;
15510
15511 parm = TYPE_ARG_TYPES (type);
15512 /* Figure out whether or not the function has a variable number of
15513 arguments. */
15514 for (; parm; parm = TREE_CHAIN (parm))
15515 if (TREE_VALUE (parm) == void_type_node)
15516 break;
15517 /* If not, the this parameter is in the first argument. */
15518 if (parm)
15519 {
15520 int regno = 0;
15521 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15522 regno = 2;
15523 return gen_rtx_REG (SImode, regno);
15524 }
15525 }
15526
15527 if (aggregate_value_p (TREE_TYPE (type), type))
15528 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15529 else
15530 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15531}
15532
15533/* Determine whether x86_output_mi_thunk can succeed. */
15534
15535static bool
15536x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15537 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15538 HOST_WIDE_INT vcall_offset, tree function)
15539{
15540 /* 64-bit can handle anything. */
15541 if (TARGET_64BIT)
15542 return true;
15543
15544 /* For 32-bit, everything's fine if we have one free register. */
15545 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15546 return true;
15547
15548 /* Need a free register for vcall_offset. */
15549 if (vcall_offset)
15550 return false;
15551
15552 /* Need a free register for GOT references. */
15553 if (flag_pic && !(*targetm.binds_local_p) (function))
15554 return false;
15555
15556 /* Otherwise ok. */
15557 return true;
15558}
15559
15560/* Output the assembler code for a thunk function. THUNK_DECL is the
15561 declaration for the thunk function itself, FUNCTION is the decl for
15562 the target function. DELTA is an immediate constant offset to be
15563 added to THIS. If VCALL_OFFSET is nonzero, the word at
15564 *(*this + vcall_offset) should be added to THIS. */
15565
15566static void
15567x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15568 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15569 HOST_WIDE_INT vcall_offset, tree function)
15570{
15571 rtx xops[3];
15572 rtx this = x86_this_parameter (function);
15573 rtx this_reg, tmp;
15574
15575 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15576 pull it in now and let DELTA benefit. */
15577 if (REG_P (this))
15578 this_reg = this;
15579 else if (vcall_offset)
15580 {
15581 /* Put the this parameter into %eax. */
15582 xops[0] = this;
15583 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15584 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15585 }
15586 else
15587 this_reg = NULL_RTX;
15588
15589 /* Adjust the this parameter by a fixed constant. */
15590 if (delta)
15591 {
15592 xops[0] = GEN_INT (delta);
15593 xops[1] = this_reg ? this_reg : this;
15594 if (TARGET_64BIT)
15595 {
15596 if (!x86_64_general_operand (xops[0], DImode))
15597 {
15598 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15599 xops[1] = tmp;
15600 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15601 xops[0] = tmp;
15602 xops[1] = this;
15603 }
15604 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15605 }
15606 else
15607 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15608 }
15609
15610 /* Adjust the this parameter by a value stored in the vtable. */
15611 if (vcall_offset)
15612 {
15613 if (TARGET_64BIT)
15614 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15615 else
15616 {
15617 int tmp_regno = 2 /* ECX */;
15618 if (lookup_attribute ("fastcall",
15619 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15620 tmp_regno = 0 /* EAX */;
15621 tmp = gen_rtx_REG (SImode, tmp_regno);
15622 }
15623
15624 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15625 xops[1] = tmp;
15626 if (TARGET_64BIT)
15627 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15628 else
15629 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15630
15631 /* Adjust the this parameter. */
15632 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15633 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15634 {
15635 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15636 xops[0] = GEN_INT (vcall_offset);
15637 xops[1] = tmp2;
15638 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15639 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15640 }
15641 xops[1] = this_reg;
15642 if (TARGET_64BIT)
15643 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15644 else
15645 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15646 }
15647
15648 /* If necessary, drop THIS back to its stack slot. */
15649 if (this_reg && this_reg != this)
15650 {
15651 xops[0] = this_reg;
15652 xops[1] = this;
15653 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15654 }
15655
15656 xops[0] = XEXP (DECL_RTL (function), 0);
15657 if (TARGET_64BIT)
15658 {
15659 if (!flag_pic || (*targetm.binds_local_p) (function))
15660 output_asm_insn ("jmp\t%P0", xops);
15661 else
15662 {
15663 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15664 tmp = gen_rtx_CONST (Pmode, tmp);
15665 tmp = gen_rtx_MEM (QImode, tmp);
15666 xops[0] = tmp;
15667 output_asm_insn ("jmp\t%A0", xops);
15668 }
15669 }
15670 else
15671 {
15672 if (!flag_pic || (*targetm.binds_local_p) (function))
15673 output_asm_insn ("jmp\t%P0", xops);
15674 else
15675#if TARGET_MACHO
15676 if (TARGET_MACHO)
15677 {
15678 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15679 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15680 tmp = gen_rtx_MEM (QImode, tmp);
15681 xops[0] = tmp;
15682 output_asm_insn ("jmp\t%0", xops);
15683 }
15684 else
15685#endif /* TARGET_MACHO */
15686 {
15687 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15688 output_set_got (tmp);
15689
15690 xops[1] = tmp;
15691 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15692 output_asm_insn ("jmp\t{*}%1", xops);
15693 }
15694 }
15695}
15696
15697static void
15698x86_file_start (void)
15699{
15700 default_file_start ();
15701 if (X86_FILE_START_VERSION_DIRECTIVE)
15702 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15703 if (X86_FILE_START_FLTUSED)
15704 fputs ("\t.global\t__fltused\n", asm_out_file);
15705 if (ix86_asm_dialect == ASM_INTEL)
15706 fputs ("\t.intel_syntax\n", asm_out_file);
15707}
15708
15709int
15710x86_field_alignment (tree field, int computed)
15711{
15712 enum machine_mode mode;
15713 tree type = TREE_TYPE (field);
15714
15715 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15716 return computed;
15717 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15718 ? get_inner_array_type (type) : type);
15719 if (mode == DFmode || mode == DCmode
15720 || GET_MODE_CLASS (mode) == MODE_INT
15721 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15722 return MIN (32, computed);
15723 return computed;
15724}
15725
15726/* Output assembler code to FILE to increment profiler label # LABELNO
15727 for profiling a function entry. */
15728void
15729x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15730{
15731 if (TARGET_64BIT)
15732 if (flag_pic)
15733 {
15734#ifndef NO_PROFILE_COUNTERS
15735 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15736#endif
15737 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15738 }
15739 else
15740 {
15741#ifndef NO_PROFILE_COUNTERS
15742 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15743#endif
15744 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15745 }
15746 else if (flag_pic)
15747 {
15748#ifndef NO_PROFILE_COUNTERS
15749 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15750 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15751#endif
15752 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15753 }
15754 else
15755 {
15756#ifndef NO_PROFILE_COUNTERS
15757 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15758 PROFILE_COUNT_REGISTER);
15759#endif
15760 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15761 }
15762}
15763
15764/* We don't have exact information about the insn sizes, but we may assume
15765 quite safely that we are informed about all 1 byte insns and memory
15766 address sizes. This is enough to eliminate unnecessary padding in
15767 99% of cases. */
15768
15769static int
15770min_insn_size (rtx insn)
15771{
15772 int l = 0;
15773
15774 if (!INSN_P (insn) || !active_insn_p (insn))
15775 return 0;
15776
15777 /* Discard alignments we've emit and jump instructions. */
15778 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15779 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15780 return 0;
15781 if (GET_CODE (insn) == JUMP_INSN
15782 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15783 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15784 return 0;
15785
15786 /* Important case - calls are always 5 bytes.
15787 It is common to have many calls in the row. */
15788 if (GET_CODE (insn) == CALL_INSN
15789 && symbolic_reference_mentioned_p (PATTERN (insn))
15790 && !SIBLING_CALL_P (insn))
15791 return 5;
15792 if (get_attr_length (insn) <= 1)
15793 return 1;
15794
15795 /* For normal instructions we may rely on the sizes of addresses
15796 and the presence of symbol to require 4 bytes of encoding.
15797 This is not the case for jumps where references are PC relative. */
15798 if (GET_CODE (insn) != JUMP_INSN)
15799 {
15800 l = get_attr_length_address (insn);
15801 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15802 l = 4;
15803 }
15804 if (l)
15805 return 1+l;
15806 else
15807 return 2;
15808}
15809
15810/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15811 window. */
15812
15813static void
15814k8_avoid_jump_misspredicts (void)
15815{
15816 rtx insn, start = get_insns ();
15817 int nbytes = 0, njumps = 0;
15818 int isjump = 0;
15819
15820 /* Look for all minimal intervals of instructions containing 4 jumps.
15821 The intervals are bounded by START and INSN. NBYTES is the total
15822 size of instructions in the interval including INSN and not including
15823 START. When the NBYTES is smaller than 16 bytes, it is possible
15824 that the end of START and INSN ends up in the same 16byte page.
15825
15826 The smallest offset in the page INSN can start is the case where START
15827 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15828 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15829 */
15830 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15831 {
15832
15833 nbytes += min_insn_size (insn);
15834 if (rtl_dump_file)
15835 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15836 INSN_UID (insn), min_insn_size (insn));
15837 if ((GET_CODE (insn) == JUMP_INSN
15838 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15839 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15840 || GET_CODE (insn) == CALL_INSN)
15841 njumps++;
15842 else
15843 continue;
15844
15845 while (njumps > 3)
15846 {
15847 start = NEXT_INSN (start);
15848 if ((GET_CODE (start) == JUMP_INSN
15849 && GET_CODE (PATTERN (start)) != ADDR_VEC
15850 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15851 || GET_CODE (start) == CALL_INSN)
15852 njumps--, isjump = 1;
15853 else
15854 isjump = 0;
15855 nbytes -= min_insn_size (start);
15856 }
15857 if (njumps < 0)
15858 abort ();
15859 if (rtl_dump_file)
15860 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15861 INSN_UID (start), INSN_UID (insn), nbytes);
15862
15863 if (njumps == 3 && isjump && nbytes < 16)
15864 {
15865 int padsize = 15 - nbytes + min_insn_size (insn);
15866
15867 if (rtl_dump_file)
15868 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15869 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15870 }
15871 }
15872}
15873
15874/* Implement machine specific optimizations.
15875 At the moment we implement single transformation: AMD Athlon works faster
15876 when RET is not destination of conditional jump or directly preceded
15877 by other jump instruction. We avoid the penalty by inserting NOP just
15878 before the RET instructions in such cases. */
15879static void
15880ix86_reorg (void)
15881{
15882 edge e;
15883
15884 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15885 return;
15886 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15887 {
15888 basic_block bb = e->src;
15889 rtx ret = BB_END (bb);
15890 rtx prev;
15891 bool replace = false;
15892
15893 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15894 || !maybe_hot_bb_p (bb))
15895 continue;
15896 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15897 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15898 break;
15899 if (prev && GET_CODE (prev) == CODE_LABEL)
15900 {
15901 edge e;
15902 for (e = bb->pred; e; e = e->pred_next)
15903 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15904 && !(e->flags & EDGE_FALLTHRU))
15905 replace = true;
15906 }
15907 if (!replace)
15908 {
15909 prev = prev_active_insn (ret);
15910 if (prev
15911 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15912 || GET_CODE (prev) == CALL_INSN))
15913 replace = true;
15914 /* Empty functions get branch mispredict even when the jump destination
15915 is not visible to us. */
15916 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15917 replace = true;
15918 }
15919 if (replace)
15920 {
15921 emit_insn_before (gen_return_internal_long (), ret);
15922 delete_insn (ret);
15923 }
15924 }
15925 k8_avoid_jump_misspredicts ();
15926}
15927
15928/* Return nonzero when QImode register that must be represented via REX prefix
15929 is used. */
15930bool
15931x86_extended_QIreg_mentioned_p (rtx insn)
15932{
15933 int i;
15934 extract_insn_cached (insn);
15935 for (i = 0; i < recog_data.n_operands; i++)
15936 if (REG_P (recog_data.operand[i])
15937 && REGNO (recog_data.operand[i]) >= 4)
15938 return true;
15939 return false;
15940}
15941
15942/* Return nonzero when P points to register encoded via REX prefix.
15943 Called via for_each_rtx. */
15944static int
15945extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15946{
15947 unsigned int regno;
15948 if (!REG_P (*p))
15949 return 0;
15950 regno = REGNO (*p);
15951 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15952}
15953
15954/* Return true when INSN mentions register that must be encoded using REX
15955 prefix. */
15956bool
15957x86_extended_reg_mentioned_p (rtx insn)
15958{
15959 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15960}
15961
15962/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15963 optabs would emit if we didn't have TFmode patterns. */
15964
15965void
15966x86_emit_floatuns (rtx operands[2])
15967{
15968 rtx neglab, donelab, i0, i1, f0, in, out;
15969 enum machine_mode mode, inmode;
15970
15971 inmode = GET_MODE (operands[1]);
15972 if (inmode != SImode
15973 && inmode != DImode)
15974 abort ();
15975
15976 out = operands[0];
15977 in = force_reg (inmode, operands[1]);
15978 mode = GET_MODE (out);
15979 neglab = gen_label_rtx ();
15980 donelab = gen_label_rtx ();
15981 i1 = gen_reg_rtx (Pmode);
15982 f0 = gen_reg_rtx (mode);
15983
15984 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15985
15986 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15987 emit_jump_insn (gen_jump (donelab));
15988 emit_barrier ();
15989
15990 emit_label (neglab);
15991
15992 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15993 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15994 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15995 expand_float (f0, i0, 0);
15996 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15997
15998 emit_label (donelab);
15999}
16000
16001/* Return if we do not know how to pass TYPE solely in registers. */
16002bool
16003ix86_must_pass_in_stack (enum machine_mode mode, tree type)
16004{
16005 if (default_must_pass_in_stack (mode, type))
16006 return true;
16007 return (!TARGET_64BIT && type && mode == TImode);
16008}
16009
16010/* Initialize vector TARGET via VALS. */
16011void
16012ix86_expand_vector_init (rtx target, rtx vals)
16013{
16014 enum machine_mode mode = GET_MODE (target);
16015 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16016 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
16017 int i;
16018
16019 for (i = n_elts - 1; i >= 0; i--)
16020 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
16021 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
16022 break;
16023
16024 /* Few special cases first...
16025 ... constants are best loaded from constant pool. */
16026 if (i < 0)
16027 {
16028 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16029 return;
16030 }
16031
16032 /* ... values where only first field is non-constant are best loaded
16033 from the pool and overwriten via move later. */
16034 if (!i)
16035 {
16036 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
16037 GET_MODE_INNER (mode), 0);
16038
16039 op = force_reg (mode, op);
16040 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
16041 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16042 switch (GET_MODE (target))
16043 {
16044 case V2DFmode:
16045 emit_insn (gen_sse2_movsd (target, target, op));
16046 break;
16047 case V4SFmode:
16048 emit_insn (gen_sse_movss (target, target, op));
16049 break;
16050 default:
16051 break;
16052 }
16053 return;
16054 }
16055
16056 /* And the busy sequence doing rotations. */
16057 switch (GET_MODE (target))
16058 {
16059 case V2DFmode:
16060 {
16061 rtx vecop0 =
16062 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16063 rtx vecop1 =
16064 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16065
16066 vecop0 = force_reg (V2DFmode, vecop0);
16067 vecop1 = force_reg (V2DFmode, vecop1);
16068 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16069 }
16070 break;
16071 case V4SFmode:
16072 {
16073 rtx vecop0 =
16074 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16075 rtx vecop1 =
16076 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16077 rtx vecop2 =
16078 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16079 rtx vecop3 =
16080 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16081 rtx tmp1 = gen_reg_rtx (V4SFmode);
16082 rtx tmp2 = gen_reg_rtx (V4SFmode);
16083
16084 vecop0 = force_reg (V4SFmode, vecop0);
16085 vecop1 = force_reg (V4SFmode, vecop1);
16086 vecop2 = force_reg (V4SFmode, vecop2);
16087 vecop3 = force_reg (V4SFmode, vecop3);
16088 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16089 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16090 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16091 }
16092 break;
16093 default:
16094 abort ();
16095 }
16096}
16097
16098#include "gt-i386.h"
13965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13966 target = gen_reg_rtx (tmode);
13967 if (do_load)
13968 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13969 else
13970 {
13971 if (VECTOR_MODE_P (mode0))
13972 op0 = safe_vector_operand (op0, mode0);
13973
13974 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13975 op0 = copy_to_mode_reg (mode0, op0);
13976 }
13977
13978 pat = GEN_FCN (icode) (target, op0);
13979 if (! pat)
13980 return 0;
13981 emit_insn (pat);
13982 return target;
13983}
13984
13985/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13986 sqrtss, rsqrtss, rcpss. */
13987
13988static rtx
13989ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13990{
13991 rtx pat;
13992 tree arg0 = TREE_VALUE (arglist);
13993 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13994 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13995 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13996
13997 if (! target
13998 || GET_MODE (target) != tmode
13999 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14000 target = gen_reg_rtx (tmode);
14001
14002 if (VECTOR_MODE_P (mode0))
14003 op0 = safe_vector_operand (op0, mode0);
14004
14005 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14006 op0 = copy_to_mode_reg (mode0, op0);
14007
14008 op1 = op0;
14009 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14010 op1 = copy_to_mode_reg (mode0, op1);
14011
14012 pat = GEN_FCN (icode) (target, op0, op1);
14013 if (! pat)
14014 return 0;
14015 emit_insn (pat);
14016 return target;
14017}
14018
14019/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
14020
14021static rtx
14022ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14023 rtx target)
14024{
14025 rtx pat;
14026 tree arg0 = TREE_VALUE (arglist);
14027 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14028 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030 rtx op2;
14031 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14032 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14033 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14034 enum rtx_code comparison = d->comparison;
14035
14036 if (VECTOR_MODE_P (mode0))
14037 op0 = safe_vector_operand (op0, mode0);
14038 if (VECTOR_MODE_P (mode1))
14039 op1 = safe_vector_operand (op1, mode1);
14040
14041 /* Swap operands if we have a comparison that isn't available in
14042 hardware. */
14043 if (d->flag)
14044 {
14045 rtx tmp = gen_reg_rtx (mode1);
14046 emit_move_insn (tmp, op1);
14047 op1 = op0;
14048 op0 = tmp;
14049 }
14050
14051 if (! target
14052 || GET_MODE (target) != tmode
14053 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14054 target = gen_reg_rtx (tmode);
14055
14056 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14057 op0 = copy_to_mode_reg (mode0, op0);
14058 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14059 op1 = copy_to_mode_reg (mode1, op1);
14060
14061 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14062 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14063 if (! pat)
14064 return 0;
14065 emit_insn (pat);
14066 return target;
14067}
14068
14069/* Subroutine of ix86_expand_builtin to take care of comi insns. */
14070
14071static rtx
14072ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14073 rtx target)
14074{
14075 rtx pat;
14076 tree arg0 = TREE_VALUE (arglist);
14077 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14078 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14079 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14080 rtx op2;
14081 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14082 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14083 enum rtx_code comparison = d->comparison;
14084
14085 if (VECTOR_MODE_P (mode0))
14086 op0 = safe_vector_operand (op0, mode0);
14087 if (VECTOR_MODE_P (mode1))
14088 op1 = safe_vector_operand (op1, mode1);
14089
14090 /* Swap operands if we have a comparison that isn't available in
14091 hardware. */
14092 if (d->flag)
14093 {
14094 rtx tmp = op1;
14095 op1 = op0;
14096 op0 = tmp;
14097 }
14098
14099 target = gen_reg_rtx (SImode);
14100 emit_move_insn (target, const0_rtx);
14101 target = gen_rtx_SUBREG (QImode, target, 0);
14102
14103 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14104 op0 = copy_to_mode_reg (mode0, op0);
14105 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14106 op1 = copy_to_mode_reg (mode1, op1);
14107
14108 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14109 pat = GEN_FCN (d->icode) (op0, op1);
14110 if (! pat)
14111 return 0;
14112 emit_insn (pat);
14113 emit_insn (gen_rtx_SET (VOIDmode,
14114 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14115 gen_rtx_fmt_ee (comparison, QImode,
14116 SET_DEST (pat),
14117 const0_rtx)));
14118
14119 return SUBREG_REG (target);
14120}
14121
14122/* Expand an expression EXP that calls a built-in function,
14123 with result going to TARGET if that's convenient
14124 (and in mode MODE if that's convenient).
14125 SUBTARGET may be used as the target for computing one of EXP's operands.
14126 IGNORE is nonzero if the value is to be ignored. */
14127
14128rtx
14129ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14130 enum machine_mode mode ATTRIBUTE_UNUSED,
14131 int ignore ATTRIBUTE_UNUSED)
14132{
14133 const struct builtin_description *d;
14134 size_t i;
14135 enum insn_code icode;
14136 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14137 tree arglist = TREE_OPERAND (exp, 1);
14138 tree arg0, arg1, arg2;
14139 rtx op0, op1, op2, pat;
14140 enum machine_mode tmode, mode0, mode1, mode2;
14141 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14142
14143 switch (fcode)
14144 {
14145 case IX86_BUILTIN_EMMS:
14146 emit_insn (gen_emms ());
14147 return 0;
14148
14149 case IX86_BUILTIN_SFENCE:
14150 emit_insn (gen_sfence ());
14151 return 0;
14152
14153 case IX86_BUILTIN_PEXTRW:
14154 case IX86_BUILTIN_PEXTRW128:
14155 icode = (fcode == IX86_BUILTIN_PEXTRW
14156 ? CODE_FOR_mmx_pextrw
14157 : CODE_FOR_sse2_pextrw);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14162 tmode = insn_data[icode].operand[0].mode;
14163 mode0 = insn_data[icode].operand[1].mode;
14164 mode1 = insn_data[icode].operand[2].mode;
14165
14166 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14167 op0 = copy_to_mode_reg (mode0, op0);
14168 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14169 {
14170 error ("selector must be an integer constant in the range 0..%i",
14171 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14172 return gen_reg_rtx (tmode);
14173 }
14174 if (target == 0
14175 || GET_MODE (target) != tmode
14176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14177 target = gen_reg_rtx (tmode);
14178 pat = GEN_FCN (icode) (target, op0, op1);
14179 if (! pat)
14180 return 0;
14181 emit_insn (pat);
14182 return target;
14183
14184 case IX86_BUILTIN_PINSRW:
14185 case IX86_BUILTIN_PINSRW128:
14186 icode = (fcode == IX86_BUILTIN_PINSRW
14187 ? CODE_FOR_mmx_pinsrw
14188 : CODE_FOR_sse2_pinsrw);
14189 arg0 = TREE_VALUE (arglist);
14190 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14191 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14192 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14193 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14194 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14195 tmode = insn_data[icode].operand[0].mode;
14196 mode0 = insn_data[icode].operand[1].mode;
14197 mode1 = insn_data[icode].operand[2].mode;
14198 mode2 = insn_data[icode].operand[3].mode;
14199
14200 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14201 op0 = copy_to_mode_reg (mode0, op0);
14202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14203 op1 = copy_to_mode_reg (mode1, op1);
14204 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14205 {
14206 error ("selector must be an integer constant in the range 0..%i",
14207 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14208 return const0_rtx;
14209 }
14210 if (target == 0
14211 || GET_MODE (target) != tmode
14212 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14213 target = gen_reg_rtx (tmode);
14214 pat = GEN_FCN (icode) (target, op0, op1, op2);
14215 if (! pat)
14216 return 0;
14217 emit_insn (pat);
14218 return target;
14219
14220 case IX86_BUILTIN_MASKMOVQ:
14221 case IX86_BUILTIN_MASKMOVDQU:
14222 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14223 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14224 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14225 : CODE_FOR_sse2_maskmovdqu));
14226 /* Note the arg order is different from the operand order. */
14227 arg1 = TREE_VALUE (arglist);
14228 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14229 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14230 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14231 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14232 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14233 mode0 = insn_data[icode].operand[0].mode;
14234 mode1 = insn_data[icode].operand[1].mode;
14235 mode2 = insn_data[icode].operand[2].mode;
14236
14237 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14238 op0 = copy_to_mode_reg (mode0, op0);
14239 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14240 op1 = copy_to_mode_reg (mode1, op1);
14241 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14242 op2 = copy_to_mode_reg (mode2, op2);
14243 pat = GEN_FCN (icode) (op0, op1, op2);
14244 if (! pat)
14245 return 0;
14246 emit_insn (pat);
14247 return 0;
14248
14249 case IX86_BUILTIN_SQRTSS:
14250 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14251 case IX86_BUILTIN_RSQRTSS:
14252 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14253 case IX86_BUILTIN_RCPSS:
14254 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14255
14256 case IX86_BUILTIN_LOADAPS:
14257 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14258
14259 case IX86_BUILTIN_LOADUPS:
14260 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14261
14262 case IX86_BUILTIN_STOREAPS:
14263 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14264
14265 case IX86_BUILTIN_STOREUPS:
14266 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14267
14268 case IX86_BUILTIN_LOADSS:
14269 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14270
14271 case IX86_BUILTIN_STORESS:
14272 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14273
14274 case IX86_BUILTIN_LOADHPS:
14275 case IX86_BUILTIN_LOADLPS:
14276 case IX86_BUILTIN_LOADHPD:
14277 case IX86_BUILTIN_LOADLPD:
14278 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14279 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14280 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14281 : CODE_FOR_sse2_movsd);
14282 arg0 = TREE_VALUE (arglist);
14283 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14284 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14285 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14286 tmode = insn_data[icode].operand[0].mode;
14287 mode0 = insn_data[icode].operand[1].mode;
14288 mode1 = insn_data[icode].operand[2].mode;
14289
14290 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14291 op0 = copy_to_mode_reg (mode0, op0);
14292 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14293 if (target == 0
14294 || GET_MODE (target) != tmode
14295 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14296 target = gen_reg_rtx (tmode);
14297 pat = GEN_FCN (icode) (target, op0, op1);
14298 if (! pat)
14299 return 0;
14300 emit_insn (pat);
14301 return target;
14302
14303 case IX86_BUILTIN_STOREHPS:
14304 case IX86_BUILTIN_STORELPS:
14305 case IX86_BUILTIN_STOREHPD:
14306 case IX86_BUILTIN_STORELPD:
14307 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14308 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14309 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14310 : CODE_FOR_sse2_movsd);
14311 arg0 = TREE_VALUE (arglist);
14312 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14313 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14314 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14315 mode0 = insn_data[icode].operand[1].mode;
14316 mode1 = insn_data[icode].operand[2].mode;
14317
14318 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14319 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14320 op1 = copy_to_mode_reg (mode1, op1);
14321
14322 pat = GEN_FCN (icode) (op0, op0, op1);
14323 if (! pat)
14324 return 0;
14325 emit_insn (pat);
14326 return 0;
14327
14328 case IX86_BUILTIN_MOVNTPS:
14329 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14330 case IX86_BUILTIN_MOVNTQ:
14331 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14332
14333 case IX86_BUILTIN_LDMXCSR:
14334 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14335 target = assign_386_stack_local (SImode, 0);
14336 emit_move_insn (target, op0);
14337 emit_insn (gen_ldmxcsr (target));
14338 return 0;
14339
14340 case IX86_BUILTIN_STMXCSR:
14341 target = assign_386_stack_local (SImode, 0);
14342 emit_insn (gen_stmxcsr (target));
14343 return copy_to_mode_reg (SImode, target);
14344
14345 case IX86_BUILTIN_SHUFPS:
14346 case IX86_BUILTIN_SHUFPD:
14347 icode = (fcode == IX86_BUILTIN_SHUFPS
14348 ? CODE_FOR_sse_shufps
14349 : CODE_FOR_sse2_shufpd);
14350 arg0 = TREE_VALUE (arglist);
14351 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14352 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14353 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14354 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14355 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14356 tmode = insn_data[icode].operand[0].mode;
14357 mode0 = insn_data[icode].operand[1].mode;
14358 mode1 = insn_data[icode].operand[2].mode;
14359 mode2 = insn_data[icode].operand[3].mode;
14360
14361 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14362 op0 = copy_to_mode_reg (mode0, op0);
14363 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14364 op1 = copy_to_mode_reg (mode1, op1);
14365 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14366 {
14367 /* @@@ better error message */
14368 error ("mask must be an immediate");
14369 return gen_reg_rtx (tmode);
14370 }
14371 if (target == 0
14372 || GET_MODE (target) != tmode
14373 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14374 target = gen_reg_rtx (tmode);
14375 pat = GEN_FCN (icode) (target, op0, op1, op2);
14376 if (! pat)
14377 return 0;
14378 emit_insn (pat);
14379 return target;
14380
14381 case IX86_BUILTIN_PSHUFW:
14382 case IX86_BUILTIN_PSHUFD:
14383 case IX86_BUILTIN_PSHUFHW:
14384 case IX86_BUILTIN_PSHUFLW:
14385 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14386 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14387 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14388 : CODE_FOR_mmx_pshufw);
14389 arg0 = TREE_VALUE (arglist);
14390 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14391 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14392 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14393 tmode = insn_data[icode].operand[0].mode;
14394 mode1 = insn_data[icode].operand[1].mode;
14395 mode2 = insn_data[icode].operand[2].mode;
14396
14397 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14398 op0 = copy_to_mode_reg (mode1, op0);
14399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14400 {
14401 /* @@@ better error message */
14402 error ("mask must be an immediate");
14403 return const0_rtx;
14404 }
14405 if (target == 0
14406 || GET_MODE (target) != tmode
14407 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14408 target = gen_reg_rtx (tmode);
14409 pat = GEN_FCN (icode) (target, op0, op1);
14410 if (! pat)
14411 return 0;
14412 emit_insn (pat);
14413 return target;
14414
14415 case IX86_BUILTIN_PSLLDQI128:
14416 case IX86_BUILTIN_PSRLDQI128:
14417 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14418 : CODE_FOR_sse2_lshrti3);
14419 arg0 = TREE_VALUE (arglist);
14420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14421 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14422 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14423 tmode = insn_data[icode].operand[0].mode;
14424 mode1 = insn_data[icode].operand[1].mode;
14425 mode2 = insn_data[icode].operand[2].mode;
14426
14427 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14428 {
14429 op0 = copy_to_reg (op0);
14430 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14431 }
14432 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14433 {
14434 error ("shift must be an immediate");
14435 return const0_rtx;
14436 }
14437 target = gen_reg_rtx (V2DImode);
14438 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14439 if (! pat)
14440 return 0;
14441 emit_insn (pat);
14442 return target;
14443
14444 case IX86_BUILTIN_FEMMS:
14445 emit_insn (gen_femms ());
14446 return NULL_RTX;
14447
14448 case IX86_BUILTIN_PAVGUSB:
14449 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14450
14451 case IX86_BUILTIN_PF2ID:
14452 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14453
14454 case IX86_BUILTIN_PFACC:
14455 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14456
14457 case IX86_BUILTIN_PFADD:
14458 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14459
14460 case IX86_BUILTIN_PFCMPEQ:
14461 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14462
14463 case IX86_BUILTIN_PFCMPGE:
14464 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14465
14466 case IX86_BUILTIN_PFCMPGT:
14467 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14468
14469 case IX86_BUILTIN_PFMAX:
14470 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14471
14472 case IX86_BUILTIN_PFMIN:
14473 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14474
14475 case IX86_BUILTIN_PFMUL:
14476 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14477
14478 case IX86_BUILTIN_PFRCP:
14479 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14480
14481 case IX86_BUILTIN_PFRCPIT1:
14482 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14483
14484 case IX86_BUILTIN_PFRCPIT2:
14485 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14486
14487 case IX86_BUILTIN_PFRSQIT1:
14488 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14489
14490 case IX86_BUILTIN_PFRSQRT:
14491 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14492
14493 case IX86_BUILTIN_PFSUB:
14494 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14495
14496 case IX86_BUILTIN_PFSUBR:
14497 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14498
14499 case IX86_BUILTIN_PI2FD:
14500 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14501
14502 case IX86_BUILTIN_PMULHRW:
14503 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14504
14505 case IX86_BUILTIN_PF2IW:
14506 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14507
14508 case IX86_BUILTIN_PFNACC:
14509 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14510
14511 case IX86_BUILTIN_PFPNACC:
14512 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14513
14514 case IX86_BUILTIN_PI2FW:
14515 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14516
14517 case IX86_BUILTIN_PSWAPDSI:
14518 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14519
14520 case IX86_BUILTIN_PSWAPDSF:
14521 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14522
14523 case IX86_BUILTIN_SSE_ZERO:
14524 target = gen_reg_rtx (V4SFmode);
14525 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14526 return target;
14527
14528 case IX86_BUILTIN_MMX_ZERO:
14529 target = gen_reg_rtx (DImode);
14530 emit_insn (gen_mmx_clrdi (target));
14531 return target;
14532
14533 case IX86_BUILTIN_CLRTI:
14534 target = gen_reg_rtx (V2DImode);
14535 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14536 return target;
14537
14538
14539 case IX86_BUILTIN_SQRTSD:
14540 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14541 case IX86_BUILTIN_LOADAPD:
14542 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14543 case IX86_BUILTIN_LOADUPD:
14544 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14545
14546 case IX86_BUILTIN_STOREAPD:
14547 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14548 case IX86_BUILTIN_STOREUPD:
14549 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14550
14551 case IX86_BUILTIN_LOADSD:
14552 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14553
14554 case IX86_BUILTIN_STORESD:
14555 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14556
14557 case IX86_BUILTIN_SETPD1:
14558 target = assign_386_stack_local (DFmode, 0);
14559 arg0 = TREE_VALUE (arglist);
14560 emit_move_insn (adjust_address (target, DFmode, 0),
14561 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14562 op0 = gen_reg_rtx (V2DFmode);
14563 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14564 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14565 return op0;
14566
14567 case IX86_BUILTIN_SETPD:
14568 target = assign_386_stack_local (V2DFmode, 0);
14569 arg0 = TREE_VALUE (arglist);
14570 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14571 emit_move_insn (adjust_address (target, DFmode, 0),
14572 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14573 emit_move_insn (adjust_address (target, DFmode, 8),
14574 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14575 op0 = gen_reg_rtx (V2DFmode);
14576 emit_insn (gen_sse2_movapd (op0, target));
14577 return op0;
14578
14579 case IX86_BUILTIN_LOADRPD:
14580 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14581 gen_reg_rtx (V2DFmode), 1);
14582 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14583 return target;
14584
14585 case IX86_BUILTIN_LOADPD1:
14586 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14587 gen_reg_rtx (V2DFmode), 1);
14588 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14589 return target;
14590
14591 case IX86_BUILTIN_STOREPD1:
14592 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14593 case IX86_BUILTIN_STORERPD:
14594 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14595
14596 case IX86_BUILTIN_CLRPD:
14597 target = gen_reg_rtx (V2DFmode);
14598 emit_insn (gen_sse_clrv2df (target));
14599 return target;
14600
14601 case IX86_BUILTIN_MFENCE:
14602 emit_insn (gen_sse2_mfence ());
14603 return 0;
14604 case IX86_BUILTIN_LFENCE:
14605 emit_insn (gen_sse2_lfence ());
14606 return 0;
14607
14608 case IX86_BUILTIN_CLFLUSH:
14609 arg0 = TREE_VALUE (arglist);
14610 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14611 icode = CODE_FOR_sse2_clflush;
14612 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14613 op0 = copy_to_mode_reg (Pmode, op0);
14614
14615 emit_insn (gen_sse2_clflush (op0));
14616 return 0;
14617
14618 case IX86_BUILTIN_MOVNTPD:
14619 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14620 case IX86_BUILTIN_MOVNTDQ:
14621 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14622 case IX86_BUILTIN_MOVNTI:
14623 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14624
14625 case IX86_BUILTIN_LOADDQA:
14626 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14627 case IX86_BUILTIN_LOADDQU:
14628 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14629 case IX86_BUILTIN_LOADD:
14630 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14631
14632 case IX86_BUILTIN_STOREDQA:
14633 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14634 case IX86_BUILTIN_STOREDQU:
14635 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14636 case IX86_BUILTIN_STORED:
14637 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14638
14639 case IX86_BUILTIN_MONITOR:
14640 arg0 = TREE_VALUE (arglist);
14641 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14642 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14643 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14644 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14645 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14646 if (!REG_P (op0))
14647 op0 = copy_to_mode_reg (SImode, op0);
14648 if (!REG_P (op1))
14649 op1 = copy_to_mode_reg (SImode, op1);
14650 if (!REG_P (op2))
14651 op2 = copy_to_mode_reg (SImode, op2);
14652 emit_insn (gen_monitor (op0, op1, op2));
14653 return 0;
14654
14655 case IX86_BUILTIN_MWAIT:
14656 arg0 = TREE_VALUE (arglist);
14657 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14658 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14659 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14660 if (!REG_P (op0))
14661 op0 = copy_to_mode_reg (SImode, op0);
14662 if (!REG_P (op1))
14663 op1 = copy_to_mode_reg (SImode, op1);
14664 emit_insn (gen_mwait (op0, op1));
14665 return 0;
14666
14667 case IX86_BUILTIN_LOADDDUP:
14668 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14669
14670 case IX86_BUILTIN_LDDQU:
14671 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14672 1);
14673
14674 default:
14675 break;
14676 }
14677
14678 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14679 if (d->code == fcode)
14680 {
14681 /* Compares are treated specially. */
14682 if (d->icode == CODE_FOR_maskcmpv4sf3
14683 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14684 || d->icode == CODE_FOR_maskncmpv4sf3
14685 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14686 || d->icode == CODE_FOR_maskcmpv2df3
14687 || d->icode == CODE_FOR_vmmaskcmpv2df3
14688 || d->icode == CODE_FOR_maskncmpv2df3
14689 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14690 return ix86_expand_sse_compare (d, arglist, target);
14691
14692 return ix86_expand_binop_builtin (d->icode, arglist, target);
14693 }
14694
14695 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14696 if (d->code == fcode)
14697 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14698
14699 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14700 if (d->code == fcode)
14701 return ix86_expand_sse_comi (d, arglist, target);
14702
14703 /* @@@ Should really do something sensible here. */
14704 return 0;
14705}
14706
14707/* Store OPERAND to the memory after reload is completed. This means
14708 that we can't easily use assign_stack_local. */
14709rtx
14710ix86_force_to_memory (enum machine_mode mode, rtx operand)
14711{
14712 rtx result;
14713 if (!reload_completed)
14714 abort ();
14715 if (TARGET_RED_ZONE)
14716 {
14717 result = gen_rtx_MEM (mode,
14718 gen_rtx_PLUS (Pmode,
14719 stack_pointer_rtx,
14720 GEN_INT (-RED_ZONE_SIZE)));
14721 emit_move_insn (result, operand);
14722 }
14723 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14724 {
14725 switch (mode)
14726 {
14727 case HImode:
14728 case SImode:
14729 operand = gen_lowpart (DImode, operand);
14730 /* FALLTHRU */
14731 case DImode:
14732 emit_insn (
14733 gen_rtx_SET (VOIDmode,
14734 gen_rtx_MEM (DImode,
14735 gen_rtx_PRE_DEC (DImode,
14736 stack_pointer_rtx)),
14737 operand));
14738 break;
14739 default:
14740 abort ();
14741 }
14742 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14743 }
14744 else
14745 {
14746 switch (mode)
14747 {
14748 case DImode:
14749 {
14750 rtx operands[2];
14751 split_di (&operand, 1, operands, operands + 1);
14752 emit_insn (
14753 gen_rtx_SET (VOIDmode,
14754 gen_rtx_MEM (SImode,
14755 gen_rtx_PRE_DEC (Pmode,
14756 stack_pointer_rtx)),
14757 operands[1]));
14758 emit_insn (
14759 gen_rtx_SET (VOIDmode,
14760 gen_rtx_MEM (SImode,
14761 gen_rtx_PRE_DEC (Pmode,
14762 stack_pointer_rtx)),
14763 operands[0]));
14764 }
14765 break;
14766 case HImode:
14767 /* It is better to store HImodes as SImodes. */
14768 if (!TARGET_PARTIAL_REG_STALL)
14769 operand = gen_lowpart (SImode, operand);
14770 /* FALLTHRU */
14771 case SImode:
14772 emit_insn (
14773 gen_rtx_SET (VOIDmode,
14774 gen_rtx_MEM (GET_MODE (operand),
14775 gen_rtx_PRE_DEC (SImode,
14776 stack_pointer_rtx)),
14777 operand));
14778 break;
14779 default:
14780 abort ();
14781 }
14782 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14783 }
14784 return result;
14785}
14786
14787/* Free operand from the memory. */
14788void
14789ix86_free_from_memory (enum machine_mode mode)
14790{
14791 if (!TARGET_RED_ZONE)
14792 {
14793 int size;
14794
14795 if (mode == DImode || TARGET_64BIT)
14796 size = 8;
14797 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14798 size = 2;
14799 else
14800 size = 4;
14801 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14802 to pop or add instruction if registers are available. */
14803 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14804 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14805 GEN_INT (size))));
14806 }
14807}
14808
14809/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14810 QImode must go into class Q_REGS.
14811 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14812 movdf to do mem-to-mem moves through integer regs. */
14813enum reg_class
14814ix86_preferred_reload_class (rtx x, enum reg_class class)
14815{
14816 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14817 return NO_REGS;
14818 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14819 {
14820 /* SSE can't load any constant directly yet. */
14821 if (SSE_CLASS_P (class))
14822 return NO_REGS;
14823 /* Floats can load 0 and 1. */
14824 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14825 {
14826 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14827 if (MAYBE_SSE_CLASS_P (class))
14828 return (reg_class_subset_p (class, GENERAL_REGS)
14829 ? GENERAL_REGS : FLOAT_REGS);
14830 else
14831 return class;
14832 }
14833 /* General regs can load everything. */
14834 if (reg_class_subset_p (class, GENERAL_REGS))
14835 return GENERAL_REGS;
14836 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14837 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14838 return NO_REGS;
14839 }
14840 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14841 return NO_REGS;
14842 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14843 return Q_REGS;
14844 return class;
14845}
14846
14847/* If we are copying between general and FP registers, we need a memory
14848 location. The same is true for SSE and MMX registers.
14849
14850 The macro can't work reliably when one of the CLASSES is class containing
14851 registers from multiple units (SSE, MMX, integer). We avoid this by never
14852 combining those units in single alternative in the machine description.
14853 Ensure that this constraint holds to avoid unexpected surprises.
14854
14855 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14856 enforce these sanity checks. */
14857int
14858ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14859 enum machine_mode mode, int strict)
14860{
14861 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14862 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14863 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14864 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14865 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14866 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14867 {
14868 if (strict)
14869 abort ();
14870 else
14871 return 1;
14872 }
14873 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14874 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14875 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14876 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14877 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14878}
14879/* Return the cost of moving data from a register in class CLASS1 to
14880 one in class CLASS2.
14881
14882 It is not required that the cost always equal 2 when FROM is the same as TO;
14883 on some machines it is expensive to move between registers if they are not
14884 general registers. */
14885int
14886ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14887 enum reg_class class2)
14888{
14889 /* In case we require secondary memory, compute cost of the store followed
14890 by load. In order to avoid bad register allocation choices, we need
14891 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14892
14893 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14894 {
14895 int cost = 1;
14896
14897 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14898 MEMORY_MOVE_COST (mode, class1, 1));
14899 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14900 MEMORY_MOVE_COST (mode, class2, 1));
14901
14902 /* In case of copying from general_purpose_register we may emit multiple
14903 stores followed by single load causing memory size mismatch stall.
14904 Count this as arbitrarily high cost of 20. */
14905 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14906 cost += 20;
14907
14908 /* In the case of FP/MMX moves, the registers actually overlap, and we
14909 have to switch modes in order to treat them differently. */
14910 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14911 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14912 cost += 20;
14913
14914 return cost;
14915 }
14916
14917 /* Moves between SSE/MMX and integer unit are expensive. */
14918 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14919 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14920 return ix86_cost->mmxsse_to_integer;
14921 if (MAYBE_FLOAT_CLASS_P (class1))
14922 return ix86_cost->fp_move;
14923 if (MAYBE_SSE_CLASS_P (class1))
14924 return ix86_cost->sse_move;
14925 if (MAYBE_MMX_CLASS_P (class1))
14926 return ix86_cost->mmx_move;
14927 return 2;
14928}
14929
14930/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14931int
14932ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14933{
14934 /* Flags and only flags can only hold CCmode values. */
14935 if (CC_REGNO_P (regno))
14936 return GET_MODE_CLASS (mode) == MODE_CC;
14937 if (GET_MODE_CLASS (mode) == MODE_CC
14938 || GET_MODE_CLASS (mode) == MODE_RANDOM
14939 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14940 return 0;
14941 if (FP_REGNO_P (regno))
14942 return VALID_FP_MODE_P (mode);
14943 if (SSE_REGNO_P (regno))
14944 {
14945 /* HACK! We didn't change all of the constraints for SSE1 for the
14946 scalar modes on the branch. Fortunately, they're not required
14947 for ABI compatibility. */
14948 if (!TARGET_SSE2 && !VECTOR_MODE_P (mode))
14949 return VALID_SSE_REG_MODE (mode);
14950
14951 /* We implement the move patterns for all vector modes into and
14952 out of SSE registers, even when no operation instructions
14953 are available. */
14954 return (VALID_SSE_REG_MODE (mode)
14955 || VALID_SSE2_REG_MODE (mode)
14956 || VALID_MMX_REG_MODE (mode)
14957 || VALID_MMX_REG_MODE_3DNOW (mode));
14958 }
14959 if (MMX_REGNO_P (regno))
14960 {
14961 /* We implement the move patterns for 3DNOW modes even in MMX mode,
14962 so if the register is available at all, then we can move data of
14963 the given mode into or out of it. */
14964 return (VALID_MMX_REG_MODE (mode)
14965 || VALID_MMX_REG_MODE_3DNOW (mode));
14966 }
14967 /* We handle both integer and floats in the general purpose registers.
14968 In future we should be able to handle vector modes as well. */
14969 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14970 return 0;
14971 /* Take care for QImode values - they can be in non-QI regs, but then
14972 they do cause partial register stalls. */
14973 if (regno < 4 || mode != QImode || TARGET_64BIT)
14974 return 1;
14975 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14976}
14977
14978/* Return the cost of moving data of mode M between a
14979 register and memory. A value of 2 is the default; this cost is
14980 relative to those in `REGISTER_MOVE_COST'.
14981
14982 If moving between registers and memory is more expensive than
14983 between two registers, you should define this macro to express the
14984 relative cost.
14985
14986 Model also increased moving costs of QImode registers in non
14987 Q_REGS classes.
14988 */
14989int
14990ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14991{
14992 if (FLOAT_CLASS_P (class))
14993 {
14994 int index;
14995 switch (mode)
14996 {
14997 case SFmode:
14998 index = 0;
14999 break;
15000 case DFmode:
15001 index = 1;
15002 break;
15003 case XFmode:
15004 index = 2;
15005 break;
15006 default:
15007 return 100;
15008 }
15009 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15010 }
15011 if (SSE_CLASS_P (class))
15012 {
15013 int index;
15014 switch (GET_MODE_SIZE (mode))
15015 {
15016 case 4:
15017 index = 0;
15018 break;
15019 case 8:
15020 index = 1;
15021 break;
15022 case 16:
15023 index = 2;
15024 break;
15025 default:
15026 return 100;
15027 }
15028 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15029 }
15030 if (MMX_CLASS_P (class))
15031 {
15032 int index;
15033 switch (GET_MODE_SIZE (mode))
15034 {
15035 case 4:
15036 index = 0;
15037 break;
15038 case 8:
15039 index = 1;
15040 break;
15041 default:
15042 return 100;
15043 }
15044 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15045 }
15046 switch (GET_MODE_SIZE (mode))
15047 {
15048 case 1:
15049 if (in)
15050 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15051 : ix86_cost->movzbl_load);
15052 else
15053 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15054 : ix86_cost->int_store[0] + 4);
15055 break;
15056 case 2:
15057 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15058 default:
15059 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
15060 if (mode == TFmode)
15061 mode = XFmode;
15062 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15063 * (((int) GET_MODE_SIZE (mode)
15064 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15065 }
15066}
15067
15068/* Compute a (partial) cost for rtx X. Return true if the complete
15069 cost has been computed, and false if subexpressions should be
15070 scanned. In either case, *TOTAL contains the cost result. */
15071
15072static bool
15073ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15074{
15075 enum machine_mode mode = GET_MODE (x);
15076
15077 switch (code)
15078 {
15079 case CONST_INT:
15080 case CONST:
15081 case LABEL_REF:
15082 case SYMBOL_REF:
15083 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15084 *total = 3;
15085 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15086 *total = 2;
15087 else if (flag_pic && SYMBOLIC_CONST (x)
15088 && (!TARGET_64BIT
15089 || (!GET_CODE (x) != LABEL_REF
15090 && (GET_CODE (x) != SYMBOL_REF
15091 || !SYMBOL_REF_LOCAL_P (x)))))
15092 *total = 1;
15093 else
15094 *total = 0;
15095 return true;
15096
15097 case CONST_DOUBLE:
15098 if (mode == VOIDmode)
15099 *total = 0;
15100 else
15101 switch (standard_80387_constant_p (x))
15102 {
15103 case 1: /* 0.0 */
15104 *total = 1;
15105 break;
15106 default: /* Other constants */
15107 *total = 2;
15108 break;
15109 case 0:
15110 case -1:
15111 /* Start with (MEM (SYMBOL_REF)), since that's where
15112 it'll probably end up. Add a penalty for size. */
15113 *total = (COSTS_N_INSNS (1)
15114 + (flag_pic != 0 && !TARGET_64BIT)
15115 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15116 break;
15117 }
15118 return true;
15119
15120 case ZERO_EXTEND:
15121 /* The zero extensions is often completely free on x86_64, so make
15122 it as cheap as possible. */
15123 if (TARGET_64BIT && mode == DImode
15124 && GET_MODE (XEXP (x, 0)) == SImode)
15125 *total = 1;
15126 else if (TARGET_ZERO_EXTEND_WITH_AND)
15127 *total = COSTS_N_INSNS (ix86_cost->add);
15128 else
15129 *total = COSTS_N_INSNS (ix86_cost->movzx);
15130 return false;
15131
15132 case SIGN_EXTEND:
15133 *total = COSTS_N_INSNS (ix86_cost->movsx);
15134 return false;
15135
15136 case ASHIFT:
15137 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15138 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15139 {
15140 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15141 if (value == 1)
15142 {
15143 *total = COSTS_N_INSNS (ix86_cost->add);
15144 return false;
15145 }
15146 if ((value == 2 || value == 3)
15147 && !TARGET_DECOMPOSE_LEA
15148 && ix86_cost->lea <= ix86_cost->shift_const)
15149 {
15150 *total = COSTS_N_INSNS (ix86_cost->lea);
15151 return false;
15152 }
15153 }
15154 /* FALLTHRU */
15155
15156 case ROTATE:
15157 case ASHIFTRT:
15158 case LSHIFTRT:
15159 case ROTATERT:
15160 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15161 {
15162 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15163 {
15164 if (INTVAL (XEXP (x, 1)) > 32)
15165 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15166 else
15167 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15168 }
15169 else
15170 {
15171 if (GET_CODE (XEXP (x, 1)) == AND)
15172 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15173 else
15174 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15175 }
15176 }
15177 else
15178 {
15179 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15180 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15181 else
15182 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15183 }
15184 return false;
15185
15186 case MULT:
15187 if (FLOAT_MODE_P (mode))
15188 *total = COSTS_N_INSNS (ix86_cost->fmul);
15189 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15190 {
15191 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15192 int nbits;
15193
15194 for (nbits = 0; value != 0; value >>= 1)
15195 nbits++;
15196
15197 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15198 + nbits * ix86_cost->mult_bit);
15199 }
15200 else
15201 {
15202 /* This is arbitrary */
15203 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15204 + 7 * ix86_cost->mult_bit);
15205 }
15206 return false;
15207
15208 case DIV:
15209 case UDIV:
15210 case MOD:
15211 case UMOD:
15212 if (FLOAT_MODE_P (mode))
15213 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15214 else
15215 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15216 return false;
15217
15218 case PLUS:
15219 if (FLOAT_MODE_P (mode))
15220 *total = COSTS_N_INSNS (ix86_cost->fadd);
15221 else if (!TARGET_DECOMPOSE_LEA
15222 && GET_MODE_CLASS (mode) == MODE_INT
15223 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15224 {
15225 if (GET_CODE (XEXP (x, 0)) == PLUS
15226 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15227 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15228 && CONSTANT_P (XEXP (x, 1)))
15229 {
15230 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15231 if (val == 2 || val == 4 || val == 8)
15232 {
15233 *total = COSTS_N_INSNS (ix86_cost->lea);
15234 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15235 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15236 outer_code);
15237 *total += rtx_cost (XEXP (x, 1), outer_code);
15238 return true;
15239 }
15240 }
15241 else if (GET_CODE (XEXP (x, 0)) == MULT
15242 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15243 {
15244 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15245 if (val == 2 || val == 4 || val == 8)
15246 {
15247 *total = COSTS_N_INSNS (ix86_cost->lea);
15248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15249 *total += rtx_cost (XEXP (x, 1), outer_code);
15250 return true;
15251 }
15252 }
15253 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15254 {
15255 *total = COSTS_N_INSNS (ix86_cost->lea);
15256 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15257 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15258 *total += rtx_cost (XEXP (x, 1), outer_code);
15259 return true;
15260 }
15261 }
15262 /* FALLTHRU */
15263
15264 case MINUS:
15265 if (FLOAT_MODE_P (mode))
15266 {
15267 *total = COSTS_N_INSNS (ix86_cost->fadd);
15268 return false;
15269 }
15270 /* FALLTHRU */
15271
15272 case AND:
15273 case IOR:
15274 case XOR:
15275 if (!TARGET_64BIT && mode == DImode)
15276 {
15277 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15278 + (rtx_cost (XEXP (x, 0), outer_code)
15279 << (GET_MODE (XEXP (x, 0)) != DImode))
15280 + (rtx_cost (XEXP (x, 1), outer_code)
15281 << (GET_MODE (XEXP (x, 1)) != DImode)));
15282 return true;
15283 }
15284 /* FALLTHRU */
15285
15286 case NEG:
15287 if (FLOAT_MODE_P (mode))
15288 {
15289 *total = COSTS_N_INSNS (ix86_cost->fchs);
15290 return false;
15291 }
15292 /* FALLTHRU */
15293
15294 case NOT:
15295 if (!TARGET_64BIT && mode == DImode)
15296 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15297 else
15298 *total = COSTS_N_INSNS (ix86_cost->add);
15299 return false;
15300
15301 case FLOAT_EXTEND:
15302 if (!TARGET_SSE_MATH
15303 || mode == XFmode
15304 || (mode == DFmode && !TARGET_SSE2))
15305 *total = 0;
15306 return false;
15307
15308 case ABS:
15309 if (FLOAT_MODE_P (mode))
15310 *total = COSTS_N_INSNS (ix86_cost->fabs);
15311 return false;
15312
15313 case SQRT:
15314 if (FLOAT_MODE_P (mode))
15315 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15316 return false;
15317
15318 case UNSPEC:
15319 if (XINT (x, 1) == UNSPEC_TP)
15320 *total = 0;
15321 return false;
15322
15323 default:
15324 return false;
15325 }
15326}
15327
15328#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15329static void
15330ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15331{
15332 init_section ();
15333 fputs ("\tpushl $", asm_out_file);
15334 assemble_name (asm_out_file, XSTR (symbol, 0));
15335 fputc ('\n', asm_out_file);
15336}
15337#endif
15338
15339#if TARGET_MACHO
15340
15341static int current_machopic_label_num;
15342
15343/* Given a symbol name and its associated stub, write out the
15344 definition of the stub. */
15345
15346void
15347machopic_output_stub (FILE *file, const char *symb, const char *stub)
15348{
15349 unsigned int length;
15350 char *binder_name, *symbol_name, lazy_ptr_name[32];
15351 int label = ++current_machopic_label_num;
15352
15353 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15354 symb = (*targetm.strip_name_encoding) (symb);
15355
15356 length = strlen (stub);
15357 binder_name = alloca (length + 32);
15358 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15359
15360 length = strlen (symb);
15361 symbol_name = alloca (length + 32);
15362 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15363
15364 sprintf (lazy_ptr_name, "L%d$lz", label);
15365
15366 if (MACHOPIC_PURE)
15367 machopic_picsymbol_stub_section ();
15368 else
15369 machopic_symbol_stub_section ();
15370
15371 fprintf (file, "%s:\n", stub);
15372 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15373
15374 if (MACHOPIC_PURE)
15375 {
15376 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15377 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15378 fprintf (file, "\tjmp %%edx\n");
15379 }
15380 else
15381 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15382
15383 fprintf (file, "%s:\n", binder_name);
15384
15385 if (MACHOPIC_PURE)
15386 {
15387 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15388 fprintf (file, "\tpushl %%eax\n");
15389 }
15390 else
15391 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15392
15393 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15394
15395 machopic_lazy_symbol_ptr_section ();
15396 fprintf (file, "%s:\n", lazy_ptr_name);
15397 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15398 fprintf (file, "\t.long %s\n", binder_name);
15399}
15400#endif /* TARGET_MACHO */
15401
15402/* Order the registers for register allocator. */
15403
15404void
15405x86_order_regs_for_local_alloc (void)
15406{
15407 int pos = 0;
15408 int i;
15409
15410 /* First allocate the local general purpose registers. */
15411 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15412 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15413 reg_alloc_order [pos++] = i;
15414
15415 /* Global general purpose registers. */
15416 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15417 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15418 reg_alloc_order [pos++] = i;
15419
15420 /* x87 registers come first in case we are doing FP math
15421 using them. */
15422 if (!TARGET_SSE_MATH)
15423 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15424 reg_alloc_order [pos++] = i;
15425
15426 /* SSE registers. */
15427 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15428 reg_alloc_order [pos++] = i;
15429 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15430 reg_alloc_order [pos++] = i;
15431
15432 /* x87 registers. */
15433 if (TARGET_SSE_MATH)
15434 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15435 reg_alloc_order [pos++] = i;
15436
15437 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15438 reg_alloc_order [pos++] = i;
15439
15440 /* Initialize the rest of array as we do not allocate some registers
15441 at all. */
15442 while (pos < FIRST_PSEUDO_REGISTER)
15443 reg_alloc_order [pos++] = 0;
15444}
15445
15446#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15447#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15448#endif
15449
15450/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15451 struct attribute_spec.handler. */
15452static tree
15453ix86_handle_struct_attribute (tree *node, tree name,
15454 tree args ATTRIBUTE_UNUSED,
15455 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15456{
15457 tree *type = NULL;
15458 if (DECL_P (*node))
15459 {
15460 if (TREE_CODE (*node) == TYPE_DECL)
15461 type = &TREE_TYPE (*node);
15462 }
15463 else
15464 type = node;
15465
15466 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15467 || TREE_CODE (*type) == UNION_TYPE)))
15468 {
15469 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15470 *no_add_attrs = true;
15471 }
15472
15473 else if ((is_attribute_p ("ms_struct", name)
15474 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15475 || ((is_attribute_p ("gcc_struct", name)
15476 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15477 {
15478 warning ("`%s' incompatible attribute ignored",
15479 IDENTIFIER_POINTER (name));
15480 *no_add_attrs = true;
15481 }
15482
15483 return NULL_TREE;
15484}
15485
15486static bool
15487ix86_ms_bitfield_layout_p (tree record_type)
15488{
15489 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15490 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15491 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15492}
15493
15494/* Returns an expression indicating where the this parameter is
15495 located on entry to the FUNCTION. */
15496
15497static rtx
15498x86_this_parameter (tree function)
15499{
15500 tree type = TREE_TYPE (function);
15501
15502 if (TARGET_64BIT)
15503 {
15504 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15505 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15506 }
15507
15508 if (ix86_function_regparm (type, function) > 0)
15509 {
15510 tree parm;
15511
15512 parm = TYPE_ARG_TYPES (type);
15513 /* Figure out whether or not the function has a variable number of
15514 arguments. */
15515 for (; parm; parm = TREE_CHAIN (parm))
15516 if (TREE_VALUE (parm) == void_type_node)
15517 break;
15518 /* If not, the this parameter is in the first argument. */
15519 if (parm)
15520 {
15521 int regno = 0;
15522 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15523 regno = 2;
15524 return gen_rtx_REG (SImode, regno);
15525 }
15526 }
15527
15528 if (aggregate_value_p (TREE_TYPE (type), type))
15529 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15530 else
15531 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15532}
15533
15534/* Determine whether x86_output_mi_thunk can succeed. */
15535
15536static bool
15537x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15538 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15539 HOST_WIDE_INT vcall_offset, tree function)
15540{
15541 /* 64-bit can handle anything. */
15542 if (TARGET_64BIT)
15543 return true;
15544
15545 /* For 32-bit, everything's fine if we have one free register. */
15546 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15547 return true;
15548
15549 /* Need a free register for vcall_offset. */
15550 if (vcall_offset)
15551 return false;
15552
15553 /* Need a free register for GOT references. */
15554 if (flag_pic && !(*targetm.binds_local_p) (function))
15555 return false;
15556
15557 /* Otherwise ok. */
15558 return true;
15559}
15560
15561/* Output the assembler code for a thunk function. THUNK_DECL is the
15562 declaration for the thunk function itself, FUNCTION is the decl for
15563 the target function. DELTA is an immediate constant offset to be
15564 added to THIS. If VCALL_OFFSET is nonzero, the word at
15565 *(*this + vcall_offset) should be added to THIS. */
15566
15567static void
15568x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15569 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15570 HOST_WIDE_INT vcall_offset, tree function)
15571{
15572 rtx xops[3];
15573 rtx this = x86_this_parameter (function);
15574 rtx this_reg, tmp;
15575
15576 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15577 pull it in now and let DELTA benefit. */
15578 if (REG_P (this))
15579 this_reg = this;
15580 else if (vcall_offset)
15581 {
15582 /* Put the this parameter into %eax. */
15583 xops[0] = this;
15584 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15585 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15586 }
15587 else
15588 this_reg = NULL_RTX;
15589
15590 /* Adjust the this parameter by a fixed constant. */
15591 if (delta)
15592 {
15593 xops[0] = GEN_INT (delta);
15594 xops[1] = this_reg ? this_reg : this;
15595 if (TARGET_64BIT)
15596 {
15597 if (!x86_64_general_operand (xops[0], DImode))
15598 {
15599 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15600 xops[1] = tmp;
15601 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15602 xops[0] = tmp;
15603 xops[1] = this;
15604 }
15605 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15606 }
15607 else
15608 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15609 }
15610
15611 /* Adjust the this parameter by a value stored in the vtable. */
15612 if (vcall_offset)
15613 {
15614 if (TARGET_64BIT)
15615 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15616 else
15617 {
15618 int tmp_regno = 2 /* ECX */;
15619 if (lookup_attribute ("fastcall",
15620 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15621 tmp_regno = 0 /* EAX */;
15622 tmp = gen_rtx_REG (SImode, tmp_regno);
15623 }
15624
15625 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15626 xops[1] = tmp;
15627 if (TARGET_64BIT)
15628 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15629 else
15630 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15631
15632 /* Adjust the this parameter. */
15633 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15634 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15635 {
15636 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15637 xops[0] = GEN_INT (vcall_offset);
15638 xops[1] = tmp2;
15639 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15640 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15641 }
15642 xops[1] = this_reg;
15643 if (TARGET_64BIT)
15644 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15645 else
15646 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15647 }
15648
15649 /* If necessary, drop THIS back to its stack slot. */
15650 if (this_reg && this_reg != this)
15651 {
15652 xops[0] = this_reg;
15653 xops[1] = this;
15654 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15655 }
15656
15657 xops[0] = XEXP (DECL_RTL (function), 0);
15658 if (TARGET_64BIT)
15659 {
15660 if (!flag_pic || (*targetm.binds_local_p) (function))
15661 output_asm_insn ("jmp\t%P0", xops);
15662 else
15663 {
15664 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15665 tmp = gen_rtx_CONST (Pmode, tmp);
15666 tmp = gen_rtx_MEM (QImode, tmp);
15667 xops[0] = tmp;
15668 output_asm_insn ("jmp\t%A0", xops);
15669 }
15670 }
15671 else
15672 {
15673 if (!flag_pic || (*targetm.binds_local_p) (function))
15674 output_asm_insn ("jmp\t%P0", xops);
15675 else
15676#if TARGET_MACHO
15677 if (TARGET_MACHO)
15678 {
15679 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15680 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15681 tmp = gen_rtx_MEM (QImode, tmp);
15682 xops[0] = tmp;
15683 output_asm_insn ("jmp\t%0", xops);
15684 }
15685 else
15686#endif /* TARGET_MACHO */
15687 {
15688 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15689 output_set_got (tmp);
15690
15691 xops[1] = tmp;
15692 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15693 output_asm_insn ("jmp\t{*}%1", xops);
15694 }
15695 }
15696}
15697
15698static void
15699x86_file_start (void)
15700{
15701 default_file_start ();
15702 if (X86_FILE_START_VERSION_DIRECTIVE)
15703 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15704 if (X86_FILE_START_FLTUSED)
15705 fputs ("\t.global\t__fltused\n", asm_out_file);
15706 if (ix86_asm_dialect == ASM_INTEL)
15707 fputs ("\t.intel_syntax\n", asm_out_file);
15708}
15709
15710int
15711x86_field_alignment (tree field, int computed)
15712{
15713 enum machine_mode mode;
15714 tree type = TREE_TYPE (field);
15715
15716 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15717 return computed;
15718 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15719 ? get_inner_array_type (type) : type);
15720 if (mode == DFmode || mode == DCmode
15721 || GET_MODE_CLASS (mode) == MODE_INT
15722 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15723 return MIN (32, computed);
15724 return computed;
15725}
15726
15727/* Output assembler code to FILE to increment profiler label # LABELNO
15728 for profiling a function entry. */
15729void
15730x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15731{
15732 if (TARGET_64BIT)
15733 if (flag_pic)
15734 {
15735#ifndef NO_PROFILE_COUNTERS
15736 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15737#endif
15738 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15739 }
15740 else
15741 {
15742#ifndef NO_PROFILE_COUNTERS
15743 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15744#endif
15745 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15746 }
15747 else if (flag_pic)
15748 {
15749#ifndef NO_PROFILE_COUNTERS
15750 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15751 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15752#endif
15753 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15754 }
15755 else
15756 {
15757#ifndef NO_PROFILE_COUNTERS
15758 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15759 PROFILE_COUNT_REGISTER);
15760#endif
15761 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15762 }
15763}
15764
15765/* We don't have exact information about the insn sizes, but we may assume
15766 quite safely that we are informed about all 1 byte insns and memory
15767 address sizes. This is enough to eliminate unnecessary padding in
15768 99% of cases. */
15769
15770static int
15771min_insn_size (rtx insn)
15772{
15773 int l = 0;
15774
15775 if (!INSN_P (insn) || !active_insn_p (insn))
15776 return 0;
15777
15778 /* Discard alignments we've emit and jump instructions. */
15779 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15780 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15781 return 0;
15782 if (GET_CODE (insn) == JUMP_INSN
15783 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15784 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15785 return 0;
15786
15787 /* Important case - calls are always 5 bytes.
15788 It is common to have many calls in the row. */
15789 if (GET_CODE (insn) == CALL_INSN
15790 && symbolic_reference_mentioned_p (PATTERN (insn))
15791 && !SIBLING_CALL_P (insn))
15792 return 5;
15793 if (get_attr_length (insn) <= 1)
15794 return 1;
15795
15796 /* For normal instructions we may rely on the sizes of addresses
15797 and the presence of symbol to require 4 bytes of encoding.
15798 This is not the case for jumps where references are PC relative. */
15799 if (GET_CODE (insn) != JUMP_INSN)
15800 {
15801 l = get_attr_length_address (insn);
15802 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15803 l = 4;
15804 }
15805 if (l)
15806 return 1+l;
15807 else
15808 return 2;
15809}
15810
15811/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15812 window. */
15813
15814static void
15815k8_avoid_jump_misspredicts (void)
15816{
15817 rtx insn, start = get_insns ();
15818 int nbytes = 0, njumps = 0;
15819 int isjump = 0;
15820
15821 /* Look for all minimal intervals of instructions containing 4 jumps.
15822 The intervals are bounded by START and INSN. NBYTES is the total
15823 size of instructions in the interval including INSN and not including
15824 START. When the NBYTES is smaller than 16 bytes, it is possible
15825 that the end of START and INSN ends up in the same 16byte page.
15826
15827 The smallest offset in the page INSN can start is the case where START
15828 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15829 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15830 */
15831 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15832 {
15833
15834 nbytes += min_insn_size (insn);
15835 if (rtl_dump_file)
15836 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15837 INSN_UID (insn), min_insn_size (insn));
15838 if ((GET_CODE (insn) == JUMP_INSN
15839 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15840 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15841 || GET_CODE (insn) == CALL_INSN)
15842 njumps++;
15843 else
15844 continue;
15845
15846 while (njumps > 3)
15847 {
15848 start = NEXT_INSN (start);
15849 if ((GET_CODE (start) == JUMP_INSN
15850 && GET_CODE (PATTERN (start)) != ADDR_VEC
15851 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15852 || GET_CODE (start) == CALL_INSN)
15853 njumps--, isjump = 1;
15854 else
15855 isjump = 0;
15856 nbytes -= min_insn_size (start);
15857 }
15858 if (njumps < 0)
15859 abort ();
15860 if (rtl_dump_file)
15861 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15862 INSN_UID (start), INSN_UID (insn), nbytes);
15863
15864 if (njumps == 3 && isjump && nbytes < 16)
15865 {
15866 int padsize = 15 - nbytes + min_insn_size (insn);
15867
15868 if (rtl_dump_file)
15869 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15870 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15871 }
15872 }
15873}
15874
15875/* Implement machine specific optimizations.
15876 At the moment we implement single transformation: AMD Athlon works faster
15877 when RET is not destination of conditional jump or directly preceded
15878 by other jump instruction. We avoid the penalty by inserting NOP just
15879 before the RET instructions in such cases. */
15880static void
15881ix86_reorg (void)
15882{
15883 edge e;
15884
15885 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15886 return;
15887 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15888 {
15889 basic_block bb = e->src;
15890 rtx ret = BB_END (bb);
15891 rtx prev;
15892 bool replace = false;
15893
15894 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15895 || !maybe_hot_bb_p (bb))
15896 continue;
15897 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15898 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15899 break;
15900 if (prev && GET_CODE (prev) == CODE_LABEL)
15901 {
15902 edge e;
15903 for (e = bb->pred; e; e = e->pred_next)
15904 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15905 && !(e->flags & EDGE_FALLTHRU))
15906 replace = true;
15907 }
15908 if (!replace)
15909 {
15910 prev = prev_active_insn (ret);
15911 if (prev
15912 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15913 || GET_CODE (prev) == CALL_INSN))
15914 replace = true;
15915 /* Empty functions get branch mispredict even when the jump destination
15916 is not visible to us. */
15917 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15918 replace = true;
15919 }
15920 if (replace)
15921 {
15922 emit_insn_before (gen_return_internal_long (), ret);
15923 delete_insn (ret);
15924 }
15925 }
15926 k8_avoid_jump_misspredicts ();
15927}
15928
15929/* Return nonzero when QImode register that must be represented via REX prefix
15930 is used. */
15931bool
15932x86_extended_QIreg_mentioned_p (rtx insn)
15933{
15934 int i;
15935 extract_insn_cached (insn);
15936 for (i = 0; i < recog_data.n_operands; i++)
15937 if (REG_P (recog_data.operand[i])
15938 && REGNO (recog_data.operand[i]) >= 4)
15939 return true;
15940 return false;
15941}
15942
15943/* Return nonzero when P points to register encoded via REX prefix.
15944 Called via for_each_rtx. */
15945static int
15946extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15947{
15948 unsigned int regno;
15949 if (!REG_P (*p))
15950 return 0;
15951 regno = REGNO (*p);
15952 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15953}
15954
15955/* Return true when INSN mentions register that must be encoded using REX
15956 prefix. */
15957bool
15958x86_extended_reg_mentioned_p (rtx insn)
15959{
15960 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15961}
15962
15963/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15964 optabs would emit if we didn't have TFmode patterns. */
15965
15966void
15967x86_emit_floatuns (rtx operands[2])
15968{
15969 rtx neglab, donelab, i0, i1, f0, in, out;
15970 enum machine_mode mode, inmode;
15971
15972 inmode = GET_MODE (operands[1]);
15973 if (inmode != SImode
15974 && inmode != DImode)
15975 abort ();
15976
15977 out = operands[0];
15978 in = force_reg (inmode, operands[1]);
15979 mode = GET_MODE (out);
15980 neglab = gen_label_rtx ();
15981 donelab = gen_label_rtx ();
15982 i1 = gen_reg_rtx (Pmode);
15983 f0 = gen_reg_rtx (mode);
15984
15985 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15986
15987 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15988 emit_jump_insn (gen_jump (donelab));
15989 emit_barrier ();
15990
15991 emit_label (neglab);
15992
15993 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15994 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15995 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15996 expand_float (f0, i0, 0);
15997 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15998
15999 emit_label (donelab);
16000}
16001
16002/* Return if we do not know how to pass TYPE solely in registers. */
16003bool
16004ix86_must_pass_in_stack (enum machine_mode mode, tree type)
16005{
16006 if (default_must_pass_in_stack (mode, type))
16007 return true;
16008 return (!TARGET_64BIT && type && mode == TImode);
16009}
16010
16011/* Initialize vector TARGET via VALS. */
16012void
16013ix86_expand_vector_init (rtx target, rtx vals)
16014{
16015 enum machine_mode mode = GET_MODE (target);
16016 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16017 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
16018 int i;
16019
16020 for (i = n_elts - 1; i >= 0; i--)
16021 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
16022 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
16023 break;
16024
16025 /* Few special cases first...
16026 ... constants are best loaded from constant pool. */
16027 if (i < 0)
16028 {
16029 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16030 return;
16031 }
16032
16033 /* ... values where only first field is non-constant are best loaded
16034 from the pool and overwriten via move later. */
16035 if (!i)
16036 {
16037 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
16038 GET_MODE_INNER (mode), 0);
16039
16040 op = force_reg (mode, op);
16041 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
16042 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16043 switch (GET_MODE (target))
16044 {
16045 case V2DFmode:
16046 emit_insn (gen_sse2_movsd (target, target, op));
16047 break;
16048 case V4SFmode:
16049 emit_insn (gen_sse_movss (target, target, op));
16050 break;
16051 default:
16052 break;
16053 }
16054 return;
16055 }
16056
16057 /* And the busy sequence doing rotations. */
16058 switch (GET_MODE (target))
16059 {
16060 case V2DFmode:
16061 {
16062 rtx vecop0 =
16063 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16064 rtx vecop1 =
16065 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16066
16067 vecop0 = force_reg (V2DFmode, vecop0);
16068 vecop1 = force_reg (V2DFmode, vecop1);
16069 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16070 }
16071 break;
16072 case V4SFmode:
16073 {
16074 rtx vecop0 =
16075 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16076 rtx vecop1 =
16077 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16078 rtx vecop2 =
16079 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16080 rtx vecop3 =
16081 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16082 rtx tmp1 = gen_reg_rtx (V4SFmode);
16083 rtx tmp2 = gen_reg_rtx (V4SFmode);
16084
16085 vecop0 = force_reg (V4SFmode, vecop0);
16086 vecop1 = force_reg (V4SFmode, vecop1);
16087 vecop2 = force_reg (V4SFmode, vecop2);
16088 vecop3 = force_reg (V4SFmode, vecop3);
16089 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16090 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16091 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16092 }
16093 break;
16094 default:
16095 abort ();
16096 }
16097}
16098
16099#include "gt-i386.h"